// This file impliments a wrapper class for MPI functions #include "common/MPI.h" #include "common/Utilities.h" #include "common/Utilities.hpp" #include "ProfilerApp.h" #include "StackTrace/ErrorHandlers.h" #include "StackTrace/StackTrace.h" // Include all other headers #include #include #include #include #include #include #include #include #include #include #include // Include OS specific headers #undef USE_WINDOWS #undef USE_LINUX #undef USE_MAC #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) // We are using windows #define USE_WINDOWS #include #include #define sched_yield() Sleep(0) #elif defined(__APPLE__) // Using MAC #define USE_MAC #include #elif defined(__linux) || defined(__linux__) || defined(__unix) || \ defined(__posix) // We are using linux #define USE_LINUX #include #include #else #error Unknown OS #endif // Convience defines #define MPI_ERROR ERROR #define MPI_ASSERT ASSERT #define MPI_INSIST INSIST #define MPI_WARNING WARNING #define MPI_CLASS_COMM_NULL MPI_COMM_NULL #define MPI_CLASS_COMM_SELF MPI_COMM_SELF #define MPI_CLASS_COMM_WORLD MPI_COMM_WORLD // Global variable to track create new unique comms (dup and split) #ifndef USE_MPI MPI_Comm uniqueGlobalComm = 11; #endif #if defined(USE_SAMRAI) && defined(USE_PETSC) && !defined(USE_MPI) int MPI_REQUEST_NULL = 3; int MPI_ERR_IN_STATUS = 4; #endif namespace Utilities { // Some special structs to work with MPI #ifdef USE_MPI struct IntIntStruct { int j; int i; }; struct LongIntStruct { long int j; int i; }; struct FloatIntStruct { float f; int i; }; struct DoubleIntStruct { double d; int i; }; #endif // Initialized the static member variables volatile unsigned int MPI_CLASS::N_MPI_Comm_created = 0; volatile unsigned int MPI_CLASS::N_MPI_Comm_destroyed = 0; short MPI_CLASS::profile_level = 127; // Define a type for use with size_t #ifdef USE_MPI static MPI_Datatype MPI_SIZE_T = 0x0; static MPI_Datatype getSizeTDataType() { int size_int, size_long, size_longlong, size_longlong2; MPI_Type_size(MPI_UNSIGNED, &size_int); MPI_Type_size(MPI_UNSIGNED_LONG, &size_long); MPI_Type_size(MPI_UNSIGNED_LONG_LONG, &size_longlong); MPI_Type_size(MPI_LONG_LONG_INT, &size_longlong2); if (sizeof(size_t) == size_int) { return MPI_UNSIGNED; } else if (sizeof(size_t) == size_long) { return MPI_UNSIGNED_LONG; } else if (sizeof(size_t) == size_longlong) { return MPI_UNSIGNED_LONG_LONG; } else if (sizeof(size_t) == size_longlong2) { MPI_WARNING("Using signed long long datatype for size_t in MPI"); return MPI_LONG_LONG_INT; // Note: this is not unsigned } else { MPI_ERROR("No suitable datatype found"); } return 0; } #endif // Static data for asyncronous communication without MPI // Note: these routines may not be thread-safe yet #ifndef USE_MPI static const int mpi_max_tag = 0x003FFFFF; struct Isendrecv_struct { const char *data; // Pointer to data int status; // Status: 1-sending, 2-recieving }; std::map global_isendrecv_list; static MPI_Request getRequest(MPI_Comm comm, int tag) { MPI_ASSERT(tag >= 0 && tag <= mpi_max_tag); // Use hashing function: 2^64*0.5*(sqrt(5)-1) uint64_t a = static_cast(comm) * 0x9E3779B97F4A7C15; uint64_t b = static_cast(tag) * 0x9E3779B97F4A7C15; uint64_t hash = a ^ b; MPI_Request request; memcpy(&request, &hash, sizeof(MPI_Request)); return request; } #endif // Check the mpi error code #ifdef USE_MPI inline void check_MPI(int error) { if (error != MPI_SUCCESS) MPI_ERROR("Error calling MPI routine"); } #endif /****************************************************************** * Some helper functions to convert between signed/unsigned types * ******************************************************************/ DISABLE_WARNINGS static inline constexpr unsigned int offset_int() { return ~static_cast(std::numeric_limits::min()) + 1; } static inline constexpr unsigned long int offset_long() { return ~static_cast(std::numeric_limits::min()) + 1; } static inline constexpr unsigned long long int offset_long_long() { return ~static_cast( std::numeric_limits::min()) + 1; } ENABLE_WARNINGS static inline unsigned int signed_to_unsigned(int x) { const auto offset = offset_int(); return (x >= 0) ? static_cast(x) + offset : offset - static_cast(-x); } static inline unsigned long int signed_to_unsigned(long int x) { const auto offset = offset_long(); return (x >= 0) ? static_cast(x) + offset : offset - static_cast(-x); } static inline unsigned long long int signed_to_unsigned(long long int x) { const auto offset = offset_long_long(); return (x >= 0) ? static_cast(x) + offset : offset - static_cast(-x); } static inline int unsigned_to_signed(unsigned int x) { const auto offset = offset_int(); return (x >= offset) ? static_cast(x - offset) : -static_cast(offset - x); } static inline long int unsigned_to_signed(unsigned long int x) { const auto offset = offset_long(); return (x >= offset) ? static_cast(x - offset) : -static_cast(offset - x); } static inline long long int unsigned_to_signed(unsigned long long int x) { const auto offset = offset_long_long(); return (x >= offset) ? static_cast(x - offset) : -static_cast(offset - x); } /************************************************************************ * Get the MPI version * ************************************************************************/ std::array MPI_CLASS::version() { #ifdef USE_MPI int MPI_version; int MPI_subversion; MPI_Get_version(&MPI_version, &MPI_subversion); return {MPI_version, MPI_subversion}; #else return {0, 0}; #endif } std::string MPI_CLASS::info() { #ifdef USE_MPI #if MPI_VERSION >= 3 int MPI_version_length = 0; char MPI_version_string[MPI_MAX_LIBRARY_VERSION_STRING]; MPI_Get_library_version(MPI_version_string, &MPI_version_length); if (MPI_version_length > 0) { std::string MPI_info(MPI_version_string, MPI_version_length); size_t pos = MPI_info.find('\n'); while (pos != std::string::npos) { MPI_info.insert(pos + 1, " "); pos = MPI_info.find('\n', pos + 1); } return MPI_info; } #endif auto tmp = version(); return std::to_string(tmp[0]) + "." + std::to_string(tmp[0]); #else return std::string(); #endif } /************************************************************************ * Functions to get/set the process affinities * ************************************************************************/ int MPI_CLASS::getNumberOfProcessors() { return std::thread::hardware_concurrency(); } std::vector MPI_CLASS::getProcessAffinity() { std::vector procs; #ifdef USE_LINUX cpu_set_t mask; int error = sched_getaffinity(getpid(), sizeof(cpu_set_t), &mask); if (error != 0) MPI_ERROR("Error getting process affinity"); for (int i = 0; i < (int)sizeof(cpu_set_t) * CHAR_BIT; i++) { if (CPU_ISSET(i, &mask)) procs.push_back(i); } #elif defined(USE_MAC) // MAC does not support getting or setting the affinity printf("Warning: MAC does not support getting the process affinity\n"); procs.clear(); #elif defined(USE_WINDOWS) HANDLE hProc = GetCurrentProcess(); size_t procMask; size_t sysMask; PDWORD_PTR procMaskPtr = reinterpret_cast(&procMask); PDWORD_PTR sysMaskPtr = reinterpret_cast(&sysMask); GetProcessAffinityMask(hProc, procMaskPtr, sysMaskPtr); for (int i = 0; i < (int)sizeof(size_t) * CHAR_BIT; i++) { if ((procMask & 0x1) != 0) procs.push_back(i); procMask >>= 1; } #else #error Unknown OS #endif return procs; } void MPI_CLASS::setProcessAffinity(const std::vector &procs) { #ifdef USE_LINUX cpu_set_t mask; CPU_ZERO(&mask); for (auto cpu : procs) CPU_SET(cpu, &mask); int error = sched_setaffinity(getpid(), sizeof(cpu_set_t), &mask); if (error != 0) MPI_ERROR("Error setting process affinity"); #elif defined(USE_MAC) // MAC does not support getting or setting the affinity NULL_USE(procs); #elif defined(USE_WINDOWS) DWORD mask = 0; for (size_t i = 0; i < procs.size(); i++) mask |= ((DWORD)1) << procs[i]; HANDLE hProc = GetCurrentProcess(); SetProcessAffinityMask(hProc, mask); #else #error Unknown OS #endif } /************************************************************************ * Function to check if MPI is active * ************************************************************************/ bool MPI_CLASS::MPI_active() { #ifdef USE_MPI int initialized = 0, finalized = 0; MPI_Initialized(&initialized); MPI_Finalized(&finalized); return initialized != 0 && finalized == 0; #else return true; #endif } MPI_CLASS::ThreadSupport MPI_CLASS::queryThreadSupport() { #ifdef USE_MPI int provided = 0; MPI_Query_thread(&provided); if (provided == MPI_THREAD_SINGLE) return ThreadSupport::SINGLE; if (provided == MPI_THREAD_FUNNELED) return ThreadSupport::FUNNELED; if (provided == MPI_THREAD_SERIALIZED) return ThreadSupport::SERIALIZED; if (provided == MPI_THREAD_MULTIPLE) return ThreadSupport::MULTIPLE; return ThreadSupport::SINGLE; #else return ThreadSupport::MULTIPLE; #endif } /************************************************************************ * Function to perform a load balance of the given processes * ************************************************************************/ void MPI_CLASS::balanceProcesses(const MPI_CLASS &globalComm, const int method, const std::vector &procs, const int N_min_in, const int N_max_in) { // Build the list of processors to use std::vector cpus = procs; if (cpus.empty()) { for (int i = 0; i < getNumberOfProcessors(); i++) cpus.push_back(i); } // Handle the "easy cases" if (method == 1) { // Trivial case where we do not need any communication setProcessAffinity(cpus); return; } // Get the sub-communicator for the current node MPI_CLASS nodeComm = globalComm.splitByNode(); int N_min = std::min(std::max(N_min_in, 1), cpus.size()); int N_max = N_max_in; if (N_max == -1) N_max = cpus.size(); N_max = std::min(N_max, cpus.size()); MPI_ASSERT(N_max >= N_min); // Perform the load balance within the node if (method == 2) { int N_proc = cpus.size() / nodeComm.getSize(); N_proc = std::max(N_proc, N_min); N_proc = std::min(N_proc, N_max); std::vector cpus2(N_proc, -1); for (int i = 0; i < N_proc; i++) cpus2[i] = cpus[(nodeComm.getRank() * N_proc + i) % cpus.size()]; setProcessAffinity(cpus2); } else { MPI_ERROR("Unknown method for load balance"); } } /************************************************************************ * Empty constructor * ************************************************************************/ MPI_CLASS::MPI_CLASS() { // Initialize the data members to a defaul communicator of self #ifdef USE_MPI communicator = MPI_COMM_NULL; d_maxTag = 0x7FFFFFFF; #else communicator = MPI_CLASS_COMM_NULL; d_maxTag = mpi_max_tag; #endif d_count = nullptr; d_manage = false; comm_rank = 0; comm_size = 1; d_isNull = true; d_currentTag = nullptr; d_call_abort = true; tmp_alignment = -1; } /************************************************************************ * Empty deconstructor * ************************************************************************/ MPI_CLASS::~MPI_CLASS() { reset(); } void MPI_CLASS::reset() { // Decrement the count if used int count = -1; if (d_count != nullptr) count = --(*d_count); if (count == 0) { // We are holding that last reference to the MPI_Comm object, we need to free it if (d_manage) { #ifdef USE_MPI MPI_Comm_set_errhandler(communicator, MPI_ERRORS_ARE_FATAL); int err = MPI_Comm_free(&communicator); if (err != MPI_SUCCESS) MPI_ERROR("Problem free'ing MPI_Comm object"); communicator = MPI_CLASS_COMM_NULL; ++N_MPI_Comm_destroyed; #endif } delete d_count; } if (d_currentTag == nullptr) { // No tag index } else if (d_currentTag[1] > 1) { --(d_currentTag[1]); } else { delete[] d_currentTag; } d_manage = false; d_count = nullptr; comm_rank = 0; comm_size = 1; d_maxTag = 0; d_isNull = true; d_currentTag = nullptr; d_call_abort = true; } /************************************************************************ * Copy constructors * ************************************************************************/ MPI_CLASS::MPI_CLASS(const MPI_CLASS &comm) : communicator(comm.communicator), d_isNull(comm.d_isNull), d_manage(comm.d_manage), comm_rank(comm.comm_rank), comm_size(comm.comm_size), d_maxTag(comm.d_maxTag), d_currentTag(comm.d_currentTag) { // Initialize the data members to the existing comm object if (d_currentTag != nullptr) ++d_currentTag[1]; d_call_abort = comm.d_call_abort; // Set and increment the count d_count = comm.d_count; if (d_count != nullptr) ++(*d_count); tmp_alignment = -1; } MPI_CLASS::MPI_CLASS(MPI_CLASS &&rhs) : MPI_CLASS() { std::swap(communicator, rhs.communicator); std::swap(d_isNull, rhs.d_isNull); std::swap(d_manage, rhs.d_manage); std::swap(d_call_abort, rhs.d_call_abort); std::swap(profile_level, rhs.profile_level); std::swap(comm_rank, rhs.comm_rank); std::swap(comm_size, rhs.comm_size); std::swap(d_maxTag, rhs.d_maxTag); std::swap(d_currentTag, rhs.d_currentTag); std::swap(d_count, rhs.d_count); std::swap(tmp_alignment, rhs.tmp_alignment); } /************************************************************************ * Assignment operators * ************************************************************************/ MPI_CLASS &MPI_CLASS::operator=(const MPI_CLASS &comm) { if (this == &comm) // protect against invalid self-assignment return *this; // Destroy the previous object this->reset(); // Initialize the data members to the existing object this->communicator = comm.communicator; this->comm_rank = comm.comm_rank; this->comm_size = comm.comm_size; this->d_isNull = comm.d_isNull; this->d_manage = comm.d_manage; this->d_maxTag = comm.d_maxTag; this->d_call_abort = comm.d_call_abort; this->d_currentTag = comm.d_currentTag; if (this->d_currentTag != nullptr) ++(this->d_currentTag[1]); // Set and increment the count this->d_count = comm.d_count; if (this->d_count != nullptr) ++(*d_count); this->tmp_alignment = -1; return *this; } MPI_CLASS &MPI_CLASS::operator=(MPI_CLASS &&rhs) { if (this == &rhs) // protect against invalid self-assignment return *this; std::swap(communicator, rhs.communicator); std::swap(d_isNull, rhs.d_isNull); std::swap(d_manage, rhs.d_manage); std::swap(d_call_abort, rhs.d_call_abort); std::swap(profile_level, rhs.profile_level); std::swap(comm_rank, rhs.comm_rank); std::swap(comm_size, rhs.comm_size); std::swap(d_maxTag, rhs.d_maxTag); std::swap(d_currentTag, rhs.d_currentTag); std::swap(d_count, rhs.d_count); std::swap(tmp_alignment, rhs.tmp_alignment); return *this; } /************************************************************************ * Constructor from existing MPI communicator * ************************************************************************/ int d_global_currentTag_world1[2] = {1, 1}; int d_global_currentTag_world2[2] = {1, 1}; int d_global_currentTag_self[2] = {1, 1}; #ifdef USE_MPI std::atomic_int d_global_count_world1 = {1}; std::atomic_int d_global_count_world2 = {1}; std::atomic_int d_global_count_self = {1}; #endif MPI_CLASS::MPI_CLASS(MPI_Comm comm, bool manage) { d_count = nullptr; d_manage = false; tmp_alignment = -1; // Check if we are using our version of comm_world if (comm == MPI_CLASS_COMM_WORLD) { communicator = MPI_COMM_WORLD; } else if (comm == MPI_CLASS_COMM_SELF) { communicator = MPI_COMM_SELF; } else if (comm == MPI_CLASS_COMM_NULL) { communicator = MPI_COMM_NULL; } else { communicator = comm; } #ifdef USE_MPI // We are using MPI, use the MPI communicator to initialize the data if (communicator != MPI_COMM_NULL) { // Set the MPI_SIZE_T datatype if it has not been set if (MPI_SIZE_T == 0x0) MPI_SIZE_T = getSizeTDataType(); // Attach the error handler StackTrace::setMPIErrorHandler(communicator); // Get the communicator properties MPI_Comm_rank(communicator, &comm_rank); MPI_Comm_size(communicator, &comm_size); int flag, *val; int ierr = MPI_Comm_get_attr(communicator, MPI_TAG_UB, &val, &flag); MPI_ASSERT(ierr == MPI_SUCCESS); if (flag == 0) { d_maxTag = 0x7FFFFFFF; // The tag is not a valid attribute (set to 2^31-1) } else { d_maxTag = *val; if (d_maxTag < 0) { d_maxTag = 0x7FFFFFFF; } // The maximum tag is > a signed int (set to 2^31-1) MPI_INSIST(d_maxTag >= 0x7FFF, "maximum tag size is < MPI standard"); } } else { comm_rank = 1; comm_size = 0; d_maxTag = 0x7FFFFFFF; } d_isNull = communicator == MPI_COMM_NULL; if (manage && communicator != MPI_COMM_NULL && communicator != MPI_COMM_SELF && communicator != MPI_COMM_WORLD) d_manage = true; // Create the count (Note: we do not need to worry about thread safety) if (communicator == MPI_CLASS_COMM_WORLD) { d_count = &d_global_count_world1; ++(*d_count); } else if (communicator == MPI_COMM_WORLD) { d_count = &d_global_count_world2; ++(*d_count); } else if (communicator == MPI_COMM_SELF) { d_count = &d_global_count_self; ++(*d_count); } else if (communicator == MPI_COMM_NULL) { d_count = nullptr; } else { d_count = new std::atomic_int; *d_count = 1; } if (d_manage) ++N_MPI_Comm_created; #else // We are not using MPI, intialize based on the communicator NULL_USE(manage); comm_rank = 0; comm_size = 1; d_maxTag = mpi_max_tag; d_isNull = communicator == MPI_COMM_NULL; if (d_isNull) comm_size = 0; #endif if (communicator == MPI_CLASS_COMM_WORLD) { d_currentTag = d_global_currentTag_world1; ++(this->d_currentTag[1]); } else if (communicator == MPI_COMM_WORLD) { d_currentTag = d_global_currentTag_world2; ++(this->d_currentTag[1]); } else if (communicator == MPI_COMM_SELF) { d_currentTag = d_global_currentTag_self; ++(this->d_currentTag[1]); } else if (communicator == MPI_COMM_NULL) { d_currentTag = nullptr; } else { d_currentTag = new int[2]; d_currentTag[0] = (d_maxTag <= 0x10000) ? 1 : 0x1FFF; d_currentTag[1] = 1; } d_call_abort = true; } /************************************************************************ * Return the ranks of the communicator in the global comm * ************************************************************************/ std::vector MPI_CLASS::globalRanks() const { if (d_isNull) return std::vector(); #ifdef USE_MPI // Get my global rank and size if it has not been set static int globalRank = -1; static int globalSize = -1; if (globalRank == -1 && MPI_active()) { MPI_Comm_rank(MPI_CLASS_COMM_WORLD, &globalRank); MPI_Comm_size(MPI_CLASS_COMM_WORLD, &globalSize); } // Check if we are dealing with a serial or global communicator if (comm_size == 1) return std::vector(1, globalRank); if (comm_size == globalSize) { std::vector ranks(globalSize); for (int i = 0; i < globalSize; i++) ranks[i] = i; return ranks; } // Get the global rank from each rank in the communicator auto ranks = allGather(globalRank); std::sort(ranks.begin(), ranks.end()); return ranks; #else return std::vector(1, 1); #endif } /************************************************************************ * Generate a random number * ************************************************************************/ size_t MPI_CLASS::rand() const { size_t val = 0; if (getRank() == 0) { static std::random_device rd; static std::mt19937 gen(rd()); static std::uniform_int_distribution dist; val = dist(gen); } val = bcast(val, 0); return val; } /************************************************************************ * Intersect two communicators * ************************************************************************/ #ifdef USE_MPI static inline void MPI_Group_free2(MPI_Group *group) { if (*group != MPI_GROUP_EMPTY) { // MPICH is fine with free'ing an empty group, OpenMPI crashes MPI_Group_free(group); } } MPI_CLASS MPI_CLASS::intersect(const MPI_CLASS &comm1, const MPI_CLASS &comm2) { MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY; if (!comm1.isNull()) { MPI_Group_free2(&group1); MPI_Comm_group(comm1.communicator, &group1); } if (!comm2.isNull()) { MPI_Group_free2(&group2); MPI_Comm_group(comm2.communicator, &group2); } MPI_Group group12; MPI_Group_intersection(group1, group2, &group12); int compare1, compare2; MPI_Group_compare(group1, group12, &compare1); MPI_Group_compare(group2, group12, &compare2); MPI_CLASS new_comm(MPI_CLASS_COMM_NULL); int size; MPI_Group_size(group12, &size); if (compare1 != MPI_UNEQUAL && size != 0) { // The intersection matches comm1 new_comm = comm1; } else if (compare2 != MPI_UNEQUAL && size != 0) { // The intersection matches comm2 new_comm = comm2; } else if (comm1.isNull()) { // comm1 is null, we can return safely (comm1 is needed for communication) } else { // The intersection is smaller than comm1 or comm2 // Check if the new comm is nullptr for all processors int max_size = 0; MPI_Allreduce(&size, &max_size, 1, MPI_INT, MPI_MAX, comm1.communicator); if (max_size == 0) { // We are dealing with completely disjoint sets new_comm = MPI_CLASS(MPI_CLASS_COMM_NULL, false); } else { // Create the new comm // Note: OpenMPI crashes if the intersection group is EMPTY for any processors // We will set it to SELF for the EMPTY processors, then create a nullptr comm later if (group12 == MPI_GROUP_EMPTY) { MPI_Group_free2(&group12); MPI_Comm_group(MPI_COMM_SELF, &group12); } MPI_Comm new_MPI_comm; MPI_Comm_create(comm1.communicator, group12, &new_MPI_comm); if (size > 0) { // This is the valid case where we create a new intersection comm new_comm = MPI_CLASS(new_MPI_comm, true); } else { // We actually want a null comm for this communicator new_comm = MPI_CLASS(MPI_CLASS_COMM_NULL, false); MPI_Comm_free(&new_MPI_comm); } } } MPI_Group_free2(&group1); MPI_Group_free2(&group2); MPI_Group_free2(&group12); return new_comm; } #else MPI_CLASS MPI_CLASS::intersect(const MPI_CLASS &comm1, const MPI_CLASS &comm2) { if (comm1.isNull() || comm2.isNull()) return MPI_CLASS(MPI_CLASS_COMM_NULL, false); MPI_ASSERT(comm1.comm_size == 1 && comm2.comm_size == 1); return comm1; } #endif /************************************************************************ * Split a comm * ************************************************************************/ MPI_CLASS MPI_CLASS::split(int color, int key) const { if (d_isNull) { return MPI_CLASS(MPI_CLASS_COMM_NULL); } else if (comm_size == 1) { if (color == -1) return MPI_CLASS(MPI_CLASS_COMM_NULL); return dup(); } MPI_Comm new_MPI_comm = MPI_CLASS_COMM_NULL; #ifdef USE_MPI // USE MPI to split the communicator if (color == -1) { check_MPI( MPI_Comm_split(communicator, MPI_UNDEFINED, key, &new_MPI_comm)); } else { check_MPI(MPI_Comm_split(communicator, color, key, &new_MPI_comm)); } #endif // Create the new object NULL_USE(key); MPI_CLASS new_comm(new_MPI_comm, true); new_comm.d_call_abort = d_call_abort; return new_comm; } MPI_CLASS MPI_CLASS::splitByNode(int key) const { // Check if we are dealing with a single processor (trivial case) if (comm_size == 1) return this->split(0, 0); // Get the node name std::string name = MPI_CLASS::getNodeName(); // Gather the names from all ranks std::vector list(comm_size); allGather(name, &list[0]); // Create the colors std::vector color(comm_size, -1); color[0] = 0; for (int i = 1; i < comm_size; i++) { const std::string tmp1 = list[i]; for (int j = 0; j < i; j++) { const std::string tmp2 = list[j]; if (tmp1 == tmp2) { color[i] = color[j]; break; } color[i] = color[i - 1] + 1; } } MPI_CLASS new_comm = this->split(color[comm_rank], key); return new_comm; } /************************************************************************ * Duplicate an exisiting comm object * ************************************************************************/ MPI_CLASS MPI_CLASS::dup() const { if (d_isNull) return MPI_CLASS(MPI_CLASS_COMM_NULL); MPI_Comm new_MPI_comm = communicator; #if defined(USE_MPI) || defined(USE_PETSC) // USE MPI to duplicate the communicator MPI_Comm_dup(communicator, &new_MPI_comm); #else new_MPI_comm = uniqueGlobalComm; uniqueGlobalComm++; #endif // Create the new comm object MPI_CLASS new_comm(new_MPI_comm, true); new_comm.d_isNull = d_isNull; new_comm.d_call_abort = d_call_abort; return new_comm; } /************************************************************************ * Get the node name * ************************************************************************/ std::string MPI_CLASS::getNodeName() { #ifdef USE_MPI int length; char name[MPI_MAX_PROCESSOR_NAME + 1]; memset(name, 0, MPI_MAX_PROCESSOR_NAME + 1); MPI_Get_processor_name(name, &length); return std::string(name); #else return "Node0"; #endif } /************************************************************************ * Overload operator == * ************************************************************************/ bool MPI_CLASS::operator==(const MPI_CLASS &comm) const { return communicator == comm.communicator; } /************************************************************************ * Overload operator != * ************************************************************************/ bool MPI_CLASS::operator!=(const MPI_CLASS &comm) const { return communicator != comm.communicator; } /************************************************************************ * Overload operator < * ************************************************************************/ bool MPI_CLASS::operator<(const MPI_CLASS &comm) const { MPI_ASSERT(!this->d_isNull && !comm.d_isNull); bool flag = true; // First check if either communicator is NULL if (this->d_isNull) return false; if (comm.d_isNull) flag = false; // Use compare to check if the comms are equal if (compare(comm) != 0) return false; // Check that the size of the other communicator is > the current communicator size if (comm_size >= comm.comm_size) flag = false; // Check the union of the communicator groups // this is < comm iff this group is a subgroup of comm's group #ifdef USE_MPI MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY, group12 = MPI_GROUP_EMPTY; if (!d_isNull) MPI_Comm_group(communicator, &group1); if (!comm.d_isNull) MPI_Comm_group(comm.communicator, &group2); MPI_Group_union(group1, group2, &group12); int compare; MPI_Group_compare(group2, group12, &compare); if (compare == MPI_UNEQUAL) flag = false; MPI_Group_free(&group1); MPI_Group_free(&group2); MPI_Group_free(&group12); #endif // Perform a global reduce of the flag (equivalent to all operation) return allReduce(flag); } /************************************************************************ * Overload operator <= * ************************************************************************/ bool MPI_CLASS::operator<=(const MPI_CLASS &comm) const { MPI_ASSERT(!this->d_isNull && !comm.d_isNull); bool flag = true; // First check if either communicator is NULL if (this->d_isNull) return false; if (comm.d_isNull) flag = false; #ifdef USE_MPI int world_size = 0; MPI_Comm_size(MPI_COMM_WORLD, &world_size); if (comm.getSize() == world_size) return true; if (getSize() == 1 && !comm.d_isNull) return true; #endif // Use compare to check if the comms are equal if (compare(comm) != 0) return true; // Check that the size of the other communicator is > the current communicator size // this is <= comm iff this group is a subgroup of comm's group if (comm_size > comm.comm_size) flag = false; // Check the unnion of the communicator groups #ifdef USE_MPI MPI_Group group1, group2, group12; MPI_Comm_group(communicator, &group1); MPI_Comm_group(comm.communicator, &group2); MPI_Group_union(group1, group2, &group12); int compare; MPI_Group_compare(group2, group12, &compare); if (compare == MPI_UNEQUAL) flag = false; MPI_Group_free(&group1); MPI_Group_free(&group2); MPI_Group_free(&group12); #endif // Perform a global reduce of the flag (equivalent to all operation) return allReduce(flag); } /************************************************************************ * Overload operator > * ************************************************************************/ bool MPI_CLASS::operator>(const MPI_CLASS &comm) const { bool flag = true; // First check if either communicator is NULL if (this->d_isNull) return false; if (comm.d_isNull) flag = false; // Use compare to check if the comms are equal if (compare(comm) != 0) return false; // Check that the size of the other communicator is > the current communicator size if (comm_size <= comm.comm_size) flag = false; // Check the unnion of the communicator groups // this is > comm iff comm's group is a subgroup of this group #ifdef USE_MPI MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY, group12 = MPI_GROUP_EMPTY; if (!d_isNull) MPI_Comm_group(communicator, &group1); if (!comm.d_isNull) MPI_Comm_group(comm.communicator, &group2); MPI_Group_union(group1, group2, &group12); int compare; MPI_Group_compare(group1, group12, &compare); if (compare == MPI_UNEQUAL) flag = false; MPI_Group_free(&group1); MPI_Group_free(&group2); MPI_Group_free(&group12); #endif // Perform a global reduce of the flag (equivalent to all operation) return allReduce(flag); } /************************************************************************ * Overload operator >= * ************************************************************************/ bool MPI_CLASS::operator>=(const MPI_CLASS &comm) const { bool flag = true; // First check if either communicator is NULL if (this->d_isNull) return false; if (comm.d_isNull) flag = false; #ifdef USE_MPI int world_size = 0; MPI_Comm_size(MPI_COMM_WORLD, &world_size); if (getSize() == world_size) return true; if (comm.getSize() == 1 && !comm.d_isNull) return true; #endif // Use compare to check if the comms are equal if (compare(comm) != 0) return true; // Check that the size of the other communicator is > the current communicator size if (comm_size < comm.comm_size) flag = false; // Check the unnion of the communicator groups // this is >= comm iff comm's group is a subgroup of this group #ifdef USE_MPI MPI_Group group1 = MPI_GROUP_EMPTY, group2 = MPI_GROUP_EMPTY, group12 = MPI_GROUP_EMPTY; if (!d_isNull) MPI_Comm_group(communicator, &group1); if (!comm.d_isNull) MPI_Comm_group(comm.communicator, &group2); MPI_Group_union(group1, group2, &group12); int compare; MPI_Group_compare(group1, group12, &compare); if (compare == MPI_UNEQUAL) flag = false; MPI_Group_free(&group1); MPI_Group_free(&group2); MPI_Group_free(&group12); #endif // Perform a global reduce of the flag (equivalent to all operation) return allReduce(flag); } /************************************************************************ * Compare two comm objects * ************************************************************************/ int MPI_CLASS::compare(const MPI_CLASS &comm) const { if (communicator == comm.communicator) return 1; #ifdef USE_MPI if (d_isNull || comm.d_isNull) return 0; int result; check_MPI(MPI_Comm_compare(communicator, comm.communicator, &result)); if (result == MPI_IDENT) return 2; else if (result == MPI_CONGRUENT) return 3; else if (result == MPI_SIMILAR) return 4; else if (result == MPI_UNEQUAL) return 0; MPI_ERROR("Unknown results from comm compare"); #else if (comm.communicator == MPI_COMM_NULL || communicator == MPI_COMM_NULL) return 0; else return 3; #endif return 0; } /************************************************************************ * Abort the program. * ************************************************************************/ void MPI_CLASS::setCallAbortInSerialInsteadOfExit(bool flag) { d_call_abort = flag; } void MPI_CLASS::abort() const { #ifdef USE_MPI MPI_Comm comm = communicator; if (comm == MPI_COMM_NULL) comm = MPI_COMM_WORLD; if (!MPI_active()) { // MPI is not availible exit(-1); } else if (comm_size > 1) { MPI_Abort(comm, -1); } else if (d_call_abort) { MPI_Abort(comm, -1); } else { exit(-1); } #else exit(-1); #endif } /************************************************************************ * newTag * ************************************************************************/ int MPI_CLASS::newTag() { #ifdef USE_MPI // Syncronize the processes to ensure all ranks enter this call // Needed so the count will match barrier(); // Return and increment the tag int tag = (*d_currentTag)++; MPI_INSIST(tag <= d_maxTag, "Maximum number of tags exceeded\n"); return tag; #else static int globalCurrentTag = 1; return globalCurrentTag++; #endif } /************************************************************************ * allReduce * ************************************************************************/ bool MPI_CLASS::allReduce(const bool value) const { bool ret = value; if (comm_size > 1) { #ifdef USE_MPI MPI_Allreduce((void *)&value, (void *)&ret, 1, MPI_UNSIGNED_CHAR, MPI_MIN, communicator); #else MPI_ERROR("This shouldn't be possible"); #endif } return ret; } /************************************************************************ * anyReduce * ************************************************************************/ bool MPI_CLASS::anyReduce(const bool value) const { bool ret = value; if (comm_size > 1) { #ifdef USE_MPI MPI_Allreduce((void *)&value, (void *)&ret, 1, MPI_UNSIGNED_CHAR, MPI_MAX, communicator); #else MPI_ERROR("This shouldn't be possible"); #endif } return ret; } /************************************************************************ * call_sumReduce * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // unsigned char template <> void MPI_CLASS::call_sumReduce(const unsigned char *send, unsigned char *recv, int n) const { PROFILE_START("sumReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED_CHAR, MPI_SUM, communicator); PROFILE_STOP("sumReduce1", profile_level); } template <> void MPI_CLASS::call_sumReduce(unsigned char *x, int n) const { PROFILE_START("sumReduce2", profile_level); auto send = x; auto recv = new unsigned char[n]; MPI_Allreduce(send, recv, n, MPI_UNSIGNED_CHAR, MPI_SUM, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; PROFILE_STOP("sumReduce2", profile_level); } // char template <> void MPI_CLASS::call_sumReduce(const char *send, char *recv, int n) const { PROFILE_START("sumReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_SIGNED_CHAR, MPI_SUM, communicator); PROFILE_STOP("sumReduce1", profile_level); } template <> void MPI_CLASS::call_sumReduce(char *x, int n) const { PROFILE_START("sumReduce2", profile_level); auto send = x; auto recv = new char[n]; MPI_Allreduce(send, recv, n, MPI_SIGNED_CHAR, MPI_SUM, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; PROFILE_STOP("sumReduce2", profile_level); } // unsigned int template <> void MPI_CLASS::call_sumReduce(const unsigned int *send, unsigned int *recv, int n) const { PROFILE_START("sumReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED, MPI_SUM, communicator); PROFILE_STOP("sumReduce1", profile_level); } template <> void MPI_CLASS::call_sumReduce(unsigned int *x, int n) const { PROFILE_START("sumReduce2", profile_level); auto send = x; auto recv = new unsigned int[n]; MPI_Allreduce(send, recv, n, MPI_UNSIGNED, MPI_SUM, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; PROFILE_STOP("sumReduce2", profile_level); } // int template <> void MPI_CLASS::call_sumReduce(const int *send, int *recv, int n) const { PROFILE_START("sumReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_INT, MPI_SUM, communicator); PROFILE_STOP("sumReduce1", profile_level); } template <> void MPI_CLASS::call_sumReduce(int *x, int n) const { PROFILE_START("sumReduce2", profile_level); auto send = x; auto recv = new int[n]; MPI_Allreduce(send, recv, n, MPI_INT, MPI_SUM, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; PROFILE_STOP("sumReduce2", profile_level); } // long int template <> void MPI_CLASS::call_sumReduce(const long int *send, long int *recv, int n) const { PROFILE_START("sumReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_LONG, MPI_SUM, communicator); PROFILE_STOP("sumReduce1", profile_level); } template <> void MPI_CLASS::call_sumReduce(long int *x, int n) const { PROFILE_START("sumReduce2", profile_level); auto send = x; auto recv = new long int[n]; MPI_Allreduce(send, recv, n, MPI_LONG, MPI_SUM, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; PROFILE_STOP("sumReduce2", profile_level); } // unsigned long int template <> void MPI_CLASS::call_sumReduce(const unsigned long *send, unsigned long *recv, int n) const { PROFILE_START("sumReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED_LONG, MPI_SUM, communicator); PROFILE_STOP("sumReduce1", profile_level); } template <> void MPI_CLASS::call_sumReduce(unsigned long *x, int n) const { PROFILE_START("sumReduce2", profile_level); auto send = x; auto recv = new unsigned long int[n]; MPI_Allreduce(send, recv, n, MPI_UNSIGNED_LONG, MPI_SUM, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; PROFILE_STOP("sumReduce2", profile_level); } // size_t #ifdef USE_WINDOWS template <> void MPI_CLASS::call_sumReduce(const size_t *send, size_t *recv, int n) const { MPI_ASSERT(MPI_SIZE_T != 0); PROFILE_START("sumReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_SIZE_T, MPI_SUM, communicator); PROFILE_STOP("sumReduce1", profile_level); } template <> void MPI_CLASS::call_sumReduce(size_t *x, int n) const { MPI_ASSERT(MPI_SIZE_T != 0); PROFILE_START("sumReduce2", profile_level); auto send = x; auto recv = new size_t[n]; MPI_Allreduce((void *)send, (void *)recv, n, MPI_SIZE_T, MPI_SUM, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; PROFILE_STOP("sumReduce2", profile_level); } #endif // float template <> void MPI_CLASS::call_sumReduce(const float *send, float *recv, int n) const { PROFILE_START("sumReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_FLOAT, MPI_SUM, communicator); PROFILE_STOP("sumReduce1", profile_level); } template <> void MPI_CLASS::call_sumReduce(float *x, int n) const { PROFILE_START("sumReduce2", profile_level); auto send = x; auto recv = new float[n]; MPI_Allreduce(send, recv, n, MPI_FLOAT, MPI_SUM, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; PROFILE_STOP("sumReduce2", profile_level); } // double template <> void MPI_CLASS::call_sumReduce(const double *send, double *recv, int n) const { PROFILE_START("sumReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_DOUBLE, MPI_SUM, communicator); PROFILE_STOP("sumReduce1", profile_level); } template <> void MPI_CLASS::call_sumReduce(double *x, int n) const { PROFILE_START("sumReduce2", profile_level); auto send = x; auto recv = new double[n]; MPI_Allreduce(send, recv, n, MPI_DOUBLE, MPI_SUM, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; PROFILE_STOP("sumReduce2", profile_level); } // std::complex template <> void MPI_CLASS::call_sumReduce>( const std::complex *x, std::complex *y, int n) const { PROFILE_START("sumReduce1", profile_level); auto send = new double[2 * n]; auto recv = new double[2 * n]; for (int i = 0; i < n; i++) { send[2 * i + 0] = real(x[i]); send[2 * i + 1] = imag(x[i]); } MPI_Allreduce((void *)send, (void *)recv, 2 * n, MPI_DOUBLE, MPI_SUM, communicator); for (int i = 0; i < n; i++) y[i] = std::complex(recv[2 * i + 0], recv[2 * i + 1]); delete[] send; delete[] recv; PROFILE_STOP("sumReduce1", profile_level); } template <> void MPI_CLASS::call_sumReduce>(std::complex *x, int n) const { PROFILE_START("sumReduce2", profile_level); auto send = new double[2 * n]; auto recv = new double[2 * n]; for (int i = 0; i < n; i++) { send[2 * i + 0] = real(x[i]); send[2 * i + 1] = imag(x[i]); } MPI_Allreduce(send, recv, 2 * n, MPI_DOUBLE, MPI_SUM, communicator); for (int i = 0; i < n; i++) x[i] = std::complex(recv[2 * i + 0], recv[2 * i + 1]); delete[] send; delete[] recv; PROFILE_STOP("sumReduce2", profile_level); } #endif /************************************************************************ * call_minReduce * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // unsigned char template <> void MPI_CLASS::call_minReduce(const unsigned char *send, unsigned char *recv, int n, int *comm_rank_of_min) const { if (comm_rank_of_min == nullptr) { PROFILE_START("minReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED_CHAR, MPI_MIN, communicator); PROFILE_STOP("minReduce1", profile_level); } else { auto tmp = new int[n]; for (int i = 0; i < n; i++) tmp[i] = send[i]; call_minReduce(tmp, n, comm_rank_of_min); for (int i = 0; i < n; i++) recv[i] = static_cast(tmp[i]); delete[] tmp; } } template <> void MPI_CLASS::call_minReduce(unsigned char *x, int n, int *comm_rank_of_min) const { if (comm_rank_of_min == nullptr) { PROFILE_START("minReduce2", profile_level); auto send = x; auto recv = new unsigned char[n]; MPI_Allreduce(send, recv, n, MPI_UNSIGNED_CHAR, MPI_MIN, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; PROFILE_STOP("minReduce2", profile_level); } else { auto tmp = new int[n]; for (int i = 0; i < n; i++) tmp[i] = x[i]; call_minReduce(tmp, n, comm_rank_of_min); for (int i = 0; i < n; i++) x[i] = static_cast(tmp[i]); delete[] tmp; } } // char template <> void MPI_CLASS::call_minReduce(const char *send, char *recv, int n, int *comm_rank_of_min) const { if (comm_rank_of_min == nullptr) { PROFILE_START("minReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_SIGNED_CHAR, MPI_MIN, communicator); PROFILE_STOP("minReduce1", profile_level); } else { auto tmp = new int[n]; for (int i = 0; i < n; i++) tmp[i] = send[i]; call_minReduce(tmp, n, comm_rank_of_min); for (int i = 0; i < n; i++) recv[i] = static_cast(tmp[i]); delete[] tmp; } } template <> void MPI_CLASS::call_minReduce(char *x, int n, int *comm_rank_of_min) const { if (comm_rank_of_min == nullptr) { PROFILE_START("minReduce2", profile_level); auto send = x; auto recv = new char[n]; MPI_Allreduce(send, recv, n, MPI_SIGNED_CHAR, MPI_MIN, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; PROFILE_STOP("minReduce2", profile_level); } else { auto tmp = new int[n]; for (int i = 0; i < n; i++) tmp[i] = x[i]; call_minReduce(tmp, n, comm_rank_of_min); for (int i = 0; i < n; i++) x[i] = static_cast(tmp[i]); delete[] tmp; } } // unsigned int template <> void MPI_CLASS::call_minReduce(const unsigned int *send, unsigned int *recv, int n, int *comm_rank_of_min) const { if (comm_rank_of_min == nullptr) { PROFILE_START("minReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED, MPI_MIN, communicator); PROFILE_STOP("minReduce1", profile_level); } else { auto tmp = new int[n]; for (int i = 0; i < n; i++) tmp[i] = unsigned_to_signed(send[i]); call_minReduce(tmp, n, comm_rank_of_min); for (int i = 0; i < n; i++) recv[i] = signed_to_unsigned(tmp[i]); delete[] tmp; } } template <> void MPI_CLASS::call_minReduce(unsigned int *x, int n, int *comm_rank_of_min) const { if (comm_rank_of_min == nullptr) { PROFILE_START("minReduce2", profile_level); auto send = x; auto recv = new unsigned int[n]; MPI_Allreduce(send, recv, n, MPI_UNSIGNED, MPI_MIN, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; PROFILE_STOP("minReduce2", profile_level); } else { auto tmp = new int[n]; for (int i = 0; i < n; i++) tmp[i] = unsigned_to_signed(x[i]); call_minReduce(tmp, n, comm_rank_of_min); for (int i = 0; i < n; i++) x[i] = signed_to_unsigned(tmp[i]); delete[] tmp; } } // int template <> void MPI_CLASS::call_minReduce(const int *x, int *y, int n, int *comm_rank_of_min) const { PROFILE_START("minReduce1", profile_level); if (comm_rank_of_min == nullptr) { MPI_Allreduce((void *)x, (void *)y, n, MPI_INT, MPI_MIN, communicator); } else { auto recv = new IntIntStruct[n]; auto send = new IntIntStruct[n]; for (int i = 0; i < n; ++i) { send[i].j = x[i]; send[i].i = comm_rank; } MPI_Allreduce(send, recv, n, MPI_2INT, MPI_MINLOC, communicator); for (int i = 0; i < n; ++i) { y[i] = recv[i].j; comm_rank_of_min[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP("minReduce1", profile_level); } template <> void MPI_CLASS::call_minReduce(int *x, int n, int *comm_rank_of_min) const { PROFILE_START("minReduce2", profile_level); if (comm_rank_of_min == nullptr) { auto send = x; auto recv = new int[n]; MPI_Allreduce(send, recv, n, MPI_INT, MPI_MIN, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; } else { auto recv = new IntIntStruct[n]; auto send = new IntIntStruct[n]; for (int i = 0; i < n; ++i) { send[i].j = x[i]; send[i].i = comm_rank; } MPI_Allreduce(send, recv, n, MPI_2INT, MPI_MINLOC, communicator); for (int i = 0; i < n; ++i) { x[i] = recv[i].j; comm_rank_of_min[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP("minReduce2", profile_level); } // unsigned long int template <> void MPI_CLASS::call_minReduce(const unsigned long int *send, unsigned long int *recv, int n, int *comm_rank_of_min) const { if (comm_rank_of_min == nullptr) { PROFILE_START("minReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED_LONG, MPI_MIN, communicator); PROFILE_STOP("minReduce1", profile_level); } else { auto tmp = new long int[n]; for (int i = 0; i < n; i++) tmp[i] = unsigned_to_signed(send[i]); call_minReduce(tmp, n, comm_rank_of_min); for (int i = 0; i < n; i++) recv[i] = signed_to_unsigned(tmp[i]); delete[] tmp; } } template <> void MPI_CLASS::call_minReduce(unsigned long int *x, int n, int *comm_rank_of_min) const { if (comm_rank_of_min == nullptr) { PROFILE_START("minReduce2", profile_level); auto send = x; auto recv = new unsigned long int[n]; MPI_Allreduce(send, recv, n, MPI_UNSIGNED_LONG, MPI_MIN, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; PROFILE_STOP("minReduce2", profile_level); } else { auto tmp = new long int[n]; for (int i = 0; i < n; i++) tmp[i] = unsigned_to_signed(x[i]); call_minReduce(tmp, n, comm_rank_of_min); for (int i = 0; i < n; i++) x[i] = signed_to_unsigned(tmp[i]); delete[] tmp; } } // long int template <> void MPI_CLASS::call_minReduce(const long int *x, long int *y, int n, int *comm_rank_of_min) const { PROFILE_START("minReduce1", profile_level); if (comm_rank_of_min == nullptr) { MPI_Allreduce((void *)x, (void *)y, n, MPI_LONG, MPI_MIN, communicator); } else { auto recv = new LongIntStruct[n]; auto send = new LongIntStruct[n]; for (int i = 0; i < n; ++i) { send[i].j = x[i]; send[i].i = comm_rank; } MPI_Allreduce(send, recv, n, MPI_LONG_INT, MPI_MINLOC, communicator); for (int i = 0; i < n; ++i) { y[i] = recv[i].j; comm_rank_of_min[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP("minReduce1", profile_level); } template <> void MPI_CLASS::call_minReduce(long int *x, int n, int *comm_rank_of_min) const { PROFILE_START("minReduce2", profile_level); if (comm_rank_of_min == nullptr) { auto send = x; auto recv = new long int[n]; MPI_Allreduce(send, recv, n, MPI_LONG, MPI_MIN, communicator); for (long int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; } else { auto recv = new LongIntStruct[n]; auto send = new LongIntStruct[n]; for (int i = 0; i < n; ++i) { send[i].j = x[i]; send[i].i = comm_rank; } MPI_Allreduce(send, recv, n, MPI_LONG_INT, MPI_MINLOC, communicator); for (int i = 0; i < n; ++i) { x[i] = recv[i].j; comm_rank_of_min[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP("minReduce2", profile_level); } // unsigned long long int template <> void MPI_CLASS::call_minReduce( const unsigned long long int *send, unsigned long long int *recv, int n, int *comm_rank_of_min) const { PROFILE_START("minReduce1", profile_level); if (comm_rank_of_min == nullptr) { auto x = new long long int[n]; auto y = new long long int[n]; for (int i = 0; i < n; i++) x[i] = unsigned_to_signed(send[i]); MPI_Allreduce((void *)x, (void *)y, n, MPI_LONG_LONG_INT, MPI_MIN, communicator); for (int i = 0; i < n; i++) recv[i] = signed_to_unsigned(y[i]); delete[] x; delete[] y; } else { printf("minReduce will use double\n"); auto tmp = new double[n]; for (int i = 0; i < n; i++) tmp[i] = static_cast(send[i]); call_minReduce(tmp, n, comm_rank_of_min); for (int i = 0; i < n; i++) recv[i] = static_cast(tmp[i]); delete[] tmp; } PROFILE_STOP("minReduce1", profile_level); } template <> void MPI_CLASS::call_minReduce( unsigned long long int *x, int n, int *comm_rank_of_min) const { auto recv = new unsigned long long int[n]; call_minReduce(x, recv, n, comm_rank_of_min); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; } // long long int template <> void MPI_CLASS::call_minReduce(const long long int *x, long long int *y, int n, int *comm_rank_of_min) const { PROFILE_START("minReduce1", profile_level); if (comm_rank_of_min == nullptr) { MPI_Allreduce((void *)x, (void *)y, n, MPI_LONG_LONG_INT, MPI_MIN, communicator); } else { printf("minReduce will use double\n"); auto tmp = new double[n]; for (int i = 0; i < n; i++) tmp[i] = static_cast(x[i]); call_minReduce(tmp, n, comm_rank_of_min); for (int i = 0; i < n; i++) y[i] = static_cast(tmp[i]); delete[] tmp; } PROFILE_STOP("minReduce1", profile_level); } template <> void MPI_CLASS::call_minReduce(long long int *x, int n, int *comm_rank_of_min) const { auto recv = new long long int[n]; call_minReduce(x, recv, n, comm_rank_of_min); for (int i = 0; i < n; i++) x[i] = signed_to_unsigned(recv[i]); delete[] recv; } // float template <> void MPI_CLASS::call_minReduce(const float *x, float *y, int n, int *comm_rank_of_min) const { PROFILE_START("minReduce1", profile_level); if (comm_rank_of_min == nullptr) { MPI_Allreduce((void *)x, (void *)y, n, MPI_INT, MPI_MIN, communicator); } else { auto recv = new FloatIntStruct[n]; auto send = new FloatIntStruct[n]; for (int i = 0; i < n; ++i) { send[i].f = x[i]; send[i].i = comm_rank; } MPI_Allreduce(send, recv, n, MPI_FLOAT_INT, MPI_MINLOC, communicator); for (int i = 0; i < n; ++i) { y[i] = recv[i].f; comm_rank_of_min[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP("minReduce1", profile_level); } template <> void MPI_CLASS::call_minReduce(float *x, int n, int *comm_rank_of_min) const { PROFILE_START("minReduce2", profile_level); if (comm_rank_of_min == nullptr) { auto send = x; auto recv = new float[n]; MPI_Allreduce(send, recv, n, MPI_FLOAT, MPI_MIN, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; } else { auto recv = new FloatIntStruct[n]; auto send = new FloatIntStruct[n]; for (int i = 0; i < n; ++i) { send[i].f = x[i]; send[i].i = comm_rank; } MPI_Allreduce(send, recv, n, MPI_FLOAT_INT, MPI_MINLOC, communicator); for (int i = 0; i < n; ++i) { x[i] = recv[i].f; comm_rank_of_min[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP("minReduce2", profile_level); } // double template <> void MPI_CLASS::call_minReduce(const double *x, double *y, int n, int *comm_rank_of_min) const { PROFILE_START("minReduce1", profile_level); if (comm_rank_of_min == nullptr) { MPI_Allreduce((void *)x, (void *)y, n, MPI_DOUBLE, MPI_MIN, communicator); } else { auto recv = new DoubleIntStruct[n]; auto send = new DoubleIntStruct[n]; for (int i = 0; i < n; ++i) { send[i].d = x[i]; send[i].i = comm_rank; } MPI_Allreduce(send, recv, n, MPI_DOUBLE_INT, MPI_MINLOC, communicator); for (int i = 0; i < n; ++i) { y[i] = recv[i].d; comm_rank_of_min[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP("minReduce1", profile_level); } template <> void MPI_CLASS::call_minReduce(double *x, int n, int *comm_rank_of_min) const { PROFILE_START("minReduce2", profile_level); if (comm_rank_of_min == nullptr) { auto send = x; auto recv = new double[n]; MPI_Allreduce(send, recv, n, MPI_DOUBLE, MPI_MIN, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; } else { auto recv = new DoubleIntStruct[n]; auto send = new DoubleIntStruct[n]; for (int i = 0; i < n; ++i) { send[i].d = x[i]; send[i].i = comm_rank; } MPI_Allreduce(send, recv, n, MPI_DOUBLE_INT, MPI_MINLOC, communicator); for (int i = 0; i < n; ++i) { x[i] = recv[i].d; comm_rank_of_min[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP("minReduce2", profile_level); } #endif /************************************************************************ * call_maxReduce * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // unsigned char template <> void MPI_CLASS::call_maxReduce(const unsigned char *send, unsigned char *recv, int n, int *comm_rank_of_max) const { if (comm_rank_of_max == nullptr) { PROFILE_START("maxReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED_CHAR, MPI_MAX, communicator); PROFILE_STOP("maxReduce1", profile_level); } else { auto tmp = new int[n]; for (int i = 0; i < n; i++) tmp[i] = send[i]; call_maxReduce(tmp, n, comm_rank_of_max); for (int i = 0; i < n; i++) recv[i] = static_cast(tmp[i]); delete[] tmp; } } template <> void MPI_CLASS::call_maxReduce(unsigned char *x, int n, int *comm_rank_of_max) const { if (comm_rank_of_max == nullptr) { PROFILE_START("maxReduce2", profile_level); auto send = x; auto recv = new unsigned char[n]; MPI_Allreduce(send, recv, n, MPI_UNSIGNED_CHAR, MPI_MAX, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; PROFILE_STOP("maxReduce2", profile_level); } else { auto tmp = new int[n]; for (int i = 0; i < n; i++) tmp[i] = x[i]; call_maxReduce(tmp, n, comm_rank_of_max); for (int i = 0; i < n; i++) x[i] = static_cast(tmp[i]); delete[] tmp; } } // char template <> void MPI_CLASS::call_maxReduce(const char *send, char *recv, int n, int *comm_rank_of_max) const { if (comm_rank_of_max == nullptr) { PROFILE_START("maxReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_SIGNED_CHAR, MPI_MAX, communicator); PROFILE_STOP("maxReduce1", profile_level); } else { auto tmp = new int[n]; for (int i = 0; i < n; i++) tmp[i] = send[i]; call_maxReduce(tmp, n, comm_rank_of_max); for (int i = 0; i < n; i++) recv[i] = static_cast(tmp[i]); delete[] tmp; } } template <> void MPI_CLASS::call_maxReduce(char *x, int n, int *comm_rank_of_max) const { if (comm_rank_of_max == nullptr) { PROFILE_START("maxReduce2", profile_level); auto send = x; auto recv = new char[n]; MPI_Allreduce(send, recv, n, MPI_SIGNED_CHAR, MPI_MAX, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; PROFILE_STOP("maxReduce2", profile_level); } else { auto tmp = new int[n]; for (int i = 0; i < n; i++) tmp[i] = x[i]; call_maxReduce(tmp, n, comm_rank_of_max); for (int i = 0; i < n; i++) x[i] = static_cast(tmp[i]); delete[] tmp; } } // unsigned int template <> void MPI_CLASS::call_maxReduce(const unsigned int *send, unsigned int *recv, int n, int *comm_rank_of_max) const { if (comm_rank_of_max == nullptr) { PROFILE_START("maxReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED, MPI_MAX, communicator); PROFILE_STOP("maxReduce1", profile_level); } else { auto tmp = new int[n]; for (int i = 0; i < n; i++) tmp[i] = unsigned_to_signed(send[i]); call_maxReduce(tmp, n, comm_rank_of_max); for (int i = 0; i < n; i++) recv[i] = signed_to_unsigned(tmp[i]); delete[] tmp; } } template <> void MPI_CLASS::call_maxReduce(unsigned int *x, int n, int *comm_rank_of_max) const { if (comm_rank_of_max == nullptr) { PROFILE_START("maxReduce2", profile_level); auto send = x; auto recv = new unsigned int[n]; MPI_Allreduce(send, recv, n, MPI_UNSIGNED, MPI_MAX, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; PROFILE_STOP("maxReduce2", profile_level); } else { auto tmp = new int[n]; for (int i = 0; i < n; i++) tmp[i] = unsigned_to_signed(x[i]); call_maxReduce(tmp, n, comm_rank_of_max); for (int i = 0; i < n; i++) x[i] = signed_to_unsigned(tmp[i]); delete[] tmp; } } // int template <> void MPI_CLASS::call_maxReduce(const int *x, int *y, int n, int *comm_rank_of_max) const { PROFILE_START("maxReduce1", profile_level); if (comm_rank_of_max == nullptr) { MPI_Allreduce((void *)x, (void *)y, n, MPI_INT, MPI_MAX, communicator); } else { auto recv = new IntIntStruct[n]; auto send = new IntIntStruct[n]; for (int i = 0; i < n; ++i) { send[i].j = x[i]; send[i].i = comm_rank; } MPI_Allreduce(send, recv, n, MPI_2INT, MPI_MAXLOC, communicator); for (int i = 0; i < n; ++i) { y[i] = recv[i].j; comm_rank_of_max[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP("maxReduce1", profile_level); } template <> void MPI_CLASS::call_maxReduce(int *x, int n, int *comm_rank_of_max) const { PROFILE_START("maxReduce2", profile_level); if (comm_rank_of_max == nullptr) { int *send = x; auto recv = new int[n]; MPI_Allreduce(send, recv, n, MPI_INT, MPI_MAX, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; } else { auto recv = new IntIntStruct[n]; auto send = new IntIntStruct[n]; for (int i = 0; i < n; ++i) { send[i].j = x[i]; send[i].i = comm_rank; } MPI_Allreduce(send, recv, n, MPI_2INT, MPI_MAXLOC, communicator); for (int i = 0; i < n; ++i) { x[i] = recv[i].j; comm_rank_of_max[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP("maxReduce2", profile_level); } // long int template <> void MPI_CLASS::call_maxReduce(const long int *x, long int *y, int n, int *comm_rank_of_max) const { PROFILE_START("maxReduce1", profile_level); if (comm_rank_of_max == nullptr) { MPI_Allreduce((void *)x, (void *)y, n, MPI_LONG, MPI_MAX, communicator); } else { auto recv = new LongIntStruct[n]; auto send = new LongIntStruct[n]; for (int i = 0; i < n; ++i) { send[i].j = x[i]; send[i].i = comm_rank; } MPI_Allreduce(send, recv, n, MPI_LONG_INT, MPI_MAXLOC, communicator); for (int i = 0; i < n; ++i) { y[i] = recv[i].j; comm_rank_of_max[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP("maxReduce1", profile_level); } template <> void MPI_CLASS::call_maxReduce(long int *x, int n, int *comm_rank_of_max) const { PROFILE_START("maxReduce2", profile_level); if (comm_rank_of_max == nullptr) { auto send = x; auto recv = new long int[n]; MPI_Allreduce(send, recv, n, MPI_LONG, MPI_MAX, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; } else { auto recv = new LongIntStruct[n]; auto send = new LongIntStruct[n]; for (int i = 0; i < n; ++i) { send[i].j = x[i]; send[i].i = comm_rank; } MPI_Allreduce(send, recv, n, MPI_LONG_INT, MPI_MAXLOC, communicator); for (int i = 0; i < n; ++i) { x[i] = recv[i].j; comm_rank_of_max[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP("maxReduce2", profile_level); } // unsigned long int template <> void MPI_CLASS::call_maxReduce(const unsigned long int *send, unsigned long int *recv, int n, int *comm_rank_of_max) const { if (comm_rank_of_max == nullptr) { PROFILE_START("maxReduce1", profile_level); MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED_LONG, MPI_MAX, communicator); PROFILE_STOP("maxReduce1", profile_level); } else { auto tmp = new long int[n]; for (int i = 0; i < n; i++) tmp[i] = unsigned_to_signed(send[i]); call_maxReduce(tmp, n, comm_rank_of_max); for (int i = 0; i < n; i++) recv[i] = signed_to_unsigned(tmp[i]); delete[] tmp; } } template <> void MPI_CLASS::call_maxReduce(unsigned long int *x, int n, int *comm_rank_of_max) const { if (comm_rank_of_max == nullptr) { PROFILE_START("maxReduce2", profile_level); auto send = x; auto recv = new unsigned long int[n]; MPI_Allreduce(send, recv, n, MPI_UNSIGNED_LONG, MPI_MAX, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; PROFILE_STOP("maxReduce2", profile_level); } else { auto tmp = new long int[n]; for (int i = 0; i < n; i++) tmp[i] = unsigned_to_signed(x[i]); call_maxReduce(tmp, n, comm_rank_of_max); for (int i = 0; i < n; i++) x[i] = signed_to_unsigned(tmp[i]); delete[] tmp; } } // unsigned long long int template <> void MPI_CLASS::call_maxReduce( const unsigned long long int *send, unsigned long long int *recv, int n, int *comm_rank_of_max) const { PROFILE_START("maxReduce1", profile_level); if (comm_rank_of_max == nullptr) { auto x = new long long int[n]; auto y = new long long int[n]; for (int i = 0; i < n; i++) x[i] = unsigned_to_signed(send[i]); MPI_Allreduce((void *)x, (void *)y, n, MPI_LONG_LONG_INT, MPI_MAX, communicator); for (int i = 0; i < n; i++) recv[i] = signed_to_unsigned(y[i]); delete[] x; delete[] y; } else { printf("maxReduce will use double\n"); auto tmp = new double[n]; for (int i = 0; i < n; i++) tmp[i] = static_cast(send[i]); call_maxReduce(tmp, n, comm_rank_of_max); for (int i = 0; i < n; i++) recv[i] = static_cast(tmp[i]); delete[] tmp; } PROFILE_STOP("maxReduce1", profile_level); } template <> void MPI_CLASS::call_maxReduce( unsigned long long int *x, int n, int *comm_rank_of_max) const { auto recv = new unsigned long long int[n]; call_maxReduce(x, recv, n, comm_rank_of_max); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; } // long long int template <> void MPI_CLASS::call_maxReduce(const long long int *x, long long int *y, int n, int *comm_rank_of_max) const { PROFILE_START("maxReduce1", profile_level); if (comm_rank_of_max == nullptr) { MPI_Allreduce((void *)x, (void *)y, n, MPI_LONG_LONG_INT, MPI_MAX, communicator); } else { printf("maxReduce will use double\n"); auto tmp = new double[n]; for (int i = 0; i < n; i++) tmp[i] = static_cast(x[i]); call_maxReduce(tmp, n, comm_rank_of_max); for (int i = 0; i < n; i++) y[i] = static_cast(tmp[i]); delete[] tmp; } PROFILE_STOP("maxReduce1", profile_level); } template <> void MPI_CLASS::call_maxReduce(long long int *x, int n, int *comm_rank_of_max) const { auto recv = new long long int[n]; call_maxReduce(x, recv, n, comm_rank_of_max); for (int i = 0; i < n; i++) x[i] = signed_to_unsigned(recv[i]); delete[] recv; } // float template <> void MPI_CLASS::call_maxReduce(const float *x, float *y, int n, int *comm_rank_of_max) const { PROFILE_START("maxReduce1", profile_level); if (comm_rank_of_max == nullptr) { MPI_Allreduce((void *)x, (void *)y, n, MPI_FLOAT, MPI_MAX, communicator); } else { auto recv = new FloatIntStruct[n]; auto send = new FloatIntStruct[n]; for (int i = 0; i < n; ++i) { send[i].f = x[i]; send[i].i = comm_rank; } MPI_Allreduce(send, recv, n, MPI_FLOAT_INT, MPI_MAXLOC, communicator); for (int i = 0; i < n; ++i) { y[i] = recv[i].f; comm_rank_of_max[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP("maxReduce1", profile_level); } template <> void MPI_CLASS::call_maxReduce(float *x, int n, int *comm_rank_of_max) const { PROFILE_START("maxReduce2", profile_level); if (comm_rank_of_max == nullptr) { auto send = x; auto recv = new float[n]; MPI_Allreduce(send, recv, n, MPI_FLOAT, MPI_MAX, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; } else { auto recv = new FloatIntStruct[n]; auto send = new FloatIntStruct[n]; for (int i = 0; i < n; ++i) { send[i].f = x[i]; send[i].i = comm_rank; } MPI_Allreduce(send, recv, n, MPI_FLOAT_INT, MPI_MAXLOC, communicator); for (int i = 0; i < n; ++i) { x[i] = recv[i].f; comm_rank_of_max[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP("maxReduce2", profile_level); } // double template <> void MPI_CLASS::call_maxReduce(const double *x, double *y, int n, int *comm_rank_of_max) const { PROFILE_START("maxReduce1", profile_level); if (comm_rank_of_max == nullptr) { MPI_Allreduce((void *)x, (void *)y, n, MPI_DOUBLE, MPI_MAX, communicator); } else { auto recv = new DoubleIntStruct[n]; auto send = new DoubleIntStruct[n]; for (int i = 0; i < n; ++i) { send[i].d = x[i]; send[i].i = comm_rank; } MPI_Allreduce(send, recv, n, MPI_DOUBLE_INT, MPI_MAXLOC, communicator); for (int i = 0; i < n; ++i) { y[i] = recv[i].d; comm_rank_of_max[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP("maxReduce1", profile_level); } template <> void MPI_CLASS::call_maxReduce(double *x, int n, int *comm_rank_of_max) const { PROFILE_START("maxReduce2", profile_level); if (comm_rank_of_max == nullptr) { auto send = x; auto recv = new double[n]; MPI_Allreduce(send, recv, n, MPI_DOUBLE, MPI_MAX, communicator); for (int i = 0; i < n; i++) x[i] = recv[i]; delete[] recv; } else { auto recv = new DoubleIntStruct[n]; auto send = new DoubleIntStruct[n]; for (int i = 0; i < n; ++i) { send[i].d = x[i]; send[i].i = comm_rank; } MPI_Allreduce(send, recv, n, MPI_DOUBLE_INT, MPI_MAXLOC, communicator); for (int i = 0; i < n; ++i) { x[i] = recv[i].d; comm_rank_of_max[i] = recv[i].i; } delete[] recv; delete[] send; } PROFILE_STOP("maxReduce2", profile_level); } #endif /************************************************************************ * bcast * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // char template <> void MPI_CLASS::call_bcast(unsigned char *x, int n, int root) const { PROFILE_START("bcast", profile_level); MPI_Bcast(x, n, MPI_UNSIGNED_CHAR, root, communicator); PROFILE_STOP("bcast", profile_level); } template <> void MPI_CLASS::call_bcast(char *x, int n, int root) const { PROFILE_START("bcast", profile_level); MPI_Bcast(x, n, MPI_CHAR, root, communicator); PROFILE_STOP("bcast", profile_level); } // int template <> void MPI_CLASS::call_bcast(unsigned int *x, int n, int root) const { PROFILE_START("bcast", profile_level); MPI_Bcast(x, n, MPI_UNSIGNED, root, communicator); PROFILE_STOP("bcast", profile_level); } template <> void MPI_CLASS::call_bcast(int *x, int n, int root) const { PROFILE_START("bcast", profile_level); MPI_Bcast(x, n, MPI_INT, root, communicator); PROFILE_STOP("bcast", profile_level); } // float template <> void MPI_CLASS::call_bcast(float *x, int n, int root) const { PROFILE_START("bcast", profile_level); MPI_Bcast(x, n, MPI_FLOAT, root, communicator); PROFILE_STOP("bcast", profile_level); } // double template <> void MPI_CLASS::call_bcast(double *x, int n, int root) const { PROFILE_START("bcast", profile_level); MPI_Bcast(x, n, MPI_DOUBLE, root, communicator); PROFILE_STOP("bcast", profile_level); } #else // We need a concrete instantiation of bcast(x,n,root); template <> void MPI_CLASS::call_bcast(char *, int, int) const {} #endif /************************************************************************ * Perform a global barrier across all processors. * ************************************************************************/ void MPI_CLASS::barrier() const { #ifdef USE_MPI MPI_Barrier(communicator); #endif } /************************************************************************ * Send data array to another processor. * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // char template <> void MPI_CLASS::send(const char *buf, int length, int recv_proc_number, int tag) const { // Set the tag to 0 if it is < 0 tag = (tag >= 0) ? tag : 0; MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); // Send the data PROFILE_START("send", profile_level); MPI_Send((void *)buf, length, MPI_CHAR, recv_proc_number, tag, communicator); PROFILE_STOP("send", profile_level); } // int template <> void MPI_CLASS::send(const int *buf, int length, int recv_proc_number, int tag) const { // Set the tag to 0 if it is < 0 tag = (tag >= 0) ? tag : 0; MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); // Send the data PROFILE_START("send", profile_level); MPI_Send((void *)buf, length, MPI_INT, recv_proc_number, tag, communicator); PROFILE_STOP("send", profile_level); } // float template <> void MPI_CLASS::send(const float *buf, int length, int recv_proc_number, int tag) const { // Set the tag to 0 if it is < 0 tag = (tag >= 0) ? tag : 0; MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); // Send the data PROFILE_START("send", profile_level); MPI_Send((void *)buf, length, MPI_FLOAT, recv_proc_number, tag, communicator); PROFILE_STOP("send", profile_level); } // double template <> void MPI_CLASS::send(const double *buf, int length, int recv_proc_number, int tag) const { // Set the tag to 0 if it is < 0 tag = (tag >= 0) ? tag : 0; MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); // Send the data PROFILE_START("send", profile_level); MPI_Send((void *)buf, length, MPI_DOUBLE, recv_proc_number, tag, communicator); PROFILE_STOP("send", profile_level); } #else // We need a concrete instantiation of send for use without MPI template <> void MPI_CLASS::send(const char *buf, int length, int, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); PROFILE_START("send", profile_level); auto id = getRequest(communicator, tag); auto it = global_isendrecv_list.find(id); MPI_INSIST(it == global_isendrecv_list.end(), "send must be paired with a previous call to irecv in serial"); MPI_ASSERT(it->second.status == 2); memcpy((char *)it->second.data, buf, length); global_isendrecv_list.erase(it); PROFILE_START("send", profile_level); } #endif /************************************************************************ * Non-blocking send data array to another processor. * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // char template <> MPI_Request MPI_CLASS::Isend(const char *buf, int length, int recv_proc, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); MPI_Request request; PROFILE_START("Isend", profile_level); MPI_Isend((void *)buf, length, MPI_CHAR, recv_proc, tag, communicator, &request); PROFILE_STOP("Isend", profile_level); return request; } // int template <> MPI_Request MPI_CLASS::Isend(const int *buf, int length, int recv_proc, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); MPI_Request request; PROFILE_START("Isend", profile_level); MPI_Isend((void *)buf, length, MPI_INT, recv_proc, tag, communicator, &request); PROFILE_STOP("Isend", profile_level); return request; } // float template <> MPI_Request MPI_CLASS::Isend(const float *buf, int length, int recv_proc, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); MPI_Request request; PROFILE_START("Isend", profile_level); MPI_Isend((void *)buf, length, MPI_FLOAT, recv_proc, tag, communicator, &request); PROFILE_STOP("Isend", profile_level); return request; } // double template <> MPI_Request MPI_CLASS::Isend(const double *buf, int length, int recv_proc, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); MPI_Request request; PROFILE_START("Isend", profile_level); MPI_Isend((void *)buf, length, MPI_DOUBLE, recv_proc, tag, communicator, &request); PROFILE_STOP("Isend", profile_level); return request; } #else // We need a concrete instantiation of send for use without mpi template <> MPI_Request MPI_CLASS::Isend(const char *buf, int length, int, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); PROFILE_START("Isend", profile_level); auto id = getRequest(communicator, tag); auto it = global_isendrecv_list.find(id); if (it == global_isendrecv_list.end()) { // We are calling isend first Isendrecv_struct data; data.data = buf; data.status = 1; global_isendrecv_list.insert( std::pair(id, data)); } else { // We called irecv first MPI_ASSERT(it->second.status == 2); memcpy((char *)it->second.data, buf, length); global_isendrecv_list.erase(it); } PROFILE_STOP("Isend", profile_level); return id; } #endif /************************************************************************ * Send byte array to another processor. * ************************************************************************/ void MPI_CLASS::sendBytes(const void *buf, int number_bytes, int recv_proc_number, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); send((const char *)buf, number_bytes, recv_proc_number, tag); } /************************************************************************ * Non-blocking send byte array to another processor. * ************************************************************************/ MPI_Request MPI_CLASS::IsendBytes(const void *buf, int number_bytes, const int recv_proc, const int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); return Isend((const char *)buf, number_bytes, recv_proc, tag); } /************************************************************************ * Recieve data array to another processor. * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // char template <> void MPI_CLASS::recv(char *buf, int &length, int send_proc_number, const bool get_length, int tag) const { // Set the tag to 0 if it is < 0 tag = (tag >= 0) ? tag : 0; MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); PROFILE_START("recv", profile_level); // Get the recieve length if necessary if (get_length) { int bytes = this->probe(send_proc_number, tag); int recv_length = bytes / sizeof(char); MPI_INSIST(length >= recv_length, "Recived length is larger than allocated array"); length = recv_length; } // Send the data MPI_Status status; MPI_Recv((void *)buf, length, MPI_CHAR, send_proc_number, tag, communicator, &status); PROFILE_STOP("recv", profile_level); } // int template <> void MPI_CLASS::recv(int *buf, int &length, int send_proc_number, const bool get_length, int tag) const { // Set the tag to 0 if it is < 0 tag = (tag >= 0) ? tag : 0; MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); PROFILE_START("recv", profile_level); // Get the recieve length if necessary if (get_length) { int bytes = this->probe(send_proc_number, tag); int recv_length = bytes / sizeof(int); MPI_INSIST(length >= recv_length, "Recived length is larger than allocated array"); length = recv_length; } // Send the data MPI_Status status; MPI_Recv((void *)buf, length, MPI_INT, send_proc_number, tag, communicator, &status); PROFILE_STOP("recv", profile_level); } // float template <> void MPI_CLASS::recv(float *buf, int &length, int send_proc_number, const bool get_length, int tag) const { // Set the tag to 0 if it is < 0 tag = (tag >= 0) ? tag : 0; MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); PROFILE_START("recv", profile_level); // Get the recieve length if necessary if (get_length) { int bytes = this->probe(send_proc_number, tag); int recv_length = bytes / sizeof(float); MPI_INSIST(length >= recv_length, "Recived length is larger than allocated array"); length = recv_length; } // Send the data MPI_Status status; MPI_Recv((void *)buf, length, MPI_FLOAT, send_proc_number, tag, communicator, &status); PROFILE_STOP("recv", profile_level); } // double template <> void MPI_CLASS::recv(double *buf, int &length, int send_proc_number, const bool get_length, int tag) const { // Set the tag to 0 if it is < 0 tag = (tag >= 0) ? tag : 0; MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); PROFILE_START("recv", profile_level); // Get the recieve length if necessary if (get_length) { int bytes = this->probe(send_proc_number, tag); int recv_length = bytes / sizeof(double); MPI_INSIST(length >= recv_length, "Recived length is larger than allocated array"); length = recv_length; } // Send the data MPI_Status status; MPI_Recv((void *)buf, length, MPI_DOUBLE, send_proc_number, tag, communicator, &status); PROFILE_STOP("recv", profile_level); } #else // We need a concrete instantiation of recv for use without mpi template <> void MPI_CLASS::recv(char *buf, int &length, int, const bool, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); PROFILE_START("recv", profile_level); auto id = getRequest(communicator, tag); auto it = global_isendrecv_list.find(id); MPI_INSIST(it != global_isendrecv_list.end(), "recv must be paired with a previous call to isend in serial"); MPI_ASSERT(it->second.status == 1); memcpy(buf, it->second.data, length); global_isendrecv_list.erase(it); PROFILE_STOP("recv", profile_level); } #endif /************************************************************************ * Non-blocking recieve data array to another processor. * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // char template <> MPI_Request MPI_CLASS::Irecv(char *buf, int length, int send_proc, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); MPI_Request request; PROFILE_START("Irecv", profile_level); MPI_Irecv((void *)buf, length, MPI_CHAR, send_proc, tag, communicator, &request); PROFILE_STOP("Irecv", profile_level); return request; } // int template <> MPI_Request MPI_CLASS::Irecv(int *buf, int length, int send_proc, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); MPI_Request request; PROFILE_START("Irecv", profile_level); MPI_Irecv((void *)buf, length, MPI_INT, send_proc, tag, communicator, &request); PROFILE_STOP("Irecv", profile_level); return request; } // float template <> MPI_Request MPI_CLASS::Irecv(float *buf, int length, int send_proc, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); MPI_Request request; PROFILE_START("Irecv", profile_level); MPI_Irecv((void *)buf, length, MPI_FLOAT, send_proc, tag, communicator, &request); PROFILE_STOP("Irecv", profile_level); return request; } // double template <> MPI_Request MPI_CLASS::Irecv(double *buf, int length, int send_proc, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); MPI_Request request; PROFILE_START("Irecv", profile_level); MPI_Irecv((void *)buf, length, MPI_DOUBLE, send_proc, tag, communicator, &request); PROFILE_STOP("Irecv", profile_level); return request; } #else // We need a concrete instantiation of irecv for use without mpi template <> MPI_Request MPI_CLASS::Irecv(char *buf, int length, int, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); PROFILE_START("Irecv", profile_level); auto id = getRequest(communicator, tag); auto it = global_isendrecv_list.find(id); if (it == global_isendrecv_list.end()) { // We are calling Irecv first Isendrecv_struct data; data.data = buf; data.status = 2; global_isendrecv_list.insert( std::pair(id, data)); } else { // We called Isend first MPI_ASSERT(it->second.status == 1); memcpy(buf, it->second.data, length); global_isendrecv_list.erase(it); } PROFILE_STOP("Irecv", profile_level); return id; } #endif /************************************************************************ * Recieve byte array to another processor. * ************************************************************************/ void MPI_CLASS::recvBytes(void *buf, int &number_bytes, int send_proc, int tag) const { recv((char *)buf, number_bytes, send_proc, false, tag); } /************************************************************************ * Recieve byte array to another processor. * ************************************************************************/ MPI_Request MPI_CLASS::IrecvBytes(void *buf, int number_bytes, int send_proc, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); return Irecv((char *)buf, number_bytes, send_proc, tag); } /************************************************************************ * sendrecv * ************************************************************************/ #if defined(USE_MPI) template <> void MPI_CLASS::sendrecv(const char *sendbuf, int sendcount, int dest, int sendtag, char *recvbuf, int recvcount, int source, int recvtag) const { PROFILE_START("sendrecv", profile_level); MPI_Sendrecv(sendbuf, sendcount, MPI_CHAR, dest, sendtag, recvbuf, recvcount, MPI_CHAR, source, recvtag, communicator, MPI_STATUS_IGNORE); PROFILE_STOP("sendrecv", profile_level); } template <> void MPI_CLASS::sendrecv(const int *sendbuf, int sendcount, int dest, int sendtag, int *recvbuf, int recvcount, int source, int recvtag) const { PROFILE_START("sendrecv", profile_level); MPI_Sendrecv(sendbuf, sendcount, MPI_INT, dest, sendtag, recvbuf, recvcount, MPI_INT, source, recvtag, communicator, MPI_STATUS_IGNORE); PROFILE_STOP("sendrecv", profile_level); } template <> void MPI_CLASS::sendrecv(const float *sendbuf, int sendcount, int dest, int sendtag, float *recvbuf, int recvcount, int source, int recvtag) const { PROFILE_START("sendrecv", profile_level); MPI_Sendrecv(sendbuf, sendcount, MPI_FLOAT, dest, sendtag, recvbuf, recvcount, MPI_FLOAT, source, recvtag, communicator, MPI_STATUS_IGNORE); PROFILE_STOP("sendrecv", profile_level); } template <> void MPI_CLASS::sendrecv(const double *sendbuf, int sendcount, int dest, int sendtag, double *recvbuf, int recvcount, int source, int recvtag) const { PROFILE_START("sendrecv", profile_level); MPI_Sendrecv(sendbuf, sendcount, MPI_DOUBLE, dest, sendtag, recvbuf, recvcount, MPI_DOUBLE, source, recvtag, communicator, MPI_STATUS_IGNORE); PROFILE_STOP("sendrecv", profile_level); } #endif /************************************************************************ * allGather * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // unsigned char template <> void MPI_CLASS::call_allGather(const unsigned char &x_in, unsigned char *x_out) const { PROFILE_START("allGather", profile_level); MPI_Allgather((void *)&x_in, 1, MPI_UNSIGNED_CHAR, (void *)x_out, 1, MPI_UNSIGNED_CHAR, communicator); PROFILE_STOP("allGather", profile_level); } template <> void MPI_CLASS::call_allGather(const unsigned char *x_in, int size_in, unsigned char *x_out, int *size_out, int *disp_out) const { PROFILE_START("allGatherv", profile_level); MPI_Allgatherv((void *)x_in, size_in, MPI_CHAR, (void *)x_out, size_out, disp_out, MPI_CHAR, communicator); PROFILE_STOP("allGatherv", profile_level); } // char template <> void MPI_CLASS::call_allGather(const char &x_in, char *x_out) const { PROFILE_START("allGather", profile_level); MPI_Allgather((void *)&x_in, 1, MPI_CHAR, (void *)x_out, 1, MPI_CHAR, communicator); PROFILE_STOP("allGather", profile_level); } template <> void MPI_CLASS::call_allGather(const char *x_in, int size_in, char *x_out, int *size_out, int *disp_out) const { PROFILE_START("allGatherv", profile_level); MPI_Allgatherv((void *)x_in, size_in, MPI_CHAR, (void *)x_out, size_out, disp_out, MPI_CHAR, communicator); PROFILE_STOP("allGatherv", profile_level); } // unsigned int template <> void MPI_CLASS::call_allGather(const unsigned int &x_in, unsigned int *x_out) const { PROFILE_START("allGather", profile_level); MPI_Allgather((void *)&x_in, 1, MPI_UNSIGNED, (void *)x_out, 1, MPI_UNSIGNED, communicator); PROFILE_STOP("allGather", profile_level); } template <> void MPI_CLASS::call_allGather(const unsigned int *x_in, int size_in, unsigned int *x_out, int *size_out, int *disp_out) const { PROFILE_START("allGatherv", profile_level); MPI_Allgatherv((void *)x_in, size_in, MPI_UNSIGNED, (void *)x_out, size_out, disp_out, MPI_UNSIGNED, communicator); PROFILE_STOP("allGatherv", profile_level); } // int template <> void MPI_CLASS::call_allGather(const int &x_in, int *x_out) const { PROFILE_START("allGather", profile_level); MPI_Allgather((void *)&x_in, 1, MPI_INT, (void *)x_out, 1, MPI_INT, communicator); PROFILE_STOP("allGather", profile_level); } template <> void MPI_CLASS::call_allGather(const int *x_in, int size_in, int *x_out, int *size_out, int *disp_out) const { PROFILE_START("allGatherv", profile_level); MPI_Allgatherv((void *)x_in, size_in, MPI_INT, (void *)x_out, size_out, disp_out, MPI_INT, communicator); PROFILE_STOP("allGatherv", profile_level); } // unsigned long int template <> void MPI_CLASS::call_allGather( const unsigned long int &x_in, unsigned long int *x_out) const { PROFILE_START("allGather", profile_level); MPI_Allgather((void *)&x_in, 1, MPI_UNSIGNED_LONG, (void *)x_out, 1, MPI_UNSIGNED_LONG, communicator); PROFILE_STOP("allGather", profile_level); } template <> void MPI_CLASS::call_allGather(const unsigned long int *x_in, int size_in, unsigned long int *x_out, int *size_out, int *disp_out) const { PROFILE_START("allGatherv", profile_level); MPI_Allgatherv((void *)x_in, size_in, MPI_UNSIGNED_LONG, (void *)x_out, size_out, disp_out, MPI_UNSIGNED_LONG, communicator); PROFILE_STOP("allGatherv", profile_level); } // long int template <> void MPI_CLASS::call_allGather(const long int &x_in, long int *x_out) const { PROFILE_START("allGather", profile_level); MPI_Allgather((void *)&x_in, 1, MPI_LONG, (void *)x_out, 1, MPI_LONG, communicator); PROFILE_STOP("allGather", profile_level); } template <> void MPI_CLASS::call_allGather(const long int *x_in, int size_in, long int *x_out, int *size_out, int *disp_out) const { PROFILE_START("allGatherv", profile_level); MPI_Allgatherv((void *)x_in, size_in, MPI_LONG, (void *)x_out, size_out, disp_out, MPI_LONG, communicator); PROFILE_STOP("allGatherv", profile_level); } // float template <> void MPI_CLASS::call_allGather(const float &x_in, float *x_out) const { PROFILE_START("allGather", profile_level); MPI_Allgather((void *)&x_in, 1, MPI_FLOAT, (void *)x_out, 1, MPI_FLOAT, communicator); PROFILE_STOP("allGather", profile_level); } template <> void MPI_CLASS::call_allGather(const float *x_in, int size_in, float *x_out, int *size_out, int *disp_out) const { PROFILE_START("allGatherv", profile_level); MPI_Allgatherv((void *)x_in, size_in, MPI_FLOAT, (void *)x_out, size_out, disp_out, MPI_FLOAT, communicator); PROFILE_STOP("allGatherv", profile_level); } // double template <> void MPI_CLASS::call_allGather(const double &x_in, double *x_out) const { PROFILE_START("allGather", profile_level); MPI_Allgather((void *)&x_in, 1, MPI_DOUBLE, (void *)x_out, 1, MPI_DOUBLE, communicator); PROFILE_STOP("allGather", profile_level); } template <> void MPI_CLASS::call_allGather(const double *x_in, int size_in, double *x_out, int *size_out, int *disp_out) const { PROFILE_START("allGatherv", profile_level); MPI_Allgatherv((void *)x_in, size_in, MPI_DOUBLE, (void *)x_out, size_out, disp_out, MPI_DOUBLE, communicator); PROFILE_STOP("allGatherv", profile_level); } #else // We need a concrete instantiation of call_allGather(x_in,size_in,x_out,size_out) template <> void MPI_CLASS::call_allGather(const char *, int, char *, int *, int *) const { MPI_ERROR("Internal error in communicator (allGather) "); } #endif /************************************************************************ * allToAll * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI template <> void MPI_CLASS::allToAll(int n, const unsigned char *send, unsigned char *recv) const { PROFILE_START("allToAll", profile_level); MPI_Alltoall((void *)send, n, MPI_UNSIGNED_CHAR, (void *)recv, n, MPI_UNSIGNED_CHAR, communicator); PROFILE_STOP("allToAll", profile_level); } template <> void MPI_CLASS::allToAll(int n, const char *send, char *recv) const { PROFILE_START("allToAll", profile_level); MPI_Alltoall((void *)send, n, MPI_CHAR, (void *)recv, n, MPI_CHAR, communicator); PROFILE_STOP("allToAll", profile_level); } template <> void MPI_CLASS::allToAll(int n, const unsigned int *send, unsigned int *recv) const { PROFILE_START("allToAll", profile_level); MPI_Alltoall((void *)send, n, MPI_UNSIGNED, (void *)recv, n, MPI_UNSIGNED, communicator); PROFILE_STOP("allToAll", profile_level); } template <> void MPI_CLASS::allToAll(int n, const int *send, int *recv) const { PROFILE_START("allToAll", profile_level); MPI_Alltoall((void *)send, n, MPI_INT, (void *)recv, n, MPI_INT, communicator); PROFILE_STOP("allToAll", profile_level); } template <> void MPI_CLASS::allToAll(int n, const unsigned long int *send, unsigned long int *recv) const { PROFILE_START("allToAll", profile_level); MPI_Alltoall((void *)send, n, MPI_UNSIGNED_LONG, (void *)recv, n, MPI_UNSIGNED_LONG, communicator); PROFILE_STOP("allToAll", profile_level); } template <> void MPI_CLASS::allToAll(int n, const long int *send, long int *recv) const { PROFILE_START("allToAll", profile_level); MPI_Alltoall((void *)send, n, MPI_LONG, (void *)recv, n, MPI_LONG, communicator); PROFILE_STOP("allToAll", profile_level); } template <> void MPI_CLASS::allToAll(int n, const float *send, float *recv) const { PROFILE_START("allToAll", profile_level); MPI_Alltoall((void *)send, n, MPI_FLOAT, (void *)recv, n, MPI_FLOAT, communicator); PROFILE_STOP("allToAll", profile_level); } template <> void MPI_CLASS::allToAll(int n, const double *send, double *recv) const { PROFILE_START("allToAll", profile_level); MPI_Alltoall((void *)send, n, MPI_DOUBLE, (void *)recv, n, MPI_DOUBLE, communicator); PROFILE_STOP("allToAll", profile_level); } #endif /************************************************************************ * call_allToAll * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // unsigned char template <> void MPI_CLASS::call_allToAll( const unsigned char *send_data, const int send_cnt[], const int send_disp[], unsigned char *recv_data, const int *recv_cnt, const int *recv_disp) const { PROFILE_START("allToAllv", profile_level); MPI_Alltoallv((void *)send_data, (int *)send_cnt, (int *)send_disp, MPI_UNSIGNED_CHAR, (void *)recv_data, (int *)recv_cnt, (int *)recv_disp, MPI_UNSIGNED_CHAR, communicator); PROFILE_STOP("allToAllv", profile_level); } // char template <> void MPI_CLASS::call_allToAll(const char *send_data, const int send_cnt[], const int send_disp[], char *recv_data, const int *recv_cnt, const int *recv_disp) const { PROFILE_START("allToAllv", profile_level); MPI_Alltoallv((void *)send_data, (int *)send_cnt, (int *)send_disp, MPI_CHAR, (void *)recv_data, (int *)recv_cnt, (int *)recv_disp, MPI_CHAR, communicator); PROFILE_STOP("allToAllv", profile_level); } // unsigned int template <> void MPI_CLASS::call_allToAll( const unsigned int *send_data, const int send_cnt[], const int send_disp[], unsigned int *recv_data, const int *recv_cnt, const int *recv_disp) const { PROFILE_START("allToAllv", profile_level); MPI_Alltoallv((void *)send_data, (int *)send_cnt, (int *)send_disp, MPI_UNSIGNED, (void *)recv_data, (int *)recv_cnt, (int *)recv_disp, MPI_UNSIGNED, communicator); PROFILE_STOP("allToAllv", profile_level); } // int template <> void MPI_CLASS::call_allToAll(const int *send_data, const int send_cnt[], const int send_disp[], int *recv_data, const int *recv_cnt, const int *recv_disp) const { PROFILE_START("allToAllv", profile_level); MPI_Alltoallv((void *)send_data, (int *)send_cnt, (int *)send_disp, MPI_INT, (void *)recv_data, (int *)recv_cnt, (int *)recv_disp, MPI_INT, communicator); PROFILE_STOP("allToAllv", profile_level); } // unsigned long int template <> void MPI_CLASS::call_allToAll( const unsigned long int *send_data, const int send_cnt[], const int send_disp[], unsigned long int *recv_data, const int *recv_cnt, const int *recv_disp) const { PROFILE_START("allToAllv", profile_level); MPI_Alltoallv((void *)send_data, (int *)send_cnt, (int *)send_disp, MPI_UNSIGNED_LONG, (void *)recv_data, (int *)recv_cnt, (int *)recv_disp, MPI_UNSIGNED_LONG, communicator); PROFILE_STOP("allToAllv", profile_level); } // long int template <> void MPI_CLASS::call_allToAll( const long int *send_data, const int send_cnt[], const int send_disp[], long int *recv_data, const int *recv_cnt, const int *recv_disp) const { PROFILE_START("allToAllv", profile_level); MPI_Alltoallv((void *)send_data, (int *)send_cnt, (int *)send_disp, MPI_LONG, (void *)recv_data, (int *)recv_cnt, (int *)recv_disp, MPI_LONG, communicator); PROFILE_STOP("allToAllv", profile_level); } // float template <> void MPI_CLASS::call_allToAll(const float *send_data, const int send_cnt[], const int send_disp[], float *recv_data, const int *recv_cnt, const int *recv_disp) const { PROFILE_START("allToAllv", profile_level); MPI_Alltoallv((void *)send_data, (int *)send_cnt, (int *)send_disp, MPI_FLOAT, (void *)recv_data, (int *)recv_cnt, (int *)recv_disp, MPI_FLOAT, communicator); PROFILE_STOP("allToAllv", profile_level); } // double template <> void MPI_CLASS::call_allToAll(const double *send_data, const int send_cnt[], const int send_disp[], double *recv_data, const int *recv_cnt, const int *recv_disp) const { PROFILE_START("allToAllv", profile_level); MPI_Alltoallv((void *)send_data, (int *)send_cnt, (int *)send_disp, MPI_DOUBLE, (void *)recv_data, (int *)recv_cnt, (int *)recv_disp, MPI_DOUBLE, communicator); PROFILE_STOP("allToAllv", profile_level); } #else // Default instatiation of unsigned char template <> void MPI_CLASS::call_allToAll(const char *, const int[], const int[], char *, const int *, const int *) const { MPI_ERROR("Should not reach this point"); } #endif /************************************************************************ * call_sumScan * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // unsigned char template <> void MPI_CLASS::call_sumScan(const unsigned char *send, unsigned char *recv, int n) const { PROFILE_START("sumScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_UNSIGNED_CHAR, MPI_SUM, communicator); PROFILE_STOP("sumScan", profile_level); } // char template <> void MPI_CLASS::call_sumScan(const char *send, char *recv, int n) const { PROFILE_START("sumScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_SIGNED_CHAR, MPI_SUM, communicator); PROFILE_STOP("sumScan", profile_level); } // unsigned int template <> void MPI_CLASS::call_sumScan(const unsigned int *send, unsigned int *recv, int n) const { PROFILE_START("sumScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_UNSIGNED, MPI_SUM, communicator); PROFILE_STOP("sumScan", profile_level); } // int template <> void MPI_CLASS::call_sumScan(const int *send, int *recv, int n) const { PROFILE_START("sumScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_INT, MPI_SUM, communicator); PROFILE_STOP("sumScan", profile_level); } // long int template <> void MPI_CLASS::call_sumScan(const long int *send, long int *recv, int n) const { PROFILE_START("sumScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_LONG, MPI_SUM, communicator); PROFILE_STOP("sumScan", profile_level); } // unsigned long int template <> void MPI_CLASS::call_sumScan(const unsigned long *send, unsigned long *recv, int n) const { PROFILE_START("sumScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_UNSIGNED_LONG, MPI_SUM, communicator); PROFILE_STOP("sumScan", profile_level); } // size_t #ifdef USE_WINDOWS template <> void MPI_CLASS::call_sumScan(const size_t *send, size_t *recv, int n) const { MPI_ASSERT(MPI_SIZE_T != 0); PROFILE_START("sumScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_SIZE_T, MPI_SUM, communicator); PROFILE_STOP("sumScan", profile_level); } #endif // float template <> void MPI_CLASS::call_sumScan(const float *send, float *recv, int n) const { PROFILE_START("sumScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_FLOAT, MPI_SUM, communicator); PROFILE_STOP("sumScan", profile_level); } // double template <> void MPI_CLASS::call_sumScan(const double *send, double *recv, int n) const { PROFILE_START("sumScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_DOUBLE, MPI_SUM, communicator); PROFILE_STOP("sumScan", profile_level); } // std::complex template <> void MPI_CLASS::call_sumScan>( const std::complex *x, std::complex *y, int n) const { auto send = new double[2 * n]; auto recv = new double[2 * n]; for (int i = 0; i < n; i++) { send[2 * i + 0] = real(x[i]); send[2 * i + 1] = imag(x[i]); } MPI_Scan((void *)send, (void *)recv, 2 * n, MPI_DOUBLE, MPI_SUM, communicator); for (int i = 0; i < n; i++) y[i] = std::complex(recv[2 * i + 0], recv[2 * i + 1]); delete[] send; delete[] recv; } #endif /************************************************************************ * call_minScan * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // unsigned char template <> void MPI_CLASS::call_minScan(const unsigned char *send, unsigned char *recv, int n) const { PROFILE_START("minScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_UNSIGNED_CHAR, MPI_MIN, communicator); PROFILE_STOP("minScan", profile_level); } // char template <> void MPI_CLASS::call_minScan(const char *send, char *recv, int n) const { PROFILE_START("minScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_SIGNED_CHAR, MPI_MIN, communicator); PROFILE_STOP("minScan", profile_level); } // unsigned int template <> void MPI_CLASS::call_minScan(const unsigned int *send, unsigned int *recv, int n) const { PROFILE_START("minScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_UNSIGNED, MPI_MIN, communicator); PROFILE_STOP("minScan", profile_level); } // int template <> void MPI_CLASS::call_minScan(const int *send, int *recv, int n) const { PROFILE_START("minScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_INT, MPI_MIN, communicator); PROFILE_STOP("minScan", profile_level); } // unsigned long int template <> void MPI_CLASS::call_minScan(const unsigned long int *send, unsigned long int *recv, int n) const { PROFILE_START("minScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_UNSIGNED_LONG, MPI_MIN, communicator); PROFILE_STOP("minScan", profile_level); } // long int template <> void MPI_CLASS::call_minScan(const long int *send, long int *recv, int n) const { PROFILE_START("minScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_LONG, MPI_MIN, communicator); PROFILE_STOP("minScan", profile_level); } // size_t #ifdef USE_WINDOWS template <> void MPI_CLASS::call_minScan(const size_t *send, size_t *recv, int n) const { MPI_ASSERT(MPI_SIZE_T != 0); PROFILE_START("minScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_SIZE_T, MPI_MIN, communicator); PROFILE_STOP("minScan", profile_level); } #endif // float template <> void MPI_CLASS::call_minScan(const float *send, float *recv, int n) const { PROFILE_START("minScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_FLOAT, MPI_MIN, communicator); PROFILE_STOP("minScan", profile_level); } // double template <> void MPI_CLASS::call_minScan(const double *send, double *recv, int n) const { PROFILE_START("minScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_DOUBLE, MPI_MIN, communicator); PROFILE_STOP("minScan", profile_level); } #endif /************************************************************************ * call_maxScan * * Note: these specializations are only called when using MPI. * ************************************************************************/ #ifdef USE_MPI // unsigned char template <> void MPI_CLASS::call_maxScan(const unsigned char *send, unsigned char *recv, int n) const { PROFILE_START("maxScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_UNSIGNED_CHAR, MPI_MAX, communicator); PROFILE_STOP("maxScan", profile_level); } // char template <> void MPI_CLASS::call_maxScan(const char *send, char *recv, int n) const { PROFILE_START("maxScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_SIGNED_CHAR, MPI_MAX, communicator); PROFILE_STOP("maxScan", profile_level); } // unsigned int template <> void MPI_CLASS::call_maxScan(const unsigned int *send, unsigned int *recv, int n) const { PROFILE_START("maxScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_UNSIGNED, MPI_MAX, communicator); PROFILE_STOP("maxScan", profile_level); } // int template <> void MPI_CLASS::call_maxScan(const int *send, int *recv, int n) const { PROFILE_START("maxScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_INT, MPI_MAX, communicator); PROFILE_STOP("maxScan", profile_level); } // long int template <> void MPI_CLASS::call_maxScan(const long int *send, long int *recv, int n) const { PROFILE_START("maxScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_LONG, MPI_MAX, communicator); PROFILE_STOP("maxScan", profile_level); } // unsigned long int template <> void MPI_CLASS::call_maxScan(const unsigned long int *send, unsigned long int *recv, int n) const { PROFILE_START("maxScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_UNSIGNED_LONG, MPI_MAX, communicator); PROFILE_STOP("maxScan", profile_level); } // size_t #ifdef USE_WINDOWS template <> void MPI_CLASS::call_maxScan(const size_t *send, size_t *recv, int n) const { MPI_ASSERT(MPI_SIZE_T != 0); PROFILE_START("maxScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_SIZE_T, MPI_MAX, communicator); PROFILE_STOP("maxScan", profile_level); } #endif // float template <> void MPI_CLASS::call_maxScan(const float *send, float *recv, int n) const { PROFILE_START("maxScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_INT, MPI_MAX, communicator); PROFILE_STOP("maxScan", profile_level); } // double template <> void MPI_CLASS::call_maxScan(const double *send, double *recv, int n) const { PROFILE_START("maxScan", profile_level); MPI_Scan((void *)send, (void *)recv, n, MPI_DOUBLE, MPI_MAX, communicator); PROFILE_STOP("maxScan", profile_level); } #endif /************************************************************************ * Communicate ranks for communication * ************************************************************************/ std::vector MPI_CLASS::commRanks(const std::vector &ranks) const { #ifdef USE_MPI // Get a byte array with the ranks to communicate auto data1 = new char[comm_size]; auto data2 = new char[comm_size]; memset(data1, 0, comm_size); memset(data2, 0, comm_size); for (auto &rank : ranks) data1[rank] = 1; MPI_Alltoall(data1, 1, MPI_CHAR, data2, 1, MPI_CHAR, communicator); int N = 0; for (int i = 0; i < comm_size; i++) N += data2[i]; std::vector ranks_out; ranks_out.reserve(N); for (int i = 0; i < comm_size; i++) { if (data2[i]) ranks_out.push_back(i); } delete[] data1; delete[] data2; return ranks_out; #else return ranks; #endif } /************************************************************************ * Wait functions * ************************************************************************/ #ifdef USE_MPI void MPI_CLASS::wait(MPI_Request request) { PROFILE_START("wait", profile_level); MPI_Status status; MPI_Wait(&request, &status); /*int flag = 0; int err = MPI_Test( &request, &flag, &status ); MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid while ( !flag ) { // Put the current thread to sleep to allow other threads to run sched_yield(); // Check if the request has finished MPI_Test( &request, &flag, &status ); }*/ PROFILE_STOP("wait", profile_level); } int MPI_CLASS::waitAny(int count, MPI_Request *request) { if (count == 0) return -1; PROFILE_START("waitAny", profile_level); int index = -1; auto status = new MPI_Status[count]; MPI_Waitany(count, request, &index, status); /*int flag = 0; int err = MPI_Testany( count, request, &index, &flag, status ); MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid while ( !flag ) { // Put the current thread to sleep to allow other threads to run sched_yield(); // Check if the request has finished MPI_Testany( count, request, &index, &flag, status ); } MPI_ASSERT( index >= 0 ); // Check that the index is valid*/ delete[] status; PROFILE_STOP("waitAny", profile_level); return index; } void MPI_CLASS::waitAll(int count, MPI_Request *request) { if (count == 0) return; PROFILE_START("waitAll", profile_level); auto status = new MPI_Status[count]; MPI_Waitall(count, request, status); /*int flag = 0; int err = MPI_Testall( count, request, &flag, status ); MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid while ( !flag ) { // Put the current thread to sleep to allow other threads to run sched_yield(); // Check if the request has finished MPI_Testall( count, request, &flag, status ); }*/ PROFILE_STOP("waitAll", profile_level); delete[] status; } std::vector MPI_CLASS::waitSome(int count, MPI_Request *request) { if (count == 0) return std::vector(); PROFILE_START("waitSome", profile_level); std::vector indicies(count, -1); auto *status = new MPI_Status[count]; int outcount = 0; MPI_Waitsome(count, request, &outcount, indicies.data(), status); /*int err = MPI_Testsome( count, request, &outcount, &indicies[0], status ); MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid MPI_ASSERT( outcount != MPI_UNDEFINED ); // Check that the first call is valid while ( outcount == 0 ) { // Put the current thread to sleep to allow other threads to run sched_yield(); // Check if the request has finished MPI_Testsome( count, request, &outcount, &indicies[0], status ); }*/ indicies.resize(outcount); delete[] status; PROFILE_STOP("waitSome", profile_level); return indicies; } #else void MPI_CLASS::wait(MPI_Request request) { PROFILE_START("wait", profile_level); while (1) { // Check if the request is in our list if (global_isendrecv_list.find(request) == global_isendrecv_list.end()) break; // Put the current thread to sleep to allow other threads to run sched_yield(); } PROFILE_STOP("wait", profile_level); } int MPI_CLASS::waitAny(int count, MPI_Request *request) { if (count == 0) return -1; PROFILE_START("waitAny", profile_level); int index = 0; while (1) { // Check if the request is in our list bool found_any = false; for (int i = 0; i < count; i++) { if (global_isendrecv_list.find(request[i]) == global_isendrecv_list.end()) { found_any = true; index = i; } } if (found_any) break; // Put the current thread to sleep to allow other threads to run sched_yield(); } PROFILE_STOP("waitAny", profile_level); return index; } void MPI_CLASS::waitAll(int count, MPI_Request *request) { if (count == 0) return; PROFILE_START("waitAll", profile_level); while (1) { // Check if the request is in our list bool found_all = true; for (int i = 0; i < count; i++) { if (global_isendrecv_list.find(request[i]) != global_isendrecv_list.end()) found_all = false; } if (found_all) break; // Put the current thread to sleep to allow other threads to run sched_yield(); } PROFILE_STOP("waitAll", profile_level); } std::vector MPI_CLASS::waitSome(int count, MPI_Request *request) { if (count == 0) return std::vector(); PROFILE_START("waitSome", profile_level); std::vector indicies; while (1) { // Check if the request is in our list for (int i = 0; i < count; i++) { if (global_isendrecv_list.find(request[i]) == global_isendrecv_list.end()) indicies.push_back(i); } if (!indicies.empty()) break; // Put the current thread to sleep to allow other threads to run sched_yield(); } PROFILE_STOP("waitSome", profile_level); return indicies; } #endif /************************************************************************ * Probe functions * ************************************************************************/ #ifdef USE_MPI int MPI_CLASS::Iprobe(int source, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); MPI_Status status; int flag = 0; MPI_Iprobe(source, tag, communicator, &flag, &status); if (flag == 0) return -1; int count; MPI_Get_count(&status, MPI_BYTE, &count); MPI_ASSERT(count >= 0); return count; } int MPI_CLASS::probe(int source, int tag) const { MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded"); MPI_INSIST(tag >= 0, "tag must be >= 0"); MPI_Status status; MPI_Probe(source, tag, communicator, &status); int count; MPI_Get_count(&status, MPI_BYTE, &count); MPI_ASSERT(count >= 0); return count; } #else int MPI_CLASS::Iprobe(int, int) const { MPI_ERROR("Not implimented for serial codes (Iprobe)"); return 0; } int MPI_CLASS::probe(int, int) const { MPI_ERROR("Not implimented for serial codes (probe)"); return 0; } #endif /************************************************************************ * Timer functions * ************************************************************************/ #ifdef USE_MPI double MPI_CLASS::time() { return MPI_Wtime(); } double MPI_CLASS::tick() { return MPI_Wtick(); } #else double MPI_CLASS::time() { auto t = std::chrono::system_clock::now(); auto ns = std::chrono::duration_cast( t.time_since_epoch()); return 1e-9 * ns.count(); } double MPI_CLASS::tick() { auto period = std::chrono::system_clock::period(); return static_cast(period.num) / static_cast(period.den); } #endif /************************************************************************ * Serialize a block of code across MPI processes * ************************************************************************/ void MPI_CLASS::serializeStart() { #ifdef USE_MPI using namespace std::chrono_literals; if (comm_rank == 0) { // Start rank 0 immediately } else { // Wait for a message from the previous rank MPI_Request request; MPI_Status status; int flag = false, buf = 0; MPI_Irecv(&buf, 1, MPI_INT, comm_rank - 1, 5627, MPI_COMM_WORLD, &request); while (!flag) { MPI_Test(&request, &flag, &status); std::this_thread::sleep_for(50ms); } } #endif } void MPI_CLASS::serializeStop() { #ifdef USE_MPI using namespace std::chrono_literals; if (comm_rank < comm_size - 1) { // Send flag to next rank MPI_Send(&comm_rank, 1, MPI_INT, comm_rank + 1, 5627, MPI_COMM_WORLD); // Wait for final finished flag int flag = false, buf = 0; MPI_Request request; MPI_Status status; MPI_Irecv(&buf, 1, MPI_INT, comm_size - 1, 5627, MPI_COMM_WORLD, &request); while (!flag) { MPI_Test(&request, &flag, &status); std::this_thread::sleep_for(50ms); } } else { // Send final flag to all ranks for (int i = 0; i < comm_size - 1; i++) MPI_Send(&comm_rank, 1, MPI_INT, i, 5627, MPI_COMM_WORLD); } #endif } /**************************************************************************** * Function to start/stop MPI * ****************************************************************************/ #ifdef USE_MPI static bool called_MPI_Init = false; #endif bool MPI_CLASS::MPI_Active() { #ifdef USE_MPI int MPI_initialized, MPI_finialized; MPI_Initialized(&MPI_initialized); MPI_Finalized(&MPI_finialized); return MPI_initialized != 0 && MPI_finialized == 0; #else return false; #endif } void MPI_CLASS::start_MPI(int argc, char *argv[], int profile_level) { changeProfileLevel(profile_level); NULL_USE(argc); NULL_USE(argv); #ifdef USE_MPI if (MPI_Active()) { called_MPI_Init = false; } else { int provided; int result = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); if (result != MPI_SUCCESS) MPI_ERROR("Unable to initialize MPI"); if (provided < MPI_THREAD_MULTIPLE) std::cerr << "Warning: Failed to start MPI with MPI_THREAD_MULTIPLE\n"; called_MPI_Init = true; } #endif } void MPI_CLASS::stop_MPI() { #ifdef USE_MPI int finalized; MPI_Finalized(&finalized); if (called_MPI_Init && !finalized) { MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); called_MPI_Init = true; } #endif } /**************************************************************************** * Function to perform load balancing * ****************************************************************************/ MPI MPI::loadBalance(double local, std::vector work) { MPI_ASSERT((int)work.size() == getSize()); auto perf = allGather(local); std::vector I(work.size()); for (size_t i = 0; i < work.size(); i++) I[i] = i; auto J = I; quicksort(perf, I); quicksort(work, J); std::vector key(work.size()); for (size_t i = 0; i < work.size(); i++) key[J[i]] = I[i]; return split(0, key[getRank()]); } /**************************************************************************** * Function Persistent Communication * ****************************************************************************/ template <> std::shared_ptr MPI::Isend_init(const double *buf, int N, int proc, int tag) const { std::shared_ptr obj( new MPI_Request, []( MPI_Request *req ) { MPI_Request_free( req ); delete req; } ); MPI_Send_init( buf, N, MPI_DOUBLE, proc, tag, communicator, obj.get() ); return obj; } template<> std::shared_ptr MPI::Irecv_init(double *buf, int N, int proc, int tag) const { std::shared_ptr obj( new MPI_Request, []( MPI_Request *req ) { MPI_Request_free( req ); delete req; } ); MPI_Recv_init( buf, N, MPI_DOUBLE, proc, tag, communicator, obj.get() ); return obj; } void MPI::Start( MPI_Request &request ) { MPI_Start( &request ); } } // namespace Utilities