LBPM/tests/test_MPI.cpp
2021-07-19 13:36:04 -04:00

1676 lines
60 KiB
C++

#include <algorithm>
#include <cmath>
#include <complex>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <vector>
#include "common/MPI.h"
#include "common/UnitTest.h"
#include "common/Utilities.h"
#include "common/Utilities.hpp"
#include "common/ScaLBL.h"
#include "ProfilerApp.h"
#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 )
#include <windows.h>
#define sched_yield() Sleep( 0 )
#else
#include <sched.h>
#endif
#undef MPI_CLASS
#define MPI_CLASS Utilities::MPI
#define MPI_ASSERT ASSERT
// Return the time elapsed in seconds
static inline double time() { return MPI_CLASS::time(); }
struct mytype {
int a;
double b;
mytype()
{
a = -1;
b = -1.0;
}
mytype( int i )
{
a = i;
b = -1.0;
}
mytype( int i, double d )
{
a = i;
b = d;
}
bool operator==( const mytype &other )
{
if ( a == other.a && b == other.b )
return true;
return false;
}
bool operator!=( const mytype &other )
{
if ( a != other.a || b != other.b )
return true;
return false;
}
};
// Routines to test Reduce with known data types
// flag - 0: all tests should pass
// 1: basic reduce should pass, reduce with rank should fail with error message
template<class type>
int testReduce( MPI_CLASS comm, UnitTest *ut, int flag );
template<>
int testReduce<std::complex<double>>( MPI_CLASS comm, UnitTest *ut, int )
{
PROFILE_START( "testReduce<complex double>" );
char message[128];
std::complex<double> rank = comm.getRank() + 1;
std::complex<double> N = ( ( comm.getSize() * ( comm.getSize() + 1 ) ) / 2 );
// Test sumReduce
sprintf( message, "sumReduce (%s)", typeid( std::complex<double> ).name() );
if ( comm.sumReduce<std::complex<double>>( rank ) == N )
ut->passes( message );
else
ut->failure( message );
sprintf( message, "sumReduce (%s) (x,y)", typeid( std::complex<double> ).name() );
std::complex<double> y;
comm.sumReduce<std::complex<double>>( &rank, &y, 1 );
if ( y == N )
ut->passes( message );
else
ut->failure( message );
PROFILE_STOP( "testReduce<complex double>" );
return 2; // Return the number of tests
}
template<class type>
int testReduce( MPI_CLASS comm, UnitTest *ut, int flag )
{
PROFILE_START( "testReduce" );
char message[128];
auto rank = (type) comm.getRank();
auto size = (type) comm.getSize();
if ( (int) ( size ) != comm.getSize() ) {
sprintf( message,
"Reduce (%s) cannot represent the number of processors",
typeid( type ).name() );
ut->expected_failure( message );
PROFILE_STOP2( "testReduce<class type>" );
return 0;
}
type x, y;
int N = ( ( comm.getSize() * ( comm.getSize() + 1 ) ) / 2 );
// Test sumReduce
sprintf( message, "sumReduce (%s)", typeid( type ).name() );
if ( ( (int) ( (type) N ) ) != N )
ut->expected_failure( message ); // type cannot represent N
else if ( comm.sumReduce<type>( rank + 1 ) == (type) N )
ut->passes( message );
else
ut->failure( message );
sprintf( message, "sumReduce (%s) (x,y)", typeid( type ).name() );
x = rank + 1;
comm.sumReduce<type>( &x, &y, 1 );
if ( ( (int) ( (type) N ) ) != N )
ut->expected_failure( message );
else if ( y == (type) N )
ut->passes( message );
else
ut->failure( message );
// Test minReduce
sprintf( message, "minReduce (%s)", typeid( type ).name() );
if ( comm.minReduce<type>( rank + 1 ) == 1 )
ut->passes( message );
else
ut->failure( message );
sprintf( message, "minReduce (%s) (x,y)", typeid( type ).name() );
comm.minReduce<type>( &x, &y, 1, nullptr );
if ( y == 1 )
ut->passes( message );
else
ut->failure( message );
// Test maxReduce
sprintf( message, "maxReduce (%s)", typeid( type ).name() );
if ( comm.maxReduce<type>( rank + 1 ) == size )
ut->passes( message );
else
ut->failure( message );
sprintf( message, "maxReduce (%s) (x,y)", typeid( type ).name() );
comm.maxReduce<type>( &x, &y, 1, nullptr );
if ( y == size )
ut->passes( message );
else
ut->failure( message );
// Test minReduce with rank
int rank_of_min = -1;
int rank_of_max = -1;
type rank_min = rank + 1;
type rank_max = rank + 1;
sprintf( message, "minReduce-rank (%s)", typeid( type ).name() );
try {
comm.minReduce<type>( &rank_min, 1, &rank_of_min );
if ( rank_min == 1 && rank_of_min == 0 )
ut->passes( message );
else
ut->failure( message );
if ( flag == 1 && comm.getSize() > 1 )
ut->failure( message );
} catch ( ... ) {
if ( flag == 1 && comm.getSize() > 1 )
ut->expected_failure( message );
else
ut->failure( message );
}
sprintf( message, "minReduce-rank (%s) (x,y)", typeid( type ).name() );
try {
comm.minReduce<type>( &x, &rank_min, 1, &rank_of_min );
if ( rank_min == 1 && rank_of_min == 0 )
ut->passes( message );
else
ut->failure( message );
if ( flag == 1 && comm.getSize() > 1 )
ut->failure( message );
} catch ( ... ) {
if ( flag == 1 && comm.getSize() > 1 )
ut->expected_failure( message );
else
ut->failure( message );
}
// Test maxReduce with rank
sprintf( message, "maxReduce-rank (%s)", typeid( type ).name() );
try {
comm.maxReduce<type>( &rank_max, 1, &rank_of_max );
if ( rank_max == size && rank_of_max == comm.getSize() - 1 )
ut->passes( message );
else
ut->failure( message );
if ( flag == 1 && comm.getSize() > 1 )
ut->failure( message );
} catch ( ... ) {
if ( flag == 1 && comm.getSize() > 1 )
ut->expected_failure( message );
else
ut->failure( message );
}
sprintf( message, "maxReduce-rank (%s) (x,y)", typeid( type ).name() );
try {
comm.maxReduce<type>( &x, &rank_max, 1, &rank_of_max );
if ( rank_max == size && rank_of_max == comm.getSize() - 1 )
ut->passes( message );
else
ut->failure( message );
if ( flag == 1 && comm.getSize() > 1 )
ut->failure( message );
} catch ( ... ) {
if ( flag == 1 && comm.getSize() > 1 )
ut->expected_failure( message );
else
ut->failure( message );
}
PROFILE_STOP( "testReduce" );
return 10; // Return the number of tests
}
// Routine to test Scan with known data types
// flag - 0: all tests should pass
// 1: only sumScan is valid (complex<double>)
template<class type>
int testScan( MPI_CLASS comm, UnitTest *ut, int flag = 0 )
{
PROFILE_START( "testScan" );
char message[500];
auto x = ( type )( comm.getRank() + 1 );
type y;
sprintf( message, "sumScan (%s)", typeid( type ).name() );
comm.sumScan<type>( &x, &y, 1 );
auto N = ( type )( ( ( comm.getRank() + 1 ) * ( comm.getRank() + 2 ) ) / 2 );
if ( y == N )
ut->passes( message );
else
ut->failure( message );
if ( flag == 1 ) {
PROFILE_STOP2( "testScan" );
return 1;
}
sprintf( message, "minScan (%s)", typeid( type ).name() );
comm.minScan<type>( &x, &y, 1 );
if ( y == (type) 1 )
ut->passes( message );
else
ut->failure( message );
sprintf( message, "maxScan (%s)", typeid( type ).name() );
comm.maxScan<type>( &x, &y, 1 );
if ( y == x )
ut->passes( message );
else
ut->failure( message );
PROFILE_STOP( "testScan" );
return 3; // Return the number of tests
}
// Routine to test bcast
template<class type>
int testBcast( MPI_CLASS comm, UnitTest *ut, type default_val, type new_val )
{
PROFILE_START( "testBcast" );
char message[128];
for ( int i = 0; i < comm.getSize(); i++ ) {
type tmp1 = default_val;
if ( comm.getRank() == i )
tmp1 = new_val;
sprintf( message, "bcast scalar (%s) from rank %i", typeid( type ).name(), i );
if ( comm.bcast( tmp1, i ) == new_val )
ut->passes( message );
else
ut->failure( message );
type tmp2[2];
tmp2[0] = default_val;
tmp2[1] = default_val;
if ( comm.getRank() == i ) {
tmp2[0] = new_val;
tmp2[1] = new_val;
}
sprintf( message, "bcast vector (%s) from rank %i", typeid( type ).name(), i );
comm.bcast( tmp2, 2, i );
if ( tmp2[0] == new_val && tmp2[1] == new_val )
ut->passes( message );
else
ut->failure( message );
}
PROFILE_STOP( "testBcast" );
return 2 * comm.getSize(); // Return the number of tests
}
// Routine to test allGather
template<class type>
int testAllGather( MPI_CLASS comm, UnitTest *ut )
{
PROFILE_START( "testAllGather" );
char message[128];
// Test scalar allGather
auto x1 = (type) comm.getRank();
auto *x2 = new type[comm.getSize()];
comm.allGather( x1, x2 );
bool pass = true;
for ( int i = 0; i < comm.getSize(); i++ ) {
type test = i;
if ( x2[i] != test )
pass = false;
}
sprintf( message, "allGather scalar (%s)", typeid( type ).name() );
if ( pass )
ut->passes( message );
else
ut->failure( message );
// Test vector allGather
int N = ( comm.getSize() * ( comm.getSize() + 1 ) ) / 2;
auto *x3 = new type[comm.getRank() + 1];
auto *x4 = new type[N];
auto *x5 = new type[N];
int *size = new int[comm.getSize()];
for ( int i = 0; i <= comm.getRank(); i++ )
x3[i] = (type) comm.getRank();
int tot1 = comm.allGather( x3, comm.getRank() + 1, x4 );
int tot2 = comm.allGather( x3, comm.getRank() + 1, x5, size );
pass = true;
if ( tot1 != N || tot2 != N )
pass = false;
int k = 0;
for ( int i = 0; i < comm.getSize(); i++ ) {
if ( size[i] != i + 1 )
pass = false;
if ( !pass )
break;
for ( int j = 0; j <= i; j++ ) {
type test = i;
if ( x4[k] != test || x5[k] != test )
pass = false;
k++;
}
}
sprintf( message, "allGather vector (%s)", typeid( type ).name() );
if ( pass )
ut->passes( message );
else
ut->failure( message );
delete[] x2;
delete[] x3;
delete[] x4;
delete[] x5;
delete[] size;
// Test vector allGather with know recive sizes and non-zero displacements
auto *send = new type[comm.getRank() + 1];
auto *recv = new type[comm.getSize() * comm.getSize() + 1];
auto *recv_size = new int[comm.getSize()];
auto *recv_disp = new int[comm.getSize()];
for ( int i = 0; i <= comm.getRank(); i++ )
send[i] = i;
for ( int i = 0; i < comm.getSize(); i++ )
recv_size[i] = i + 1;
for ( int i = 0; i < comm.getSize(); i++ )
recv_disp[i] = 1 + i * comm.getSize() + comm.getSize() - i - 1;
for ( int i = 0; i <= comm.getSize() * comm.getSize(); i++ )
recv[i] = (type) -1;
int tot = comm.allGather( send, comm.getRank() + 1, recv, recv_size, recv_disp, true );
pass = true;
if ( tot != N )
pass = false;
auto test = (type) -1;
if ( recv[0] != test )
pass = false;
for ( int i = 0; i < comm.getSize(); i++ ) {
for ( int j = 0; j < comm.getSize(); j++ ) {
int l = j + i * comm.getSize() + 1 - recv_disp[i];
if ( l >= 0 )
test = l;
else
test = (type) -1;
if ( recv[j + i * comm.getSize() + 1] != test )
pass = false;
}
}
sprintf( message,
"allGather vector with known recv and non-zero displacements (%s)",
typeid( type ).name() );
if ( pass )
ut->passes( message );
else
ut->failure( message );
delete[] send;
delete[] recv;
delete[] recv_size;
delete[] recv_disp;
// Test vector allGather with no elements
size = new int[comm.getSize()];
sprintf( message, "allGather scalar (%s)", typeid( type ).name() );
try {
comm.allGather( &x1, 0, (type *) nullptr, size );
ut->passes( message );
} catch ( ... ) {
ut->failure( message );
}
delete[] size;
PROFILE_STOP( "testAllGather" );
return 4; // Return the number of tests
}
// Routine to test setGather
template<class type>
int testSetGather( MPI_CLASS comm, UnitTest *ut )
{
PROFILE_START( "testSetGather" );
char message[500];
auto x1 = (type) comm.getRank();
std::set<type> set;
set.insert( x1 );
comm.setGather( set );
bool pass = true;
for ( int i = 0; i < comm.getSize(); i++ ) {
type x2 = i;
if ( set.find( x2 ) == set.end() )
pass = false;
}
sprintf( message, "setGather (%s)", typeid( type ).name() );
if ( pass )
ut->passes( message );
else
ut->failure( message );
PROFILE_STOP( "testSetGather" );
return 1; // Return the number of tests
}
// Routine to test mapGather
template<class type>
int testMapGather( MPI_CLASS comm, UnitTest *ut )
{
PROFILE_START( "testMapGather" );
char message[128];
auto x1 = (type) comm.getRank();
std::map<int, type> map;
map.insert( std::pair<int, type>( comm.getRank(), x1 ) );
comm.mapGather( map );
bool pass = true;
for ( int i = 0; i < comm.getSize(); i++ ) {
type x2 = i;
auto it = map.find( i );
if ( it == map.end() )
pass = false;
else if ( it->second != x2 )
pass = false;
}
sprintf( message, "mapGather (%s)", typeid( type ).name() );
if ( pass )
ut->passes( message );
else
ut->failure( message );
PROFILE_STOP( "testMapGather" );
return 1; // Return the number of tests
}
// Routine to test allToAll
template<class type>
int testAllToAll( MPI_CLASS comm, UnitTest *ut )
{
PROFILE_START( "testAllToAll" );
bool pass;
char message[128];
int size = 0;
type *send_data, *recv_data;
auto *send_cnt = new int[comm.getSize()];
auto *recv_cnt = new int[comm.getSize()];
auto *send_disp = new int[comm.getSize()];
auto *recv_disp = new int[comm.getSize()];
// Test allToAll with a scalar value to each processor
send_data = new type[comm.getSize()];
recv_data = new type[comm.getSize()];
for ( int i = 0; i < comm.getSize(); i++ )
send_data[i] = comm.getSize();
comm.allToAll( 1, send_data, recv_data );
pass = true;
for ( int i = 0; i < comm.getSize(); i++ ) {
type test = comm.getSize();
if ( recv_data[i] != test )
pass = false;
}
delete[] send_data;
delete[] recv_data;
sprintf( message, "allToAll with scalar (%s)", typeid( type ).name() );
if ( pass )
ut->passes( message );
else
ut->failure( message );
// Test allToAll vector with a scalar value to each processor
send_data = new type[comm.getSize()];
recv_data = new type[comm.getSize()];
for ( int i = 0; i < comm.getSize(); i++ ) {
send_cnt[i] = 1;
recv_cnt[i] = 1;
send_disp[i] = i;
recv_disp[i] = i;
send_data[i] = comm.getSize();
recv_data[i] = 0;
}
size = comm.allToAll( send_data, send_cnt, send_disp, recv_data, recv_cnt, recv_disp, true );
pass = true;
if ( size != comm.getSize() )
pass = false;
for ( int i = 0; i < comm.getSize(); i++ ) {
type test = comm.getSize();
if ( recv_data[i] != test )
pass = false;
}
delete[] send_data;
delete[] recv_data;
sprintf( message, "allToAll vector with scalar (%s)", typeid( type ).name() );
if ( pass )
ut->passes( message );
else
ut->failure( message );
// Test allToAll with a variable number of values per processor and spacing
send_data = new type[comm.getSize() * comm.getSize()];
recv_data = new type[2 * comm.getRank() * comm.getSize()];
for ( int i = 0; i < comm.getSize(); i++ ) {
send_cnt[i] = i;
recv_cnt[i] = comm.getRank();
send_disp[i] = i * comm.getSize();
recv_disp[i] = 2 * i * comm.getRank();
for ( int j = 0; j < comm.getSize(); j++ ) {
if ( j < i )
send_data[j + send_disp[i]] = i;
else
send_data[j + send_disp[i]] = (type) -1;
}
}
for ( int i = 0; i < 2 * comm.getRank() * comm.getSize(); i++ )
recv_data[i] = (type) -2;
size = comm.allToAll( send_data, send_cnt, send_disp, recv_data, recv_cnt, recv_disp, true );
pass = true;
if ( size != comm.getRank() * comm.getSize() )
pass = false;
for ( int i = 0; i < comm.getSize(); i++ ) {
for ( int j = 0; j < 2 * comm.getRank(); j++ ) {
if ( j < comm.getRank() ) {
type test = comm.getRank();
if ( recv_data[j + recv_disp[i]] != test )
pass = false;
} else {
auto test = (type) -2;
if ( recv_data[j + recv_disp[i]] != test )
pass = false;
}
}
}
delete[] send_data;
delete[] recv_data;
sprintf( message,
"allToAll with vector of known size and displacements (%s)",
typeid( type ).name() );
if ( pass )
ut->passes( message );
else
ut->failure( message );
// Test allToAll with a unknown recieve length
send_data = new type[comm.getSize() * comm.getSize()];
auto *recv_data1 = new type[comm.getSize() * comm.getSize()];
auto *recv_data2 = new type[comm.getSize() * comm.getSize()];
for ( int i = 0; i < comm.getSize(); i++ ) {
send_cnt[i] = i;
recv_cnt[i] = -1;
send_disp[i] = i * comm.getSize();
recv_disp[i] = -1;
for ( int j = 0; j < comm.getSize(); j++ ) {
if ( j < i )
send_data[j + send_disp[i]] = i;
else
send_data[j + send_disp[i]] = (type) -1;
}
}
for ( int i = 0; i < comm.getSize() * comm.getSize(); i++ ) {
recv_data1[i] = (type) -2;
recv_data2[i] = (type) -2;
}
int size1 =
comm.allToAll( send_data, send_cnt, send_disp, recv_data1, recv_cnt, recv_disp, false );
int size2 = comm.allToAll( send_data, send_cnt, send_disp, recv_data2 );
bool pass1 = true;
bool pass2 = true;
if ( size1 != comm.getRank() * comm.getSize() )
pass1 = false;
if ( size2 != comm.getRank() * comm.getSize() )
pass2 = false;
for ( int i = 0; i < comm.getSize(); i++ ) {
if ( recv_cnt[i] != comm.getRank() || recv_disp[i] != i * comm.getRank() )
pass1 = false;
}
for ( int i = 0; i < comm.getRank() * comm.getSize(); i++ ) {
type test = comm.getRank();
if ( recv_data1[i] != test )
pass1 = false;
if ( recv_data2[i] != test )
pass2 = false;
}
delete[] send_data;
delete[] recv_data1;
delete[] recv_data2;
sprintf( message, "allToAll with vector of unknown size (%s)", typeid( type ).name() );
if ( pass1 )
ut->passes( message );
else
ut->failure( message );
sprintf(
message, "allToAll with vector of unknown size with NULL recv(%s)", typeid( type ).name() );
if ( pass2 )
ut->passes( message );
else
ut->failure( message );
// Free temporary variables
delete[] send_cnt;
delete[] recv_cnt;
delete[] send_disp;
delete[] recv_disp;
PROFILE_STOP( "testAllToAll" );
return 5; // Return the number of tests
}
// Routine to test send/recv
template<class type>
int testSendRecv( MPI_CLASS comm, UnitTest *ut, type v1, type v2 )
{
PROFILE_START( "testSendRecv" );
char message[128];
// Test send-recv with a known length
for ( int i = 0; i < comm.getSize(); i++ ) {
for ( int j = 0; j < comm.getSize(); j++ ) {
type x = v1;
int tag = i + j * comm.getSize();
sprintf( message, "send-recv %i-%i known length (%s)", i, j, typeid( type ).name() );
if ( i == j ) {
// We are not allowed to send/recieve from the same processor
continue;
} else if ( i == comm.getRank() ) {
// We are sending
x = v2;
comm.send( &x, 1, j, tag );
} else if ( j == comm.getRank() ) {
// We are recieving
int size = 1;
comm.recv( &x, size, i, false, tag );
if ( size == 1 && x == v2 )
ut->passes( message );
else
ut->failure( message );
}
}
}
// Test send-recv with an unknown length
for ( int i = 0; i < comm.getSize(); i++ ) {
for ( int j = 0; j < comm.getSize(); j++ ) {
type x = v1;
int tag = i + j * comm.getSize();
sprintf( message, "send-recv %i-%i unknown length (%s)", i, j, typeid( type ).name() );
if ( i == j ) {
// We are not allowed to send/recieve from the same processor
continue;
} else if ( i == comm.getRank() ) {
// We are sending
x = v2;
comm.send( &x, 1, j, tag );
} else if ( j == comm.getRank() ) {
// We are recieving
int size = 1;
comm.recv( &x, size, i, true, tag );
if ( size == 1 && x == v2 )
ut->passes( message );
else
ut->failure( message );
}
}
}
// Test send-recv with an empty length
for ( int i = 0; i < comm.getSize(); i++ ) {
for ( int j = 0; j < comm.getSize(); j++ ) {
type x = v1;
int tag = i + j * comm.getSize();
sprintf( message, "send-recv %i-%i empty length (%s)", i, j, typeid( type ).name() );
if ( i == j ) {
// We are not allowed to send/recieve from the same processor
continue;
} else if ( i == comm.getRank() ) {
// We are sending
x = v2;
comm.send( &x, 0, j, tag );
} else if ( j == comm.getRank() ) {
// We are recieving
int size = comm.probe( i, tag );
comm.recv( &x, size, i, false, tag );
if ( size == 0 )
ut->passes( message );
else
ut->failure( message );
}
}
}
PROFILE_STOP( "testSendRecv" );
return 3 * comm.getSize() * comm.getSize(); // Return the number of tests
}
// Routine to test Isend/Irecv
template<class type>
int testIsendIrecv( MPI_CLASS comm, UnitTest *ut, type v1, type v2 )
{
PROFILE_START( "testIsendIrecv" );
char message[128];
std::vector<MPI_Request> sendRequest;
std::vector<MPI_Request> recvRequest;
// Send all messages
for ( int i = 0; i < comm.getSize(); i++ ) {
// Check if the current rank is sending
if ( i != comm.getRank() )
continue;
for ( int j = 0; j < comm.getSize(); j++ ) {
// Start a non-blocking send
int tag = i + j * comm.getSize();
MPI_Request request = comm.Isend( &v1, 1, j, tag );
sendRequest.insert( sendRequest.begin(), request );
}
}
// Recv all messages
auto *recv_buffer = new type[comm.getSize()];
for ( int i = 0; i < comm.getSize(); i++ )
recv_buffer[i] = v2;
recv_buffer[comm.getRank()] = v1;
for ( int j = 0; j < comm.getSize(); j++ ) {
// Check if the current rank is recieving
if ( j != comm.getRank() )
continue;
for ( int i = 0; i < comm.getSize(); i++ ) {
// Start a non-blocking recv
int tag = i + j * comm.getSize();
MPI_Request request = comm.Irecv( &recv_buffer[i], 1, i, tag );
recvRequest.insert( recvRequest.begin(), request );
}
}
// Wait for all communications to finish
MPI_CLASS::wait( sendRequest[0] );
sendRequest.erase( sendRequest.begin() + 0 );
while ( !sendRequest.empty() ) {
int index = comm.waitAny( sendRequest.size(), &( sendRequest[0] ) );
sendRequest.erase( sendRequest.begin() + index );
}
auto finished = MPI_CLASS::waitSome( recvRequest.size(), recvRequest.data() );
if ( !recvRequest.empty() ) {
MPI_ASSERT( !finished.empty() );
for ( auto it = finished.rbegin(); it != finished.rend(); ++it )
recvRequest.erase( recvRequest.begin() + ( *it ) );
}
if ( !recvRequest.empty() )
MPI_CLASS::waitAll( recvRequest.size(), &( recvRequest[0] ) );
Utilities::unique( finished );
// Check the recieved values
bool pass = true;
for ( int i = 0; i < comm.getSize(); i++ ) {
if ( recv_buffer[i] != v1 )
pass = false;
}
sprintf( message, "Isend-Irecv (%s)", typeid( type ).name() );
if ( pass )
ut->passes( message );
else
ut->failure( message );
delete[] recv_buffer;
PROFILE_STOP( "testIsendIrecv" );
return comm.getSize() * comm.getSize(); // Return the number of tests
}
// Routine to test CommRanks
int testCommRanks( MPI_CLASS comm, UnitTest *ut )
{
std::vector<int> neighbors;
for ( int i = 0; i < comm.getSize(); i++ )
if ( ( i % 2 ) == 0 )
neighbors.push_back( i );
std::vector<int> neighbors2 = comm.commRanks( neighbors );
bool pass = true;
if ( comm.getRank() % 2 == 0 ) {
pass = static_cast<int>( neighbors2.size() ) == comm.getSize();
if ( pass ) {
for ( int i = 0; i < comm.getSize(); i++ )
pass = pass && neighbors2[i] == i;
}
} else {
pass = neighbors2.empty();
}
auto ranks = comm.globalRanks();
pass = pass && (int) ranks.size() == comm.getSize();
for ( int rank : ranks )
pass = pass && rank >= 0;
auto ranks2 = ranks;
Utilities::unique( ranks2 );
pass = pass && ranks.size() == ranks2.size();
comm.barrier();
if ( pass )
ut->passes( "commRanks" );
else
ut->failure( "commRanks" );
return 1; // Return the number of tests
}
// Structure to contain timer results
struct testCommTimerResults {
int N_reduce;
int N_scan;
int N_bcast;
int N_allGather;
int N_setGather;
int N_mapGather;
int N_allToAll;
int N_sendRecv;
int N_IsendIrecv;
double t_reduce;
double t_scan;
double t_bcast;
double t_allGather;
double t_setGather;
double t_mapGather;
double t_allToAll;
double t_sendRecv;
double t_IsendIrecv;
// Constructor
testCommTimerResults()
{
N_reduce = 0;
N_scan = 0;
N_bcast = 0;
N_allGather = 0;
N_setGather = 0;
N_mapGather = 0;
N_allToAll = 0;
N_sendRecv = 0;
N_IsendIrecv = 0;
t_reduce = 0.0;
t_scan = 0.0;
t_bcast = 0.0;
t_allGather = 0.0;
t_setGather = 0.0;
t_mapGather = 0.0;
t_allToAll = 0.0;
t_sendRecv = 0.0;
t_IsendIrecv = 0.0;
}
// Print the results
void print()
{
printf( " Reduce: N = %5i, t_tot = %0.5e, t_avg = %6.1f us\n",
N_reduce,
t_reduce,
1e6 * t_reduce / N_reduce );
printf( " Scan: N = %5i, t_tot = %0.5e, t_avg = %6.1f us\n",
N_scan,
t_scan,
1e6 * t_scan / N_scan );
printf( " Bcast: N = %5i, t_tot = %0.5e, t_avg = %6.1f us\n",
N_bcast,
t_bcast,
1e6 * t_bcast / N_bcast );
printf( " allGather: N = %5i, t_tot = %0.5e, t_avg = %6.1f us\n",
N_allGather,
t_allGather,
1e6 * t_allGather / N_allGather );
printf( " allToAll: N = %5i, t_tot = %0.5e, t_avg = %6.1f us\n",
N_allToAll,
t_allToAll,
1e6 * t_allToAll / N_allToAll );
printf( " send-recv: N = %5i, t_tot = %0.5e, t_avg = %6.1f us\n",
N_sendRecv,
t_sendRecv,
1e6 * t_sendRecv / N_sendRecv );
printf( " Isend-Irecv: N = %5i, t_tot = %0.5e, t_avg = %6.1f us\n",
N_IsendIrecv,
t_IsendIrecv,
1e6 * t_IsendIrecv / N_IsendIrecv );
printf( " setGather: N = %5i, t_tot = %0.5e, t_avg = %6.1f us\n",
N_setGather,
t_setGather,
1e6 * t_setGather / N_setGather );
printf( " mapGather: N = %5i, t_tot = %0.5e, t_avg = %6.1f us\n",
N_mapGather,
t_mapGather,
1e6 * t_mapGather / N_mapGather );
}
};
// This routine will test a single MPI communicator
testCommTimerResults testComm( MPI_CLASS comm, UnitTest *ut )
{
PROFILE_START( "testComm" );
testCommTimerResults timer;
double start_time;
// Test the tag
int tag0 = comm.newTag();
MPI_CLASS comm2 = comm;
MPI_CLASS comm3( comm );
bool pass = tag0 > 0 && tag0 < comm.maxTag();
for ( int i = 1; i < 64; i++ ) {
if ( comm.newTag() != tag0 + i )
pass = false;
}
for ( int i = 1; i <= 64; i++ ) {
if ( comm2.newTag() != tag0 + 63 + i )
pass = false;
}
for ( int i = 1; i <= 128; i++ ) {
if ( comm3.newTag() != tag0 + 127 + i )
pass = false;
}
if ( pass )
ut->passes( "newTag" );
else
ut->failure( "newTag" );
// Test all and any reduce
bool test1 = !comm.allReduce( comm.getRank() != 0 );
bool test2 = comm.allReduce( true );
if ( test1 && test2 )
ut->passes( "allReduce" );
else
ut->failure( "allReduce" );
test1 = comm.anyReduce( comm.getRank() == 0 );
test2 = !comm.anyReduce( false );
if ( test1 && test2 )
ut->passes( "anyReduce" );
else
ut->failure( "anyReduce" );
// Test min, max, and sum reduce
start_time = time();
timer.N_reduce += testReduce<unsigned char>( comm, ut, 0 );
timer.N_reduce += testReduce<char>( comm, ut, 0 );
timer.N_reduce += testReduce<unsigned int>( comm, ut, 0 );
timer.N_reduce += testReduce<int>( comm, ut, 0 );
timer.N_reduce += testReduce<unsigned long int>( comm, ut, 0 );
timer.N_reduce += testReduce<long int>( comm, ut, 0 );
timer.N_reduce += testReduce<size_t>( comm, ut, 0 );
timer.N_reduce += testReduce<float>( comm, ut, 0 );
timer.N_reduce += testReduce<double>( comm, ut, 0 );
timer.N_reduce += testReduce<std::complex<double>>(
comm, ut, 2 ); // only sumreduce is valid for complex numbers
mytype tmp1( 1, -1.0 );
mytype tmp2;
if ( comm.getSize() > 1 ) {
// We can't perform a reduce on an unknown data type (this should throw an error)
try {
// This should fail
tmp2 = comm.sumReduce<mytype>( tmp1 );
ut->failure( "sumReduce should give an error with an unknown type" );
} catch ( ... ) {
ut->passes( "sumReduce should give an error with an unknown type" );
}
try {
// This should fail
tmp2 = comm.minReduce<mytype>( tmp1 );
ut->failure( "minReduce should give an error with an unknown type" );
} catch ( ... ) {
ut->passes( "minReduce should give an error with an unknown type" );
}
try {
// This should fail
tmp2 = comm.maxReduce<mytype>( tmp1 );
ut->failure( "maxReduce should give an error with an unknown type" );
} catch ( ... ) {
ut->passes( "maxReduce should give an error with an unknown type" );
}
timer.N_reduce += 3;
}
timer.t_reduce = time() - start_time;
// Test min, max, and sum scan
start_time = time();
timer.N_scan += testScan<unsigned char>( comm, ut );
timer.N_scan += testScan<char>( comm, ut );
timer.N_scan += testScan<unsigned int>( comm, ut );
timer.N_scan += testScan<int>( comm, ut );
timer.N_scan += testScan<unsigned long int>( comm, ut );
timer.N_scan += testScan<long int>( comm, ut );
timer.N_scan += testScan<size_t>( comm, ut );
timer.N_scan += testScan<float>( comm, ut );
timer.N_scan += testScan<double>( comm, ut );
timer.N_scan +=
testScan<std::complex<double>>( comm, ut, 1 ); // Only sumScan is valid with complex data
if ( comm.getSize() > 1 ) {
// We can't perform a reduce on an unknown data type (this should throw an error)
try {
// This should fail
comm.sumScan<mytype>( &tmp1, &tmp2, 1 );
ut->failure( "sumReduce should give an error with an unknown type" );
} catch ( ... ) {
ut->passes( "sumReduce should give an error with an unknown type" );
}
try {
// This should fail
comm.minScan<mytype>( &tmp1, &tmp2, 1 );
ut->failure( "minReduce should give an error with an unknown type" );
} catch ( ... ) {
ut->passes( "minReduce should give an error with an unknown type" );
}
try {
// This should fail
comm.maxScan<mytype>( &tmp1, &tmp2, 1 );
ut->failure( "maxReduce should give an error with an unknown type" );
} catch ( ... ) {
ut->passes( "maxReduce should give an error with an unknown type" );
}
timer.N_scan += 3;
}
timer.t_scan = time() - start_time;
// Test bcast
start_time = time();
timer.N_bcast += testBcast<unsigned char>( comm, ut, 0, 1 );
timer.N_bcast += testBcast<char>( comm, ut, -1, 1 );
timer.N_bcast += testBcast<unsigned int>( comm, ut, 0, 1 );
timer.N_bcast += testBcast<int>( comm, ut, -1, 1 );
timer.N_bcast += testBcast<unsigned long int>( comm, ut, 0, 1 );
timer.N_bcast += testBcast<long int>( comm, ut, -1, 1 );
timer.N_bcast += testBcast<size_t>( comm, ut, 0, 1 );
timer.N_bcast += testBcast<float>( comm, ut, -1.0, 1.0 );
timer.N_bcast += testBcast<double>( comm, ut, -1.0, 1.0 );
mytype tmp3( -1, -1.0 );
mytype tmp4( 1, 1.0 );
timer.N_bcast += testBcast<mytype>( comm, ut, tmp3, tmp4 );
timer.t_bcast = time() - start_time;
// Test barrier
comm.barrier();
// Test gather
start_time = time();
timer.N_allGather += testAllGather<unsigned char>( comm, ut );
timer.N_allGather += testAllGather<char>( comm, ut );
timer.N_allGather += testAllGather<unsigned int>( comm, ut );
timer.N_allGather += testAllGather<int>( comm, ut );
timer.N_allGather += testAllGather<unsigned long int>( comm, ut );
timer.N_allGather += testAllGather<long int>( comm, ut );
timer.N_allGather += testAllGather<size_t>( comm, ut );
timer.N_allGather += testAllGather<float>( comm, ut );
timer.N_allGather += testAllGather<double>( comm, ut );
timer.N_allGather += testAllGather<std::complex<double>>( comm, ut );
timer.N_allGather += testAllGather<mytype>( comm, ut );
timer.t_allGather = time() - start_time;
// Test std::set gather
start_time = time();
timer.N_setGather += testSetGather<unsigned char>( comm, ut );
timer.N_setGather += testSetGather<char>( comm, ut );
timer.N_setGather += testSetGather<unsigned int>( comm, ut );
timer.N_setGather += testSetGather<int>( comm, ut );
timer.N_setGather += testSetGather<unsigned long int>( comm, ut );
timer.N_setGather += testSetGather<long int>( comm, ut );
timer.N_setGather += testSetGather<size_t>( comm, ut );
timer.N_setGather += testSetGather<float>( comm, ut );
timer.N_setGather += testSetGather<double>( comm, ut );
timer.t_setGather = time() - start_time;
// Test std::map gather
start_time = time();
timer.N_mapGather += testMapGather<unsigned char>( comm, ut );
timer.N_mapGather += testMapGather<char>( comm, ut );
timer.N_mapGather += testMapGather<unsigned int>( comm, ut );
timer.N_mapGather += testMapGather<int>( comm, ut );
timer.N_mapGather += testMapGather<unsigned long int>( comm, ut );
timer.N_mapGather += testMapGather<long int>( comm, ut );
timer.N_mapGather += testMapGather<size_t>( comm, ut );
timer.N_mapGather += testMapGather<float>( comm, ut );
timer.N_mapGather += testMapGather<double>( comm, ut );
timer.t_mapGather = time() - start_time;
// Test allToAlll
start_time = time();
timer.N_allToAll += testAllToAll<unsigned char>( comm, ut );
timer.N_allToAll += testAllToAll<char>( comm, ut );
timer.N_allToAll += testAllToAll<unsigned int>( comm, ut );
timer.N_allToAll += testAllToAll<int>( comm, ut );
timer.N_allToAll += testAllToAll<unsigned long int>( comm, ut );
timer.N_allToAll += testAllToAll<long int>( comm, ut );
timer.N_allToAll += testAllToAll<size_t>( comm, ut );
timer.N_allToAll += testAllToAll<float>( comm, ut );
timer.N_allToAll += testAllToAll<double>( comm, ut );
timer.N_allToAll += testAllToAll<std::complex<double>>( comm, ut );
timer.N_allToAll += testAllToAll<mytype>( comm, ut );
timer.t_allToAll = time() - start_time;
// Test send/recv
start_time = time();
timer.N_sendRecv += testSendRecv<unsigned char>( comm, ut, 0, 1 );
timer.N_sendRecv += testSendRecv<char>( comm, ut, -1, 1 );
timer.N_sendRecv += testSendRecv<unsigned int>( comm, ut, 0, 1 );
timer.N_sendRecv += testSendRecv<int>( comm, ut, -1, 1 );
timer.N_sendRecv += testSendRecv<unsigned long int>( comm, ut, 0, 1 );
timer.N_sendRecv += testSendRecv<long int>( comm, ut, -1, 1 );
timer.N_sendRecv += testSendRecv<size_t>( comm, ut, 0, 1 );
timer.N_sendRecv += testSendRecv<float>( comm, ut, -1.0, 1.0 );
timer.N_sendRecv += testSendRecv<double>( comm, ut, -1.0, 1.0 );
timer.N_sendRecv += testSendRecv<mytype>( comm, ut, tmp3, tmp4 );
timer.t_sendRecv = time() - start_time;
// Test Isend/Irecv
start_time = time();
timer.N_IsendIrecv += testIsendIrecv<unsigned char>( comm, ut, 0, 1 );
timer.N_IsendIrecv += testIsendIrecv<char>( comm, ut, -1, 1 );
timer.N_IsendIrecv += testIsendIrecv<unsigned int>( comm, ut, 0, 1 );
timer.N_IsendIrecv += testIsendIrecv<int>( comm, ut, -1, 1 );
timer.N_IsendIrecv += testIsendIrecv<unsigned long int>( comm, ut, 0, 1 );
timer.N_IsendIrecv += testIsendIrecv<long int>( comm, ut, -1, 1 );
timer.N_IsendIrecv += testIsendIrecv<size_t>( comm, ut, 0, 1 );
timer.N_IsendIrecv += testIsendIrecv<float>( comm, ut, -1.0, 1.0 );
timer.N_IsendIrecv += testIsendIrecv<double>( comm, ut, -1.0, 1.0 );
timer.N_IsendIrecv += testIsendIrecv<mytype>( comm, ut, tmp3, tmp4 );
timer.t_IsendIrecv = time() - start_time;
// Test commRanks
testCommRanks( comm, ut );
PROFILE_STOP( "testComm" );
return timer;
}
// Test comm dup and the number of communicators that can be created
void testCommDup( UnitTest *ut )
{
#if defined( USING_CLANG ) && defined( __APPLE__ )
// The MPI error handler crashes so this test fails
// This seems to be a MAC? + Clang + MPICH? issue only
ut->expected_failure( "testCommDup skipped for this architecture/compiler" );
#else
MPI_CLASS globalComm( MPI_COMM_WORLD );
MPI_CLASS dupComm = globalComm.dup();
if ( globalComm.getCommunicator() != dupComm.getCommunicator() &&
dupComm.getSize() == globalComm.getSize() && dupComm.getRank() == globalComm.getRank() ) {
ut->passes( "dup comm" );
} else {
ut->failure( "dup comm" );
return;
}
#if defined( USE_PETSC ) && !defined( USE_MPI )
ut->expected_failure( "Skipping dup tests, PETSc (no-mpi) has a limit of 128 unique comms" );
return;
#endif
int N_comm_try = 2000; // Maximum number of comms to try and create
std::vector<MPI_CLASS> comms;
comms.reserve( N_comm_try );
try {
for ( int i = 0; i < N_comm_try; i++ ) {
MPI_CLASS tmp_comm = globalComm.dup();
comms.push_back( tmp_comm );
MPI_ASSERT( globalComm.getCommunicator() != comms[i].getCommunicator() );
MPI_ASSERT( comms.back().sumReduce<int>( 1 ) ==
globalComm.getSize() ); // We need to communicate as part of the test
}
ut->passes( Utilities::stringf( "Created %i comms", N_comm_try ) );
} catch ( ... ) {
if ( comms.size() < 252 ) {
ut->failure( "Could not create 252 different communicators" );
} else {
int N = comms.size();
ut->expected_failure( Utilities::stringf(
"Failed to create an unlimited number of comms (%i)", N ) );
}
std::cout << "Maximum number of concurrent communicators: " << comms.size() << std::endl;
}
comms.clear();
size_t N_dup = 0;
globalComm.barrier();
try {
double start = MPI_CLASS::time();
for ( int i = 0; i < N_comm_try; i++ ) {
MPI_CLASS tmp_comm1 = globalComm.dup();
MPI_CLASS tmp_comm2 = globalComm.dup();
MPI_ASSERT( globalComm.getCommunicator() != tmp_comm1.getCommunicator() );
MPI_ASSERT( globalComm.getCommunicator() != tmp_comm2.getCommunicator() );
MPI_ASSERT( tmp_comm1.getCommunicator() != tmp_comm2.getCommunicator() );
MPI_ASSERT( tmp_comm1.sumReduce<int>( 1 ) ==
globalComm.getSize() ); // We need to communicate as part of the test
MPI_ASSERT( tmp_comm2.sumReduce<int>( 1 ) ==
globalComm.getSize() ); // We need to communicate as part of the test
N_dup += 2;
}
double stop = MPI_CLASS::time();
ut->passes( "Created/Destroyed an unlimited number of comms" );
char message[128];
sprintf( message,
"Time to create/destroy comm using MPI_CLASS::dup() is: %0.1f us",
1e6 * ( stop - start ) / N_dup );
std::cout << message << std::endl;
} catch ( ... ) {
ut->failure( "Failed to create/destroy an unlimited number of comms" );
std::cout << "Maximum number of communicators created with destruction: " << N_dup
<< std::endl;
}
#endif
}
class gpuWrapper{
public:
gpuWrapper(MPI_CLASS MPI_COMM, int MSG_SIZE);
~gpuWrapper();
void Send(double *values);
void Recv(double *values);
double *sendbuf, *recvbuf;
private:
MPI_Request req1[1],req2[1];
MPI_CLASS comm;
int sendCount;
int recvCount;
int rank, rank_x, rank_X, nprocs;
int sendtag, recvtag;
};
gpuWrapper::gpuWrapper(MPI_CLASS MPI_COMM, int MSG_SIZE){
comm = MPI_COMM.dup();
rank = comm.getRank();
nprocs = comm.getSize();
sendCount=MSG_SIZE;
recvCount=MSG_SIZE;
ScaLBL_AllocateZeroCopy((void **) &sendbuf, sendCount*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf, sendCount*sizeof(double)); // Allocate device memory
rank_X = rank+1;
rank_x = rank-1;
if (rank_x < 0) rank_x = nprocs-1;
if (!(rank_X < nprocs)) rank_X = 0;
}
gpuWrapper::~gpuWrapper(){
ScaLBL_FreeDeviceMemory(sendbuf);
ScaLBL_FreeDeviceMemory(recvbuf);
}
void gpuWrapper::Send(double *values){
sendtag = recvtag = 130;
ScaLBL_CopyToDevice(sendbuf,values,sendCount*sizeof(double));
req1[0] = comm.Isend(sendbuf,sendCount,rank_x,sendtag+0);
req2[0] = comm.Irecv(recvbuf,recvCount,rank_X,recvtag+0);
}
void gpuWrapper::Recv(double *values){
comm.waitAll(1,req1);
comm.waitAll(1,req2);
ScaLBL_DeviceBarrier();
ScaLBL_CopyToHost(values,recvbuf,recvCount*sizeof(double));
}
// Test GPU aware MPI
void test_GPU_aware( UnitTest *ut )
{
constexpr size_t N = 1024*1024;
constexpr size_t N_msg = 64;
bool test = true;
// Get the comm to use
MPI_CLASS comm( MPI_COMM_WORLD );
int rank = comm.getRank();
int size = comm.getSize();
try {
// Initialize the device
int device = ScaLBL_SetDevice(rank);
NULL_USE( device );
// create wrapper for communications
gpuWrapper gpuComm(comm, N);
// Allocate and initialize the buffers
size_t bytes = N*sizeof(double);
double *device_send[N_msg] = { nullptr };
double *device_recv[N_msg] = { nullptr };
double *host_send[N_msg] = { nullptr };
double *host_recv[N_msg] = { nullptr };
for ( size_t k=0; k<N_msg; k++ ) {
ScaLBL_AllocateDeviceMemory((void**)&device_send[k],bytes);
ScaLBL_AllocateDeviceMemory((void**)&device_recv[k],bytes);
host_send[k] = new double[N];
host_recv[k] = new double[N];
// Initialize the data
for ( size_t i=0; i<N; i++ ) {
host_send[k][i] = 1000 * k * rank + i;
host_recv[k][i] = 0;
}
ScaLBL_CopyToDevice(device_send[k],host_send[k],bytes);
ScaLBL_CopyToDevice(device_recv[k],host_recv[k],bytes);
}
ScaLBL_DeviceBarrier();
// Send/recieve the data
int rank_send = ( rank + 1 ) % size;
int rank_recv = ( rank - 1 + size ) % size;
MPI_Request req1[N_msg];
MPI_Request req2[N_msg];
for ( size_t k=0; k<N_msg; k++ ) {
req1[k] = comm.Isend( device_send[k], N, rank_send, k );
req2[k] = comm.Irecv( device_recv[k], N, rank_recv, k );
}
comm.waitAll(N_msg,req1);
comm.waitAll(N_msg,req2);
// Copy
for ( size_t k=0; k<N_msg; k++ ) {
ScaLBL_CopyToHost(host_send[k],device_send[k],bytes);
ScaLBL_CopyToHost(host_recv[k],device_recv[k],bytes);
}
ScaLBL_DeviceBarrier();
// Check the data
for ( size_t k=0; k<N_msg; k++ ) {
for ( size_t i=0; i<N; i++ )
test = test && host_recv[k][i] == 1000 * k * rank_recv + i;
}
// Check the gpu wrapper communications the same way
for ( size_t k=0; k<N_msg; k++ ) {
gpuComm.Send(host_send[k]);
gpuComm.Recv(host_recv[k]);
}
// Check the data
for ( size_t k=0; k<N_msg; k++ ) {
for ( size_t i=0; i<N; i++ )
test = test && host_recv[k][i] == 1000 * k * rank_recv + i;
}
// Free buffers
for ( size_t k=0; k<N_msg; k++ ) {
ScaLBL_FreeDeviceMemory(device_send[k]);
ScaLBL_FreeDeviceMemory(device_recv[k]);
delete [] host_send[k];
delete [] host_recv[k];
}
} catch ( ... ) {
test = false;
}
comm.barrier();
if ( test ) {
std::cout << "MPI is GPU aware" << std::endl;
ut->passes("GPU aware MPI" );
} else {
std::cout << "MPI is NOT GPU aware" << std::endl;
ut->failure("GPU aware MPI" );
}
}
// This test will test the MPI class
int main( int argc, char *argv[] )
{
// Start MPI
Utilities::MPI::start_MPI( argc, argv );
// Create the unit test
UnitTest ut;
PROFILE_ENABLE( 0 );
PROFILE_START( "Main" );
// Limit the scope so objects are destroyed
{
// Get the start time for the tests
double start_time = time();
// Print the global size (if we are using MPI)
int global_size = 0;
#ifdef USE_MPI
MPI_Comm_size( MPI_COMM_WORLD, &global_size );
#else
global_size = 1;
#endif
// Test the global communicator (MPI_COMM_WORLD)
MPI_CLASS globalComm = MPI_CLASS( MPI_COMM_WORLD );
if ( !globalComm.isNull() )
ut.passes( "Global communicator created" );
else
ut.failure( "Global communicator created" );
if ( globalComm.getSize() == global_size )
ut.passes( "Global communicator size" );
else
ut.failure( "Global communicator size" );
if ( globalComm.getRank() == 0 ) {
std::cout << "MPI_COMM_WORLD = " << global_size << " processors" << std::endl;
std::cout << " Largest tag value = " << globalComm.maxTag() << std::endl << std::endl;
}
#ifdef USE_MPI
if ( globalComm.getCommunicator() == MPI_COMM_WORLD )
ut.passes( "Communicator == MPI_COMM_WORLD" );
else
ut.failure( "Communicator == MPI_COMM_WORLD" );
#endif
testCommTimerResults commTimer = testComm( globalComm, &ut );
if ( globalComm.getRank() == 0 ) {
std::cout << "Results for global timer (rank 0)" << std::endl;
commTimer.print();
std::cout << std::endl;
}
// Test bcast with std::string
std::string rank_string;
if ( globalComm.getRank() == 0 )
rank_string = "Rank 0";
rank_string = globalComm.bcast( rank_string, 0 );
if ( rank_string == "Rank 0" )
ut.passes( "Bcast std::string" );
else
ut.failure( "Bcast std::string" );
// Test MPI_COMM_SELF
MPI_CLASS selfComm = MPI_CLASS( MPI_COMM_SELF );
if ( !selfComm.isNull() )
ut.passes( "Self communicator created" );
else
ut.failure( "Self communicator created" );
#ifdef USE_MPI
if ( selfComm.getCommunicator() == MPI_COMM_SELF )
ut.passes( "Communicator == MPI_COMM_SELF" );
else
ut.failure( "Communicator == MPI_COMM_SELF" );
#endif
testComm( selfComm, &ut );
// Test == and !=
if ( globalComm == globalComm && !( selfComm == globalComm ) )
ut.passes( "==" );
else
ut.failure( "==" );
if ( selfComm != globalComm && !( globalComm != globalComm ) )
ut.passes( "!=" );
else
ut.failure( "!=" );
// Test MPI_COMM_NULL
MPI_CLASS nullComm = MPI_CLASS( MPI_COMM_NULL );
if ( nullComm.isNull() )
ut.passes( "Null communicator created" );
else
ut.failure( "Null communicator created" );
if ( nullComm.getSize() == 0 )
ut.passes( "Null communicator has zero size" );
else
ut.failure( "Null communicator has zero size" );
#ifdef USE_MPI
if ( nullComm.getCommunicator() == MPI_COMM_NULL )
ut.passes( "Communicator == MPI_COMM_NULL" );
else
ut.failure( "Communicator == MPI_COMM_NULL" );
#endif
// Test dup
MPI_CLASS dupComm = globalComm.dup();
if ( nullComm.dup().isNull() )
ut.passes( "Null communicator duplicates a Null communicator" );
else
ut.failure( "Null communicator duplicates a Null communicator" );
testCommDup( &ut );
// Test compare
if ( globalComm.compare( globalComm ) == 1 )
ut.passes( "compare comm global==global" );
else
ut.failure( "compare comm global==global" );
if ( globalComm.compare( dupComm ) == 3 )
ut.passes( "compare comm global~=dup" );
else
ut.failure( "compare comm global~=dup" );
if ( global_size == 1 ) {
if ( globalComm.compare( selfComm ) == 3 )
ut.passes( "compare comm global~=self (global size=1)" );
else
ut.failure( "compare comm global~=self (global size=1)" );
} else {
if ( globalComm.compare( selfComm ) == 0 )
ut.passes( "compare comm global!=self" );
else
ut.failure( "compare comm global!=self" );
}
// Split the global comm and test
PROFILE_START( "Split" );
int color;
if ( globalComm.getRank() == 0 )
color = 0;
else if ( globalComm.getRank() < 3 )
color = 1;
else
color = 2 + ( globalComm.getRank() - 2 ) / 4;
std::vector<MPI_CLASS> splitComms( 4 );
splitComms[0] = globalComm.split( color );
splitComms[1] = globalComm.split( color, globalComm.getRank() );
if ( splitComms[0].getCommunicator() != globalComm.getCommunicator() &&
splitComms[1].getCommunicator() != globalComm.getCommunicator() &&
splitComms[0].getCommunicator() != splitComms[1].getCommunicator() )
ut.passes( "split comm has different communicator" );
else
ut.failure( "split comm has different communicator" );
if ( globalComm.getSize() > 1 ) {
if ( splitComms[0].getSize() < globalComm.getSize() )
ut.passes( "split comm is smaller" );
else
ut.failure( "split comm is smaller" );
}
if ( splitComms[0].getRank() == splitComms[1].getRank() )
ut.passes( "split sort by rank" );
else
ut.failure( "split sort by rank" );
testComm( splitComms[0], &ut );
splitComms[2] = globalComm.split( -1 );
if ( splitComms[2].isNull() )
ut.passes( "split with color=-1 returns NULL communicator" );
else
ut.failure( "split with color=-1 returns NULL communicator" );
splitComms[3] = splitComms[0]; // Make a copy to ensure there are no memory leaks
splitComms[3] = splitComms[2]; // Perform assignement to check memory leaks
MPI_ASSERT( splitComms[3] == splitComms[2] );
PROFILE_STOP( "Split" );
// Test < <= > >=
if ( globalComm.getSize() > 1 ) {
if ( splitComms[0] < globalComm && splitComms[1] < globalComm &&
!( globalComm < globalComm ) && !( globalComm < splitComms[0] ) )
ut.passes( " < comm" );
else
ut.failure( " < comm" );
if ( splitComms[0] <= globalComm && splitComms[1] <= globalComm &&
globalComm <= globalComm && !( globalComm <= splitComms[0] ) )
ut.passes( " <= comm" );
else
ut.failure( " <= comm" );
if ( globalComm > splitComms[0] && globalComm > splitComms[1] &&
!( globalComm > globalComm ) && !( splitComms[0] > globalComm ) )
ut.passes( " > comm" );
else
ut.failure( " > comm" );
if ( globalComm >= splitComms[0] && globalComm >= splitComms[1] &&
globalComm >= globalComm && !( splitComms[0] >= globalComm ) )
ut.passes( " >= comm" );
else
ut.failure( " >= comm" );
}
// Test intersection
// Test globalComm with selfComm
if ( globalComm.getSize() > 1 ) {
MPI_CLASS comm1 = MPI_CLASS::intersect( globalComm, selfComm );
MPI_CLASS comm2 = MPI_CLASS::intersect( selfComm, globalComm );
MPI_CLASS comm3 = MPI_CLASS::intersect( globalComm, globalComm );
if ( comm1.compare( globalComm ) == 0 && comm1.compare( selfComm ) != 0 &&
comm2.compare( globalComm ) == 0 && comm2.compare( selfComm ) != 0 &&
comm3.compare( globalComm ) != 0 && comm3.compare( selfComm ) == 0 )
ut.passes( "intersection of globalComm and selfComm" );
else
ut.failure( "intersection of globalComm and selfComm" );
}
// Test case where we have disjoint sets (this can only happen of one of the comms is null)
{
MPI_CLASS intersection = MPI_CLASS::intersect( globalComm, nullComm );
if ( intersection.isNull() )
ut.passes( "intersection of non-overlapping comms" );
else
ut.failure( "intersection of non-overlapping comms" );
}
// Test case where the comms partially overlap
if ( globalComm.getSize() > 2 ) {
int n = globalComm.getSize() - 1;
// Intersect 2 comms (all other ranks will be null)
MPI_CLASS split1 = globalComm.split( globalComm.getRank() == 0 ? -1 : 0 );
MPI_CLASS split2 = globalComm.split( globalComm.getRank() == n ? -1 : 0 );
MPI_CLASS intersection = MPI_CLASS::intersect( split1, split2 );
bool pass = true;
if ( globalComm.getRank() == 0 || globalComm.getRank() == n ) {
if ( !intersection.isNull() )
pass = false;
} else {
if ( intersection.compare( split1 ) != 0 || intersection.compare( split2 ) != 0 ||
intersection.getSize() != globalComm.getSize() - 2 )
pass = false;
}
// Intersect 2 sets for ranks (3 groups should result)
// split1 = globalComm.split(globalComm.getRank()==0?1:2);
// split2 = globalComm.split(globalComm.getRank()==n?1:2);
// intersection = MPI_CLASS::intersect( split1, split2 );
// bool pass = true;
// if ( globalComm.getRank()==0 || globalComm.getRank()==n ) {
// if ( intersection.compare(selfComm)==0 )
// pass = false;
//} else {
// if ( intersection.compare(split1)!=0 || intersection.compare(split2)!=0 ||
// intersection.getSize()!=globalComm.getSize()-2 )
// pass = false;
//}
if ( pass )
ut.passes( "intersection of partially overlapping comms" );
else
ut.failure( "intersection of partially overlapping comms" );
}
// Test splitByNode
MPI_CLASS nodeComm = globalComm.splitByNode();
std::string localName = MPI_CLASS::getNodeName();
std::vector<std::string> globalStrings( globalComm.getSize() );
std::vector<std::string> nodeStrings( nodeComm.getSize() );
globalComm.allGather<std::string>( localName, &globalStrings[0] );
nodeComm.allGather<std::string>( localName, &nodeStrings[0] );
int N_local = 0;
for ( auto &nodeString : nodeStrings ) {
if ( nodeString == localName )
N_local++;
}
int N_global = 0;
for ( auto &globalString : globalStrings ) {
if ( globalString == localName )
N_global++;
}
if ( !nodeComm.isNull() && N_local == nodeComm.getSize() && N_local == N_global )
ut.passes( "splitByNode" );
else
ut.failure( "splitByNode" );
// Test the call to load balance the processes
MPI_CLASS::balanceProcesses( globalComm, 1 );
std::vector<int> cpus = MPI_CLASS::getProcessAffinity();
size_t maxProcNode = nodeComm.maxReduce( cpus.size() );
bool pass_balance = cpus.size() == maxProcNode && !cpus.empty();
MPI_CLASS::balanceProcesses( globalComm, 2 );
cpus = MPI_CLASS::getProcessAffinity();
if ( cpus.size() < 1 || cpus.size() > maxProcNode / nodeComm.getSize() )
pass_balance = false;
if ( pass_balance ) {
ut.passes( "balanceProcesses" );
} else {
#ifdef __APPLE__
ut.expected_failure( "balanceProcesses" );
#else
ut.failure( "balanceProcesses" );
#endif
}
// Test the performance of sched_yield (used internally by MPI wait routines)
globalComm.barrier();
double start_yield = time();
for ( int i = 0; i < 10000; i++ )
sched_yield();
double time_yield = ( time() - start_yield ) / 10000;
if ( globalComm.getRank() == 0 )
std::cout << "Time to yield: " << time_yield * 1e6 << " us" << std::endl;
// Test time and tick
double end_time = MPI_CLASS::time();
double time_res = MPI_CLASS::tick();
if ( globalComm.getRank() == 0 ) {
std::cout << "Time to run tests: " << end_time - start_time << std::endl;
std::cout << "Timer resolution: " << time_res << std::endl;
if ( time_res > 0 && time_res < 1 && ( end_time - start_time ) >= time_res )
ut.passes( "time and tick" );
else
ut.failure( "time and tick" );
std::cout << std::endl;
}
// Test GPU aware MPI
test_GPU_aware( &ut );
} // Limit the scope so objects are destroyed
// Finished testing, report the results
PROFILE_START( "Report" );
double start_time = time();
ut.report();
int num_failed = ut.NumFailGlobal();
double end_time = time();
if ( MPI_CLASS( MPI_COMM_WORLD ).getRank() == 0 )
std::cout << "Time to report: " << end_time - start_time << std::endl << std::endl;
PROFILE_STOP( "Report" );
PROFILE_STOP( "Main" );
// Shutdown
PROFILE_SAVE( "test_MPI" );
ut.reset();
Utilities::MPI::stop_MPI();
return num_failed;
}