Testing persistent communication

This commit is contained in:
Mark Berrill 2022-02-10 16:29:22 -05:00
parent f329e424a4
commit 1f671edbc1
6 changed files with 1127 additions and 1121 deletions

View File

@ -1263,7 +1263,7 @@ static int backtrace_thread(
if ( tid == pthread_self() ) {
count = ::backtrace( buffer, size );
} else {
// Note: this will get the backtrace, but terminates the thread in the process!!!
// Send a signal to the desired thread to get the call stack
StackTrace_mutex.lock();
struct sigaction sa;
sigfillset( &sa.sa_mask );

File diff suppressed because it is too large Load Diff

View File

@ -1115,15 +1115,14 @@ bool MPI_CLASS::anyReduce(const bool value) const {
template <>
void MPI_CLASS::call_sumReduce<unsigned char>(const unsigned char *send,
unsigned char *recv,
const int n) const {
int n) const {
PROFILE_START("sumReduce1<unsigned char>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED_CHAR, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<unsigned char>", profile_level);
}
template <>
void MPI_CLASS::call_sumReduce<unsigned char>(unsigned char *x,
const int n) const {
void MPI_CLASS::call_sumReduce<unsigned char>(unsigned char *x, int n) const {
PROFILE_START("sumReduce2<unsigned char>", profile_level);
auto send = x;
auto recv = new unsigned char[n];
@ -1136,13 +1135,13 @@ void MPI_CLASS::call_sumReduce<unsigned char>(unsigned char *x,
// char
template <>
void MPI_CLASS::call_sumReduce<char>(const char *send, char *recv,
const int n) const {
int n) const {
PROFILE_START("sumReduce1<char>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_SIGNED_CHAR, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<char>", profile_level);
}
template <> void MPI_CLASS::call_sumReduce<char>(char *x, const int n) const {
template <> void MPI_CLASS::call_sumReduce<char>(char *x, int n) const {
PROFILE_START("sumReduce2<char>", profile_level);
auto send = x;
auto recv = new char[n];
@ -1155,16 +1154,14 @@ template <> void MPI_CLASS::call_sumReduce<char>(char *x, const int n) const {
// unsigned int
template <>
void MPI_CLASS::call_sumReduce<unsigned int>(const unsigned int *send,
unsigned int *recv,
const int n) const {
unsigned int *recv, int n) const {
PROFILE_START("sumReduce1<unsigned int>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<unsigned int>", profile_level);
}
template <>
void MPI_CLASS::call_sumReduce<unsigned int>(unsigned int *x,
const int n) const {
void MPI_CLASS::call_sumReduce<unsigned int>(unsigned int *x, int n) const {
PROFILE_START("sumReduce2<unsigned int>", profile_level);
auto send = x;
auto recv = new unsigned int[n];
@ -1176,14 +1173,13 @@ void MPI_CLASS::call_sumReduce<unsigned int>(unsigned int *x,
}
// int
template <>
void MPI_CLASS::call_sumReduce<int>(const int *send, int *recv,
const int n) const {
void MPI_CLASS::call_sumReduce<int>(const int *send, int *recv, int n) const {
PROFILE_START("sumReduce1<int>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_INT, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<int>", profile_level);
}
template <> void MPI_CLASS::call_sumReduce<int>(int *x, const int n) const {
template <> void MPI_CLASS::call_sumReduce<int>(int *x, int n) const {
PROFILE_START("sumReduce2<int>", profile_level);
auto send = x;
auto recv = new int[n];
@ -1196,14 +1192,13 @@ template <> void MPI_CLASS::call_sumReduce<int>(int *x, const int n) const {
// long int
template <>
void MPI_CLASS::call_sumReduce<long int>(const long int *send, long int *recv,
const int n) const {
int n) const {
PROFILE_START("sumReduce1<long int>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_LONG, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<long int>", profile_level);
}
template <>
void MPI_CLASS::call_sumReduce<long int>(long int *x, const int n) const {
template <> void MPI_CLASS::call_sumReduce<long int>(long int *x, int n) const {
PROFILE_START("sumReduce2<long int>", profile_level);
auto send = x;
auto recv = new long int[n];
@ -1217,15 +1212,14 @@ void MPI_CLASS::call_sumReduce<long int>(long int *x, const int n) const {
template <>
void MPI_CLASS::call_sumReduce<unsigned long>(const unsigned long *send,
unsigned long *recv,
const int n) const {
int n) const {
PROFILE_START("sumReduce1<unsigned long>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED_LONG, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<unsigned long>", profile_level);
}
template <>
void MPI_CLASS::call_sumReduce<unsigned long>(unsigned long *x,
const int n) const {
void MPI_CLASS::call_sumReduce<unsigned long>(unsigned long *x, int n) const {
PROFILE_START("sumReduce2<unsigned long>", profile_level);
auto send = x;
auto recv = new unsigned long int[n];
@ -1239,15 +1233,14 @@ void MPI_CLASS::call_sumReduce<unsigned long>(unsigned long *x,
#ifdef USE_WINDOWS
template <>
void MPI_CLASS::call_sumReduce<size_t>(const size_t *send, size_t *recv,
const int n) const {
int n) const {
MPI_ASSERT(MPI_SIZE_T != 0);
PROFILE_START("sumReduce1<size_t>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_SIZE_T, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<size_t>", profile_level);
}
template <>
void MPI_CLASS::call_sumReduce<size_t>(size_t *x, const int n) const {
template <> void MPI_CLASS::call_sumReduce<size_t>(size_t *x, int n) const {
MPI_ASSERT(MPI_SIZE_T != 0);
PROFILE_START("sumReduce2<size_t>", profile_level);
auto send = x;
@ -1263,13 +1256,13 @@ void MPI_CLASS::call_sumReduce<size_t>(size_t *x, const int n) const {
// float
template <>
void MPI_CLASS::call_sumReduce<float>(const float *send, float *recv,
const int n) const {
int n) const {
PROFILE_START("sumReduce1<float>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_FLOAT, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<float>", profile_level);
}
template <> void MPI_CLASS::call_sumReduce<float>(float *x, const int n) const {
template <> void MPI_CLASS::call_sumReduce<float>(float *x, int n) const {
PROFILE_START("sumReduce2<float>", profile_level);
auto send = x;
auto recv = new float[n];
@ -1282,14 +1275,13 @@ template <> void MPI_CLASS::call_sumReduce<float>(float *x, const int n) const {
// double
template <>
void MPI_CLASS::call_sumReduce<double>(const double *send, double *recv,
const int n) const {
int n) const {
PROFILE_START("sumReduce1<double>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_DOUBLE, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<double>", profile_level);
}
template <>
void MPI_CLASS::call_sumReduce<double>(double *x, const int n) const {
template <> void MPI_CLASS::call_sumReduce<double>(double *x, int n) const {
PROFILE_START("sumReduce2<double>", profile_level);
auto send = x;
auto recv = new double[n];
@ -1302,7 +1294,7 @@ void MPI_CLASS::call_sumReduce<double>(double *x, const int n) const {
// std::complex<double>
template <>
void MPI_CLASS::call_sumReduce<std::complex<double>>(
const std::complex<double> *x, std::complex<double> *y, const int n) const {
const std::complex<double> *x, std::complex<double> *y, int n) const {
PROFILE_START("sumReduce1<complex double>", profile_level);
auto send = new double[2 * n];
auto recv = new double[2 * n];
@ -1320,7 +1312,7 @@ void MPI_CLASS::call_sumReduce<std::complex<double>>(
}
template <>
void MPI_CLASS::call_sumReduce<std::complex<double>>(std::complex<double> *x,
const int n) const {
int n) const {
PROFILE_START("sumReduce2<complex double>", profile_level);
auto send = new double[2 * n];
auto recv = new double[2 * n];
@ -1345,7 +1337,7 @@ void MPI_CLASS::call_sumReduce<std::complex<double>>(std::complex<double> *x,
// unsigned char
template <>
void MPI_CLASS::call_minReduce<unsigned char>(const unsigned char *send,
unsigned char *recv, const int n,
unsigned char *recv, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce1<unsigned char>", profile_level);
@ -1363,7 +1355,7 @@ void MPI_CLASS::call_minReduce<unsigned char>(const unsigned char *send,
}
}
template <>
void MPI_CLASS::call_minReduce<unsigned char>(unsigned char *x, const int n,
void MPI_CLASS::call_minReduce<unsigned char>(unsigned char *x, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce2<unsigned char>", profile_level);
@ -1386,7 +1378,7 @@ void MPI_CLASS::call_minReduce<unsigned char>(unsigned char *x, const int n,
}
// char
template <>
void MPI_CLASS::call_minReduce<char>(const char *send, char *recv, const int n,
void MPI_CLASS::call_minReduce<char>(const char *send, char *recv, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce1<char>", profile_level);
@ -1404,7 +1396,7 @@ void MPI_CLASS::call_minReduce<char>(const char *send, char *recv, const int n,
}
}
template <>
void MPI_CLASS::call_minReduce<char>(char *x, const int n,
void MPI_CLASS::call_minReduce<char>(char *x, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce2<char>", profile_level);
@ -1428,7 +1420,7 @@ void MPI_CLASS::call_minReduce<char>(char *x, const int n,
// unsigned int
template <>
void MPI_CLASS::call_minReduce<unsigned int>(const unsigned int *send,
unsigned int *recv, const int n,
unsigned int *recv, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce1<unsigned int>", profile_level);
@ -1446,7 +1438,7 @@ void MPI_CLASS::call_minReduce<unsigned int>(const unsigned int *send,
}
}
template <>
void MPI_CLASS::call_minReduce<unsigned int>(unsigned int *x, const int n,
void MPI_CLASS::call_minReduce<unsigned int>(unsigned int *x, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce2<unsigned int>", profile_level);
@ -1469,7 +1461,7 @@ void MPI_CLASS::call_minReduce<unsigned int>(unsigned int *x, const int n,
}
// int
template <>
void MPI_CLASS::call_minReduce<int>(const int *x, int *y, const int n,
void MPI_CLASS::call_minReduce<int>(const int *x, int *y, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce1<int>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1492,7 +1484,7 @@ void MPI_CLASS::call_minReduce<int>(const int *x, int *y, const int n,
PROFILE_STOP("minReduce1<int>", profile_level);
}
template <>
void MPI_CLASS::call_minReduce<int>(int *x, const int n,
void MPI_CLASS::call_minReduce<int>(int *x, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce2<int>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1523,7 +1515,7 @@ void MPI_CLASS::call_minReduce<int>(int *x, const int n,
template <>
void MPI_CLASS::call_minReduce<unsigned long int>(const unsigned long int *send,
unsigned long int *recv,
const int n,
int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce1<unsigned long>", profile_level);
@ -1541,8 +1533,7 @@ void MPI_CLASS::call_minReduce<unsigned long int>(const unsigned long int *send,
}
}
template <>
void MPI_CLASS::call_minReduce<unsigned long int>(unsigned long int *x,
const int n,
void MPI_CLASS::call_minReduce<unsigned long int>(unsigned long int *x, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce2<unsigned long>", profile_level);
@ -1565,8 +1556,7 @@ void MPI_CLASS::call_minReduce<unsigned long int>(unsigned long int *x,
}
// long int
template <>
void MPI_CLASS::call_minReduce<long int>(const long int *x, long int *y,
const int n,
void MPI_CLASS::call_minReduce<long int>(const long int *x, long int *y, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce1<long int>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1589,7 +1579,7 @@ void MPI_CLASS::call_minReduce<long int>(const long int *x, long int *y,
PROFILE_STOP("minReduce1<long int>", profile_level);
}
template <>
void MPI_CLASS::call_minReduce<long int>(long int *x, const int n,
void MPI_CLASS::call_minReduce<long int>(long int *x, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce2<long int>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1619,8 +1609,8 @@ void MPI_CLASS::call_minReduce<long int>(long int *x, const int n,
// unsigned long long int
template <>
void MPI_CLASS::call_minReduce<unsigned long long int>(
const unsigned long long int *send, unsigned long long int *recv,
const int n, int *comm_rank_of_min) const {
const unsigned long long int *send, unsigned long long int *recv, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce1<long int>", profile_level);
if (comm_rank_of_min == nullptr) {
auto x = new long long int[n];
@ -1647,7 +1637,7 @@ void MPI_CLASS::call_minReduce<unsigned long long int>(
}
template <>
void MPI_CLASS::call_minReduce<unsigned long long int>(
unsigned long long int *x, const int n, int *comm_rank_of_min) const {
unsigned long long int *x, int n, int *comm_rank_of_min) const {
auto recv = new unsigned long long int[n];
call_minReduce<unsigned long long int>(x, recv, n, comm_rank_of_min);
for (int i = 0; i < n; i++)
@ -1657,7 +1647,7 @@ void MPI_CLASS::call_minReduce<unsigned long long int>(
// long long int
template <>
void MPI_CLASS::call_minReduce<long long int>(const long long int *x,
long long int *y, const int n,
long long int *y, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce1<long int>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1676,7 +1666,7 @@ void MPI_CLASS::call_minReduce<long long int>(const long long int *x,
PROFILE_STOP("minReduce1<long int>", profile_level);
}
template <>
void MPI_CLASS::call_minReduce<long long int>(long long int *x, const int n,
void MPI_CLASS::call_minReduce<long long int>(long long int *x, int n,
int *comm_rank_of_min) const {
auto recv = new long long int[n];
call_minReduce<long long int>(x, recv, n, comm_rank_of_min);
@ -1686,7 +1676,7 @@ void MPI_CLASS::call_minReduce<long long int>(long long int *x, const int n,
}
// float
template <>
void MPI_CLASS::call_minReduce<float>(const float *x, float *y, const int n,
void MPI_CLASS::call_minReduce<float>(const float *x, float *y, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce1<float>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1709,7 +1699,7 @@ void MPI_CLASS::call_minReduce<float>(const float *x, float *y, const int n,
PROFILE_STOP("minReduce1<float>", profile_level);
}
template <>
void MPI_CLASS::call_minReduce<float>(float *x, const int n,
void MPI_CLASS::call_minReduce<float>(float *x, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce2<float>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1738,7 +1728,7 @@ void MPI_CLASS::call_minReduce<float>(float *x, const int n,
}
// double
template <>
void MPI_CLASS::call_minReduce<double>(const double *x, double *y, const int n,
void MPI_CLASS::call_minReduce<double>(const double *x, double *y, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce1<double>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1762,7 +1752,7 @@ void MPI_CLASS::call_minReduce<double>(const double *x, double *y, const int n,
PROFILE_STOP("minReduce1<double>", profile_level);
}
template <>
void MPI_CLASS::call_minReduce<double>(double *x, const int n,
void MPI_CLASS::call_minReduce<double>(double *x, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce2<double>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1799,7 +1789,7 @@ void MPI_CLASS::call_minReduce<double>(double *x, const int n,
// unsigned char
template <>
void MPI_CLASS::call_maxReduce<unsigned char>(const unsigned char *send,
unsigned char *recv, const int n,
unsigned char *recv, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce1<unsigned char>", profile_level);
@ -1817,7 +1807,7 @@ void MPI_CLASS::call_maxReduce<unsigned char>(const unsigned char *send,
}
}
template <>
void MPI_CLASS::call_maxReduce<unsigned char>(unsigned char *x, const int n,
void MPI_CLASS::call_maxReduce<unsigned char>(unsigned char *x, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce2<unsigned char>", profile_level);
@ -1840,7 +1830,7 @@ void MPI_CLASS::call_maxReduce<unsigned char>(unsigned char *x, const int n,
}
// char
template <>
void MPI_CLASS::call_maxReduce<char>(const char *send, char *recv, const int n,
void MPI_CLASS::call_maxReduce<char>(const char *send, char *recv, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce1<char>", profile_level);
@ -1858,7 +1848,7 @@ void MPI_CLASS::call_maxReduce<char>(const char *send, char *recv, const int n,
}
}
template <>
void MPI_CLASS::call_maxReduce<char>(char *x, const int n,
void MPI_CLASS::call_maxReduce<char>(char *x, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce2<char>", profile_level);
@ -1882,7 +1872,7 @@ void MPI_CLASS::call_maxReduce<char>(char *x, const int n,
// unsigned int
template <>
void MPI_CLASS::call_maxReduce<unsigned int>(const unsigned int *send,
unsigned int *recv, const int n,
unsigned int *recv, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce1<unsigned int>", profile_level);
@ -1900,7 +1890,7 @@ void MPI_CLASS::call_maxReduce<unsigned int>(const unsigned int *send,
}
}
template <>
void MPI_CLASS::call_maxReduce<unsigned int>(unsigned int *x, const int n,
void MPI_CLASS::call_maxReduce<unsigned int>(unsigned int *x, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce2<unsigned int>", profile_level);
@ -1923,7 +1913,7 @@ void MPI_CLASS::call_maxReduce<unsigned int>(unsigned int *x, const int n,
}
// int
template <>
void MPI_CLASS::call_maxReduce<int>(const int *x, int *y, const int n,
void MPI_CLASS::call_maxReduce<int>(const int *x, int *y, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce1<int>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -1946,7 +1936,7 @@ void MPI_CLASS::call_maxReduce<int>(const int *x, int *y, const int n,
PROFILE_STOP("maxReduce1<int>", profile_level);
}
template <>
void MPI_CLASS::call_maxReduce<int>(int *x, const int n,
void MPI_CLASS::call_maxReduce<int>(int *x, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce2<int>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -1975,8 +1965,7 @@ void MPI_CLASS::call_maxReduce<int>(int *x, const int n,
}
// long int
template <>
void MPI_CLASS::call_maxReduce<long int>(const long int *x, long int *y,
const int n,
void MPI_CLASS::call_maxReduce<long int>(const long int *x, long int *y, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce1<lond int>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -1999,7 +1988,7 @@ void MPI_CLASS::call_maxReduce<long int>(const long int *x, long int *y,
PROFILE_STOP("maxReduce1<lond int>", profile_level);
}
template <>
void MPI_CLASS::call_maxReduce<long int>(long int *x, const int n,
void MPI_CLASS::call_maxReduce<long int>(long int *x, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce2<lond int>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -2030,7 +2019,7 @@ void MPI_CLASS::call_maxReduce<long int>(long int *x, const int n,
template <>
void MPI_CLASS::call_maxReduce<unsigned long int>(const unsigned long int *send,
unsigned long int *recv,
const int n,
int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce1<unsigned long>", profile_level);
@ -2048,8 +2037,7 @@ void MPI_CLASS::call_maxReduce<unsigned long int>(const unsigned long int *send,
}
}
template <>
void MPI_CLASS::call_maxReduce<unsigned long int>(unsigned long int *x,
const int n,
void MPI_CLASS::call_maxReduce<unsigned long int>(unsigned long int *x, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce2<unsigned long>", profile_level);
@ -2073,8 +2061,8 @@ void MPI_CLASS::call_maxReduce<unsigned long int>(unsigned long int *x,
// unsigned long long int
template <>
void MPI_CLASS::call_maxReduce<unsigned long long int>(
const unsigned long long int *send, unsigned long long int *recv,
const int n, int *comm_rank_of_max) const {
const unsigned long long int *send, unsigned long long int *recv, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce1<long int>", profile_level);
if (comm_rank_of_max == nullptr) {
auto x = new long long int[n];
@ -2101,7 +2089,7 @@ void MPI_CLASS::call_maxReduce<unsigned long long int>(
}
template <>
void MPI_CLASS::call_maxReduce<unsigned long long int>(
unsigned long long int *x, const int n, int *comm_rank_of_max) const {
unsigned long long int *x, int n, int *comm_rank_of_max) const {
auto recv = new unsigned long long int[n];
call_maxReduce<unsigned long long int>(x, recv, n, comm_rank_of_max);
for (int i = 0; i < n; i++)
@ -2111,7 +2099,7 @@ void MPI_CLASS::call_maxReduce<unsigned long long int>(
// long long int
template <>
void MPI_CLASS::call_maxReduce<long long int>(const long long int *x,
long long int *y, const int n,
long long int *y, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce1<long int>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -2130,7 +2118,7 @@ void MPI_CLASS::call_maxReduce<long long int>(const long long int *x,
PROFILE_STOP("maxReduce1<long int>", profile_level);
}
template <>
void MPI_CLASS::call_maxReduce<long long int>(long long int *x, const int n,
void MPI_CLASS::call_maxReduce<long long int>(long long int *x, int n,
int *comm_rank_of_max) const {
auto recv = new long long int[n];
call_maxReduce<long long int>(x, recv, n, comm_rank_of_max);
@ -2140,7 +2128,7 @@ void MPI_CLASS::call_maxReduce<long long int>(long long int *x, const int n,
}
// float
template <>
void MPI_CLASS::call_maxReduce<float>(const float *x, float *y, const int n,
void MPI_CLASS::call_maxReduce<float>(const float *x, float *y, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce1<float>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -2164,7 +2152,7 @@ void MPI_CLASS::call_maxReduce<float>(const float *x, float *y, const int n,
PROFILE_STOP("maxReduce1<float>", profile_level);
}
template <>
void MPI_CLASS::call_maxReduce<float>(float *x, const int n,
void MPI_CLASS::call_maxReduce<float>(float *x, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce2<float>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -2193,7 +2181,7 @@ void MPI_CLASS::call_maxReduce<float>(float *x, const int n,
}
// double
template <>
void MPI_CLASS::call_maxReduce<double>(const double *x, double *y, const int n,
void MPI_CLASS::call_maxReduce<double>(const double *x, double *y, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce1<double>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -2217,7 +2205,7 @@ void MPI_CLASS::call_maxReduce<double>(const double *x, double *y, const int n,
PROFILE_STOP("maxReduce1<double>", profile_level);
}
template <>
void MPI_CLASS::call_maxReduce<double>(double *x, const int n,
void MPI_CLASS::call_maxReduce<double>(double *x, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce2<double>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -2253,51 +2241,46 @@ void MPI_CLASS::call_maxReduce<double>(double *x, const int n,
#ifdef USE_MPI
// char
template <>
void MPI_CLASS::call_bcast<unsigned char>(unsigned char *x, const int n,
const int root) const {
void MPI_CLASS::call_bcast<unsigned char>(unsigned char *x, int n,
int root) const {
PROFILE_START("bcast<unsigned char>", profile_level);
MPI_Bcast(x, n, MPI_UNSIGNED_CHAR, root, communicator);
PROFILE_STOP("bcast<unsigned char>", profile_level);
}
template <>
void MPI_CLASS::call_bcast<char>(char *x, const int n, const int root) const {
template <> void MPI_CLASS::call_bcast<char>(char *x, int n, int root) const {
PROFILE_START("bcast<char>", profile_level);
MPI_Bcast(x, n, MPI_CHAR, root, communicator);
PROFILE_STOP("bcast<char>", profile_level);
}
// int
template <>
void MPI_CLASS::call_bcast<unsigned int>(unsigned int *x, const int n,
const int root) const {
void MPI_CLASS::call_bcast<unsigned int>(unsigned int *x, int n,
int root) const {
PROFILE_START("bcast<unsigned int>", profile_level);
MPI_Bcast(x, n, MPI_UNSIGNED, root, communicator);
PROFILE_STOP("bcast<unsigned int>", profile_level);
}
template <>
void MPI_CLASS::call_bcast<int>(int *x, const int n, const int root) const {
template <> void MPI_CLASS::call_bcast<int>(int *x, int n, int root) const {
PROFILE_START("bcast<int>", profile_level);
MPI_Bcast(x, n, MPI_INT, root, communicator);
PROFILE_STOP("bcast<int>", profile_level);
}
// float
template <>
void MPI_CLASS::call_bcast<float>(float *x, const int n, const int root) const {
template <> void MPI_CLASS::call_bcast<float>(float *x, int n, int root) const {
PROFILE_START("bcast<float>", profile_level);
MPI_Bcast(x, n, MPI_FLOAT, root, communicator);
PROFILE_STOP("bcast<float>", profile_level);
}
// double
template <>
void MPI_CLASS::call_bcast<double>(double *x, const int n,
const int root) const {
void MPI_CLASS::call_bcast<double>(double *x, int n, int root) const {
PROFILE_START("bcast<double>", profile_level);
MPI_Bcast(x, n, MPI_DOUBLE, root, communicator);
PROFILE_STOP("bcast<double>", profile_level);
}
#else
// We need a concrete instantiation of bcast<char>(x,n,root);
template <>
void MPI_CLASS::call_bcast<char>(char *, const int, const int) const {}
template <> void MPI_CLASS::call_bcast<char>(char *, int, int) const {}
#endif
/************************************************************************
@ -2316,8 +2299,8 @@ void MPI_CLASS::barrier() const {
#ifdef USE_MPI
// char
template <>
void MPI_CLASS::send<char>(const char *buf, const int length,
const int recv_proc_number, int tag) const {
void MPI_CLASS::send<char>(const char *buf, int length, int recv_proc_number,
int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
@ -2329,8 +2312,8 @@ void MPI_CLASS::send<char>(const char *buf, const int length,
}
// int
template <>
void MPI_CLASS::send<int>(const int *buf, const int length,
const int recv_proc_number, int tag) const {
void MPI_CLASS::send<int>(const int *buf, int length, int recv_proc_number,
int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
@ -2341,8 +2324,8 @@ void MPI_CLASS::send<int>(const int *buf, const int length,
}
// float
template <>
void MPI_CLASS::send<float>(const float *buf, const int length,
const int recv_proc_number, int tag) const {
void MPI_CLASS::send<float>(const float *buf, int length, int recv_proc_number,
int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
@ -2354,8 +2337,8 @@ void MPI_CLASS::send<float>(const float *buf, const int length,
}
// double
template <>
void MPI_CLASS::send<double>(const double *buf, const int length,
const int recv_proc_number, int tag) const {
void MPI_CLASS::send<double>(const double *buf, int length,
int recv_proc_number, int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
@ -2368,8 +2351,7 @@ void MPI_CLASS::send<double>(const double *buf, const int length,
#else
// We need a concrete instantiation of send for use without MPI
template <>
void MPI_CLASS::send<char>(const char *buf, const int length, const int,
int tag) const {
void MPI_CLASS::send<char>(const char *buf, int length, int, int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
PROFILE_START("send<char>", profile_level);
@ -2391,8 +2373,8 @@ void MPI_CLASS::send<char>(const char *buf, const int length, const int,
#ifdef USE_MPI
// char
template <>
MPI_Request MPI_CLASS::Isend<char>(const char *buf, const int length,
const int recv_proc, const int tag) const {
MPI_Request MPI_CLASS::Isend<char>(const char *buf, int length, int recv_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2404,8 +2386,8 @@ MPI_Request MPI_CLASS::Isend<char>(const char *buf, const int length,
}
// int
template <>
MPI_Request MPI_CLASS::Isend<int>(const int *buf, const int length,
const int recv_proc, const int tag) const {
MPI_Request MPI_CLASS::Isend<int>(const int *buf, int length, int recv_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2417,8 +2399,8 @@ MPI_Request MPI_CLASS::Isend<int>(const int *buf, const int length,
}
// float
template <>
MPI_Request MPI_CLASS::Isend<float>(const float *buf, const int length,
const int recv_proc, const int tag) const {
MPI_Request MPI_CLASS::Isend<float>(const float *buf, int length, int recv_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2430,8 +2412,8 @@ MPI_Request MPI_CLASS::Isend<float>(const float *buf, const int length,
}
// double
template <>
MPI_Request MPI_CLASS::Isend<double>(const double *buf, const int length,
const int recv_proc, const int tag) const {
MPI_Request MPI_CLASS::Isend<double>(const double *buf, int length,
int recv_proc, int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2444,8 +2426,8 @@ MPI_Request MPI_CLASS::Isend<double>(const double *buf, const int length,
#else
// We need a concrete instantiation of send for use without mpi
template <>
MPI_Request MPI_CLASS::Isend<char>(const char *buf, const int length, const int,
const int tag) const {
MPI_Request MPI_CLASS::Isend<char>(const char *buf, int length, int,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
PROFILE_START("Isend<char>", profile_level);
@ -2472,8 +2454,8 @@ MPI_Request MPI_CLASS::Isend<char>(const char *buf, const int length, const int,
/************************************************************************
* Send byte array to another processor. *
************************************************************************/
void MPI_CLASS::sendBytes(const void *buf, const int number_bytes,
const int recv_proc_number, int tag) const {
void MPI_CLASS::sendBytes(const void *buf, int number_bytes,
int recv_proc_number, int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
send<char>((const char *)buf, number_bytes, recv_proc_number, tag);
@ -2482,7 +2464,7 @@ void MPI_CLASS::sendBytes(const void *buf, const int number_bytes,
/************************************************************************
* Non-blocking send byte array to another processor. *
************************************************************************/
MPI_Request MPI_CLASS::IsendBytes(const void *buf, const int number_bytes,
MPI_Request MPI_CLASS::IsendBytes(const void *buf, int number_bytes,
const int recv_proc, const int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
@ -2496,7 +2478,7 @@ MPI_Request MPI_CLASS::IsendBytes(const void *buf, const int number_bytes,
#ifdef USE_MPI
// char
template <>
void MPI_CLASS::recv<char>(char *buf, int &length, const int send_proc_number,
void MPI_CLASS::recv<char>(char *buf, int &length, int send_proc_number,
const bool get_length, int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
@ -2518,7 +2500,7 @@ void MPI_CLASS::recv<char>(char *buf, int &length, const int send_proc_number,
}
// int
template <>
void MPI_CLASS::recv<int>(int *buf, int &length, const int send_proc_number,
void MPI_CLASS::recv<int>(int *buf, int &length, int send_proc_number,
const bool get_length, int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
@ -2540,7 +2522,7 @@ void MPI_CLASS::recv<int>(int *buf, int &length, const int send_proc_number,
}
// float
template <>
void MPI_CLASS::recv<float>(float *buf, int &length, const int send_proc_number,
void MPI_CLASS::recv<float>(float *buf, int &length, int send_proc_number,
const bool get_length, int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
@ -2562,9 +2544,8 @@ void MPI_CLASS::recv<float>(float *buf, int &length, const int send_proc_number,
}
// double
template <>
void MPI_CLASS::recv<double>(double *buf, int &length,
const int send_proc_number, const bool get_length,
int tag) const {
void MPI_CLASS::recv<double>(double *buf, int &length, int send_proc_number,
const bool get_length, int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
@ -2586,7 +2567,7 @@ void MPI_CLASS::recv<double>(double *buf, int &length,
#else
// We need a concrete instantiation of recv for use without mpi
template <>
void MPI_CLASS::recv<char>(char *buf, int &length, const int, const bool,
void MPI_CLASS::recv<char>(char *buf, int &length, int, const bool,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
@ -2609,8 +2590,8 @@ void MPI_CLASS::recv<char>(char *buf, int &length, const int, const bool,
#ifdef USE_MPI
// char
template <>
MPI_Request MPI_CLASS::Irecv<char>(char *buf, const int length,
const int send_proc, const int tag) const {
MPI_Request MPI_CLASS::Irecv<char>(char *buf, int length, int send_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2622,8 +2603,8 @@ MPI_Request MPI_CLASS::Irecv<char>(char *buf, const int length,
}
// int
template <>
MPI_Request MPI_CLASS::Irecv<int>(int *buf, const int length,
const int send_proc, const int tag) const {
MPI_Request MPI_CLASS::Irecv<int>(int *buf, int length, int send_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2635,8 +2616,8 @@ MPI_Request MPI_CLASS::Irecv<int>(int *buf, const int length,
}
// float
template <>
MPI_Request MPI_CLASS::Irecv<float>(float *buf, const int length,
const int send_proc, const int tag) const {
MPI_Request MPI_CLASS::Irecv<float>(float *buf, int length, int send_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2648,8 +2629,8 @@ MPI_Request MPI_CLASS::Irecv<float>(float *buf, const int length,
}
// double
template <>
MPI_Request MPI_CLASS::Irecv<double>(double *buf, const int length,
const int send_proc, const int tag) const {
MPI_Request MPI_CLASS::Irecv<double>(double *buf, int length, int send_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2662,8 +2643,7 @@ MPI_Request MPI_CLASS::Irecv<double>(double *buf, const int length,
#else
// We need a concrete instantiation of irecv for use without mpi
template <>
MPI_Request MPI_CLASS::Irecv<char>(char *buf, const int length, const int,
const int tag) const {
MPI_Request MPI_CLASS::Irecv<char>(char *buf, int length, int, int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
PROFILE_START("Irecv<char>", profile_level);
@ -2690,7 +2670,7 @@ MPI_Request MPI_CLASS::Irecv<char>(char *buf, const int length, const int,
/************************************************************************
* Recieve byte array to another processor. *
************************************************************************/
void MPI_CLASS::recvBytes(void *buf, int &number_bytes, const int send_proc,
void MPI_CLASS::recvBytes(void *buf, int &number_bytes, int send_proc,
int tag) const {
recv<char>((char *)buf, number_bytes, send_proc, false, tag);
}
@ -2698,8 +2678,8 @@ void MPI_CLASS::recvBytes(void *buf, int &number_bytes, const int send_proc,
/************************************************************************
* Recieve byte array to another processor. *
************************************************************************/
MPI_Request MPI_CLASS::IrecvBytes(void *buf, const int number_bytes,
const int send_proc, const int tag) const {
MPI_Request MPI_CLASS::IrecvBytes(void *buf, int number_bytes, int send_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
return Irecv<char>((char *)buf, number_bytes, send_proc, tag);
@ -2913,7 +2893,7 @@ void MPI_CLASS::call_allGather<char>(const char *, int, char *, int *,
************************************************************************/
#ifdef USE_MPI
template <>
void MPI_CLASS::allToAll<unsigned char>(const int n, const unsigned char *send,
void MPI_CLASS::allToAll<unsigned char>(int n, const unsigned char *send,
unsigned char *recv) const {
PROFILE_START("allToAll<unsigned char>", profile_level);
MPI_Alltoall((void *)send, n, MPI_UNSIGNED_CHAR, (void *)recv, n,
@ -2921,15 +2901,14 @@ void MPI_CLASS::allToAll<unsigned char>(const int n, const unsigned char *send,
PROFILE_STOP("allToAll<unsigned char>", profile_level);
}
template <>
void MPI_CLASS::allToAll<char>(const int n, const char *send,
char *recv) const {
void MPI_CLASS::allToAll<char>(int n, const char *send, char *recv) const {
PROFILE_START("allToAll<char>", profile_level);
MPI_Alltoall((void *)send, n, MPI_CHAR, (void *)recv, n, MPI_CHAR,
communicator);
PROFILE_STOP("allToAll<char>", profile_level);
}
template <>
void MPI_CLASS::allToAll<unsigned int>(const int n, const unsigned int *send,
void MPI_CLASS::allToAll<unsigned int>(int n, const unsigned int *send,
unsigned int *recv) const {
PROFILE_START("allToAll<unsigned int>", profile_level);
MPI_Alltoall((void *)send, n, MPI_UNSIGNED, (void *)recv, n, MPI_UNSIGNED,
@ -2937,14 +2916,14 @@ void MPI_CLASS::allToAll<unsigned int>(const int n, const unsigned int *send,
PROFILE_STOP("allToAll<unsigned int>", profile_level);
}
template <>
void MPI_CLASS::allToAll<int>(const int n, const int *send, int *recv) const {
void MPI_CLASS::allToAll<int>(int n, const int *send, int *recv) const {
PROFILE_START("allToAll<int>", profile_level);
MPI_Alltoall((void *)send, n, MPI_INT, (void *)recv, n, MPI_INT,
communicator);
PROFILE_STOP("allToAll<int>", profile_level);
}
template <>
void MPI_CLASS::allToAll<unsigned long int>(const int n,
void MPI_CLASS::allToAll<unsigned long int>(int n,
const unsigned long int *send,
unsigned long int *recv) const {
PROFILE_START("allToAll<unsigned long>", profile_level);
@ -2953,7 +2932,7 @@ void MPI_CLASS::allToAll<unsigned long int>(const int n,
PROFILE_STOP("allToAll<unsigned long>", profile_level);
}
template <>
void MPI_CLASS::allToAll<long int>(const int n, const long int *send,
void MPI_CLASS::allToAll<long int>(int n, const long int *send,
long int *recv) const {
PROFILE_START("allToAll<long int>", profile_level);
MPI_Alltoall((void *)send, n, MPI_LONG, (void *)recv, n, MPI_LONG,
@ -2961,15 +2940,14 @@ void MPI_CLASS::allToAll<long int>(const int n, const long int *send,
PROFILE_STOP("allToAll<long int>", profile_level);
}
template <>
void MPI_CLASS::allToAll<float>(const int n, const float *send,
float *recv) const {
void MPI_CLASS::allToAll<float>(int n, const float *send, float *recv) const {
PROFILE_START("allToAll<float>", profile_level);
MPI_Alltoall((void *)send, n, MPI_FLOAT, (void *)recv, n, MPI_FLOAT,
communicator);
PROFILE_STOP("allToAll<float>", profile_level);
}
template <>
void MPI_CLASS::allToAll<double>(const int n, const double *send,
void MPI_CLASS::allToAll<double>(int n, const double *send,
double *recv) const {
PROFILE_START("allToAll<double>", profile_level);
MPI_Alltoall((void *)send, n, MPI_DOUBLE, (void *)recv, n, MPI_DOUBLE,
@ -3713,4 +3691,28 @@ MPI MPI::loadBalance(double local, std::vector<double> work) {
return split(0, key[getRank()]);
}
/****************************************************************************
* Function Persistent Communication *
****************************************************************************/
template <>
std::shared_ptr<MPI_Request> MPI::Isend_init<double>(const double *buf, int N, int proc, int tag) const
{
std::shared_ptr<MPI_Request> obj(new MPI_Request, []( MPI_Request *req ) { MPI_Request_free( req ); } );
MPI_Send_init( buf, N, MPI_DOUBLE, proc, tag, communicator, obj.get() );
return obj;
}
template<>
std::shared_ptr<MPI_Request> MPI::Irecv_init<double>(double *buf, int N, int proc, int tag) const
{
std::shared_ptr<MPI_Request> obj(new MPI_Request, []( MPI_Request *req ) { MPI_Request_free( req ); } );
MPI_Recv_init( buf, N, MPI_DOUBLE, proc, tag, communicator, obj.get() );
return obj;
}
void MPI::Start( MPI_Request &request )
{
MPI_Start( &request );
}
} // namespace Utilities

View File

@ -26,6 +26,7 @@ redistribution is prohibited.
#include <atomic>
#include <complex>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
@ -173,10 +174,9 @@ public: // Member functions
*
*/
static void
balanceProcesses(const MPI &comm = MPI(MPI_COMM_WORLD),
const int method = 1,
balanceProcesses(const MPI &comm = MPI(MPI_COMM_WORLD), int method = 1,
const std::vector<int> &procs = std::vector<int>(),
const int N_min = 1, const int N_max = -1);
int N_min = 1, int N_max = -1);
//! Query the level of thread support
static ThreadSupport queryThreadSupport();
@ -420,7 +420,7 @@ public: // Member functions
* \param x The input/output array for the reduce
* \param n The number of values in the array (must match on all nodes)
*/
template <class type> void sumReduce(type *x, const int n = 1) const;
template <class type> void sumReduce(type *x, int n = 1) const;
/**
* \brief Sum Reduce
@ -432,7 +432,7 @@ public: // Member functions
* \param n The number of values in the array (must match on all nodes)
*/
template <class type>
void sumReduce(const type *x, type *y, const int n = 1) const;
void sumReduce(const type *x, type *y, int n = 1) const;
/**
* \brief Min Reduce
@ -457,7 +457,7 @@ public: // Member functions
* minimum value
*/
template <class type>
void minReduce(type *x, const int n = 1, int *rank_of_min = nullptr) const;
void minReduce(type *x, int n = 1, int *rank_of_min = nullptr) const;
/**
* \brief Sum Reduce
@ -475,7 +475,7 @@ public: // Member functions
* minimum value
*/
template <class type>
void minReduce(const type *x, type *y, const int n = 1,
void minReduce(const type *x, type *y, int n = 1,
int *rank_of_min = nullptr) const;
/**
@ -501,7 +501,7 @@ public: // Member functions
* minimum value
*/
template <class type>
void maxReduce(type *x, const int n = 1, int *rank_of_max = nullptr) const;
void maxReduce(type *x, int n = 1, int *rank_of_max = nullptr) const;
/**
* \brief Sum Reduce
@ -519,7 +519,7 @@ public: // Member functions
* minimum value
*/
template <class type>
void maxReduce(const type *x, type *y, const int n = 1,
void maxReduce(const type *x, type *y, int n = 1,
int *rank_of_max = nullptr) const;
/**
@ -530,8 +530,7 @@ public: // Member functions
* \param y The output array for the scan
* \param n The number of values in the array (must match on all nodes)
*/
template <class type>
void sumScan(const type *x, type *y, const int n = 1) const;
template <class type> void sumScan(const type *x, type *y, int n = 1) const;
/**
* \brief Scan Min Reduce
@ -541,8 +540,7 @@ public: // Member functions
* \param y The output array for the scan
* \param n The number of values in the array (must match on all nodes)
*/
template <class type>
void minScan(const type *x, type *y, const int n = 1) const;
template <class type> void minScan(const type *x, type *y, int n = 1) const;
/**
* \brief Scan Max Reduce
@ -552,8 +550,7 @@ public: // Member functions
* \param y The output array for the scan
* \param n The number of values in the array (must match on all nodes)
*/
template <class type>
void maxScan(const type *x, type *y, const int n = 1) const;
template <class type> void maxScan(const type *x, type *y, int n = 1) const;
/**
* \brief Broadcast
@ -561,7 +558,7 @@ public: // Member functions
* \param value The input value for the broadcast.
* \param root The processor performing the broadcast
*/
template <class type> type bcast(const type &value, const int root) const;
template <class type> type bcast(const type &value, int root) const;
/**
* \brief Broadcast
@ -570,8 +567,7 @@ public: // Member functions
* \param n The number of values in the array (must match on all nodes)
* \param root The processor performing the broadcast
*/
template <class type>
void bcast(type *value, const int n, const int root) const;
template <class type> void bcast(type *value, int n, int root) const;
/**
* Perform a global barrier across all processors.
@ -595,8 +591,7 @@ public: // Member functions
* The matching recv must share this tag.
*/
template <class type>
void send(const type *buf, const int length, const int recv,
int tag = 0) const;
void send(const type *buf, int length, int recv, int tag = 0) const;
/*!
* @brief This function sends an MPI message with an array of bytes
@ -611,8 +606,7 @@ public: // Member functions
* to be sent with this message. Default tag is 0.
* The matching recv must share this tag.
*/
void sendBytes(const void *buf, const int N_bytes, const int recv,
int tag = 0) const;
void sendBytes(const void *buf, int N_bytes, int recv, int tag = 0) const;
/*!
* @brief This function sends an MPI message with an array
@ -627,8 +621,8 @@ public: // Member functions
* to be sent with this message.
*/
template <class type>
MPI_Request Isend(const type *buf, const int length, const int recv_proc,
const int tag) const;
MPI_Request Isend(const type *buf, int length, int recv_proc,
int tag) const;
/*!
* @brief This function sends an MPI message with an array of bytes
@ -642,8 +636,8 @@ public: // Member functions
* @param tag Integer argument specifying an integer tag
* to be sent with this message.
*/
MPI_Request IsendBytes(const void *buf, const int N_bytes,
const int recv_proc, const int tag) const;
MPI_Request IsendBytes(const void *buf, int N_bytes, int recv_proc,
int tag) const;
/*!
* @brief This function receives an MPI message with a data
@ -662,7 +656,7 @@ public: // Member functions
* by the tag of the incoming message. Default tag is 0.
*/
template <class type>
inline void recv(type *buf, int length, const int send, int tag) const {
inline void recv(type *buf, int length, int send, int tag) const {
int length2 = length;
recv(buf, length2, send, false, tag);
}
@ -687,7 +681,7 @@ public: // Member functions
* by the tag of the incoming message. Default tag is 0.
*/
template <class type>
void recv(type *buf, int &length, const int send, const bool get_length,
void recv(type *buf, int &length, int send, const bool get_length,
int tag) const;
/*!
@ -703,7 +697,7 @@ public: // Member functions
* must be matched by the tag of the incoming message. Default
* tag is 0.
*/
void recvBytes(void *buf, int &N_bytes, const int send, int tag = 0) const;
void recvBytes(void *buf, int &N_bytes, int send, int tag = 0) const;
/*!
* @brief This function receives an MPI message with a data
@ -716,8 +710,7 @@ public: // Member functions
* be matched by the tag of the incoming message.
*/
template <class type>
MPI_Request Irecv(type *buf, const int length, const int send_proc,
const int tag) const;
MPI_Request Irecv(type *buf, int length, int send_proc, int tag) const;
/*!
* @brief This function receives an MPI message with an array of
@ -731,8 +724,8 @@ public: // Member functions
* @param tag Integer argument specifying a tag which must
* be matched by the tag of the incoming message.
*/
MPI_Request IrecvBytes(void *buf, const int N_bytes, const int send_proc,
const int tag) const;
MPI_Request IrecvBytes(void *buf, int N_bytes, int send_proc,
int tag) const;
/*!
* @brief This function sends and recieves data using a blocking call
@ -741,6 +734,39 @@ public: // Member functions
void sendrecv(const type *sendbuf, int sendcount, int dest, int sendtag,
type *recvbuf, int recvcount, int source, int recvtag) const;
/*!
* @brief This function sets up an Isend call (see MPI_Send_init)
* @param buf Pointer to array buffer with length integers.
* @param length Number of integers in buf that we want to send.
* @param recv_proc Receiving processor number.
* @param tag Tag to send
* @return Returns an MPI_Request.
* Note this returns a unique pointer so the user does not
* need to manually free the request
*/
template <class type>
std::shared_ptr<MPI_Request> Isend_init(const type *buf, int length, int recv_proc,
int tag) const;
/*!
* @brief This function sets up an Irecv call (see MPI_Recv_init)
* @param buf Pointer to integer array buffer with capacity of length integers.
* @param length Maximum number of values that can be stored in buf.
* @param send_proc Processor number of sender.
* @param tag Tag to match
* @return Returns an MPI_Request.
* Note this returns a unique pointer so the user does not
* need to manually free the request
*/
template <class type>
std::shared_ptr<MPI_Request> Irecv_init(type *buf, int length, int send_proc, int tag) const;
/*!
* @brief Start the MPI communication
* @param request Request to start
*/
void Start( MPI_Request &request );
/*!
* Each processor sends every other processor a single value.
* @param[in] x Input value for allGather
@ -792,7 +818,7 @@ public: // Member functions
* and the sizes and displacements will be returned (if desired).
*/
template <class type>
int allGather(const type *send_data, const int send_cnt, type *recv_data,
int allGather(const type *send_data, int send_cnt, type *recv_data,
int *recv_cnt = nullptr, int *recv_disp = nullptr,
bool known_recv = false) const;
@ -822,7 +848,7 @@ public: // Member functions
* @param recv_data Output array of received values (nxN)
*/
template <class type>
void allToAll(const int n, const type *send_data, type *recv_data) const;
void allToAll(int n, const type *send_data, type *recv_data) const;
/*!
* Each processor sends an array of data to the different processors.
@ -995,23 +1021,20 @@ public: // Member functions
MPI loadBalance(double localPerformance, std::vector<double> work);
private: // Private helper functions for templated MPI operations;
template <class type> void call_sumReduce(type *x, const int n = 1) const;
template <class type> void call_sumReduce(type *x, int n = 1) const;
template <class type>
void call_sumReduce(const type *x, type *y, const int n = 1) const;
void call_sumReduce(const type *x, type *y, int n = 1) const;
template <class type>
void call_minReduce(type *x, const int n = 1,
void call_minReduce(type *x, int n = 1, int *rank_of_min = nullptr) const;
template <class type>
void call_minReduce(const type *x, type *y, int n = 1,
int *rank_of_min = nullptr) const;
template <class type>
void call_minReduce(const type *x, type *y, const int n = 1,
int *rank_of_min = nullptr) const;
void call_maxReduce(type *x, int n = 1, int *rank_of_max = nullptr) const;
template <class type>
void call_maxReduce(type *x, const int n = 1,
void call_maxReduce(const type *x, type *y, int n = 1,
int *rank_of_max = nullptr) const;
template <class type>
void call_maxReduce(const type *x, type *y, const int n = 1,
int *rank_of_max = nullptr) const;
template <class type>
void call_bcast(type *x, const int n, const int root) const;
template <class type> void call_bcast(type *x, int n, int root) const;
template <class type>
void call_allGather(const type &x_in, type *x_out) const;
template <class type>

View File

@ -322,6 +322,48 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr <Domain> Dm){
CommunicationCount = SendCount+RecvCount;
//......................................................................................
//...................................................................................
// Set up the persistent communication for D3Q19AA (use tags 130-145)
//...................................................................................
req_D3Q19AA.clear();
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_x, 5*sendCount_x, rank_x, 130 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_X, 5*recvCount_X, rank_X, 130 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_X, 5*sendCount_X, rank_X, 131 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_x, 5*recvCount_x, rank_x, 131 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_y, 5*sendCount_y, rank_y, 132 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Y, 5*recvCount_Y, rank_Y, 132 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Y, 5*sendCount_Y, rank_Y, 133 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_y, 5*recvCount_y, rank_y, 133 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_z, 5*sendCount_z, rank_z, 134 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Z, 5*recvCount_Z, rank_Z, 134 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Z, 5*sendCount_Z, rank_Z, 135 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_z, 5*recvCount_z, rank_z, 135 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xy, sendCount_xy, rank_xy, 136 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_XY, recvCount_XY, rank_XY, 136 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_XY, sendCount_XY, rank_XY, 137 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xy, recvCount_xy, rank_xy, 137 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Xy, sendCount_Xy, rank_Xy, 138 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xY, recvCount_xY, rank_xY, 138 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xY, sendCount_xY, rank_xY, 139 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Xy, recvCount_Xy, rank_Xy, 139 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xz, sendCount_xz, rank_xz, 140 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_XZ, recvCount_XZ, rank_XZ, 140 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xZ, sendCount_xZ, rank_xZ, 143 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Xz, recvCount_Xz, rank_Xz, 143 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Xz, sendCount_Xz, rank_Xz, 142 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xZ, recvCount_xZ, rank_xZ, 142 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_XZ, sendCount_XZ, rank_XZ, 141 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xz, recvCount_xz, rank_xz, 141 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_yz, sendCount_yz, rank_yz, 144 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_YZ, recvCount_YZ, rank_YZ, 144 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_yZ, sendCount_yZ, rank_yZ, 147 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Yz, recvCount_Yz, rank_Yz, 147 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Yz, sendCount_Yz, rank_Yz, 146 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_yZ, recvCount_yZ, rank_yZ, 146 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_YZ, sendCount_YZ, rank_YZ, 145 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_yz, recvCount_yz, rank_yz, 145 ) );
}
@ -419,6 +461,22 @@ ScaLBL_Communicator::~ScaLBL_Communicator()
ScaLBL_FreeDeviceMemory( dvcRecvDist_Yz );
ScaLBL_FreeDeviceMemory( dvcRecvDist_YZ );
}
void ScaLBL_Communicator::start( std::vector<std::shared_ptr<MPI_Request>>& requests )
{
for ( auto& req : requests )
MPI_COMM_SCALBL.Start( *req );
}
void ScaLBL_Communicator::wait( std::vector<std::shared_ptr<MPI_Request>>& requests )
{
std::vector<MPI_Request> request2;
for ( auto& req : requests )
request2.push_back( *req );
MPI_COMM_SCALBL.waitAll( request2.size(), request2.data() );
}
double ScaLBL_Communicator::GetPerformance(int *NeighborList, double *fq, int Np){
/* EACH MPI PROCESS GETS ITS OWN MEASUREMENT*/
/* use MRT kernels to check performance without communication / synchronization */
@ -1397,8 +1455,6 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){
else{
Lock=true;
}
// assign tag of 130 to D3Q19 communication
sendtag = recvtag = 130;
ScaLBL_DeviceBarrier();
// Pack the distributions
//...Packing for x face(2,8,10,12,14)................................
@ -1473,42 +1529,7 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){
//...................................................................................
ScaLBL_DeviceBarrier();
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 5*sendCount_x,rank_x,sendtag+0);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 5*recvCount_X,rank_X,recvtag+0);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 5*sendCount_X,rank_X,sendtag+1);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 5*recvCount_x,rank_x,recvtag+1);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 5*sendCount_y,rank_y,sendtag+2);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 5*recvCount_Y,rank_Y,recvtag+2);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 5*sendCount_Y,rank_Y,sendtag+3);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 5*recvCount_y,rank_y,recvtag+3);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 5*sendCount_z,rank_z,sendtag+4);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 5*recvCount_Z,rank_Z,recvtag+4);
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 5*sendCount_Z,rank_Z,sendtag+5);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 5*recvCount_z,rank_z,recvtag+5);
req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag+6);
req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag+6);
req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag+7);
req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag+7);
req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag+8);
req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag+8);
req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag+9);
req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag+9);
req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag+10);
req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag+10);
req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag+13);
req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag+13);
req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag+12);
req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag+12);
req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag+11);
req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag+11);
req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag+14);
req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag+14);
req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag+17);
req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag+17);
req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag+16);
req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag+16);
req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag+15);
req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag+15);
start( req_D3Q19AA );
}
@ -1517,8 +1538,7 @@ void ScaLBL_Communicator::RecvD3Q19AA(double *dist){
// NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2
//...................................................................................
// Wait for completion of D3Q19 communication
MPI_COMM_SCALBL.waitAll(18,req1);
MPI_COMM_SCALBL.waitAll(18,req2);
wait( req_D3Q19AA );
ScaLBL_DeviceBarrier();
//...................................................................................
@ -1695,36 +1715,36 @@ void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq){
ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,Aq,N);
ScaLBL_D3Q19_Pack(2,dvcSendList_x,sendCount_x,sendCount_x,sendbuf_x,Bq,N);
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 2*sendCount_x,rank_x,sendtag+0);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 2*recvCount_X,rank_X,recvtag+0);
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 2*sendCount_x, rank_x,sendtag+0);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 2*recvCount_X, rank_X,recvtag+0);
//...Packing for X face(1,7,9,11,13)................................
ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,Aq,N);
ScaLBL_D3Q19_Pack(1,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,Bq,N);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 2*sendCount_X,rank_X,sendtag+1);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 2*recvCount_x,rank_x,recvtag+1);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 2*sendCount_X, rank_X,sendtag+1);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 2*recvCount_x, rank_x,recvtag+1);
//...Packing for y face(4,8,9,16,18).................................
ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,Aq,N);
ScaLBL_D3Q19_Pack(4,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,Bq,N);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 2*sendCount_y,rank_y,sendtag+2);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 2*recvCount_Y,rank_Y,recvtag+2);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 2*sendCount_y, rank_y,sendtag+2);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 2*recvCount_Y, rank_Y,recvtag+2);
//...Packing for Y face(3,7,10,15,17).................................
ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,Aq,N);
ScaLBL_D3Q19_Pack(3,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,Bq,N);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 2*sendCount_Y,rank_Y,sendtag+3);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 2*recvCount_y,rank_y,recvtag+3);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 2*sendCount_Y, rank_Y,sendtag+3);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 2*recvCount_y, rank_y,recvtag+3);
//...Packing for z face(6,12,13,16,17)................................
ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,Aq,N);
ScaLBL_D3Q19_Pack(6,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,Bq,N);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 2*sendCount_z,rank_z,sendtag+4);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 2*recvCount_Z,rank_Z,recvtag+4);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 2*sendCount_z, rank_z,sendtag+4);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 2*recvCount_Z, rank_Z,recvtag+4);
//...Packing for Z face(5,11,14,15,18)................................
ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,Aq,N);
@ -1732,8 +1752,8 @@ void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq){
//...................................................................................
// Send all the distributions
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 2*sendCount_Z,rank_Z,sendtag+5);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 2*recvCount_z,rank_z,recvtag+5);
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 2*sendCount_Z, rank_Z,sendtag+5);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 2*recvCount_z, rank_z,recvtag+5);
}
@ -1810,33 +1830,33 @@ void ScaLBL_Communicator::SendD3Q7AA(double *Aq, int Component){
// Pack the distributions
//...Packing for x face(2,8,10,12,14)................................
ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,&Aq[Component*7*N],N);
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x,rank_x,sendtag+0);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag+0);
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x, rank_x,sendtag+0);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X, rank_X,recvtag+0);
//...Packing for X face(1,7,9,11,13)................................
ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,&Aq[Component*7*N],N);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X,rank_X,sendtag+1);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag+1);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X, rank_X,sendtag+1);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x, rank_x,recvtag+1);
//...Packing for y face(4,8,9,16,18).................................
ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,&Aq[Component*7*N],N);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y,rank_y,sendtag+2);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y,rank_Y,recvtag+2);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y, rank_y,sendtag+2);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y, rank_Y,recvtag+2);
//...Packing for Y face(3,7,10,15,17).................................
ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,&Aq[Component*7*N],N);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y,rank_Y,sendtag+3);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y,rank_y,recvtag+3);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y, rank_Y,sendtag+3);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y, rank_y,recvtag+3);
//...Packing for z face(6,12,13,16,17)................................
ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,&Aq[Component*7*N],N);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z,rank_z,sendtag+4);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z,rank_Z,recvtag+4);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z, rank_z,sendtag+4);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z, rank_Z,recvtag+4);
//...Packing for Z face(5,11,14,15,18)................................
ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,&Aq[Component*7*N],N);
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z,rank_Z,sendtag+5);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z,rank_z,recvtag+5);
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z, rank_Z,sendtag+5);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z, rank_z,recvtag+5);
}
@ -1929,18 +1949,18 @@ void ScaLBL_Communicator::TriSendD3Q7AA(double *Aq, double *Bq, double *Cq){
//...................................................................................
// Send all the distributions
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 3*sendCount_x,rank_x,sendtag+0);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 3*recvCount_X,rank_X,recvtag+0);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 3*sendCount_X,rank_X,sendtag+1);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 3*recvCount_x,rank_x,recvtag+1);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 3*sendCount_y,rank_y,sendtag+2);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 3*recvCount_Y,rank_Y,recvtag+2);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 3*sendCount_Y,rank_Y,sendtag+3);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 3*recvCount_y,rank_y,recvtag+3);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 3*sendCount_z,rank_z,sendtag+4);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 3*recvCount_Z,rank_Z,recvtag+4);
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 3*sendCount_Z,rank_Z,sendtag+5);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 3*recvCount_z,rank_z,recvtag+5);
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 3*sendCount_x, rank_x,sendtag+0);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 3*recvCount_X, rank_X,recvtag+0);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 3*sendCount_X, rank_X,sendtag+1);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 3*recvCount_x, rank_x,recvtag+1);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 3*sendCount_y, rank_y,sendtag+2);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 3*recvCount_Y, rank_Y,recvtag+2);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 3*sendCount_Y, rank_Y,sendtag+3);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 3*recvCount_y, rank_y,recvtag+3);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 3*sendCount_z, rank_z,sendtag+4);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 3*recvCount_Z, rank_Z,recvtag+4);
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 3*sendCount_Z, rank_Z,sendtag+5);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 3*recvCount_z, rank_z,recvtag+5);
}
@ -2045,42 +2065,42 @@ void ScaLBL_Communicator::SendHalo(double *data){
// Send / Recv all the phase indcator field values
//...................................................................................
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x,rank_x,sendtag+0);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag+0);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X,rank_X,sendtag+1);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag+1);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y,rank_y,sendtag+2);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y,rank_Y,recvtag+2);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y,rank_Y,sendtag+3);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y,rank_y,recvtag+3);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z,rank_z,sendtag+4);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z,rank_Z,recvtag+4);
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z,rank_Z,sendtag+5);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z,rank_z,recvtag+5);
req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag+6);
req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag+6);
req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag+7);
req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag+7);
req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag+8);
req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag+8);
req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag+9);
req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag+9);
req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag+10);
req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag+10);
req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag+11);
req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag+11);
req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag+12);
req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag+12);
req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag+13);
req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag+13);
req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag+14);
req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag+14);
req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag+15);
req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag+15);
req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag+16);
req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag+16);
req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag+17);
req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag+17);
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x, rank_x,sendtag+0);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X, rank_X,recvtag+0);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X, rank_X,sendtag+1);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x, rank_x,recvtag+1);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y, rank_y,sendtag+2);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y, rank_Y,recvtag+2);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y, rank_Y,sendtag+3);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y, rank_y,recvtag+3);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z, rank_z,sendtag+4);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z, rank_Z,recvtag+4);
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z, rank_Z,sendtag+5);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z, rank_z,recvtag+5);
req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy, rank_xy,sendtag+6);
req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY, rank_XY,recvtag+6);
req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY, rank_XY,sendtag+7);
req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy, rank_xy,recvtag+7);
req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy, rank_Xy,sendtag+8);
req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY, rank_xY,recvtag+8);
req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY, rank_xY,sendtag+9);
req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy, rank_Xy,recvtag+9);
req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz, rank_xz,sendtag+10);
req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ, rank_XZ,recvtag+10);
req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ, rank_XZ,sendtag+11);
req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz, rank_xz,recvtag+11);
req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz, rank_Xz,sendtag+12);
req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ, rank_xZ,recvtag+12);
req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ, rank_xZ,sendtag+13);
req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz, rank_Xz,recvtag+13);
req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz, rank_yz,sendtag+14);
req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ, rank_YZ,recvtag+14);
req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ, rank_YZ,sendtag+15);
req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz, rank_yz,recvtag+15);
req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz, rank_Yz,sendtag+16);
req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ, rank_yZ,recvtag+16);
req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ, rank_yZ,sendtag+17);
req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz, rank_Yz,recvtag+17);
//...................................................................................
}
void ScaLBL_Communicator::RecvHalo(double *data){

View File

@ -799,6 +799,12 @@ private:
int *dvcRecvDist_x, *dvcRecvDist_y, *dvcRecvDist_z, *dvcRecvDist_X, *dvcRecvDist_Y, *dvcRecvDist_Z;
int *dvcRecvDist_xy, *dvcRecvDist_yz, *dvcRecvDist_xz, *dvcRecvDist_Xy, *dvcRecvDist_Yz, *dvcRecvDist_xZ;
int *dvcRecvDist_xY, *dvcRecvDist_yZ, *dvcRecvDist_Xz, *dvcRecvDist_XY, *dvcRecvDist_YZ, *dvcRecvDist_XZ;
// MPI requests for persistent communications
std::vector<std::shared_ptr<MPI_Request>> req_D3Q19AA;
std::vector<std::shared_ptr<MPI_Request>> req_BiD3Q19AA;
std::vector<std::shared_ptr<MPI_Request>> req_TriD3Q19AA;
void start( std::vector<std::shared_ptr<MPI_Request>>& requests );
void wait( std::vector<std::shared_ptr<MPI_Request>>& requests );
//......................................................................................
int *bb_dist;
int *bb_interactions;