Merge branch 'Spock' of github.com:JamesEMcClure/LBPM-WIA into Spock

2021-07-01 14:28:47 -04:00
parent 5fabda67c5 1d07c1f860
commit 6838cabbaf
1 changed files with 43 additions and 26 deletions
--- a/tests/test_MPI.cpp
+++ b/tests/test_MPI.cpp
@@ -1199,7 +1199,8 @@ void testCommDup( UnitTest *ut )
 // Test GPU aware MPI
 void test_GPU_aware( UnitTest *ut )
 {
-    size_t N = 1024*1024;
+    constexpr size_t N = 1024*1024;
+    constexpr size_t N_msg = 64;
    bool test = true;
    // Get the comm to use
    MPI_CLASS comm( MPI_COMM_WORLD );
@@ -1208,39 +1209,55 @@ void test_GPU_aware( UnitTest *ut )
    try {
        // Initialize the device
        int device = ScaLBL_SetDevice(rank); 
-        // Allocate buffers
+        // Allocate and initialize the buffers
        size_t bytes = N*sizeof(double);
-        double *device_send = nullptr, *device_recv = nullptr;
-        ScaLBL_AllocateDeviceMemory((void**)&device_send,bytes);
-        ScaLBL_AllocateDeviceMemory((void**)&device_recv,bytes);
-        auto host_send = new double[N];
-        auto host_recv = new double[N];
-        // Initialize the data
-        for ( size_t i=0; i<N; i++ ) {
-            host_send[i] = 10000 * rank + i;
-            host_recv[i] = 0;
+        double *device_send[N_msg] = { nullptr };
+        double *device_recv[N_msg] = { nullptr };
+        double *host_send[N_msg] = { nullptr };
+        double *host_recv[N_msg] = { nullptr };
+        for ( size_t k=0; k<N_msg; k++ ) {
+            ScaLBL_AllocateDeviceMemory((void**)&device_send[k],bytes);
+            ScaLBL_AllocateDeviceMemory((void**)&device_recv[k],bytes);
+            host_send[k] = new double[N];
+            host_recv[k] = new double[N];
+            // Initialize the data
+            for ( size_t i=0; i<N; i++ ) {
+                host_send[k][i] = 1000 * k * rank + i;
+                host_recv[k][i] = 0;
+            }
+            ScaLBL_CopyToDevice(device_send[k],host_send[k],bytes);
+            ScaLBL_CopyToDevice(device_recv[k],host_recv[k],bytes);
        }
-        ScaLBL_CopyToDevice(device_send,host_send,bytes);
-        ScaLBL_CopyToDevice(device_recv,host_recv,bytes);
        ScaLBL_DeviceBarrier();
        // Send/recieve the data
        int rank_send = ( rank + 1 ) % size;
        int rank_recv = ( rank - 1 + size ) % size;
-        auto req1 = comm.Isend( device_send, N, rank_send, 1 );
-        auto req2 = comm.Irecv( device_recv, N, rank_recv, 1 );
-        comm.wait(req1);
-        comm.wait(req2);
-        // Check the data
-        ScaLBL_CopyToHost(host_send,device_send,bytes);
-        ScaLBL_CopyToHost(host_recv,device_recv,bytes);
+        MPI_Request req1[N_msg];
+        MPI_Request req2[N_msg];
+        for ( size_t k=0; k<N_msg; k++ ) {
+            req1[k] = comm.Isend( device_send[k], N, rank_send, k );
+            req2[k] = comm.Irecv( device_recv[k], N, rank_recv, k );
+        }
+        comm.waitAll(N_msg,req1);
+        comm.waitAll(N_msg,req2);
+        // Copy
+        for ( size_t k=0; k<N_msg; k++ ) {
+            ScaLBL_CopyToHost(host_send[k],device_send[k],bytes);
+            ScaLBL_CopyToHost(host_recv[k],device_recv[k],bytes);
+        }
        ScaLBL_DeviceBarrier();
-        for ( size_t i=0; i<N; i++ )
-            test = test && host_recv[i] == 10000 * rank_recv + i;
+        // Check the data
+        for ( size_t k=0; k<N_msg; k++ ) {
+            for ( size_t i=0; i<N; i++ )
+                test = test && host_recv[k][i] == 1000 * k * rank_recv + i;
+        }
        // Free buffers
-        ScaLBL_FreeDeviceMemory(device_send);
-        ScaLBL_FreeDeviceMemory(device_recv);
-        delete [] host_send;
-        delete [] host_recv;
+        for ( size_t k=0; k<N_msg; k++ ) {
+            ScaLBL_FreeDeviceMemory(device_send[k]);
+            ScaLBL_FreeDeviceMemory(device_recv[k]);
+            delete [] host_send[k];
+            delete [] host_recv[k];
+        }
    } catch ( ... ) {
        test = false;
    }