fix failed merge

2020-03-17 21:44:45 -04:00 · 2020-03-17 21:44:45 -04:00 · 05cafcb525
commit 05cafcb525
parent 9f5b44dfe4
125 changed files with 2544 additions and 8538 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,174 +1,170 @@
-# Set some CMake properties    
-CMAKE_MINIMUM_REQUIRED( VERSION 3.9 )
-
-
-MESSAGE("====================")
-MESSAGE("Configuring LBPM-WIA")
-MESSAGE("====================")
-
-
-# Set the project name
-SET( PROJ LBPM )          # Set the project name for CMake
-SET( LBPM_LIB lbpm-wia )  # Set the final library name
-SET( LBPM_INC  )          # Set an optional subfolder for includes (e.g. include/name/...)
-SET( TEST_MAX_PROCS 16 )
-
-
-# Initialize the project
-PROJECT( ${PROJ} LANGUAGES CXX )
-
-
-# Prevent users from building in place
-IF ("${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}" )
-    MESSAGE( FATAL_ERROR "Building code in place is a bad idea" )
-ENDIF()
-
-
-# Set the default C++ standard
-SET( CMAKE_CXX_EXTENSIONS OFF )
-IF ( NOT CMAKE_CXX_STANDARD )
-    IF ( CXX_STD )
-        MESSAGE( FATAL_ERROR "CXX_STD is obsolete, please set CMAKE_CXX_STANDARD" )
-    ENDIF()
-    SET( CMAKE_CXX_STANDARD 14 )
-ENDIF()
-IF ( ( "${CMAKE_CXX_STANDARD}" GREATER "90" ) OR ( "${CMAKE_CXX_STANDARD}" LESS "14" ) )
-    MESSAGE( FATAL_ERROR "C++14 or newer required" )
-ENDIF()
-
-
-# Set source/install paths
-SET( ${PROJ}_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}" )
-SET( ${PROJ}_BUILD_DIR  "${CMAKE_CURRENT_BINARY_DIR}" )
-IF( ${PROJ}_INSTALL_DIR )
-    SET( ${PROJ}_INSTALL_DIR "${${PROJ}_INSTALL_DIR}" )
-ELSEIF( PREFIX )
-    SET( ${PROJ}_INSTALL_DIR "${PREFIX}" )
-ELSEIF( NOT ${PROJ}_INSTALL_DIR )
-    SET( ${PROJ}_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}" )
-ENDIF()
-INCLUDE_DIRECTORIES( "${${PROJ}_INSTALL_DIR}/include" )
-SET( CMAKE_MODULE_PATH ${${PROJ}_SOURCE_DIR} ${${PROJ}_SOURCE_DIR}/cmake )
-
-
-# Include macros
-INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/macros.cmake" )
-INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/libraries.cmake" )
-INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/LBPM-macros.cmake" )
-
-
-# Check if we are only compiling docs
-CHECK_ENABLE_FLAG( ONLY_BUILD_DOCS 0 )
-
-
-# Set testing paramaters
-SET( DROP_METHOD "http" )
-SET( DROP_SITE "" )
-SET( DROP_LOCATION "/CDash/submit.php?project=LBPM-WIA" )
-SET( TRIGGER_SITE "" )
-SET( DROP_SITE_CDASH TRUE )
-ENABLE_TESTING()
-INCLUDE( CTest )
-
-
-# Check the compile mode and compile flags
-IF ( NOT ONLY_BUILD_DOCS )
-    CONFIGURE_SYSTEM()
-ENDIF()
-
-
-# Add some directories to include
-INCLUDE_DIRECTORIES( "${${PROJ}_INSTALL_DIR}/include" )
-
-
-# Create the target for documentation
-ADD_CUSTOM_TARGET( doc )
-ADD_CUSTOM_TARGET( latex_docs )
-CHECK_ENABLE_FLAG( USE_DOXYGEN 1 )
-CHECK_ENABLE_FLAG( USE_LATEX 1 )
-FILE( MAKE_DIRECTORY "${${PROJ}_INSTALL_DIR}/doc" )
-IF ( USE_DOXYGEN )
-    SET( DOXYFILE_LATEX YES )
-    SET( DOXYFILE_IN "${${PROJ}_SOURCE_DIR}/doxygen/Doxyfile.in" )
-    SET( DOXY_HEADER_FILE "${${PROJ}_SOURCE_DIR}/doxygen/html/header.html" )
-    SET( DOXY_FOOTER_FILE "${${PROJ}_SOURCE_DIR}/doxygen/html/footer.html" )
-    SET( DOXYFILE_OUTPUT_DIR "${${PROJ}_INSTALL_DIR}/doc" )
-    SET( DOXYFILE_SRC_HTML_DIR "${${PROJ}_SOURCE_DIR}/doxygen/html" )
-    SET( DOXYFILE_SOURCE_DIR "${${PROJ}_SOURCE_DIR}" )
-    SET( REL_PACKAGE_HTML "" )
-    SET( DOXYGEN_MACROS "" )
-    MESSAGE("DOXYGEN_MACROS = ${DOXYGEN_MACROS}")
-    INCLUDE( "${${PROJ}_SOURCE_DIR}/cmake/UseDoxygen.cmake" )
-    IF ( DOXYGEN_FOUND )
-        ADD_DEPENDENCIES( doxygen latex_docs )
-        ADD_DEPENDENCIES( doc latex_docs doxygen )
-    ELSE()
-        SET( USE_DOXYGEN 0 )
-    ENDIF()
-ENDIF()
-
-
-# Create custom targets for build-test, check, and distclean
-ADD_CUSTOM_TARGET( build-test )
-ADD_CUSTOM_TARGET( build-examples )
-ADD_CUSTOM_TARGET( check COMMAND  make test  )
-ADD_DISTCLEAN( analysis null_timer tests liblbpm-wia.* cpu gpu example common IO threadpool StackTrace )
-
-
-# Check for CUDA
-CHECK_ENABLE_FLAG( USE_CUDA 0 )
-CHECK_ENABLE_FLAG( USE_HIP 0 )
-NULL_USE( CMAKE_CUDA_FLAGS )
-IF ( USE_CUDA )
-    ADD_DEFINITIONS( -DUSE_CUDA )
-    ENABLE_LANGUAGE( CUDA )
-ELSEIF ( USE_HIP )
-    FIND_PACKAGE( HIP )
-    MESSAGE( FATAL_ERROR "STOP" )
-ENDIF()
-
-
-# Configure external packages
-IF ( NOT ONLY_BUILD_DOCS )
-    CONFIGURE_MPI()     # MPI must be before other libraries
-    CONFIGURE_MIC()
-    CONFIGURE_NETCDF()
-    CONFIGURE_SILO()
-    CONFIGURE_LBPM()
-    CONFIGURE_TIMER( 0 "${${PROJ}_INSTALL_DIR}/null_timer" )
-    CONFIGURE_LINE_COVERAGE()
-    # Set the external library link list
-    SET( EXTERNAL_LIBS ${EXTERNAL_LIBS} ${TIMER_LIBS} )
-ENDIF()
-
-
-
-# Macro to create 1,2,4 processor tests
-MACRO( ADD_LBPM_TEST_1_2_4 EXENAME ${ARGN} )
-    ADD_LBPM_TEST( ${EXENAME} ${ARGN} )
-    ADD_LBPM_TEST_PARALLEL( ${EXENAME} 2 ${ARGN} )
-    ADD_LBPM_TEST_PARALLEL( ${EXENAME} 4 ${ARGN} )
-ENDMACRO()
-
-
-# Add the src directories
-IF ( NOT ONLY_BUILD_DOCS )
-    BEGIN_PACKAGE_CONFIG( lbpm-wia-library )
-    ADD_PACKAGE_SUBDIRECTORY( common )
-    ADD_PACKAGE_SUBDIRECTORY( analysis )
-    ADD_PACKAGE_SUBDIRECTORY( IO )
-    ADD_PACKAGE_SUBDIRECTORY( threadpool )
-    ADD_PACKAGE_SUBDIRECTORY( StackTrace )
-    ADD_PACKAGE_SUBDIRECTORY( models )
-    IF ( USE_CUDA )
-        ADD_PACKAGE_SUBDIRECTORY( gpu )
-    ELSE()
-        ADD_PACKAGE_SUBDIRECTORY( cpu )
-    ENDIF()
-    INSTALL_LBPM_TARGET( lbpm-wia-library  )
-    ADD_SUBDIRECTORY( tests )
-    ADD_SUBDIRECTORY( example )
-    #ADD_SUBDIRECTORY( workflows )
-    INSTALL_PROJ_LIB()
-ENDIF()
-
+# Set some CMake properties    
+CMAKE_MINIMUM_REQUIRED( VERSION 3.9 )
+
+
+MESSAGE("====================")
+MESSAGE("Configuring LBPM-WIA")
+MESSAGE("====================")
+
+
+# Set the project name
+SET( PROJ LBPM )          # Set the project name for CMake
+SET( LBPM_LIB lbpm-wia )  # Set the final library name
+SET( LBPM_INC  )          # Set an optional subfolder for includes (e.g. include/name/...)
+SET( TEST_MAX_PROCS 16 )
+
+
+# Initialize the project
+PROJECT( ${PROJ} LANGUAGES CXX )
+
+
+# Prevent users from building in place
+IF ("${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}" )
+    MESSAGE( FATAL_ERROR "Building code in place is a bad idea" )
+ENDIF()
+
+
+# Set the default C++ standard
+SET( CMAKE_CXX_EXTENSIONS OFF )
+IF ( NOT CMAKE_CXX_STANDARD )
+    IF ( CXX_STD )
+        MESSAGE( FATAL_ERROR "CXX_STD is obsolete, please set CMAKE_CXX_STANDARD" )
+    ENDIF()
+    SET( CMAKE_CXX_STANDARD 14 )
+ENDIF()
+IF ( ( "${CMAKE_CXX_STANDARD}" GREATER "90" ) OR ( "${CMAKE_CXX_STANDARD}" LESS "14" ) )
+    MESSAGE( FATAL_ERROR "C++14 or newer required" )
+ENDIF()
+
+
+# Set source/install paths
+SET( ${PROJ}_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}" )
+SET( ${PROJ}_BUILD_DIR  "${CMAKE_CURRENT_BINARY_DIR}" )
+IF( ${PROJ}_INSTALL_DIR )
+    SET( ${PROJ}_INSTALL_DIR "${${PROJ}_INSTALL_DIR}" )
+ELSEIF( PREFIX )
+    SET( ${PROJ}_INSTALL_DIR "${PREFIX}" )
+ELSEIF( NOT ${PROJ}_INSTALL_DIR )
+    SET( ${PROJ}_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}" )
+ENDIF()
+INCLUDE_DIRECTORIES( "${${PROJ}_INSTALL_DIR}/include" )
+SET( CMAKE_MODULE_PATH ${${PROJ}_SOURCE_DIR} ${${PROJ}_SOURCE_DIR}/cmake )
+
+
+# Include macros
+INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/macros.cmake" )
+INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/libraries.cmake" )
+INCLUDE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/LBPM-macros.cmake" )
+
+
+# Check if we are only compiling docs
+CHECK_ENABLE_FLAG( ONLY_BUILD_DOCS 0 )
+
+
+# Set testing paramaters
+SET( DROP_METHOD "http" )
+SET( DROP_SITE "" )
+SET( DROP_LOCATION "/CDash/submit.php?project=LBPM-WIA" )
+SET( TRIGGER_SITE "" )
+SET( DROP_SITE_CDASH TRUE )
+ENABLE_TESTING()
+INCLUDE( CTest )
+
+
+# Check the compile mode and compile flags
+IF ( NOT ONLY_BUILD_DOCS )
+    CONFIGURE_SYSTEM()
+ENDIF()
+
+
+# Add some directories to include
+INCLUDE_DIRECTORIES( "${${PROJ}_INSTALL_DIR}/include" )
+
+
+# Create the target for documentation
+ADD_CUSTOM_TARGET( doc )
+ADD_CUSTOM_TARGET( latex_docs )
+CHECK_ENABLE_FLAG( USE_DOXYGEN 1 )
+CHECK_ENABLE_FLAG( USE_LATEX 1 )
+FILE( MAKE_DIRECTORY "${${PROJ}_INSTALL_DIR}/doc" )
+IF ( USE_DOXYGEN )
+    SET( DOXYFILE_LATEX YES )
+    SET( DOXYFILE_IN "${${PROJ}_SOURCE_DIR}/doxygen/Doxyfile.in" )
+    SET( DOXY_HEADER_FILE "${${PROJ}_SOURCE_DIR}/doxygen/html/header.html" )
+    SET( DOXY_FOOTER_FILE "${${PROJ}_SOURCE_DIR}/doxygen/html/footer.html" )
+    SET( DOXYFILE_OUTPUT_DIR "${${PROJ}_INSTALL_DIR}/doc" )
+    SET( DOXYFILE_SRC_HTML_DIR "${${PROJ}_SOURCE_DIR}/doxygen/html" )
+    SET( DOXYFILE_SOURCE_DIR "${${PROJ}_SOURCE_DIR}" )
+    SET( REL_PACKAGE_HTML "" )
+    SET( DOXYGEN_MACROS "" )
+    MESSAGE("DOXYGEN_MACROS = ${DOXYGEN_MACROS}")
+    INCLUDE( "${${PROJ}_SOURCE_DIR}/cmake/UseDoxygen.cmake" )
+    IF ( DOXYGEN_FOUND )
+        ADD_DEPENDENCIES( doxygen latex_docs )
+        ADD_DEPENDENCIES( doc latex_docs doxygen )
+    ELSE()
+        SET( USE_DOXYGEN 0 )
+    ENDIF()
+ENDIF()
+
+
+# Create custom targets for build-test, check, and distclean
+ADD_CUSTOM_TARGET( build-test )
+ADD_CUSTOM_TARGET( build-examples )
+ADD_CUSTOM_TARGET( check COMMAND  make test  )
+ADD_DISTCLEAN( analysis null_timer tests liblbpm-wia.* cpu gpu example common IO threadpool StackTrace )
+
+
+# Check for CUDA
+CHECK_ENABLE_FLAG( USE_CUDA 0 )
+NULL_USE( CMAKE_CUDA_FLAGS )
+IF ( USE_CUDA )
+    ADD_DEFINITIONS( -DUSE_CUDA )
+    ENABLE_LANGUAGE( CUDA )
+ENDIF()
+
+
+# Configure external packages
+IF ( NOT ONLY_BUILD_DOCS )
+    CONFIGURE_MPI()     # MPI must be before other libraries
+    CONFIGURE_MIC()
+    CONFIGURE_NETCDF()
+    CONFIGURE_SILO()
+    CONFIGURE_LBPM()
+    CONFIGURE_TIMER( 0 "${${PROJ}_INSTALL_DIR}/null_timer" )
+    CONFIGURE_LINE_COVERAGE()
+    # Set the external library link list
+    SET( EXTERNAL_LIBS ${EXTERNAL_LIBS} ${TIMER_LIBS} )
+ENDIF()
+
+
+
+# Macro to create 1,2,4 processor tests
+MACRO( ADD_LBPM_TEST_1_2_4 EXENAME ${ARGN} )
+    ADD_LBPM_TEST( ${EXENAME} ${ARGN} )
+    ADD_LBPM_TEST_PARALLEL( ${EXENAME} 2 ${ARGN} )
+    ADD_LBPM_TEST_PARALLEL( ${EXENAME} 4 ${ARGN} )
+ENDMACRO()
+
+
+# Add the src directories
+IF ( NOT ONLY_BUILD_DOCS )
+    BEGIN_PACKAGE_CONFIG( lbpm-wia-library )
+    ADD_PACKAGE_SUBDIRECTORY( common )
+    ADD_PACKAGE_SUBDIRECTORY( analysis )
+    ADD_PACKAGE_SUBDIRECTORY( IO )
+    ADD_PACKAGE_SUBDIRECTORY( threadpool )
+    ADD_PACKAGE_SUBDIRECTORY( StackTrace )
+    ADD_PACKAGE_SUBDIRECTORY( models )
+    IF ( USE_CUDA )
+        ADD_PACKAGE_SUBDIRECTORY( gpu )
+    ELSE()
+        ADD_PACKAGE_SUBDIRECTORY( cpu )
+    ENDIF()
+    INSTALL_LBPM_TARGET( lbpm-wia-library  )
+    ADD_SUBDIRECTORY( tests )
+    ADD_SUBDIRECTORY( example )
+    #ADD_SUBDIRECTORY( workflows )
+    INSTALL_PROJ_LIB()
+ENDIF()
+
--- a/IO/MeshDatabase.cpp
+++ b/IO/MeshDatabase.cpp
@ -1,8 +1,7 @@
 #include "IO/MeshDatabase.h"
 #include "IO/Mesh.h"
-#include "IO/PackData.h"
 #include "IO/IOHelpers.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/Utilities.h"

 #include <vector>
@ -14,6 +13,8 @@



+/****************************************************
+****************************************************/
 // MeshType
 template<>
 size_t packsize<IO::MeshType>( const IO::MeshType& rhs )
@ -246,76 +247,80 @@ void DatabaseEntry::read( const std::string& line )

 // Gather the mesh databases from all processors
 inline int tod( int N ) { return (N+7)/sizeof(double); }
-std::vector<MeshDatabase> gatherAll( const std::vector<MeshDatabase>& meshes, const Utilities::MPI& comm )
+std::vector<MeshDatabase> gatherAll( const std::vector<MeshDatabase>& meshes, MPI_Comm comm )
 {
-    if ( comm.getSize() == 1 )
-        return meshes;
-    PROFILE_START("gatherAll");
-    PROFILE_START("gatherAll-pack",2);
-    int size = comm.getSize();
-    // First pack the mesh data to local buffers
-    int localsize = 0;
-    for (size_t i=0; i<meshes.size(); i++)
-        localsize += tod(packsize(meshes[i]));
-    auto localbuf = new double[localsize];
-    int pos = 0;
-    for (size_t i=0; i<meshes.size(); i++) {
-        pack( meshes[i], (char*) &localbuf[pos] );
-        pos += tod(packsize(meshes[i]));
-    }
-    PROFILE_STOP("gatherAll-pack",2);
-    // Get the number of bytes each processor will be sending/recieving
-    PROFILE_START("gatherAll-send1",2);
-    auto recvsize = comm.allGather( localsize );
-    int globalsize = recvsize[0];
-    auto disp = new int[size];
-    disp[0] = 0;
-    for (int i=1; i<size; i++) {
-        disp[i] = disp[i-1] + recvsize[i];
-        globalsize += recvsize[i];
-    }
-    PROFILE_STOP("gatherAll-send1",2);
-    // Send/recv the global data
-    PROFILE_START("gatherAll-send2",2);
-    auto globalbuf = new double[globalsize];
-    comm.allGather(localbuf,localsize,globalbuf,recvsize.data(),disp,true);
-    PROFILE_STOP("gatherAll-send2",2);
-    // Unpack the data
-    PROFILE_START("gatherAll-unpack",2);
-    std::map<std::string,MeshDatabase> data;
-    pos = 0;
-    while ( pos < globalsize ) {
-        MeshDatabase tmp;
-        unpack(tmp,(char*)&globalbuf[pos]);
-        pos += tod(packsize(tmp));
-        std::map<std::string,MeshDatabase>::iterator it = data.find(tmp.name);
-        if ( it==data.end() ) {
-            data[tmp.name] = tmp;
-        } else {
-            for (size_t i=0; i<tmp.domains.size(); i++)
-                it->second.domains.push_back(tmp.domains[i]);
-            for (size_t i=0; i<tmp.variables.size(); i++)
-                it->second.variables.push_back(tmp.variables[i]);
-            it->second.variable_data.insert(tmp.variable_data.begin(),tmp.variable_data.end());
+    #ifdef USE_MPI
+        PROFILE_START("gatherAll");
+        PROFILE_START("gatherAll-pack",2);
+        int size = MPI_WORLD_SIZE();
+        // First pack the mesh data to local buffers
+        int localsize = 0;
+        for (size_t i=0; i<meshes.size(); i++)
+            localsize += tod(packsize(meshes[i]));
+        auto localbuf = new double[localsize];
+        int pos = 0;
+        for (size_t i=0; i<meshes.size(); i++) {
+            pack( meshes[i], (char*) &localbuf[pos] );
+            pos += tod(packsize(meshes[i]));
        }
-    }
-    for (auto it=data.begin(); it!=data.end(); ++it) {
-        // Get the unique variables
-        std::set<VariableDatabase> data2(it->second.variables.begin(),it->second.variables.end());
-        it->second.variables = std::vector<VariableDatabase>(data2.begin(),data2.end());
-    }
-    // Free temporary memory
-    delete [] localbuf;
-    delete [] disp;
-    delete [] globalbuf;
-    // Return the results
-    std::vector<MeshDatabase> data2(data.size());
-    size_t i=0; 
-    for (std::map<std::string,MeshDatabase>::iterator it=data.begin(); it!=data.end(); ++it, ++i)
-        data2[i] = it->second;
-    PROFILE_STOP("gatherAll-unpack",2);
-    PROFILE_STOP("gatherAll");
-    return data2;
+        PROFILE_STOP("gatherAll-pack",2);
+        // Get the number of bytes each processor will be sending/recieving
+        PROFILE_START("gatherAll-send1",2);
+        auto recvsize = new int[size];
+        MPI_Allgather(&localsize,1,MPI_INT,recvsize,1,MPI_INT,comm);
+        int globalsize = recvsize[0];
+        auto disp = new int[size];
+        disp[0] = 0;
+        for (int i=1; i<size; i++) {
+            disp[i] = disp[i-1] + recvsize[i];
+            globalsize += recvsize[i];
+        }
+        PROFILE_STOP("gatherAll-send1",2);
+        // Send/recv the global data
+        PROFILE_START("gatherAll-send2",2);
+        auto globalbuf = new double[globalsize];
+        MPI_Allgatherv(localbuf,localsize,MPI_DOUBLE,globalbuf,recvsize,disp,MPI_DOUBLE,comm);
+        PROFILE_STOP("gatherAll-send2",2);
+        // Unpack the data
+        PROFILE_START("gatherAll-unpack",2);
+        std::map<std::string,MeshDatabase> data;
+        pos = 0;
+        while ( pos < globalsize ) {
+            MeshDatabase tmp;
+            unpack(tmp,(char*)&globalbuf[pos]);
+            pos += tod(packsize(tmp));
+            std::map<std::string,MeshDatabase>::iterator it = data.find(tmp.name);
+            if ( it==data.end() ) {
+                data[tmp.name] = tmp;
+            } else {
+                for (size_t i=0; i<tmp.domains.size(); i++)
+                    it->second.domains.push_back(tmp.domains[i]);
+                for (size_t i=0; i<tmp.variables.size(); i++)
+                    it->second.variables.push_back(tmp.variables[i]);
+                it->second.variable_data.insert(tmp.variable_data.begin(),tmp.variable_data.end());
+            }
+        }
+        for (std::map<std::string,MeshDatabase>::iterator it=data.begin(); it!=data.end(); ++it) {
+            // Get the unique variables
+            std::set<VariableDatabase> data2(it->second.variables.begin(),it->second.variables.end());
+            it->second.variables = std::vector<VariableDatabase>(data2.begin(),data2.end());
+        }
+        // Free temporary memory
+        delete [] localbuf;
+        delete [] recvsize;
+        delete [] disp;
+        delete [] globalbuf;
+        // Return the results
+        std::vector<MeshDatabase> data2(data.size());
+        size_t i=0; 
+        for (std::map<std::string,MeshDatabase>::iterator it=data.begin(); it!=data.end(); ++it, ++i)
+            data2[i] = it->second;
+        PROFILE_STOP("gatherAll-unpack",2);
+        PROFILE_STOP("gatherAll");
+        return data2;
+    #else
+        return meshes;
+    #endif
 }


--- a/IO/MeshDatabase.h
+++ b/IO/MeshDatabase.h
@ -2,7 +2,7 @@
 #define MeshDatabase_INC

 #include "IO/Mesh.h" 
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"

 #include <iostream>
 #include <memory>
@ -70,7 +70,7 @@ public:


 //! Gather the mesh databases from all processors
-std::vector<MeshDatabase> gatherAll( const std::vector<MeshDatabase>& meshes, const Utilities::MPI& comm );
+std::vector<MeshDatabase> gatherAll( const std::vector<MeshDatabase>& meshes, MPI_Comm comm );


 //! Write the mesh databases to a file
--- a/IO/PIO.cpp
+++ b/IO/PIO.cpp
@ -1,6 +1,6 @@
 #include "IO/PIO.h"
 #include "common/Utilities.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"

 #include <fstream>
 #include <string>
@ -36,7 +36,10 @@ static void shutdownFilestream( )
 }
 void Utilities::logOnlyNodeZero( const std::string &filename )
 {
-    int rank = ::Utilities::MPI( MPI_COMM_WORLD ).getRank();
+    int rank = 0;
+    #ifdef USE_MPI
+        MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+    #endif
    if ( rank == 0 )
        logAllNodes(filename,true);
 }
@ -51,7 +54,10 @@ void Utilities::logAllNodes( const std::string &filename, bool singleStream )
    // Open the log stream and redirect output
    std::string full_filename = filename;
    if ( !singleStream ) {
-        int rank = ::Utilities::MPI( MPI_COMM_WORLD ).getRank();
+        int rank = 0;
+        #ifdef USE_MPI
+            MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+        #endif
        char tmp[100];
        sprintf(tmp,".%04i",rank);
        full_filename += std::string(tmp);
--- a/IO/PackData.cpp
+++ b/IO/PackData.cpp
@ -1,105 +0,0 @@
-#include "IO/PackData.h"
-
-#include <string.h>
-
-
-/********************************************************
-* Concrete implimentations for packing/unpacking        *
-********************************************************/
-// unsigned char
-template<>
-size_t packsize<unsigned char>( const unsigned char& rhs )
-{
-    return sizeof(unsigned char);
-}
-template<>
-void pack<unsigned char>( const unsigned char& rhs, char *buffer )
-{
-    memcpy(buffer,&rhs,sizeof(unsigned char));
-}
-template<>
-void unpack<unsigned char>( unsigned char& data, const char *buffer )
-{
-    memcpy(&data,buffer,sizeof(unsigned char));
-}
-// char
-template<>
-size_t packsize<char>( const char& rhs )
-{
-    return sizeof(char);
-}
-template<>
-void pack<char>( const char& rhs, char *buffer )
-{
-    memcpy(buffer,&rhs,sizeof(char));
-}
-template<>
-void unpack<char>( char& data, const char *buffer )
-{
-    memcpy(&data,buffer,sizeof(char));
-}
-// int
-template<>
-size_t packsize<int>( const int& rhs )
-{
-    return sizeof(int);
-}
-template<>
-void pack<int>( const int& rhs, char *buffer )
-{
-    memcpy(buffer,&rhs,sizeof(int));
-}
-template<>
-void unpack<int>( int& data, const char *buffer )
-{
-    memcpy(&data,buffer,sizeof(int));
-}
-// unsigned int
-template<>
-size_t packsize<unsigned int>( const unsigned int& rhs )
-{
-    return sizeof(unsigned int);
-}
-template<>
-void pack<unsigned int>( const unsigned int& rhs, char *buffer )
-{
-    memcpy(buffer,&rhs,sizeof(int));
-}
-template<>
-void unpack<unsigned int>( unsigned int& data, const char *buffer )
-{
-    memcpy(&data,buffer,sizeof(int));
-}
-// size_t
-template<>
-size_t packsize<size_t>( const size_t& rhs )
-{
-    return sizeof(size_t);
-}
-template<>
-void pack<size_t>( const size_t& rhs, char *buffer )
-{
-    memcpy(buffer,&rhs,sizeof(size_t));
-}
-template<>
-void unpack<size_t>( size_t& data, const char *buffer )
-{
-    memcpy(&data,buffer,sizeof(size_t));
-}
-// std::string
-template<>
-size_t packsize<std::string>( const std::string& rhs )
-{
-    return rhs.size()+1;
-}
-template<>
-void pack<std::string>( const std::string& rhs, char *buffer )
-{
-    memcpy(buffer,rhs.c_str(),rhs.size()+1);
-}
-template<>
-void unpack<std::string>( std::string& data, const char *buffer )
-{
-    data = std::string(buffer);
-}
-
--- a/IO/PackData.h
+++ b/IO/PackData.h
@ -1,78 +0,0 @@
-// This file contains unctions to pack/unpack data structures
-#ifndef included_PackData
-#define included_PackData
-
-#include <vector>
-#include <set>
-#include <map>
-
-
-//! Template function to return the buffer size required to pack a class
-template<class TYPE>
-size_t packsize( const TYPE& rhs );
-
-//! Template function to pack a class to a buffer
-template<class TYPE>
-void pack( const TYPE& rhs, char *buffer );
-
-//! Template function to unpack a class from a buffer
-template<class TYPE>
-void unpack( TYPE& data, const char *buffer );
-
-
-//! Template function to return the buffer size required to pack a std::vector
-template<class TYPE>
-size_t packsize( const std::vector<TYPE>& rhs );
-
-//! Template function to pack a class to a buffer
-template<class TYPE>
-void pack( const std::vector<TYPE>& rhs, char *buffer );
-
-//! Template function to pack a class to a buffer
-template<class TYPE>
-void unpack( std::vector<TYPE>& data, const char *buffer );
-
-
-//! Template function to return the buffer size required to pack a std::pair
-template<class TYPE1, class TYPE2>
-size_t packsize( const std::pair<TYPE1,TYPE2>& rhs );
-
-//! Template function to pack a class to a buffer
-template<class TYPE1, class TYPE2>
-void pack( const std::pair<TYPE1,TYPE2>& rhs, char *buffer );
-
-//! Template function to pack a class to a buffer
-template<class TYPE1, class TYPE2>
-void unpack( std::pair<TYPE1,TYPE2>& data, const char *buffer );
-
-
-//! Template function to return the buffer size required to pack a std::map
-template<class TYPE1, class TYPE2>
-size_t packsize( const std::map<TYPE1,TYPE2>& rhs );
-
-//! Template function to pack a class to a buffer
-template<class TYPE1, class TYPE2>
-void pack( const std::map<TYPE1,TYPE2>& rhs, char *buffer );
-
-//! Template function to pack a class to a buffer
-template<class TYPE1, class TYPE2>
-void unpack( std::map<TYPE1,TYPE2>& data, const char *buffer );
-
-
-//! Template function to return the buffer size required to pack a std::set
-template<class TYPE>
-size_t packsize( const std::set<TYPE>& rhs );
-
-//! Template function to pack a class to a buffer
-template<class TYPE>
-void pack( const std::set<TYPE>& rhs, char *buffer );
-
-//! Template function to pack a class to a buffer
-template<class TYPE>
-void unpack( std::set<TYPE>& data, const char *buffer );
-
-
-#include "IO/PackData.hpp"
-
-#endif
-
--- a/IO/Writer.cpp
+++ b/IO/Writer.cpp
@ -2,7 +2,7 @@
 #include "IO/MeshDatabase.h"
 #include "IO/IOHelpers.h"
 #include "IO/silo.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/Utilities.h"

 #include <sys/stat.h>
@ -36,7 +36,7 @@ void IO::initialize( const std::string& path, const std::string& format, bool ap
        global_IO_format = Format::SILO;
    else
        ERROR("Unknown format");
-    int rank = Utilities::MPI(MPI_COMM_WORLD).getRank();
+    int rank = comm_rank(MPI_COMM_WORLD);
    if ( !append && rank==0 ) {
        mkdir(path.c_str(),S_IRWXU|S_IRGRP);
        std::string filename;
@ -55,7 +55,7 @@ void IO::initialize( const std::string& path, const std::string& format, bool ap
 // Write the mesh data in the original format
 static std::vector<IO::MeshDatabase> writeMeshesOrigFormat( const std::vector<IO::MeshDataStruct>& meshData, const std::string& path )
 {
-    int rank = Utilities::MPI(MPI_COMM_WORLD).getRank();
+    int rank = MPI_WORLD_RANK();
    std::vector<IO::MeshDatabase> meshes_written;
    for (size_t i=0; i<meshData.size(); i++) {
        char domainname[100], filename[100], fullpath[200];
@ -120,7 +120,7 @@ static std::vector<IO::MeshDatabase> writeMeshesOrigFormat( const std::vector<IO
 // Create the database entry for the mesh data
 static IO::MeshDatabase getDatabase( const std::string& filename, const IO::MeshDataStruct& mesh, int format )
 {
-    int rank = Utilities::MPI(MPI_COMM_WORLD).getRank();
+    int rank = MPI_WORLD_RANK();
    char domainname[100];
    sprintf(domainname,"%s_%05i",mesh.meshName.c_str(),rank);
    // Create the MeshDatabase
@ -161,7 +161,7 @@ static IO::MeshDatabase write_domain( FILE *fid, const std::string& filename,
    const IO::MeshDataStruct& mesh, int format )
 {
    const int level = 0;
-    int rank = Utilities::MPI(MPI_COMM_WORLD).getRank();
+    int rank = MPI_WORLD_RANK();
    // Create the MeshDatabase
    IO::MeshDatabase database = getDatabase( filename, mesh, format );
    // Write the mesh
@ -399,7 +399,7 @@ void writeSiloSummary( const std::vector<IO::MeshDatabase>& meshes_written, cons
 static std::vector<IO::MeshDatabase> writeMeshesNewFormat( 
    const std::vector<IO::MeshDataStruct>& meshData, const std::string& path, int format )
 {
-    int rank = Utilities::MPI(MPI_COMM_WORLD).getRank();
+    int rank = MPI_WORLD_RANK();
    std::vector<IO::MeshDatabase> meshes_written;
    char filename[100], fullpath[200];
    sprintf(filename,"%05i",rank);
@ -419,7 +419,7 @@ static std::vector<IO::MeshDatabase> writeMeshesSilo(
    const std::vector<IO::MeshDataStruct>& meshData, const std::string& path, int format )
 {
 #ifdef USE_SILO
-    int rank = Utilities::MPI(MPI_COMM_WORLD).getRank();
+    int rank = MPI_WORLD_RANK();
    std::vector<IO::MeshDatabase> meshes_written;
    char filename[100], fullpath[200];
    sprintf(filename,"%05i.silo",rank);
@ -441,12 +441,12 @@ static std::vector<IO::MeshDatabase> writeMeshesSilo(
 /****************************************************
 * Write the mesh data                               *
 ****************************************************/
-void IO::writeData( const std::string& subdir, const std::vector<IO::MeshDataStruct>& meshData, const Utilities::MPI& comm )
+void IO::writeData( const std::string& subdir, const std::vector<IO::MeshDataStruct>& meshData, MPI_Comm comm )
 {
    if ( global_IO_path.empty() )
        IO::initialize( );
    PROFILE_START("writeData");
-    int rank = Utilities::MPI(MPI_COMM_WORLD).getRank();
+    int rank = comm_rank(comm);
    // Check the meshData before writing
    for ( const auto& data : meshData ) {
        if ( !data.check() )
@ -457,7 +457,7 @@ void IO::writeData( const std::string& subdir, const std::vector<IO::MeshDataStr
    if ( rank == 0 ) {
        mkdir(path.c_str(),S_IRWXU|S_IRGRP);
    }
-    comm.barrier();
+    MPI_Barrier(comm);
    // Write the mesh files
    std::vector<IO::MeshDatabase> meshes_written;
    if ( global_IO_format == Format::OLD ) {
--- a/IO/Writer.h
+++ b/IO/Writer.h
@ -34,7 +34,7 @@ void initialize( const std::string& path="", const std::string& format="silo", b
 * @param[in] meshData      The data to write
 * @param[in] comm          The comm to use for writing (usually MPI_COMM_WORLD or a dup thereof)
 */
-void writeData( const std::string& subdir, const std::vector<IO::MeshDataStruct>& meshData, const Utilities::MPI& comm );
+void writeData( const std::string& subdir, const std::vector<IO::MeshDataStruct>& meshData, MPI_Comm comm );


 /*!
@ -44,7 +44,7 @@ void writeData( const std::string& subdir, const std::vector<IO::MeshDataStruct>
 * @param[in] meshData      The data to write
 * @param[in] comm          The comm to use for writing (usually MPI_COMM_WORLD or a dup thereof)
 */
-inline void writeData( int timestep, const std::vector<IO::MeshDataStruct>& meshData, const Utilities::MPI& comm )
+inline void writeData( int timestep, const std::vector<IO::MeshDataStruct>& meshData, MPI_Comm comm )
 {
    char subdir[100];
    sprintf(subdir,"vis%03i",timestep);
--- a/IO/netcdf.cpp
+++ b/IO/netcdf.cpp
@ -1,6 +1,6 @@
 #include "IO/netcdf.h"
 #include "common/Utilities.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"

 #include "ProfilerApp.h"

@ -116,7 +116,7 @@ std::string VariableTypeName( VariableType type )
 /****************************************************
 * Open/close a file                                 *
 ****************************************************/
-int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm )
+int open( const std::string& filename, FileMode mode, MPI_Comm comm )
 {
    int fid = 0;
    if ( comm == MPI_COMM_NULL ) {
@ -134,13 +134,13 @@ int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm
        }
    } else {
        if ( mode == READ ) {
-            int err = nc_open_par( filename.c_str(), NC_MPIPOSIX, comm.getCommunicator(), MPI_INFO_NULL, &fid );
+            int err = nc_open_par( filename.c_str(), NC_MPIPOSIX, comm, MPI_INFO_NULL, &fid );
            CHECK_NC_ERR( err );
        } else if ( mode == WRITE ) {
-            int err = nc_open_par( filename.c_str(), NC_WRITE|NC_MPIPOSIX, comm.getCommunicator(), MPI_INFO_NULL, &fid );
+            int err = nc_open_par( filename.c_str(), NC_WRITE|NC_MPIPOSIX, comm, MPI_INFO_NULL, &fid );
            CHECK_NC_ERR( err );
        } else if ( mode == CREATE ) {
-            int err = nc_create_par( filename.c_str(), NC_NETCDF4|NC_MPIIO, comm.getCommunicator(), MPI_INFO_NULL, &fid );
+            int err = nc_create_par( filename.c_str(), NC_NETCDF4|NC_MPIIO, comm, MPI_INFO_NULL, &fid );
            CHECK_NC_ERR( err );
        } else {
            ERROR("Unknown file mode");
@ -375,7 +375,7 @@ Array<TYPE> getVar( int fid, const std::string& var, const std::vector<int>& sta
    std::vector<size_t> var_size = getVarDim( fid, var );
    for (int d=0; d<(int)var_size.size(); d++) {
        if ( start[d]<0 || start[d]+stride[d]*(count[d]-1)>(int)var_size[d] ) {
-            int rank = Utilities::MPI(MPI_COMM_WORLD).getRank();
+            int rank = comm_rank(MPI_COMM_WORLD);
            char tmp[1000];
            sprintf(tmp,"%i: Range exceeded array dimension:\n"
                "   start[%i]=%i, count[%i]=%i, stride[%i]=%i, var_size[%i]=%i",
--- a/IO/netcdf.h
+++ b/IO/netcdf.h
@ -5,7 +5,7 @@
 #include <vector>

 #include "common/Array.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/Communication.h"


@ -32,7 +32,7 @@ std::string VariableTypeName( VariableType type );
 * @param mode          Open the file for reading or writing
 * @param comm          MPI communicator to use (MPI_COMM_WORLD: don't use parallel netcdf)
 */
-int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm=MPI_COMM_NULL );
+int open( const std::string& filename, FileMode mode, MPI_Comm comm=MPI_COMM_NULL );


 /*!
--- a/IO/silo.cpp
+++ b/IO/silo.cpp
@ -1,6 +1,6 @@
 #include "IO/silo.h"
 #include "common/Utilities.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"

 #include "ProfilerApp.h"

--- a/IO/silo.h
+++ b/IO/silo.h
@ -6,7 +6,7 @@
 #include <array>

 #include "common/Array.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/Communication.h"


--- a/IO/silo.hpp
+++ b/IO/silo.hpp
@ -3,7 +3,7 @@

 #include "IO/silo.h"
 #include "common/Utilities.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"

 #include "ProfilerApp.h"

--- a/analysis/Minkowski.cpp
+++ b/analysis/Minkowski.cpp
@ -4,7 +4,7 @@
 #include "common/Domain.h"
 #include "common/Communication.h"
 #include "common/Utilities.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "IO/MeshDatabase.h"
 #include "IO/Reader.h"
 #include "IO/Writer.h"
@ -109,13 +109,13 @@ void Minkowski::ComputeScalar(const DoubleArray& Field, const double isovalue)
 	// convert X for 2D manifold to 3D object
 	Xi *= 0.5;
 	
-	Dm->Comm.barrier();
+	MPI_Barrier(Dm->Comm);
 	// Phase averages
-	Vi_global = Dm->Comm.sumReduce( Vi );
-	Xi_global = Dm->Comm.sumReduce( Xi );
-	Ai_global = Dm->Comm.sumReduce( Ai );
-	Ji_global = Dm->Comm.sumReduce( Ji );
-	Dm->Comm.barrier();
+	MPI_Allreduce(&Vi,&Vi_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&Xi,&Xi_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&Ai,&Ai_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&Ji,&Ji_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Barrier(Dm->Comm);
    PROFILE_STOP("ComputeScalar");
 }

@ -168,7 +168,7 @@ int Minkowski::MeasureConnectedPathway(){
 	double vF=0.0; 
 	n_connected_components = ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,Dm->rank_info,distance,distance,vF,vF,label,Dm->Comm);
 //	int n_connected_components = ComputeGlobalPhaseComponent(Nx-2,Ny-2,Nz-2,Dm->rank_info,const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, Dm->Comm )
-	Dm->Comm.barrier();
+	MPI_Barrier(Dm->Comm);
 	
 	for (int k=0; k<Nz; k++){
 		for (int j=0; j<Ny; j++){
--- a/analysis/Minkowski.h
+++ b/analysis/Minkowski.h
@ -12,7 +12,7 @@
 #include "analysis/distance.h"

 #include "common/Utilities.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "IO/MeshDatabase.h"
 #include "IO/Reader.h"
 #include "IO/Writer.h"
--- a/analysis/SubPhase.cpp
+++ b/analysis/SubPhase.cpp
@ -229,25 +229,25 @@ void SubPhase::Basic(){
 			}
 		}
 	}
-	gwb.V = Dm->Comm.sumReduce( wb.V);
-	gnb.V = Dm->Comm.sumReduce( nb.V);
-	gwb.M = Dm->Comm.sumReduce( wb.M);
-	gnb.M = Dm->Comm.sumReduce( nb.M);
-	gwb.Px = Dm->Comm.sumReduce( wb.Px);
-	gwb.Py = Dm->Comm.sumReduce( wb.Py);
-	gwb.Pz = Dm->Comm.sumReduce( wb.Pz);
-	gnb.Px = Dm->Comm.sumReduce( nb.Px);
-	gnb.Py = Dm->Comm.sumReduce( nb.Py);
-	gnb.Pz = Dm->Comm.sumReduce( nb.Pz);
+	gwb.V=sumReduce( Dm->Comm, wb.V);
+	gnb.V=sumReduce( Dm->Comm, nb.V);
+	gwb.M=sumReduce( Dm->Comm, wb.M);
+	gnb.M=sumReduce( Dm->Comm, nb.M);
+	gwb.Px=sumReduce( Dm->Comm, wb.Px);
+	gwb.Py=sumReduce( Dm->Comm, wb.Py);
+	gwb.Pz=sumReduce( Dm->Comm, wb.Pz);
+	gnb.Px=sumReduce( Dm->Comm, nb.Px);
+	gnb.Py=sumReduce( Dm->Comm, nb.Py);
+	gnb.Pz=sumReduce( Dm->Comm, nb.Pz);
 	
-	count_w = Dm->Comm.sumReduce( count_w);
-	count_n = Dm->Comm.sumReduce( count_n);
+	count_w=sumReduce( Dm->Comm, count_w);
+	count_n=sumReduce( Dm->Comm, count_n);
 	if (count_w > 0.0)
-		gwb.p = Dm->Comm.sumReduce(wb.p) / count_w;
+		gwb.p=sumReduce( Dm->Comm, wb.p) / count_w;
 	else 
 		gwb.p = 0.0;
 	if (count_n > 0.0)
-		gnb.p = Dm->Comm.sumReduce( nb.p) / count_n;
+		gnb.p=sumReduce( Dm->Comm, nb.p) / count_n;
 	else 
 		gnb.p = 0.0;

@ -444,14 +444,14 @@ void SubPhase::Full(){
 	nd.X -= nc.X;

 	// compute global entities
-	gnc.V = Dm->Comm.sumReduce( nc.V );
-	gnc.A = Dm->Comm.sumReduce( nc.A );
-	gnc.H = Dm->Comm.sumReduce( nc.H );
-	gnc.X = Dm->Comm.sumReduce( nc.X );
-	gnd.V = Dm->Comm.sumReduce( nd.V );
-	gnd.A = Dm->Comm.sumReduce( nd.A );
-	gnd.H = Dm->Comm.sumReduce( nd.H );
-	gnd.X = Dm->Comm.sumReduce( nd.X );
+	gnc.V=sumReduce( Dm->Comm, nc.V);
+	gnc.A=sumReduce( Dm->Comm, nc.A);
+	gnc.H=sumReduce( Dm->Comm, nc.H);
+	gnc.X=sumReduce( Dm->Comm, nc.X);
+	gnd.V=sumReduce( Dm->Comm, nd.V);
+	gnd.A=sumReduce( Dm->Comm, nd.A);
+	gnd.H=sumReduce( Dm->Comm, nd.H);
+	gnd.X=sumReduce( Dm->Comm, nd.X);
 	gnd.Nc = nd.Nc;
 	// wetting
 	for (k=0; k<Nz; k++){
@ -491,14 +491,14 @@ void SubPhase::Full(){
 	wd.H -= wc.H;
 	wd.X -= wc.X;
 	// compute global entities
-	gwc.V = Dm->Comm.sumReduce( wc.V );
-	gwc.A = Dm->Comm.sumReduce( wc.A );
-	gwc.H = Dm->Comm.sumReduce( wc.H );
-	gwc.X = Dm->Comm.sumReduce( wc.X );
-	gwd.V = Dm->Comm.sumReduce( wd.V );
-	gwd.A = Dm->Comm.sumReduce( wd.A );
-	gwd.H = Dm->Comm.sumReduce( wd.H );
-	gwd.X = Dm->Comm.sumReduce( wd.X );
+	gwc.V=sumReduce( Dm->Comm, wc.V);
+	gwc.A=sumReduce( Dm->Comm, wc.A);
+	gwc.H=sumReduce( Dm->Comm, wc.H);
+	gwc.X=sumReduce( Dm->Comm, wc.X);
+	gwd.V=sumReduce( Dm->Comm, wd.V);
+	gwd.A=sumReduce( Dm->Comm, wd.A);
+	gwd.H=sumReduce( Dm->Comm, wd.H);
+	gwd.X=sumReduce( Dm->Comm, wd.X);
 	gwd.Nc = wd.Nc;
 	
 	/*  Set up geometric analysis of interface region */
@ -526,20 +526,20 @@ void SubPhase::Full(){
 	iwn.A = morph_i->A(); 
 	iwn.H = morph_i->H(); 
 	iwn.X = morph_i->X(); 
-	giwn.V = Dm->Comm.sumReduce( iwn.V );
-	giwn.A = Dm->Comm.sumReduce( iwn.A );
-	giwn.H = Dm->Comm.sumReduce( iwn.H );
-	giwn.X = Dm->Comm.sumReduce( iwn.X );
+	giwn.V=sumReduce( Dm->Comm, iwn.V);
+	giwn.A=sumReduce( Dm->Comm, iwn.A);
+	giwn.H=sumReduce( Dm->Comm, iwn.H);
+	giwn.X=sumReduce( Dm->Comm, iwn.X);
 	// measure only the connected part
 	iwnc.Nc = morph_i->MeasureConnectedPathway();
 	iwnc.V = morph_i->V(); 
 	iwnc.A = morph_i->A(); 
 	iwnc.H = morph_i->H(); 
 	iwnc.X = morph_i->X(); 
-	giwnc.V = Dm->Comm.sumReduce( iwnc.V );
-	giwnc.A = Dm->Comm.sumReduce( iwnc.A );
-	giwnc.H = Dm->Comm.sumReduce( iwnc.H );
-	giwnc.X = Dm->Comm.sumReduce( iwnc.X );
+	giwnc.V=sumReduce( Dm->Comm, iwnc.V);
+	giwnc.A=sumReduce( Dm->Comm, iwnc.A);
+	giwnc.H=sumReduce( Dm->Comm, iwnc.H);
+	giwnc.X=sumReduce( Dm->Comm, iwnc.X);
 	giwnc.Nc = iwnc.Nc;

 	double vol_nc_bulk = 0.0;
@ -630,46 +630,46 @@ void SubPhase::Full(){
 		}
 	}

-	gnd.M = Dm->Comm.sumReduce( nd.M );
-	gnd.Px = Dm->Comm.sumReduce( nd.Px );
-	gnd.Py = Dm->Comm.sumReduce( nd.Py );
-	gnd.Pz = Dm->Comm.sumReduce( nd.Pz );
-	gnd.K = Dm->Comm.sumReduce( nd.K );
+	gnd.M=sumReduce( Dm->Comm, nd.M);
+	gnd.Px=sumReduce( Dm->Comm, nd.Px);
+	gnd.Py=sumReduce( Dm->Comm, nd.Py);
+	gnd.Pz=sumReduce( Dm->Comm, nd.Pz);
+	gnd.K=sumReduce( Dm->Comm, nd.K);

-	gwd.M = Dm->Comm.sumReduce( wd.M );
-	gwd.Px = Dm->Comm.sumReduce( wd.Px );
-	gwd.Py = Dm->Comm.sumReduce( wd.Py );
-	gwd.Pz = Dm->Comm.sumReduce( wd.Pz );
-	gwd.K = Dm->Comm.sumReduce( wd.K );
+	gwd.M=sumReduce( Dm->Comm, wd.M);
+	gwd.Px=sumReduce( Dm->Comm, wd.Px);
+	gwd.Py=sumReduce( Dm->Comm, wd.Py);
+	gwd.Pz=sumReduce( Dm->Comm, wd.Pz);
+	gwd.K=sumReduce( Dm->Comm, wd.K);
 	
-	gnc.M = Dm->Comm.sumReduce( nc.M );
-	gnc.Px = Dm->Comm.sumReduce( nc.Px );
-	gnc.Py = Dm->Comm.sumReduce( nc.Py );
-	gnc.Pz = Dm->Comm.sumReduce( nc.Pz );
-	gnc.K = Dm->Comm.sumReduce( nc.K );
+	gnc.M=sumReduce( Dm->Comm, nc.M);
+	gnc.Px=sumReduce( Dm->Comm, nc.Px);
+	gnc.Py=sumReduce( Dm->Comm, nc.Py);
+	gnc.Pz=sumReduce( Dm->Comm, nc.Pz);
+	gnc.K=sumReduce( Dm->Comm, nc.K);

-	gwc.M = Dm->Comm.sumReduce( wc.M );
-	gwc.Px = Dm->Comm.sumReduce( wc.Px );
-	gwc.Py = Dm->Comm.sumReduce( wc.Py );
-	gwc.Pz = Dm->Comm.sumReduce( wc.Pz );
-	gwc.K = Dm->Comm.sumReduce( wc.K );
+	gwc.M=sumReduce( Dm->Comm, wc.M);
+	gwc.Px=sumReduce( Dm->Comm, wc.Px);
+	gwc.Py=sumReduce( Dm->Comm, wc.Py);
+	gwc.Pz=sumReduce( Dm->Comm, wc.Pz);
+	gwc.K=sumReduce( Dm->Comm, wc.K);
 	
-	giwn.Mn = Dm->Comm.sumReduce( iwn.Mn );
-	giwn.Pnx = Dm->Comm.sumReduce( iwn.Pnx );
-	giwn.Pny = Dm->Comm.sumReduce( iwn.Pny );
-	giwn.Pnz = Dm->Comm.sumReduce( iwn.Pnz );
-	giwn.Kn = Dm->Comm.sumReduce( iwn.Kn );
-	giwn.Mw = Dm->Comm.sumReduce( iwn.Mw );
-	giwn.Pwx = Dm->Comm.sumReduce( iwn.Pwx );
-	giwn.Pwy = Dm->Comm.sumReduce( iwn.Pwy );
-	giwn.Pwz = Dm->Comm.sumReduce( iwn.Pwz );
-	giwn.Kw = Dm->Comm.sumReduce( iwn.Kw );
+	giwn.Mn=sumReduce( Dm->Comm, iwn.Mn);
+	giwn.Pnx=sumReduce( Dm->Comm, iwn.Pnx);
+	giwn.Pny=sumReduce( Dm->Comm, iwn.Pny);
+	giwn.Pnz=sumReduce( Dm->Comm, iwn.Pnz);
+	giwn.Kn=sumReduce( Dm->Comm, iwn.Kn);
+	giwn.Mw=sumReduce( Dm->Comm, iwn.Mw);
+	giwn.Pwx=sumReduce( Dm->Comm, iwn.Pwx);
+	giwn.Pwy=sumReduce( Dm->Comm, iwn.Pwy);
+	giwn.Pwz=sumReduce( Dm->Comm, iwn.Pwz);
+	giwn.Kw=sumReduce( Dm->Comm, iwn.Kw);
 	
 	// pressure averaging
-	gnc.p = Dm->Comm.sumReduce( nc.p );
-	gnd.p = Dm->Comm.sumReduce( nd.p );
-	gwc.p = Dm->Comm.sumReduce( wc.p );
-	gwd.p = Dm->Comm.sumReduce( wd.p );
+	gnc.p=sumReduce( Dm->Comm, nc.p);
+	gnd.p=sumReduce( Dm->Comm, nd.p);
+	gwc.p=sumReduce( Dm->Comm, wc.p);
+	gwd.p=sumReduce( Dm->Comm, wd.p);

 	if (vol_wc_bulk > 0.0)
 		wc.p = wc.p /vol_wc_bulk;
@ -680,10 +680,10 @@ void SubPhase::Full(){
 	if (vol_nd_bulk > 0.0)
 		nd.p = nd.p /vol_nd_bulk;

-	vol_wc_bulk = Dm->Comm.sumReduce( vol_wc_bulk );
-	vol_wd_bulk = Dm->Comm.sumReduce( vol_wd_bulk );
-	vol_nc_bulk = Dm->Comm.sumReduce( vol_nc_bulk );
-	vol_nd_bulk = Dm->Comm.sumReduce( vol_nd_bulk );
+	vol_wc_bulk=sumReduce( Dm->Comm, vol_wc_bulk);
+	vol_wd_bulk=sumReduce( Dm->Comm, vol_wd_bulk);
+	vol_nc_bulk=sumReduce( Dm->Comm, vol_nc_bulk);
+	vol_nd_bulk=sumReduce( Dm->Comm, vol_nd_bulk);
 	
 	if (vol_wc_bulk > 0.0)
 		gwc.p = gwc.p /vol_wc_bulk;
@ -719,7 +719,7 @@ void SubPhase::AggregateLabels( const std::string& filename )
 			}
 		}
 	}
-	Dm->Comm.barrier();
+	MPI_Barrier(Dm->Comm);

 	Dm->AggregateLabels( filename );

--- a/analysis/SubPhase.h
+++ b/analysis/SubPhase.h
@ -12,7 +12,7 @@
 #include "analysis/distance.h"
 #include "analysis/Minkowski.h"
 #include "common/Utilities.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "IO/MeshDatabase.h"
 #include "IO/Reader.h"
 #include "IO/Writer.h"
--- a/analysis/TwoPhase.cpp
+++ b/analysis/TwoPhase.cpp
@ -5,7 +5,7 @@
 #include "common/Domain.h"
 #include "common/Communication.h"
 #include "common/Utilities.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "IO/MeshDatabase.h"
 #include "IO/Reader.h"
 #include "IO/Writer.h"
@ -882,7 +882,7 @@ void TwoPhase::ComponentAverages()
 		}
 	}

-	Dm->Comm.barrier();
+	MPI_Barrier(Dm->Comm);
 	if (Dm->rank()==0){
 		printf("Component averages computed locally -- reducing result... \n");
 	}
@ -895,8 +895,8 @@ void TwoPhase::ComponentAverages()
 		for (int idx=0; idx<BLOB_AVG_COUNT; idx++) ComponentAverages_NWP(idx,b)=RecvBuffer(idx);
 	}
 	*/
-	Dm->Comm.barrier();
-	Dm->Comm.sumReduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT*NumberComponents_NWP);
+	MPI_Barrier(Dm->Comm);
+	MPI_Allreduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT*NumberComponents_NWP,					MPI_DOUBLE,MPI_SUM,Dm->Comm);
 	//	MPI_Reduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,0,Dm->Comm);

 	if (Dm->rank()==0){
@ -993,9 +993,9 @@ void TwoPhase::ComponentAverages()

 	// reduce the wetting phase averages
 	for (int b=0; b<NumberComponents_WP; b++){
-		Dm->Comm.barrier();
+		MPI_Barrier(Dm->Comm);
 //		MPI_Allreduce(&ComponentAverages_WP(0,b),RecvBuffer.data(),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,Dm->Comm);
-		Dm->Comm.sumReduce(&ComponentAverages_WP(0,b),RecvBuffer.data(),BLOB_AVG_COUNT);
+		MPI_Reduce(&ComponentAverages_WP(0,b),RecvBuffer.data(),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,0,Dm->Comm);
 		for (int idx=0; idx<BLOB_AVG_COUNT; idx++) ComponentAverages_WP(idx,b)=RecvBuffer(idx);
 	}
 	
@ -1078,48 +1078,43 @@ void TwoPhase::Reduce()
 	int i;
 	double iVol_global=1.0/Volume;
 	//...........................................................................
-	Dm->Comm.barrier();
-	nwp_volume_global = Dm->Comm.sumReduce( nwp_volume );
-	wp_volume_global = Dm->Comm.sumReduce( wp_volume );
-	awn_global = Dm->Comm.sumReduce( awn );
-	ans_global = Dm->Comm.sumReduce( ans );
-	aws_global = Dm->Comm.sumReduce( aws );
-	lwns_global = Dm->Comm.sumReduce( lwns );
-	As_global = Dm->Comm.sumReduce( As );
-	Jwn_global = Dm->Comm.sumReduce( Jwn );
-	Kwn_global = Dm->Comm.sumReduce( Kwn );
-	KGwns_global = Dm->Comm.sumReduce( KGwns );
-	KNwns_global = Dm->Comm.sumReduce( KNwns );
-	efawns_global = Dm->Comm.sumReduce( efawns );
-	wwndnw_global = Dm->Comm.sumReduce( wwndnw );
-	wwnsdnwn_global = Dm->Comm.sumReduce( wwnsdnwn );
-	Jwnwwndnw_global = Dm->Comm.sumReduce( Jwnwwndnw );
+	MPI_Barrier(Dm->Comm);
+	MPI_Allreduce(&nwp_volume,&nwp_volume_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&wp_volume,&wp_volume_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&awn,&awn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&ans,&ans_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&aws,&aws_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&lwns,&lwns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&As,&As_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&Jwn,&Jwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&Kwn,&Kwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&KGwns,&KGwns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&KNwns,&KNwns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&efawns,&efawns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&wwndnw,&wwndnw_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&wwnsdnwn,&wwnsdnwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&Jwnwwndnw,&Jwnwwndnw_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
 	// Phase averages
-	vol_w_global = Dm->Comm.sumReduce( vol_w );
-	vol_n_global = Dm->Comm.sumReduce( vol_n );
-	paw_global = Dm->Comm.sumReduce( paw );
-	pan_global = Dm->Comm.sumReduce( pan );
-	for (int idx=0; idx<3; idx++)
-		vaw_global(idx) = Dm->Comm.sumReduce( vaw(idx) );
-	for (int idx=0; idx<3; idx++)
-		van_global(idx) = Dm->Comm.sumReduce( van(idx));
-	for (int idx=0; idx<3; idx++)
-		vawn_global(idx) = Dm->Comm.sumReduce( vawn(idx) );
-	for (int idx=0; idx<3; idx++)
-		vawns_global(idx) = Dm->Comm.sumReduce( vawns(idx) );
-	for (int idx=0; idx<6; idx++){
-		Gwn_global(idx) = Dm->Comm.sumReduce( Gwn(idx) );
-		Gns_global(idx) = Dm->Comm.sumReduce( Gns(idx) );
-		Gws_global(idx) = Dm->Comm.sumReduce( Gws(idx) );
-	}
-	trawn_global = Dm->Comm.sumReduce( trawn );
-	trJwn_global = Dm->Comm.sumReduce( trJwn );
-	trRwn_global = Dm->Comm.sumReduce( trRwn );
-	euler_global = Dm->Comm.sumReduce( euler );
-	An_global = Dm->Comm.sumReduce( An );
-	Jn_global = Dm->Comm.sumReduce( Jn );
-	Kn_global = Dm->Comm.sumReduce( Kn );
-	Dm->Comm.barrier();
+	MPI_Allreduce(&vol_w,&vol_w_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&vol_n,&vol_n_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&paw,&paw_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&pan,&pan_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&vaw(0),&vaw_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&van(0),&van_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&vawn(0),&vawn_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&vawns(0),&vawns_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&Gwn(0),&Gwn_global(0),6,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&Gns(0),&Gns_global(0),6,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&Gws(0),&Gws_global(0),6,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&trawn,&trawn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&trJwn,&trJwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&trRwn,&trRwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&euler,&euler_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&An,&An_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&Jn,&Jn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&Kn,&Kn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+
+	MPI_Barrier(Dm->Comm);

 	// Normalize the phase averages
 	// (density of both components = 1.0)
--- a/analysis/TwoPhase.h
+++ b/analysis/TwoPhase.h
@ -12,7 +12,7 @@
 #include "common/Domain.h"
 #include "common/Communication.h"
 #include "common/Utilities.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "IO/MeshDatabase.h"
 #include "IO/Reader.h"
 #include "IO/Writer.h"
--- a/analysis/analysis.cpp
+++ b/analysis/analysis.cpp
@ -188,7 +188,7 @@ int ComputeLocalPhaseComponent(const IntArray &PhaseID, int &VALUE, BlobIDArray
 /******************************************************************
 * Reorder the global blob ids                                     *
 ******************************************************************/
-static int ReorderBlobIDs2( BlobIDArray& ID, int N_blobs, int ngx, int ngy, int ngz, const Utilities::MPI& comm )
+static int ReorderBlobIDs2( BlobIDArray& ID, int N_blobs, int ngx, int ngy, int ngz, MPI_Comm comm )
 {
    if ( N_blobs==0 )
        return 0;
@ -212,7 +212,7 @@ static int ReorderBlobIDs2( BlobIDArray& ID, int N_blobs, int ngx, int ngy, int
        }
    }
    ASSERT(max_id<N_blobs);
-    comm.sumReduce(local_size,global_size,N_blobs);
+    MPI_Allreduce(local_size,global_size,N_blobs,MPI_DOUBLE,MPI_SUM,comm);
    std::vector<std::pair<double,int> > map1(N_blobs);
    int N_blobs2 = 0;
    for (int i=0; i<N_blobs; i++) {
@ -235,12 +235,12 @@ static int ReorderBlobIDs2( BlobIDArray& ID, int N_blobs, int ngx, int ngy, int
    PROFILE_STOP("ReorderBlobIDs2",1);
    return N_blobs2;
 }
-void ReorderBlobIDs( BlobIDArray& ID, const Utilities::MPI& comm )
+void ReorderBlobIDs( BlobIDArray& ID, MPI_Comm comm )
 {
    PROFILE_START("ReorderBlobIDs");
    int tmp = ID.max()+1;
    int N_blobs = 0;
-    N_blobs = comm.maxReduce( tmp );
+    MPI_Allreduce(&tmp,&N_blobs,1,MPI_INT,MPI_MAX,comm);
    ReorderBlobIDs2(ID,N_blobs,1,1,1,comm);
    PROFILE_STOP("ReorderBlobIDs");
 }
@ -260,29 +260,30 @@ static void updateRemoteIds(
    int N_send, const std::vector<int>& N_recv,
    int64_t *send_buf, std::vector<int64_t*>& recv_buf,
    std::map<int64_t,int64_t>& remote_map,
-    const Utilities::MPI& comm )
+    MPI_Comm comm )
 {
    std::vector<MPI_Request> send_req(neighbors.size());
    std::vector<MPI_Request> recv_req(neighbors.size());
-    auto it = map.begin();
+    std::vector<MPI_Status> status(neighbors.size());
+    std::map<int64_t,global_id_info_struct>::const_iterator it = map.begin();
    ASSERT(N_send==(int)map.size());
    for (size_t i=0; i<map.size(); i++, ++it) {
        send_buf[2*i+0] = it->first;
        send_buf[2*i+1] = it->second.new_id;
    }
    for (size_t i=0; i<neighbors.size(); i++) {
-        send_req[i] = comm.Isend( send_buf,    2*N_send, neighbors[i], 0 );
-        recv_req[i] = comm.Irecv( recv_buf[i], 2*N_recv[i], neighbors[i], 0 );
+        MPI_Isend( send_buf,    2*N_send,    MPI_LONG_LONG, neighbors[i], 0, comm, &send_req[i] );
+        MPI_Irecv( recv_buf[i], 2*N_recv[i], MPI_LONG_LONG, neighbors[i], 0, comm, &recv_req[i] );
    }
    for (it=map.begin(); it!=map.end(); ++it) {
        remote_map[it->first] = it->second.new_id;
    }
    for (size_t i=0; i<neighbors.size(); i++) {
-        comm.wait( recv_req[i] );
+        MPI_Wait(&recv_req[i],&status[i]);
        for (int j=0; j<N_recv[i]; j++)
            remote_map[recv_buf[i][2*j+0]] = recv_buf[i][2*j+1];
    }
-    comm.waitAll(neighbors.size(),getPtr(send_req));
+    MPI_Waitall(neighbors.size(),getPtr(send_req),getPtr(status));
 }
 // Compute a new local id for each local id
 static bool updateLocalIds( const std::map<int64_t,int64_t>& remote_map, 
@ -303,18 +304,18 @@ static bool updateLocalIds( const std::map<int64_t,int64_t>& remote_map,
    return changed;
 }
 static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_info, 
-    int nblobs, BlobIDArray& IDs, const Utilities::MPI& comm )
+    int nblobs, BlobIDArray& IDs, MPI_Comm comm )
 {
    PROFILE_START("LocalToGlobalIDs",1);
    const int rank = rank_info.rank[1][1][1];
-    int nprocs = comm.getSize();
+    int nprocs = comm_size(comm);
    const int ngx = (IDs.size(0)-nx)/2;
    const int ngy = (IDs.size(1)-ny)/2;
    const int ngz = (IDs.size(2)-nz)/2;
    // Get the number of blobs for each rank
    std::vector<int> N_blobs(nprocs,0);
    PROFILE_START("LocalToGlobalIDs-Allgather",1);
-    comm.allGather(nblobs,getPtr(N_blobs));
+    MPI_Allgather(&nblobs,1,MPI_INT,getPtr(N_blobs),1,MPI_INT,comm);
    PROFILE_STOP("LocalToGlobalIDs-Allgather",1);
    int64_t N_blobs_tot = 0;
    int offset = 0;
@ -362,12 +363,13 @@ static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_
    std::vector<int> N_recv(neighbors.size(),0);
    std::vector<MPI_Request> send_req(neighbors.size());
    std::vector<MPI_Request> recv_req(neighbors.size());
+    std::vector<MPI_Status> status(neighbors.size());
    for (size_t i=0; i<neighbors.size(); i++) {
-        send_req[i] = comm.Isend( &N_send,    1, neighbors[i], 0 );
-        recv_req[i] = comm.Irecv( &N_recv[i], 1, neighbors[i], 0 );
+        MPI_Isend( &N_send,    1, MPI_INT, neighbors[i], 0, comm, &send_req[i] );
+        MPI_Irecv( &N_recv[i], 1, MPI_INT, neighbors[i], 0, comm, &recv_req[i] );
    }
-    comm.waitAll(neighbors.size(),getPtr(send_req));
-    comm.waitAll(neighbors.size(),getPtr(recv_req));
+    MPI_Waitall(neighbors.size(),getPtr(send_req),getPtr(status));
+    MPI_Waitall(neighbors.size(),getPtr(recv_req),getPtr(status));
    // Allocate memory for communication
    int64_t *send_buf = new int64_t[2*N_send];
    std::vector<int64_t*> recv_buf(neighbors.size());
@ -396,7 +398,8 @@ static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_
        bool changed = updateLocalIds( remote_map, map );
        // Check if we are finished
        int test = changed ? 1:0;
-        int result = comm.sumReduce( test );
+        int result = 0;
+        MPI_Allreduce(&test,&result,1,MPI_INT,MPI_SUM,comm);
        if ( result==0 )
            break;
    }
@ -432,7 +435,7 @@ static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_
 }
 int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_info, 
    const DoubleArray& Phase, const DoubleArray& SignDist, double vF, double vS,
-    BlobIDArray& GlobalBlobID, const Utilities::MPI& comm )
+    BlobIDArray& GlobalBlobID, MPI_Comm comm )
 {
    PROFILE_START("ComputeGlobalBlobIDs");
    // First compute the local ids
@ -443,7 +446,7 @@ int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_inf
    return nglobal;
 }
 int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& rank_info,
-    const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, const Utilities::MPI& comm )
+    const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, MPI_Comm comm )
 {
    PROFILE_START("ComputeGlobalPhaseComponent");
    // First compute the local ids
@ -459,27 +462,37 @@ int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& r
 * Compute the mapping of blob ids between timesteps               *
 ******************************************************************/
 typedef std::map<BlobIDType,std::map<BlobIDType,int64_t> > map_type;
+template<class TYPE> inline MPI_Datatype getMPIType();
+template<> inline MPI_Datatype getMPIType<int32_t>() { return MPI_INT; }
+template<> inline MPI_Datatype getMPIType<int64_t>() { 
+    if ( sizeof(int64_t)==sizeof(long int) )
+        return MPI_LONG;
+    else if ( sizeof(int64_t)==sizeof(double) )
+        return MPI_DOUBLE;
+}
 template<class TYPE>
-void gatherSet( std::set<TYPE>& set, const Utilities::MPI& comm )
+void gatherSet( std::set<TYPE>& set, MPI_Comm comm )
 {
-    int nprocs = comm.getSize();
+    int nprocs = comm_size(comm);
+    MPI_Datatype type = getMPIType<TYPE>();
    std::vector<TYPE> send_data(set.begin(),set.end());
    int send_count = send_data.size();
    std::vector<int> recv_count(nprocs,0), recv_disp(nprocs,0);
-    comm.allGather( send_count, getPtr(recv_count) );
+    MPI_Allgather(&send_count,1,MPI_INT,getPtr(recv_count),1,MPI_INT,comm);
    for (int i=1; i<nprocs; i++)
        recv_disp[i] = recv_disp[i-1] + recv_count[i-1];
    std::vector<TYPE> recv_data(recv_disp[nprocs-1]+recv_count[nprocs-1]);
-    comm.allGather( getPtr(send_data), send_count, getPtr(recv_data),
-        getPtr(recv_count), getPtr(recv_disp), true );
+    MPI_Allgatherv(getPtr(send_data),send_count,type,
+        getPtr(recv_data),getPtr(recv_count),getPtr(recv_disp),type,comm);
    for (size_t i=0; i<recv_data.size(); i++)
        set.insert(recv_data[i]);
 }
-void gatherSrcIDMap( map_type& src_map, const Utilities::MPI& comm )
+void gatherSrcIDMap( map_type& src_map, MPI_Comm comm )
 {
-    int nprocs = comm.getSize();
+    int nprocs = comm_size(comm);
+    MPI_Datatype type = getMPIType<int64_t>();
    std::vector<int64_t> send_data;
-    for (auto it=src_map.begin(); it!=src_map.end(); ++it) {
+    for (map_type::const_iterator it=src_map.begin(); it!=src_map.end(); ++it) {
        int id = it->first;
        const std::map<BlobIDType,int64_t>& src_ids = it->second;
        send_data.push_back(id);
@ -492,21 +505,21 @@ void gatherSrcIDMap( map_type& src_map, const Utilities::MPI& comm )
    }
    int send_count = send_data.size();
    std::vector<int> recv_count(nprocs,0), recv_disp(nprocs,0);
-    comm.allGather(send_count,getPtr(recv_count));
+    MPI_Allgather(&send_count,1,MPI_INT,getPtr(recv_count),1,MPI_INT,comm);
    for (int i=1; i<nprocs; i++)
        recv_disp[i] = recv_disp[i-1] + recv_count[i-1];
    std::vector<int64_t> recv_data(recv_disp[nprocs-1]+recv_count[nprocs-1]);
-    comm.allGather(getPtr(send_data),send_count,
-        getPtr(recv_data),getPtr(recv_count),getPtr(recv_disp),true);
+    MPI_Allgatherv(getPtr(send_data),send_count,type,
+        getPtr(recv_data),getPtr(recv_count),getPtr(recv_disp),type,comm);
    size_t i=0;
    src_map.clear();
    while ( i < recv_data.size() ) {
        BlobIDType id = recv_data[i];
        size_t count = recv_data[i+1];
        i += 2;
-        auto& src_ids = src_map[id];
+        std::map<BlobIDType,int64_t>& src_ids = src_map[id];
        for (size_t j=0; j<count; j++,i+=2) {
-            auto it = src_ids.find(recv_data[i]);
+            std::map<BlobIDType,int64_t>::iterator it = src_ids.find(recv_data[i]);
            if ( it == src_ids.end() )
                src_ids.insert(std::pair<BlobIDType,int64_t>(recv_data[i],recv_data[i+1]));
            else
@ -525,7 +538,7 @@ void addSrcDstIDs( BlobIDType src_id, map_type& src_map, map_type& dst_map,
    }
 }
 ID_map_struct computeIDMap( int nx, int ny, int nz, 
-    const BlobIDArray& ID1, const BlobIDArray& ID2, const Utilities::MPI& comm )
+    const BlobIDArray& ID1, const BlobIDArray& ID2, MPI_Comm comm )
 {
    ASSERT(ID1.size()==ID2.size());
    PROFILE_START("computeIDMap");
@ -767,7 +780,7 @@ void renumberIDs( const std::vector<BlobIDType>& new_ids, BlobIDArray& IDs )
 ******************************************************************/
 void writeIDMap( const ID_map_struct& map, long long int timestep, const std::string& filename )
 {
-    int rank = Utilities::MPI( MPI_COMM_WORLD ).getRank();
+    int rank = MPI_WORLD_RANK();
    if ( rank!=0 )
        return;
    bool empty = map.created.empty() && map.destroyed.empty() &&
--- a/analysis/analysis.h
+++ b/analysis/analysis.h
@ -58,7 +58,7 @@ int ComputeLocalPhaseComponent( const IntArray &PhaseID, int &VALUE, IntArray &C
 */
 int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_info, 
    const DoubleArray& Phase, const DoubleArray& SignDist, double vF, double vS, 
-    BlobIDArray& GlobalBlobID, const Utilities::MPI& comm );
+    BlobIDArray& GlobalBlobID, MPI_Comm comm );


 /*!
@ -75,7 +75,7 @@ int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_inf
 * @return Return the number of components in the specified phase
 */
 int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& rank_info,
-    const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, const Utilities::MPI& comm );
+    const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, MPI_Comm comm );


 /*!
@ -87,7 +87,7 @@ int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& r
 * @param[in] nz            Number of elements in the z-direction
 * @param[in/out] ID        The ids of the blobs
 */
-void ReorderBlobIDs( BlobIDArray& ID, const Utilities::MPI& comm );
+void ReorderBlobIDs( BlobIDArray& ID, MPI_Comm comm );


 typedef std::pair<BlobIDType,std::vector<BlobIDType> > BlobIDSplitStruct;
@ -120,7 +120,7 @@ struct ID_map_struct {
 * @param[in] ID1           The blob ids at the first timestep
 * @param[in] ID2           The blob ids at the second timestep
 */
-ID_map_struct computeIDMap( int nx, int ny, int nz, const BlobIDArray& ID1, const BlobIDArray& ID2, const Utilities::MPI& comm );
+ID_map_struct computeIDMap( int nx, int ny, int nz, const BlobIDArray& ID1, const BlobIDArray& ID2, MPI_Comm comm );


 /*!
--- a/analysis/distance.cpp
+++ b/analysis/distance.cpp
@ -176,7 +176,7 @@ void CalcVecDist( Array<Vec> &d, const Array<int> &ID0, const Domain &Dm,
        // Update distance
        double err = calcVecUpdateInterior( d, dx[0], dx[1], dx[2] );
        // Check if we are finished
-        err = Dm.Comm.maxReduce( err );
+        err = maxReduce( Dm.Comm, err );
        if ( err < tol )
            break;
    }
--- a/analysis/morphology.cpp
+++ b/analysis/morphology.cpp
@ -58,11 +58,11 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain>
 			}
 		}
 	}
-	Dm->Comm.barrier();
+	MPI_Barrier(Dm->Comm);
 	
 	// total Global is the number of nodes in the pore-space
-	totalGlobal = Dm->Comm.sumReduce( count );
-	maxdistGlobal = Dm->Comm.sumReduce( maxdist );
+	MPI_Allreduce(&count,&totalGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&maxdist,&maxdistGlobal,1,MPI_DOUBLE,MPI_MAX,Dm->Comm);
 	double volume=double(nprocx*nprocy*nprocz)*double(nx-2)*double(ny-2)*double(nz-2);
 	double volume_fraction=totalGlobal/volume;
 	if (rank==0) printf("Volume fraction for morphological opening: %f \n",volume_fraction);
@ -133,6 +133,7 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain>
 	double deltaR=0.05; // amount to change the radius in voxel units
 	double Rcrit_old=0.0;

+	double GlobalNumber = 1.f;
 	int imin,jmin,kmin,imax,jmax,kmax;

 	if (ErodeLabel == 1){
@ -202,41 +203,41 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain>
 		PackID(Dm->sendList_YZ, Dm->sendCount_YZ ,sendID_YZ, id);
 		//......................................................................................
 		MPI_Sendrecv(sendID_x,Dm->sendCount_x,MPI_CHAR,Dm->rank_x(),sendtag,
-				recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_X,Dm->sendCount_X,MPI_CHAR,Dm->rank_X(),sendtag,
-				recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_y,Dm->sendCount_y,MPI_CHAR,Dm->rank_y(),sendtag,
-				recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_Y,Dm->sendCount_Y,MPI_CHAR,Dm->rank_Y(),sendtag,
-				recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_z,Dm->sendCount_z,MPI_CHAR,Dm->rank_z(),sendtag,
-				recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_Z,Dm->sendCount_Z,MPI_CHAR,Dm->rank_Z(),sendtag,
-				recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_xy,Dm->sendCount_xy,MPI_CHAR,Dm->rank_xy(),sendtag,
-				recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_XY,Dm->sendCount_XY,MPI_CHAR,Dm->rank_XY(),sendtag,
-				recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_Xy,Dm->sendCount_Xy,MPI_CHAR,Dm->rank_Xy(),sendtag,
-				recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_xY,Dm->sendCount_xY,MPI_CHAR,Dm->rank_xY(),sendtag,
-				recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_xz,Dm->sendCount_xz,MPI_CHAR,Dm->rank_xz(),sendtag,
-				recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_XZ,Dm->sendCount_XZ,MPI_CHAR,Dm->rank_XZ(),sendtag,
-				recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_Xz,Dm->sendCount_Xz,MPI_CHAR,Dm->rank_Xz(),sendtag,
-				recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_xZ,Dm->sendCount_xZ,MPI_CHAR,Dm->rank_xZ(),sendtag,
-				recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_yz,Dm->sendCount_yz,MPI_CHAR,Dm->rank_yz(),sendtag,
-				recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_YZ,Dm->sendCount_YZ,MPI_CHAR,Dm->rank_YZ(),sendtag,
-				recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_Yz,Dm->sendCount_Yz,MPI_CHAR,Dm->rank_Yz(),sendtag,
-				recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_yZ,Dm->sendCount_yZ,MPI_CHAR,Dm->rank_yZ(),sendtag,
-				recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		//......................................................................................
 		UnpackID(Dm->recvList_x, Dm->recvCount_x ,recvID_x, id);
 		UnpackID(Dm->recvList_X, Dm->recvCount_X ,recvID_X, id);
@ -258,7 +259,7 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain>
 		UnpackID(Dm->recvList_YZ, Dm->recvCount_YZ ,recvID_YZ, id);
 		//......................................................................................

-		//double GlobalNumber = Dm->Comm.sumReduce( LocalNumber );
+		MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);

 		count = 0.f;
 		for (int k=1; k<Nz-1; k++){
@ -271,7 +272,7 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain>
 				}
 			}
 		}
-		countGlobal = Dm->Comm.sumReduce( count );
+		MPI_Allreduce(&count,&countGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
 		void_fraction_new = countGlobal/totalGlobal;
 		void_fraction_diff_new = abs(void_fraction_new-VoidFraction);
 	/*	if (rank==0){
@ -359,11 +360,11 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain
 		}
 	}

-	Dm->Comm.barrier();
+	MPI_Barrier(Dm->Comm);
 	
 	// total Global is the number of nodes in the pore-space
-	totalGlobal = Dm->Comm.sumReduce( count );
-	maxdistGlobal = Dm->Comm.sumReduce( maxdist );
+	MPI_Allreduce(&count,&totalGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
+	MPI_Allreduce(&maxdist,&maxdistGlobal,1,MPI_DOUBLE,MPI_MAX,Dm->Comm);
 	double volume=double(nprocx*nprocy*nprocz)*double(nx-2)*double(ny-2)*double(nz-2);
 	double volume_fraction=totalGlobal/volume;
 	if (rank==0) printf("Volume fraction for morphological opening: %f \n",volume_fraction);
@ -433,6 +434,7 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain
 	double deltaR=0.05; // amount to change the radius in voxel units
 	double Rcrit_old;

+	double GlobalNumber = 1.f;
 	int imin,jmin,kmin,imax,jmax,kmax;

 	double Rcrit_new = maxdistGlobal;
@ -440,7 +442,7 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain
 	//	Rcrit_new = strtod(argv[2],NULL);
 	//	if (rank==0) printf("Max. distance =%f, Initial critical radius = %f \n",maxdistGlobal,Rcrit_new);
 	//}
-	Dm->Comm.barrier();
+	MPI_Barrier(Dm->Comm);

 	
 	FILE *DRAIN = fopen("morphdrain.csv","w");
@ -507,41 +509,41 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain
 		PackID(Dm->sendList_YZ, Dm->sendCount_YZ ,sendID_YZ, id);
 		//......................................................................................
 		MPI_Sendrecv(sendID_x,Dm->sendCount_x,MPI_CHAR,Dm->rank_x(),sendtag,
-				recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_X,Dm->sendCount_X,MPI_CHAR,Dm->rank_X(),sendtag,
-				recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_y,Dm->sendCount_y,MPI_CHAR,Dm->rank_y(),sendtag,
-				recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_Y,Dm->sendCount_Y,MPI_CHAR,Dm->rank_Y(),sendtag,
-				recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_z,Dm->sendCount_z,MPI_CHAR,Dm->rank_z(),sendtag,
-				recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_Z,Dm->sendCount_Z,MPI_CHAR,Dm->rank_Z(),sendtag,
-				recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_xy,Dm->sendCount_xy,MPI_CHAR,Dm->rank_xy(),sendtag,
-				recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_XY,Dm->sendCount_XY,MPI_CHAR,Dm->rank_XY(),sendtag,
-				recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_Xy,Dm->sendCount_Xy,MPI_CHAR,Dm->rank_Xy(),sendtag,
-				recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_xY,Dm->sendCount_xY,MPI_CHAR,Dm->rank_xY(),sendtag,
-				recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_xz,Dm->sendCount_xz,MPI_CHAR,Dm->rank_xz(),sendtag,
-				recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_XZ,Dm->sendCount_XZ,MPI_CHAR,Dm->rank_XZ(),sendtag,
-				recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_Xz,Dm->sendCount_Xz,MPI_CHAR,Dm->rank_Xz(),sendtag,
-				recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_xZ,Dm->sendCount_xZ,MPI_CHAR,Dm->rank_xZ(),sendtag,
-				recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_yz,Dm->sendCount_yz,MPI_CHAR,Dm->rank_yz(),sendtag,
-				recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_YZ,Dm->sendCount_YZ,MPI_CHAR,Dm->rank_YZ(),sendtag,
-				recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_Yz,Dm->sendCount_Yz,MPI_CHAR,Dm->rank_Yz(),sendtag,
-				recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		MPI_Sendrecv(sendID_yZ,Dm->sendCount_yZ,MPI_CHAR,Dm->rank_yZ(),sendtag,
-				recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm.getCommunicator(),MPI_STATUS_IGNORE);
+				recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE);
 		//......................................................................................
 		UnpackID(Dm->recvList_x, Dm->recvCount_x ,recvID_x, id);
 		UnpackID(Dm->recvList_X, Dm->recvCount_X ,recvID_X, id);
@ -562,7 +564,7 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain
 		UnpackID(Dm->recvList_yZ, Dm->recvCount_yZ ,recvID_yZ, id);
 		UnpackID(Dm->recvList_YZ, Dm->recvCount_YZ ,recvID_YZ, id);
 		//......................................................................................
-		// double GlobalNumber = Dm->Comm.sumReduce( LocalNumber );
+		MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
 		
 		for (int k=0; k<nz; k++){
 			for (int j=0; j<ny; j++){
@ -581,7 +583,7 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain
 		BlobIDstruct new_index;
 		double vF=0.0; double vS=0.0;
 		ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm);
-		Dm->Comm.barrier();
+		MPI_Barrier(Dm->Comm);
 		
 		for (int k=0; k<nz; k++){
 			for (int j=0; j<ny; j++){
@ -643,7 +645,7 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain
 				}
 			}
 		}
-		countGlobal = Dm->Comm.sumReduce( count );
+		MPI_Allreduce(&count,&countGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
 		void_fraction_new = countGlobal/totalGlobal;
 		void_fraction_diff_new = abs(void_fraction_new-VoidFraction);
 		if (rank==0){
@ -700,7 +702,7 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array<char> &id,
 			}
 		}
 	}
-	double count_original = Dm->Comm.sumReduce( count);
+	double count_original=sumReduce( Dm->Comm, count);

 	// Estimate morph_delta
 	double morph_delta = 0.0;
@ -730,8 +732,8 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array<char> &id,
 				}
 			}
 		}
-		count = Dm->Comm.sumReduce( count );
-		MAX_DISPLACEMENT = Dm->Comm.maxReduce( MAX_DISPLACEMENT );
+		count=sumReduce( Dm->Comm, count);
+		MAX_DISPLACEMENT = maxReduce( Dm->Comm, MAX_DISPLACEMENT);
 		GrowthEstimate = count - count_original;
 		ERROR = fabs((GrowthEstimate-TargetGrowth) /TargetGrowth);

@ -774,7 +776,7 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array<char> &id,
 			}
 		}
 	}
-	count = Dm->Comm.sumReduce( count );
+	count=sumReduce( Dm->Comm, count);

 	return count;
 }
--- a/analysis/runAnalysis.cpp
+++ b/analysis/runAnalysis.cpp
@ -3,7 +3,7 @@
 #include "analysis/analysis.h"
 #include "common/Array.h"
 #include "common/Communication.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/ScaLBL.h"
 #include "models/ColorModel.h"

@ -462,7 +462,7 @@ private:
 /******************************************************************
 *  MPI comm wrapper for use with analysis                         *
 ******************************************************************/
-runAnalysis::commWrapper::commWrapper( int tag_, const Utilities::MPI& comm_, runAnalysis* analysis_ ):
+runAnalysis::commWrapper::commWrapper( int tag_, MPI_Comm comm_, runAnalysis* analysis_ ):
            comm(comm_),
            tag(tag_),
            analysis(analysis_)
@ -479,7 +479,7 @@ runAnalysis::commWrapper::~commWrapper()
 {
    if ( tag == -1 )
        return;
-    comm.barrier();
+    MPI_Barrier( comm );
    analysis->d_comm_used[tag] = false;
 }
 runAnalysis::commWrapper runAnalysis::getComm( )
@ -496,10 +496,10 @@ runAnalysis::commWrapper runAnalysis::getComm( )
        if ( tag == -1 )
            ERROR("Unable to get comm");
    }
-    tag = d_comm.bcast( tag, 0 );
+    MPI_Bcast( &tag, 1, MPI_INT, 0, d_comm );
    d_comm_used[tag] = true;
-    if ( d_comms[tag].isNull() )
-        d_comms[tag] = d_comm.dup();
+    if ( d_comms[tag] == MPI_COMM_NULL )
+        MPI_Comm_dup( MPI_COMM_WORLD, &d_comms[tag] );
    return commWrapper(tag,d_comms[tag],this);
 }

@ -507,20 +507,14 @@ runAnalysis::commWrapper runAnalysis::getComm( )
 /******************************************************************
 *  Constructor/Destructors                                        *
 ******************************************************************/
-runAnalysis::runAnalysis( std::shared_ptr<Database> input_db,
-                          const RankInfoStruct& rank_info,
-                          std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm,
-                          std::shared_ptr <Domain> Dm,
-                          int Np,
-                          bool Regular,
-                          IntArray Map ):
-    d_Np( Np ),
-    d_regular ( Regular),
-    d_rank_info( rank_info ),
-    d_Map( Map ),
-    d_fillData(Dm->Comm,Dm->rank_info,{Dm->Nx-2,Dm->Ny-2,Dm->Nz-2},{1,1,1},0,1),
-    d_comm( Utilities::MPI( MPI_COMM_WORLD ).dup() ),
-    d_ScaLBL_Comm( ScaLBL_Comm)
+runAnalysis::runAnalysis(std::shared_ptr<Database> input_db, const RankInfoStruct& rank_info, std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm, std::shared_ptr <Domain> Dm,
+        int Np, bool Regular, IntArray Map ):
+            d_Np( Np ),
+            d_regular ( Regular),
+            d_rank_info( rank_info ),
+            d_Map( Map ),
+            d_fillData(Dm->Comm,Dm->rank_info,{Dm->Nx-2,Dm->Ny-2,Dm->Nz-2},{1,1,1},0,1),
+            d_ScaLBL_Comm( ScaLBL_Comm)
 {

 	auto db = input_db->getDatabase( "Analysis" );
@ -558,7 +552,7 @@ runAnalysis::runAnalysis( std::shared_ptr<Database> input_db,
    d_restartFile = restart_file + "." + rankString;
    
    
-    d_rank = d_comm.getRank();
+    d_rank = MPI_WORLD_RANK();
    writeIDMap(ID_map_struct(),0,id_map_filename);
    // Initialize IO for silo
    IO::initialize("","silo","false");
@ -627,8 +621,11 @@ runAnalysis::runAnalysis( std::shared_ptr<Database> input_db,
    

    // Initialize the comms
-    for (int i=0; i<1024; i++)
+    MPI_Comm_dup(MPI_COMM_WORLD,&d_comm);
+    for (int i=0; i<1024; i++) {
+        d_comms[i] = MPI_COMM_NULL;
        d_comm_used[i] = false;
+    }
    // Initialize the threads
    int N_threads = db->getWithDefault<int>( "N_threads", 4 );
    auto method = db->getWithDefault<std::string>( "load_balance", "default" );
@ -638,6 +635,12 @@ runAnalysis::~runAnalysis( )
 {
    // Finish processing analysis
    finish();
+    // Clear internal data
+    MPI_Comm_free( &d_comm );
+    for (int i=0; i<1024; i++) {
+        if ( d_comms[i] != MPI_COMM_NULL )
+            MPI_Comm_free(&d_comms[i]);
+    }
 }
 void runAnalysis::finish( )
 {
@ -651,7 +654,7 @@ void runAnalysis::finish( )
    d_wait_subphase.reset();
    d_wait_restart.reset();
    // Syncronize
-    d_comm.barrier();
+    MPI_Barrier( d_comm );
    PROFILE_STOP("finish");
 }

--- a/analysis/runAnalysis.h
+++ b/analysis/runAnalysis.h
@ -68,10 +68,10 @@ public:
    class commWrapper
    {
      public:
-        Utilities::MPI comm;
+        MPI_Comm comm;
        int tag;
        runAnalysis *analysis;
-        commWrapper( int tag, const Utilities::MPI& comm, runAnalysis *analysis );
+        commWrapper( int tag, MPI_Comm comm, runAnalysis *analysis );
        commWrapper( ) = delete;
        commWrapper( const commWrapper &rhs ) = delete;
        commWrapper& operator=( const commWrapper &rhs ) = delete;
@ -100,8 +100,8 @@ private:
    std::vector<IO::MeshDataStruct> d_meshData;
    fillHalo<double> d_fillData;
    std::string d_restartFile;
-    Utilities::MPI d_comm;
-    Utilities::MPI d_comms[1024];
+    MPI_Comm d_comm;
+    MPI_Comm d_comms[1024];
    volatile bool d_comm_used[1024];
    std::shared_ptr<ScaLBL_Communicator> d_ScaLBL_Comm;

--- a/analysis/uCT.cpp
+++ b/analysis/uCT.cpp
@ -228,7 +228,8 @@ void filter_final( Array<char>& ID, Array<float>& Dist,
    Array<float>& Mean, Array<float>& Dist1, Array<float>& Dist2 )
 {
    PROFILE_SCOPED(timer,"filter_final");
-	int rank = Dm.Comm.getRank();
+	int rank;
+	MPI_Comm_rank(Dm.Comm,&rank);
    int Nx = Dm.Nx-2;
    int Ny = Dm.Ny-2;
    int Nz = Dm.Nz-2;
@ -241,7 +242,7 @@ void filter_final( Array<char>& ID, Array<float>& Dist,
    float tmp = 0;
    for (size_t i=0; i<Dist0.length(); i++)
        tmp += Dist0(i)*Dist0(i);
-    tmp = sqrt( Dm.Comm.sumReduce(tmp) / Dm.Comm.sumReduce<float>(Dist0.length()) );
+    tmp = sqrt( sumReduce(Dm.Comm,tmp) / sumReduce(Dm.Comm,(float)Dist0.length()) );
    const float dx1 = 0.3*tmp;
    const float dx2 = 1.05*dx1;
    if (rank==0)
@ -284,7 +285,7 @@ void filter_final( Array<char>& ID, Array<float>& Dist,
    Phase.fill(1);
    ComputeGlobalBlobIDs( Nx, Ny, Nz, Dm.rank_info, Phase, SignDist, 0, 0, GlobalBlobID, Dm.Comm );
    fillInt.fill(GlobalBlobID);
-    int N_blobs = Dm.Comm.maxReduce(GlobalBlobID.max()+1);
+    int N_blobs = maxReduce(Dm.Comm,GlobalBlobID.max()+1);
    std::vector<float> mean(N_blobs,0);
    std::vector<int> count(N_blobs,0);
    for (int k=1; k<=Nz; k++) {
@ -320,8 +321,8 @@ void filter_final( Array<char>& ID, Array<float>& Dist,
            }
        }
    }
-    mean = Dm.Comm.sumReduce(mean);
-    count = Dm.Comm.sumReduce(count);
+    mean = sumReduce(Dm.Comm,mean);
+    count = sumReduce(Dm.Comm,count);
    for (size_t i=0; i<mean.size(); i++)
        mean[i] /= count[i];
    /*if (rank==0) {
--- a/cmake/FindHIP.cmake
+++ b/cmake/FindHIP.cmake
@ -1,579 +0,0 @@
-###############################################################################
-# FindHIP.cmake
-###############################################################################
-
-###############################################################################
-# SET: Variable defaults
-###############################################################################
-# User defined flags
-set(HIP_HIPCC_FLAGS "" CACHE STRING "Semicolon delimited flags for HIPCC")
-set(HIP_HCC_FLAGS "" CACHE STRING "Semicolon delimited flags for HCC")
-set(HIP_NVCC_FLAGS "" CACHE STRING "Semicolon delimted flags for NVCC")
-mark_as_advanced(HIP_HIPCC_FLAGS HIP_HCC_FLAGS HIP_NVCC_FLAGS)
-set(_hip_configuration_types ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE} Debug MinSizeRel Release RelWithDebInfo)
-list(REMOVE_DUPLICATES _hip_configuration_types)
-foreach(config ${_hip_configuration_types})
-    string(TOUPPER ${config} config_upper)
-    set(HIP_HIPCC_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for HIPCC")
-    set(HIP_HCC_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for HCC")
-    set(HIP_NVCC_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for NVCC")
-    mark_as_advanced(HIP_HIPCC_FLAGS_${config_upper} HIP_HCC_FLAGS_${config_upper} HIP_NVCC_FLAGS_${config_upper})
-endforeach()
-option(HIP_HOST_COMPILATION_CPP "Host code compilation mode" ON)
-option(HIP_VERBOSE_BUILD "Print out the commands run while compiling the HIP source file.  With the Makefile generator this defaults to VERBOSE variable specified on the command line, but can be forced on with this option." OFF)
-mark_as_advanced(HIP_HOST_COMPILATION_CPP)
-
-###############################################################################
-# Set HIP CMAKE Flags
-###############################################################################
-# Copy the invocation styles from CXX to HIP
-set(CMAKE_HIP_ARCHIVE_CREATE ${CMAKE_CXX_ARCHIVE_CREATE})
-set(CMAKE_HIP_ARCHIVE_APPEND ${CMAKE_CXX_ARCHIVE_APPEND})
-set(CMAKE_HIP_ARCHIVE_FINISH ${CMAKE_CXX_ARCHIVE_FINISH})
-set(CMAKE_SHARED_LIBRARY_SONAME_HIP_FLAG ${CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG})
-set(CMAKE_SHARED_LIBRARY_CREATE_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS})
-set(CMAKE_SHARED_LIBRARY_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_CXX_FLAGS})
-#set(CMAKE_SHARED_LIBRARY_LINK_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS})
-set(CMAKE_SHARED_LIBRARY_RUNTIME_HIP_FLAG ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG})
-set(CMAKE_SHARED_LIBRARY_RUNTIME_HIP_FLAG_SEP ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG_SEP})
-set(CMAKE_SHARED_LIBRARY_LINK_STATIC_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_STATIC_CXX_FLAGS})
-set(CMAKE_SHARED_LIBRARY_LINK_DYNAMIC_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_DYNAMIC_CXX_FLAGS})
-
-# Set the CMake Flags to use the HCC Compilier.
-set(CMAKE_HIP_CREATE_SHARED_LIBRARY "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_PATH} <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>")
-set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_PATH} <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <LINK_LIBRARIES> -shared" )
-set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_PATH} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
-
-###############################################################################
-# FIND: HIP and associated helper binaries
-###############################################################################
-# HIP is supported on Linux only
-if(UNIX AND NOT APPLE AND NOT CYGWIN)
-    # Search for HIP installation
-    if(NOT HIP_ROOT_DIR)
-        # Search in user specified path first
-        find_path(
-            HIP_ROOT_DIR
-            NAMES hipconfig
-            PATHS
-            ENV ROCM_PATH
-            ENV HIP_PATH
-            PATH_SUFFIXES bin
-            DOC "HIP installed location"
-            NO_DEFAULT_PATH
-            )
-        # Now search in default path
-        find_path(
-            HIP_ROOT_DIR
-            NAMES hipconfig
-            PATHS
-            /opt/rocm
-            /opt/rocm/hip
-            PATH_SUFFIXES bin
-            DOC "HIP installed location"
-            )
-
-        # Check if we found HIP installation
-        if(HIP_ROOT_DIR)
-            # If so, fix the path
-            string(REGEX REPLACE "[/\\\\]?bin[64]*[/\\\\]?$" "" HIP_ROOT_DIR ${HIP_ROOT_DIR})
-            # And push it back to the cache
-            set(HIP_ROOT_DIR ${HIP_ROOT_DIR} CACHE PATH "HIP installed location" FORCE)
-        endif()
-        if(NOT EXISTS ${HIP_ROOT_DIR})
-            if(HIP_FIND_REQUIRED)
-                message(FATAL_ERROR "Specify HIP_ROOT_DIR")
-            elseif(NOT HIP_FIND_QUIETLY)
-                message("HIP_ROOT_DIR not found or specified")
-            endif()
-        endif()
-    endif()
-
-    # Find HIPCC executable
-    find_program(
-        HIP_HIPCC_EXECUTABLE
-        NAMES hipcc
-        PATHS
-        "${HIP_ROOT_DIR}"
-        ENV ROCM_PATH
-        ENV HIP_PATH
-        /opt/rocm
-        /opt/rocm/hip
-        PATH_SUFFIXES bin
-        NO_DEFAULT_PATH
-        )
-    if(NOT HIP_HIPCC_EXECUTABLE)
-        # Now search in default paths
-        find_program(HIP_HIPCC_EXECUTABLE hipcc)
-    endif()
-    mark_as_advanced(HIP_HIPCC_EXECUTABLE)
-
-    # Find HIPCONFIG executable
-    find_program(
-        HIP_HIPCONFIG_EXECUTABLE
-        NAMES hipconfig
-        PATHS
-        "${HIP_ROOT_DIR}"
-        ENV ROCM_PATH
-        ENV HIP_PATH
-        /opt/rocm
-        /opt/rocm/hip
-        PATH_SUFFIXES bin
-        NO_DEFAULT_PATH
-        )
-    if(NOT HIP_HIPCONFIG_EXECUTABLE)
-        # Now search in default paths
-        find_program(HIP_HIPCONFIG_EXECUTABLE hipconfig)
-    endif()
-    mark_as_advanced(HIP_HIPCONFIG_EXECUTABLE)
-
-    # Find HIPCC_CMAKE_LINKER_HELPER executable
-    find_program(
-        HIP_HIPCC_CMAKE_LINKER_HELPER
-        NAMES hipcc_cmake_linker_helper
-        PATHS
-        "${HIP_ROOT_DIR}"
-        ENV ROCM_PATH
-        ENV HIP_PATH
-        /opt/rocm
-        /opt/rocm/hip
-        PATH_SUFFIXES bin
-        NO_DEFAULT_PATH
-        )
-    if(NOT HIP_HIPCC_CMAKE_LINKER_HELPER)
-        # Now search in default paths
-        find_program(HIP_HIPCC_CMAKE_LINKER_HELPER hipcc_cmake_linker_helper)
-    endif()
-    mark_as_advanced(HIP_HIPCC_CMAKE_LINKER_HELPER)
-
-    if(HIP_HIPCONFIG_EXECUTABLE AND NOT HIP_VERSION)
-        # Compute the version
-        execute_process(
-            COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --version
-            OUTPUT_VARIABLE _hip_version
-            ERROR_VARIABLE _hip_error
-            OUTPUT_STRIP_TRAILING_WHITESPACE
-            ERROR_STRIP_TRAILING_WHITESPACE
-            )
-        if(NOT _hip_error)
-            set(HIP_VERSION ${_hip_version} CACHE STRING "Version of HIP as computed from hipcc")
-        else()
-            set(HIP_VERSION "0.0.0" CACHE STRING "Version of HIP as computed by FindHIP()")
-        endif()
-        mark_as_advanced(HIP_VERSION)
-    endif()
-    if(HIP_VERSION)
-        string(REPLACE "." ";" _hip_version_list "${HIP_VERSION}")
-        list(GET _hip_version_list 0 HIP_VERSION_MAJOR)
-        list(GET _hip_version_list 1 HIP_VERSION_MINOR)
-        list(GET _hip_version_list 2 HIP_VERSION_PATCH)
-        set(HIP_VERSION_STRING "${HIP_VERSION}")
-    endif()
-
-    if(HIP_HIPCONFIG_EXECUTABLE AND NOT HIP_PLATFORM)
-        # Compute the platform
-        execute_process(
-            COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --platform
-            OUTPUT_VARIABLE _hip_platform
-            OUTPUT_STRIP_TRAILING_WHITESPACE
-            )
-        set(HIP_PLATFORM ${_hip_platform} CACHE STRING "HIP platform as computed by hipconfig")
-        mark_as_advanced(HIP_PLATFORM)
-    endif()
-endif()
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(
-    HIP
-    REQUIRED_VARS
-    HIP_ROOT_DIR
-    HIP_HIPCC_EXECUTABLE
-    HIP_HIPCONFIG_EXECUTABLE
-    HIP_PLATFORM
-    VERSION_VAR HIP_VERSION
-    )
-
-###############################################################################
-# MACRO: Locate helper files
-###############################################################################
-macro(HIP_FIND_HELPER_FILE _name _extension)
-    set(_hip_full_name "${_name}.${_extension}")
-    get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
-    set(HIP_${_name} "${CMAKE_CURRENT_LIST_DIR}/FindHIP/${_hip_full_name}")
-    if(NOT EXISTS "${HIP_${_name}}")
-        set(error_message "${_hip_full_name} not found in ${CMAKE_CURRENT_LIST_DIR}/FindHIP")
-        if(HIP_FIND_REQUIRED)
-            message(FATAL_ERROR "${error_message}")
-        else()
-            if(NOT HIP_FIND_QUIETLY)
-                message(STATUS "${error_message}")
-            endif()
-        endif()
-    endif()
-    # Set this variable as internal, so the user isn't bugged with it.
-    set(HIP_${_name} ${HIP_${_name}} CACHE INTERNAL "Location of ${_full_name}" FORCE)
-endmacro()
-
-###############################################################################
-hip_find_helper_file(run_make2cmake cmake)
-hip_find_helper_file(run_hipcc cmake)
-###############################################################################
-
-###############################################################################
-# MACRO: Reset compiler flags
-###############################################################################
-macro(HIP_RESET_FLAGS)
-    unset(HIP_HIPCC_FLAGS)
-    unset(HIP_HCC_FLAGS)
-    unset(HIP_NVCC_FLAGS)
-    foreach(config ${_hip_configuration_types})
-        string(TOUPPER ${config} config_upper)
-        unset(HIP_HIPCC_FLAGS_${config_upper})
-        unset(HIP_HCC_FLAGS_${config_upper})
-        unset(HIP_NVCC_FLAGS_${config_upper})
-    endforeach()
-endmacro()
-
-###############################################################################
-# MACRO: Separate the options from the sources
-###############################################################################
-macro(HIP_GET_SOURCES_AND_OPTIONS _sources _cmake_options _hipcc_options _hcc_options _nvcc_options)
-    set(${_sources})
-    set(${_cmake_options})
-    set(${_hipcc_options})
-    set(${_hcc_options})
-    set(${_nvcc_options})
-    set(_hipcc_found_options FALSE)
-    set(_hcc_found_options FALSE)
-    set(_nvcc_found_options FALSE)
-    foreach(arg ${ARGN})
-        if("x${arg}" STREQUAL "xHIPCC_OPTIONS")
-            set(_hipcc_found_options TRUE)
-            set(_hcc_found_options FALSE)
-            set(_nvcc_found_options FALSE)
-        elseif("x${arg}" STREQUAL "xHCC_OPTIONS")
-            set(_hipcc_found_options FALSE)
-            set(_hcc_found_options TRUE)
-            set(_nvcc_found_options FALSE)
-        elseif("x${arg}" STREQUAL "xNVCC_OPTIONS")
-            set(_hipcc_found_options FALSE)
-            set(_hcc_found_options FALSE)
-            set(_nvcc_found_options TRUE)
-        elseif(
-                "x${arg}" STREQUAL "xEXCLUDE_FROM_ALL" OR
-                "x${arg}" STREQUAL "xSTATIC" OR
-                "x${arg}" STREQUAL "xSHARED" OR
-                "x${arg}" STREQUAL "xMODULE"
-                )
-            list(APPEND ${_cmake_options} ${arg})
-        else()
-            if(_hipcc_found_options)
-                list(APPEND ${_hipcc_options} ${arg})
-            elseif(_hcc_found_options)
-                list(APPEND ${_hcc_options} ${arg})
-            elseif(_nvcc_found_options)
-                list(APPEND ${_nvcc_options} ${arg})
-            else()
-                # Assume this is a file
-                list(APPEND ${_sources} ${arg})
-            endif()
-        endif()
-    endforeach()
-endmacro()
-
-###############################################################################
-# MACRO: Add include directories to pass to the hipcc command
-###############################################################################
-set(HIP_HIPCC_INCLUDE_ARGS_USER "")
-macro(HIP_INCLUDE_DIRECTORIES)
-    foreach(dir ${ARGN})
-        list(APPEND HIP_HIPCC_INCLUDE_ARGS_USER $<$<BOOL:${dir}>:-I${dir}>)
-    endforeach()
-endmacro()
-
-###############################################################################
-# FUNCTION: Helper to avoid clashes of files with the same basename but different paths
-###############################################################################
-function(HIP_COMPUTE_BUILD_PATH path build_path)
-    # Convert to cmake style paths
-    file(TO_CMAKE_PATH "${path}" bpath)
-    if(IS_ABSOLUTE "${bpath}")
-        string(FIND "${bpath}" "${CMAKE_CURRENT_BINARY_DIR}" _binary_dir_pos)
-        if(_binary_dir_pos EQUAL 0)
-            file(RELATIVE_PATH bpath "${CMAKE_CURRENT_BINARY_DIR}" "${bpath}")
-        else()
-            file(RELATIVE_PATH bpath "${CMAKE_CURRENT_SOURCE_DIR}" "${bpath}")
-        endif()
-    endif()
-
-    # Remove leading /
-    string(REGEX REPLACE "^[/]+" "" bpath "${bpath}")
-    # Avoid absolute paths by removing ':'
-    string(REPLACE ":" "_" bpath "${bpath}")
-    # Avoid relative paths that go up the tree
-    string(REPLACE "../" "__/" bpath "${bpath}")
-    # Avoid spaces
-    string(REPLACE " " "_" bpath "${bpath}")
-    # Strip off the filename
-    get_filename_component(bpath "${bpath}" PATH)
-
-    set(${build_path} "${bpath}" PARENT_SCOPE)
-endfunction()
-
-###############################################################################
-# MACRO: Parse OPTIONS from ARGN & set variables prefixed by _option_prefix
-###############################################################################
-macro(HIP_PARSE_HIPCC_OPTIONS _option_prefix)
-    set(_hip_found_config)
-    foreach(arg ${ARGN})
-        # Determine if we are dealing with a per-configuration flag
-        foreach(config ${_hip_configuration_types})
-            string(TOUPPER ${config} config_upper)
-            if(arg STREQUAL "${config_upper}")
-                set(_hip_found_config _${arg})
-                # Clear arg to prevent it from being processed anymore
-                set(arg)
-            endif()
-        endforeach()
-        if(arg)
-            list(APPEND ${_option_prefix}${_hip_found_config} "${arg}")
-        endif()
-    endforeach()
-endmacro()
-
-###############################################################################
-# MACRO: Try and include dependency file if it exists
-###############################################################################
-macro(HIP_INCLUDE_HIPCC_DEPENDENCIES dependency_file)
-    set(HIP_HIPCC_DEPEND)
-    set(HIP_HIPCC_DEPEND_REGENERATE FALSE)
-
-    # Create the dependency file if it doesn't exist
-    if(NOT EXISTS ${dependency_file})
-        file(WRITE ${dependency_file} "# Generated by: FindHIP.cmake. Do not edit.\n")
-    endif()
-    # Include the dependency file
-    include(${dependency_file})
-
-    # Verify the existence of all the included files
-    if(HIP_HIPCC_DEPEND)
-        foreach(f ${HIP_HIPCC_DEPEND})
-            if(NOT EXISTS ${f})
-                # If they aren't there, regenerate the file again
-                set(HIP_HIPCC_DEPEND_REGENERATE TRUE)
-            endif()
-        endforeach()
-    else()
-        # No dependencies, so regenerate the file
-        set(HIP_HIPCC_DEPEND_REGENERATE TRUE)
-    endif()
-
-    # Regenerate the dependency file if needed
-    if(HIP_HIPCC_DEPEND_REGENERATE)
-        set(HIP_HIPCC_DEPEND ${dependency_file})
-        file(WRITE ${dependency_file} "# Generated by: FindHIP.cmake. Do not edit.\n")
-    endif()
-endmacro()
-
-###############################################################################
-# MACRO: Prepare cmake commands for the target
-###############################################################################
-macro(HIP_PREPARE_TARGET_COMMANDS _target _format _generated_files _source_files)
-    set(_hip_flags "")
-    string(TOUPPER "${CMAKE_BUILD_TYPE}" _hip_build_configuration)
-    if(HIP_HOST_COMPILATION_CPP)
-        set(HIP_C_OR_CXX CXX)
-    else()
-        set(HIP_C_OR_CXX C)
-    endif()
-    set(generated_extension ${CMAKE_${HIP_C_OR_CXX}_OUTPUT_EXTENSION})
-
-    # Initialize list of includes with those specified by the user. Append with
-    # ones specified to cmake directly.
-    set(HIP_HIPCC_INCLUDE_ARGS ${HIP_HIPCC_INCLUDE_ARGS_USER})
-
-    # Add the include directories
-    set(include_directories_generator "$<TARGET_PROPERTY:${_target},INCLUDE_DIRECTORIES>")
-    list(APPEND HIP_HIPCC_INCLUDE_ARGS "$<$<BOOL:${include_directories_generator}>:-I$<JOIN:${include_directories_generator}, -I>>")
-
-    get_directory_property(_hip_include_directories INCLUDE_DIRECTORIES)
-    list(REMOVE_DUPLICATES _hip_include_directories)
-    if(_hip_include_directories)
-        foreach(dir ${_hip_include_directories})
-            list(APPEND HIP_HIPCC_INCLUDE_ARGS $<$<BOOL:${dir}>:-I${dir}>)
-        endforeach()
-    endif()
-
-    HIP_GET_SOURCES_AND_OPTIONS(_hip_sources _hip_cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN})
-    HIP_PARSE_HIPCC_OPTIONS(HIP_HIPCC_FLAGS ${_hipcc_options})
-    HIP_PARSE_HIPCC_OPTIONS(HIP_HCC_FLAGS ${_hcc_options})
-    HIP_PARSE_HIPCC_OPTIONS(HIP_NVCC_FLAGS ${_nvcc_options})
-
-    # Add the compile definitions
-    set(compile_definition_generator "$<TARGET_PROPERTY:${_target},COMPILE_DEFINITIONS>")
-    list(APPEND HIP_HIPCC_FLAGS "$<$<BOOL:${compile_definition_generator}>:-D$<JOIN:${compile_definition_generator}, -D>>")
-
-    # Check if we are building shared library.
-    set(_hip_build_shared_libs FALSE)
-    list(FIND _hip_cmake_options SHARED _hip_found_SHARED)
-    list(FIND _hip_cmake_options MODULE _hip_found_MODULE)
-    if(_hip_found_SHARED GREATER -1 OR _hip_found_MODULE GREATER -1)
-        set(_hip_build_shared_libs TRUE)
-    endif()
-    list(FIND _hip_cmake_options STATIC _hip_found_STATIC)
-    if(_hip_found_STATIC GREATER -1)
-        set(_hip_build_shared_libs FALSE)
-    endif()
-
-    # If we are building a shared library, add extra flags to HIP_HIPCC_FLAGS
-    if(_hip_build_shared_libs)
-        list(APPEND HIP_HCC_FLAGS "-fPIC")
-        list(APPEND HIP_NVCC_FLAGS "--shared -Xcompiler '-fPIC'")
-    endif()
-
-    # Set host compiler
-    set(HIP_HOST_COMPILER "${CMAKE_${HIP_C_OR_CXX}_COMPILER}")
-
-    # Set compiler flags
-    set(_HIP_HOST_FLAGS "set(CMAKE_HOST_FLAGS ${CMAKE_${HIP_C_OR_CXX}_FLAGS})")
-    set(_HIP_HIPCC_FLAGS "set(HIP_HIPCC_FLAGS ${HIP_HIPCC_FLAGS})")
-    set(_HIP_HCC_FLAGS "set(HIP_HCC_FLAGS ${HIP_HCC_FLAGS})")
-    set(_HIP_NVCC_FLAGS "set(HIP_NVCC_FLAGS ${HIP_NVCC_FLAGS})")
-    foreach(config ${_hip_configuration_types})
-        string(TOUPPER ${config} config_upper)
-        set(_HIP_HOST_FLAGS "${_HIP_HOST_FLAGS}\nset(CMAKE_HOST_FLAGS_${config_upper} ${CMAKE_${HIP_C_OR_CXX}_FLAGS_${config_upper}})")
-        set(_HIP_HIPCC_FLAGS "${_HIP_HIPCC_FLAGS}\nset(HIP_HIPCC_FLAGS_${config_upper} ${HIP_HIPCC_FLAGS_${config_upper}})")
-        set(_HIP_HCC_FLAGS "${_HIP_HCC_FLAGS}\nset(HIP_HCC_FLAGS_${config_upper} ${HIP_HCC_FLAGS_${config_upper}})")
-        set(_HIP_NVCC_FLAGS "${_HIP_NVCC_FLAGS}\nset(HIP_NVCC_FLAGS_${config_upper} ${HIP_NVCC_FLAGS_${config_upper}})")
-    endforeach()
-
-    # Reset the output variable
-    set(_hip_generated_files "")
-    set(_hip_source_files "")
-
-    # Iterate over all arguments and create custom commands for all source files
-    foreach(file ${ARGN})
-        # Ignore any file marked as a HEADER_FILE_ONLY
-        get_source_file_property(_is_header ${file} HEADER_FILE_ONLY)
-        # Allow per source file overrides of the format. Also allows compiling non .cu files.
-        get_source_file_property(_hip_source_format ${file} HIP_SOURCE_PROPERTY_FORMAT)
-        if((${file} MATCHES "\\.cu$" OR _hip_source_format) AND NOT _is_header)
-            set(host_flag FALSE)
-        else()
-            set(host_flag TRUE)
-        endif()
-
-        if(NOT host_flag)
-            # Determine output directory
-            HIP_COMPUTE_BUILD_PATH("${file}" hip_build_path)
-            set(hip_compile_output_dir "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${_target}.dir/${hip_build_path}")
-
-            get_filename_component(basename ${file} NAME)
-            set(generated_file_path "${hip_compile_output_dir}/${CMAKE_CFG_INTDIR}")
-            set(generated_file_basename "${_target}_generated_${basename}${generated_extension}")
-
-            # Set file names
-            set(generated_file "${generated_file_path}/${generated_file_basename}")
-            set(cmake_dependency_file "${hip_compile_output_dir}/${generated_file_basename}.depend")
-            set(custom_target_script_pregen "${hip_compile_output_dir}/${generated_file_basename}.cmake.pre-gen")
-            set(custom_target_script "${hip_compile_output_dir}/${generated_file_basename}.cmake")
-
-            # Set properties for object files
-            set_source_files_properties("${generated_file}"
-                PROPERTIES
-                EXTERNAL_OBJECT true # This is an object file not to be compiled, but only be linked
-                )
-
-            # Don't add CMAKE_CURRENT_SOURCE_DIR if the path is already an absolute path
-            get_filename_component(file_path "${file}" PATH)
-            if(IS_ABSOLUTE "${file_path}")
-                set(source_file "${file}")
-            else()
-                set(source_file "${CMAKE_CURRENT_SOURCE_DIR}/${file}")
-            endif()
-
-            # Bring in the dependencies
-            HIP_INCLUDE_HIPCC_DEPENDENCIES(${cmake_dependency_file})
-
-            # Configure the build script
-            configure_file("${HIP_run_hipcc}" "${custom_target_script_pregen}" @ONLY)
-            file(GENERATE
-                OUTPUT "${custom_target_script}"
-                INPUT "${custom_target_script_pregen}"
-                )
-            set(main_dep DEPENDS ${source_file})
-            if(CMAKE_GENERATOR MATCHES "Makefiles")
-                set(verbose_output "$(VERBOSE)")
-            elseif(HIP_VERBOSE_BUILD)
-                set(verbose_output ON)
-            else()
-                set(verbose_output OFF)
-            endif()
-
-            # Create up the comment string
-            file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}")
-            set(hip_build_comment_string "Building HIPCC object ${generated_file_relative_path}")
-
-            # Build the generated file and dependency file
-            add_custom_command(
-                OUTPUT ${generated_file}
-                # These output files depend on the source_file and the contents of cmake_dependency_file
-                ${main_dep}
-                DEPENDS ${HIP_HIPCC_DEPEND}
-                DEPENDS ${custom_target_script}
-                # Make sure the output directory exists before trying to write to it.
-                COMMAND ${CMAKE_COMMAND} -E make_directory "${generated_file_path}"
-                COMMAND ${CMAKE_COMMAND} ARGS
-                -D verbose:BOOL=${verbose_output}
-                -D build_configuration:STRING=${_hip_build_configuration}
-                -D "generated_file:STRING=${generated_file}"
-                -P "${custom_target_script}"
-                WORKING_DIRECTORY "${hip_compile_output_dir}"
-                COMMENT "${hip_build_comment_string}"
-                )
-
-            # Make sure the build system knows the file is generated
-            set_source_files_properties(${generated_file} PROPERTIES GENERATED TRUE)
-            list(APPEND _hip_generated_files ${generated_file})
-            list(APPEND _hip_source_files ${file})
-        endif()
-    endforeach()
-
-    # Set the return parameter
-    set(${_generated_files} ${_hip_generated_files})
-    set(${_source_files} ${_hip_source_files})
-endmacro()
-
-###############################################################################
-# HIP_ADD_EXECUTABLE
-###############################################################################
-macro(HIP_ADD_EXECUTABLE hip_target)
-    # Separate the sources from the options
-    HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN})
-    HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options})
-    if(_source_files)
-        list(REMOVE_ITEM _sources ${_source_files})
-    endif()
-    if("x${HCC_HOME}" STREQUAL "x")
-        set(HCC_HOME "/opt/rocm/hcc")
-    endif()
-    set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
-    add_executable(${hip_target} ${_cmake_options} ${_generated_files} ${_sources})
-    set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE HIP)
-endmacro()
-
-###############################################################################
-# HIP_ADD_LIBRARY
-###############################################################################
-macro(HIP_ADD_LIBRARY hip_target)
-    # Separate the sources from the options
-    HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN})
-    HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} ${_cmake_options} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options})
-    if(_source_files)
-        list(REMOVE_ITEM _sources ${_source_files})
-    endif()
-    add_library(${hip_target} ${_cmake_options} ${_generated_files} ${_sources})
-    set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE ${HIP_C_OR_CXX})
-endmacro()
-
-# vim: ts=4:sw=4:expandtab:smartindent
--- a/common/Communication.h
+++ b/common/Communication.h
@ -1,7 +1,7 @@
 #ifndef COMMUNICATION_H_INC
 #define COMMUNICATION_H_INC

-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/Utilities.h"
 #include "common/Array.h"

@ -38,7 +38,7 @@ struct RankInfoStruct {
 //! Redistribute domain data (dst may be smaller than the src)
 template<class TYPE>
 Array<TYPE> redistribute( const RankInfoStruct& src_rank, const Array<TYPE>& src_data,
-    const RankInfoStruct& dst_rank, std::array<int,3> dst_size, const Utilities::MPI& comm );
+    const RankInfoStruct& dst_rank, std::array<int,3> dst_size, MPI_Comm comm );


 /*!
@ -59,7 +59,7 @@ public:
     * @param[in] fill          Fill {faces,edges,corners}
     * @param[in] periodic      Periodic dimensions
     */
-    fillHalo( const Utilities::MPI& comm, const RankInfoStruct& info,
+    fillHalo( MPI_Comm comm, const RankInfoStruct& info,
        std::array<int,3> n, std::array<int,3> ng, int tag, int depth,
        std::array<bool,3> fill = {true,true,true},
        std::array<bool,3> periodic = {true,true,true} );
@ -83,7 +83,7 @@ public:


 private:
-    Utilities::MPI comm;
+    MPI_Comm comm;
    RankInfoStruct info;
    std::array<int,3> n, ng;
    int depth;
@ -93,6 +93,8 @@ private:
    TYPE *mem;
    TYPE *send[3][3][3], *recv[3][3][3];
    MPI_Request send_req[3][3][3], recv_req[3][3][3];
+    size_t N_type;
+    MPI_Datatype datatype;
    fillHalo();                             // Private empty constructor
    fillHalo(const fillHalo&);              // Private copy constructor
    fillHalo& operator=(const fillHalo&);   // Private assignment operator
@ -134,7 +136,7 @@ void InitializeRanks( const int rank, const int nprocx, const int nprocy, const


 //***************************************************************************************
-inline void CommunicateSendRecvCounts( const Utilities::MPI& Communicator, int sendtag, int recvtag, 
+inline void CommunicateSendRecvCounts( MPI_Comm Communicator, int sendtag, int recvtag, 
 		int rank_x, int rank_y, int rank_z, 
 		int rank_X, int rank_Y, int rank_Z,
 		int rank_xy, int rank_XY, int rank_xY, int rank_Xy,
@ -153,53 +155,53 @@ inline void CommunicateSendRecvCounts( const Utilities::MPI& Communicator, int s
 {
 	MPI_Request req1[18], req2[18];
 	MPI_Status stat1[18],stat2[18];
-	MPI_Isend(&sendCount_x, 1,MPI_INT,rank_x,sendtag+0,Communicator.getCommunicator(),&req1[0]);
-	MPI_Irecv(&recvCount_X, 1,MPI_INT,rank_X,recvtag+0,Communicator.getCommunicator(),&req2[0]);
-	MPI_Isend(&sendCount_X, 1,MPI_INT,rank_X,sendtag+1,Communicator.getCommunicator(),&req1[1]);
-	MPI_Irecv(&recvCount_x, 1,MPI_INT,rank_x,recvtag+1,Communicator.getCommunicator(),&req2[1]);
-	MPI_Isend(&sendCount_y, 1,MPI_INT,rank_y,sendtag+2,Communicator.getCommunicator(),&req1[2]);
-	MPI_Irecv(&recvCount_Y, 1,MPI_INT,rank_Y,recvtag+2,Communicator.getCommunicator(),&req2[2]);
-	MPI_Isend(&sendCount_Y, 1,MPI_INT,rank_Y,sendtag+3,Communicator.getCommunicator(),&req1[3]);
-	MPI_Irecv(&recvCount_y, 1,MPI_INT,rank_y,recvtag+3,Communicator.getCommunicator(),&req2[3]);
-	MPI_Isend(&sendCount_z, 1,MPI_INT,rank_z,sendtag+4,Communicator.getCommunicator(),&req1[4]);
-	MPI_Irecv(&recvCount_Z, 1,MPI_INT,rank_Z,recvtag+4,Communicator.getCommunicator(),&req2[4]);
-	MPI_Isend(&sendCount_Z, 1,MPI_INT,rank_Z,sendtag+5,Communicator.getCommunicator(),&req1[5]);
-	MPI_Irecv(&recvCount_z, 1,MPI_INT,rank_z,recvtag+5,Communicator.getCommunicator(),&req2[5]);
+	MPI_Isend(&sendCount_x, 1,MPI_INT,rank_x,sendtag+0,Communicator,&req1[0]);
+	MPI_Irecv(&recvCount_X, 1,MPI_INT,rank_X,recvtag+0,Communicator,&req2[0]);
+	MPI_Isend(&sendCount_X, 1,MPI_INT,rank_X,sendtag+1,Communicator,&req1[1]);
+	MPI_Irecv(&recvCount_x, 1,MPI_INT,rank_x,recvtag+1,Communicator,&req2[1]);
+	MPI_Isend(&sendCount_y, 1,MPI_INT,rank_y,sendtag+2,Communicator,&req1[2]);
+	MPI_Irecv(&recvCount_Y, 1,MPI_INT,rank_Y,recvtag+2,Communicator,&req2[2]);
+	MPI_Isend(&sendCount_Y, 1,MPI_INT,rank_Y,sendtag+3,Communicator,&req1[3]);
+	MPI_Irecv(&recvCount_y, 1,MPI_INT,rank_y,recvtag+3,Communicator,&req2[3]);
+	MPI_Isend(&sendCount_z, 1,MPI_INT,rank_z,sendtag+4,Communicator,&req1[4]);
+	MPI_Irecv(&recvCount_Z, 1,MPI_INT,rank_Z,recvtag+4,Communicator,&req2[4]);
+	MPI_Isend(&sendCount_Z, 1,MPI_INT,rank_Z,sendtag+5,Communicator,&req1[5]);
+	MPI_Irecv(&recvCount_z, 1,MPI_INT,rank_z,recvtag+5,Communicator,&req2[5]);

-	MPI_Isend(&sendCount_xy, 1,MPI_INT,rank_xy,sendtag+6,Communicator.getCommunicator(),&req1[6]);
-	MPI_Irecv(&recvCount_XY, 1,MPI_INT,rank_XY,recvtag+6,Communicator.getCommunicator(),&req2[6]);
-	MPI_Isend(&sendCount_XY, 1,MPI_INT,rank_XY,sendtag+7,Communicator.getCommunicator(),&req1[7]);
-	MPI_Irecv(&recvCount_xy, 1,MPI_INT,rank_xy,recvtag+7,Communicator.getCommunicator(),&req2[7]);
-	MPI_Isend(&sendCount_Xy, 1,MPI_INT,rank_Xy,sendtag+8,Communicator.getCommunicator(),&req1[8]);
-	MPI_Irecv(&recvCount_xY, 1,MPI_INT,rank_xY,recvtag+8,Communicator.getCommunicator(),&req2[8]);
-	MPI_Isend(&sendCount_xY, 1,MPI_INT,rank_xY,sendtag+9,Communicator.getCommunicator(),&req1[9]);
-	MPI_Irecv(&recvCount_Xy, 1,MPI_INT,rank_Xy,recvtag+9,Communicator.getCommunicator(),&req2[9]);
+	MPI_Isend(&sendCount_xy, 1,MPI_INT,rank_xy,sendtag+6,Communicator,&req1[6]);
+	MPI_Irecv(&recvCount_XY, 1,MPI_INT,rank_XY,recvtag+6,Communicator,&req2[6]);
+	MPI_Isend(&sendCount_XY, 1,MPI_INT,rank_XY,sendtag+7,Communicator,&req1[7]);
+	MPI_Irecv(&recvCount_xy, 1,MPI_INT,rank_xy,recvtag+7,Communicator,&req2[7]);
+	MPI_Isend(&sendCount_Xy, 1,MPI_INT,rank_Xy,sendtag+8,Communicator,&req1[8]);
+	MPI_Irecv(&recvCount_xY, 1,MPI_INT,rank_xY,recvtag+8,Communicator,&req2[8]);
+	MPI_Isend(&sendCount_xY, 1,MPI_INT,rank_xY,sendtag+9,Communicator,&req1[9]);
+	MPI_Irecv(&recvCount_Xy, 1,MPI_INT,rank_Xy,recvtag+9,Communicator,&req2[9]);

-	MPI_Isend(&sendCount_xz, 1,MPI_INT,rank_xz,sendtag+10,Communicator.getCommunicator(),&req1[10]);
-	MPI_Irecv(&recvCount_XZ, 1,MPI_INT,rank_XZ,recvtag+10,Communicator.getCommunicator(),&req2[10]);
-	MPI_Isend(&sendCount_XZ, 1,MPI_INT,rank_XZ,sendtag+11,Communicator.getCommunicator(),&req1[11]);
-	MPI_Irecv(&recvCount_xz, 1,MPI_INT,rank_xz,recvtag+11,Communicator.getCommunicator(),&req2[11]);
-	MPI_Isend(&sendCount_Xz, 1,MPI_INT,rank_Xz,sendtag+12,Communicator.getCommunicator(),&req1[12]);
-	MPI_Irecv(&recvCount_xZ, 1,MPI_INT,rank_xZ,recvtag+12,Communicator.getCommunicator(),&req2[12]);
-	MPI_Isend(&sendCount_xZ, 1,MPI_INT,rank_xZ,sendtag+13,Communicator.getCommunicator(),&req1[13]);
-	MPI_Irecv(&recvCount_Xz, 1,MPI_INT,rank_Xz,recvtag+13,Communicator.getCommunicator(),&req2[13]);
+	MPI_Isend(&sendCount_xz, 1,MPI_INT,rank_xz,sendtag+10,Communicator,&req1[10]);
+	MPI_Irecv(&recvCount_XZ, 1,MPI_INT,rank_XZ,recvtag+10,Communicator,&req2[10]);
+	MPI_Isend(&sendCount_XZ, 1,MPI_INT,rank_XZ,sendtag+11,Communicator,&req1[11]);
+	MPI_Irecv(&recvCount_xz, 1,MPI_INT,rank_xz,recvtag+11,Communicator,&req2[11]);
+	MPI_Isend(&sendCount_Xz, 1,MPI_INT,rank_Xz,sendtag+12,Communicator,&req1[12]);
+	MPI_Irecv(&recvCount_xZ, 1,MPI_INT,rank_xZ,recvtag+12,Communicator,&req2[12]);
+	MPI_Isend(&sendCount_xZ, 1,MPI_INT,rank_xZ,sendtag+13,Communicator,&req1[13]);
+	MPI_Irecv(&recvCount_Xz, 1,MPI_INT,rank_Xz,recvtag+13,Communicator,&req2[13]);

-	MPI_Isend(&sendCount_yz, 1,MPI_INT,rank_yz,sendtag+14,Communicator.getCommunicator(),&req1[14]);
-	MPI_Irecv(&recvCount_YZ, 1,MPI_INT,rank_YZ,recvtag+14,Communicator.getCommunicator(),&req2[14]);
-	MPI_Isend(&sendCount_YZ, 1,MPI_INT,rank_YZ,sendtag+15,Communicator.getCommunicator(),&req1[15]);
-	MPI_Irecv(&recvCount_yz, 1,MPI_INT,rank_yz,recvtag+15,Communicator.getCommunicator(),&req2[15]);
-	MPI_Isend(&sendCount_Yz, 1,MPI_INT,rank_Yz,sendtag+16,Communicator.getCommunicator(),&req1[16]);
-	MPI_Irecv(&recvCount_yZ, 1,MPI_INT,rank_yZ,recvtag+16,Communicator.getCommunicator(),&req2[16]);
-	MPI_Isend(&sendCount_yZ, 1,MPI_INT,rank_yZ,sendtag+17,Communicator.getCommunicator(),&req1[17]);
-	MPI_Irecv(&recvCount_Yz, 1,MPI_INT,rank_Yz,recvtag+17,Communicator.getCommunicator(),&req2[17]);
+	MPI_Isend(&sendCount_yz, 1,MPI_INT,rank_yz,sendtag+14,Communicator,&req1[14]);
+	MPI_Irecv(&recvCount_YZ, 1,MPI_INT,rank_YZ,recvtag+14,Communicator,&req2[14]);
+	MPI_Isend(&sendCount_YZ, 1,MPI_INT,rank_YZ,sendtag+15,Communicator,&req1[15]);
+	MPI_Irecv(&recvCount_yz, 1,MPI_INT,rank_yz,recvtag+15,Communicator,&req2[15]);
+	MPI_Isend(&sendCount_Yz, 1,MPI_INT,rank_Yz,sendtag+16,Communicator,&req1[16]);
+	MPI_Irecv(&recvCount_yZ, 1,MPI_INT,rank_yZ,recvtag+16,Communicator,&req2[16]);
+	MPI_Isend(&sendCount_yZ, 1,MPI_INT,rank_yZ,sendtag+17,Communicator,&req1[17]);
+	MPI_Irecv(&recvCount_Yz, 1,MPI_INT,rank_Yz,recvtag+17,Communicator,&req2[17]);
 	MPI_Waitall(18,req1,stat1);
 	MPI_Waitall(18,req2,stat2);
-	Communicator.barrier();
+	MPI_Barrier(Communicator);
 }


 //***************************************************************************************
-inline void CommunicateRecvLists( const Utilities::MPI& Communicator, int sendtag, int recvtag, 
+inline void CommunicateRecvLists( MPI_Comm Communicator, int sendtag, int recvtag, 
 		int *sendList_x, int *sendList_y, int *sendList_z, int *sendList_X, int *sendList_Y, int *sendList_Z,
 		int *sendList_xy, int *sendList_XY, int *sendList_xY, int *sendList_Xy,
 		int *sendList_xz, int *sendList_XZ, int *sendList_xZ, int *sendList_Xz,
@ -221,52 +223,52 @@ inline void CommunicateRecvLists( const Utilities::MPI& Communicator, int sendta
 {
 	MPI_Request req1[18], req2[18];
 	MPI_Status stat1[18],stat2[18];
-	MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_x,sendtag,Communicator.getCommunicator(),&req1[0]);
-	MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_X,recvtag,Communicator.getCommunicator(),&req2[0]);
-	MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_X,sendtag,Communicator.getCommunicator(),&req1[1]);
-	MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_x,recvtag,Communicator.getCommunicator(),&req2[1]);
-	MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_y,sendtag,Communicator.getCommunicator(),&req1[2]);
-	MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_Y,recvtag,Communicator.getCommunicator(),&req2[2]);
-	MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_Y,sendtag,Communicator.getCommunicator(),&req1[3]);
-	MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_y,recvtag,Communicator.getCommunicator(),&req2[3]);
-	MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_z,sendtag,Communicator.getCommunicator(),&req1[4]);
-	MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_Z,recvtag,Communicator.getCommunicator(),&req2[4]);
-	MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_Z,sendtag,Communicator.getCommunicator(),&req1[5]);
-	MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_z,recvtag,Communicator.getCommunicator(),&req2[5]);
+	MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_x,sendtag,Communicator,&req1[0]);
+	MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_X,recvtag,Communicator,&req2[0]);
+	MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_X,sendtag,Communicator,&req1[1]);
+	MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_x,recvtag,Communicator,&req2[1]);
+	MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_y,sendtag,Communicator,&req1[2]);
+	MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_Y,recvtag,Communicator,&req2[2]);
+	MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_Y,sendtag,Communicator,&req1[3]);
+	MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_y,recvtag,Communicator,&req2[3]);
+	MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_z,sendtag,Communicator,&req1[4]);
+	MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_Z,recvtag,Communicator,&req2[4]);
+	MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_Z,sendtag,Communicator,&req1[5]);
+	MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_z,recvtag,Communicator,&req2[5]);

-	MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_xy,sendtag,Communicator.getCommunicator(),&req1[6]);
-	MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_XY,recvtag,Communicator.getCommunicator(),&req2[6]);
-	MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_XY,sendtag,Communicator.getCommunicator(),&req1[7]);
-	MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_xy,recvtag,Communicator.getCommunicator(),&req2[7]);
-	MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_Xy,sendtag,Communicator.getCommunicator(),&req1[8]);
-	MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_xY,recvtag,Communicator.getCommunicator(),&req2[8]);
-	MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_xY,sendtag,Communicator.getCommunicator(),&req1[9]);
-	MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_Xy,recvtag,Communicator.getCommunicator(),&req2[9]);
+	MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_xy,sendtag,Communicator,&req1[6]);
+	MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_XY,recvtag,Communicator,&req2[6]);
+	MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_XY,sendtag,Communicator,&req1[7]);
+	MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_xy,recvtag,Communicator,&req2[7]);
+	MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_Xy,sendtag,Communicator,&req1[8]);
+	MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_xY,recvtag,Communicator,&req2[8]);
+	MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_xY,sendtag,Communicator,&req1[9]);
+	MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_Xy,recvtag,Communicator,&req2[9]);

-	MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_xz,sendtag,Communicator.getCommunicator(),&req1[10]);
-	MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_XZ,recvtag,Communicator.getCommunicator(),&req2[10]);
-	MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_XZ,sendtag,Communicator.getCommunicator(),&req1[11]);
-	MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_xz,recvtag,Communicator.getCommunicator(),&req2[11]);
-	MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_Xz,sendtag,Communicator.getCommunicator(),&req1[12]);
-	MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_xZ,recvtag,Communicator.getCommunicator(),&req2[12]);
-	MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_xZ,sendtag,Communicator.getCommunicator(),&req1[13]);
-	MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_Xz,recvtag,Communicator.getCommunicator(),&req2[13]);
+	MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_xz,sendtag,Communicator,&req1[10]);
+	MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_XZ,recvtag,Communicator,&req2[10]);
+	MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_XZ,sendtag,Communicator,&req1[11]);
+	MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_xz,recvtag,Communicator,&req2[11]);
+	MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_Xz,sendtag,Communicator,&req1[12]);
+	MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_xZ,recvtag,Communicator,&req2[12]);
+	MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_xZ,sendtag,Communicator,&req1[13]);
+	MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_Xz,recvtag,Communicator,&req2[13]);

-	MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_yz,sendtag,Communicator.getCommunicator(),&req1[14]);
-	MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_YZ,recvtag,Communicator.getCommunicator(),&req2[14]);
-	MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_YZ,sendtag,Communicator.getCommunicator(),&req1[15]);
-	MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_yz,recvtag,Communicator.getCommunicator(),&req2[15]);
-	MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_Yz,sendtag,Communicator.getCommunicator(),&req1[16]);
-	MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_yZ,recvtag,Communicator.getCommunicator(),&req2[16]);
-	MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_yZ,sendtag,Communicator.getCommunicator(),&req1[17]);
-	MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_Yz,recvtag,Communicator.getCommunicator(),&req2[17]);
+	MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_yz,sendtag,Communicator,&req1[14]);
+	MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_YZ,recvtag,Communicator,&req2[14]);
+	MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_YZ,sendtag,Communicator,&req1[15]);
+	MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_yz,recvtag,Communicator,&req2[15]);
+	MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_Yz,sendtag,Communicator,&req1[16]);
+	MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_yZ,recvtag,Communicator,&req2[16]);
+	MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_yZ,sendtag,Communicator,&req1[17]);
+	MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_Yz,recvtag,Communicator,&req2[17]);
 	MPI_Waitall(18,req1,stat1);
 	MPI_Waitall(18,req2,stat2);
 }


 //***************************************************************************************
-inline void CommunicateMeshHalo(DoubleArray &Mesh, const Utilities::MPI& Communicator,
+inline void CommunicateMeshHalo(DoubleArray &Mesh, MPI_Comm Communicator,
 		double *sendbuf_x,double *sendbuf_y,double *sendbuf_z,double *sendbuf_X,double *sendbuf_Y,double *sendbuf_Z,
 		double *sendbuf_xy,double *sendbuf_XY,double *sendbuf_xY,double *sendbuf_Xy,
 		double *sendbuf_xz,double *sendbuf_XZ,double *sendbuf_xZ,double *sendbuf_Xz,
@ -317,41 +319,41 @@ inline void CommunicateMeshHalo(DoubleArray &Mesh, const Utilities::MPI& Communi
 	PackMeshData(sendList_YZ, sendCount_YZ ,sendbuf_YZ, MeshData);
 	//......................................................................................
 	MPI_Sendrecv(sendbuf_x,sendCount_x,MPI_DOUBLE,rank_x,sendtag,
-			recvbuf_X,recvCount_X,MPI_DOUBLE,rank_X,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE);
+			recvbuf_X,recvCount_X,MPI_DOUBLE,rank_X,recvtag,Communicator,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendbuf_X,sendCount_X,MPI_DOUBLE,rank_X,sendtag,
-			recvbuf_x,recvCount_x,MPI_DOUBLE,rank_x,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE);
+			recvbuf_x,recvCount_x,MPI_DOUBLE,rank_x,recvtag,Communicator,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendbuf_y,sendCount_y,MPI_DOUBLE,rank_y,sendtag,
-			recvbuf_Y,recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE);
+			recvbuf_Y,recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,Communicator,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendbuf_Y,sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,
-			recvbuf_y,recvCount_y,MPI_DOUBLE,rank_y,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE);
+			recvbuf_y,recvCount_y,MPI_DOUBLE,rank_y,recvtag,Communicator,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendbuf_z,sendCount_z,MPI_DOUBLE,rank_z,sendtag,
-			recvbuf_Z,recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE);
+			recvbuf_Z,recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,Communicator,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendbuf_Z,sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,
-			recvbuf_z,recvCount_z,MPI_DOUBLE,rank_z,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE);
+			recvbuf_z,recvCount_z,MPI_DOUBLE,rank_z,recvtag,Communicator,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendbuf_xy,sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,
-			recvbuf_XY,recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE);
+			recvbuf_XY,recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,Communicator,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendbuf_XY,sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,
-			recvbuf_xy,recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE);
+			recvbuf_xy,recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,Communicator,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendbuf_Xy,sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,
-			recvbuf_xY,recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE);
+			recvbuf_xY,recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,Communicator,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendbuf_xY,sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,
-			recvbuf_Xy,recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE);
+			recvbuf_Xy,recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,Communicator,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendbuf_xz,sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,
-			recvbuf_XZ,recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE);
+			recvbuf_XZ,recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,Communicator,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendbuf_XZ,sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,
-			recvbuf_xz,recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE);
+			recvbuf_xz,recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,Communicator,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendbuf_Xz,sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,
-			recvbuf_xZ,recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE);
+			recvbuf_xZ,recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,Communicator,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendbuf_xZ,sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,
-			recvbuf_Xz,recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE);
+			recvbuf_Xz,recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,Communicator,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendbuf_yz,sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,
-			recvbuf_YZ,recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE);
+			recvbuf_YZ,recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,Communicator,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendbuf_YZ,sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,
-			recvbuf_yz,recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE);
+			recvbuf_yz,recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,Communicator,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendbuf_Yz,sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,
-			recvbuf_yZ,recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE);
+			recvbuf_yZ,recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,Communicator,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendbuf_yZ,sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,
-			recvbuf_Yz,recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,Communicator.getCommunicator(),MPI_STATUS_IGNORE);
+			recvbuf_Yz,recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,Communicator,MPI_STATUS_IGNORE);
 	//........................................................................................
 	UnpackMeshData(recvList_x, recvCount_x ,recvbuf_x, MeshData);
 	UnpackMeshData(recvList_X, recvCount_X ,recvbuf_X, MeshData);
--- a/common/Communication.hpp
+++ b/common/Communication.hpp
@ -2,8 +2,9 @@
 #define COMMUNICATION_HPP_INC

 #include "common/Communication.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/Utilities.h"
+//#include "ProfilerApp.h"


 /********************************************************
@ -11,19 +12,17 @@
 ********************************************************/
 template<class TYPE>
 Array<TYPE> redistribute( const RankInfoStruct& src_rank, const Array<TYPE>& src_data,
-    const RankInfoStruct& dst_rank, std::array<int,3> dst_size, const Utilities::MPI& comm )
+    const RankInfoStruct& dst_rank, std::array<int,3> dst_size, MPI_Comm comm )
 {
-    if ( comm.getSize() == 1 ) {
-        return src_data.subset( { 0, (size_t) dst_size[0]-1, 0, (size_t) dst_size[1]-1, 0, (size_t) dst_size[2]-1 } );
-    }
+#ifdef USE_MPI
    // Get the src size
    std::array<int,3> src_size;
    int size0[3] = { (int) src_data.size(0), (int) src_data.size(1), (int) src_data.size(2) };
-    comm.maxReduce( size0, src_size.data(), 3 );
+    MPI_Allreduce( size0, src_size.data(), 3, MPI_INT, MPI_MAX, comm );
    if ( !src_data.empty() )
        ASSERT( src_size[0] == size0[0] && src_size[1] == size0[1] && src_size[2] == size0[2] );
    // Check that dst_size matches on all ranks
-    comm.maxReduce( dst_size.data(), size0, 3 );
+    MPI_Allreduce( dst_size.data(), size0, 3, MPI_INT, MPI_MAX, comm );
    ASSERT( dst_size[0] == size0[0] && dst_size[1] == size0[1] && dst_size[2] == size0[2] );
    // Function to get overlap range
    auto calcOverlap = []( int i1[3], int i2[3], int j1[3], int j2[3] ) {
@ -61,7 +60,7 @@ Array<TYPE> redistribute( const RankInfoStruct& src_rank, const Array<TYPE>& src
    }
    std::vector<MPI_Request> send_request( send_rank.size() );
    for (size_t i=0; i<send_rank.size(); i++)
-        send_request[i] = comm.Isend( send_data[i].data(), send_data[i].length(), send_rank[i], 5462 );
+        MPI_Isend( send_data[i].data(), sizeof(TYPE)*send_data[i].length(), MPI_BYTE, send_rank[i], 5462, comm, &send_request[i]);
    // Unpack data from the appropriate ranks (including myself)
    Array<TYPE> dst_data( dst_size[0], dst_size[1], dst_size[2] );
    int i1[3] = { dst_size[0] * dst_rank.ix, dst_size[1] * dst_rank.jy, dst_size[2] * dst_rank.kz };
@ -76,14 +75,17 @@ Array<TYPE> redistribute( const RankInfoStruct& src_rank, const Array<TYPE>& src
                    continue;
                int rank  = src_rank.getRankForBlock(i,j,k);
                Array<TYPE> data( index[1] - index[0] + 1, index[3] - index[2] + 1, index[5] - index[4] + 1 );
-                comm.recv( data.data(), data.length(), rank, 5462 );
+                MPI_Recv( data.data(), sizeof(TYPE)*data.length(), MPI_BYTE, rank, 5462, comm, MPI_STATUS_IGNORE );
                dst_data.copySubset( index, data );
            }
        }
    }
    // Free data
-    comm.waitAll( send_request.size(), send_request.data() );
+    MPI_Waitall( send_request.size(), send_request.data(), MPI_STATUSES_IGNORE );
    return dst_data;
+#else
+    return src_data.subset( { 0, dst_size[0]-1, 0, dst_size[1]-1, 0, dst_size[2]-1 );
+#endif
 }


@ -92,11 +94,27 @@ Array<TYPE> redistribute( const RankInfoStruct& src_rank, const Array<TYPE>& src
 *  Structure to fill halo cells                         *
 ********************************************************/
 template<class TYPE>
-fillHalo<TYPE>::fillHalo( const Utilities::MPI& comm_, const RankInfoStruct& info_,
+fillHalo<TYPE>::fillHalo( MPI_Comm comm_, const RankInfoStruct& info_,
    std::array<int,3> n_, std::array<int,3> ng_, int tag0, int depth_,
    std::array<bool,3> fill, std::array<bool,3> periodic ):
    comm(comm_), info(info_), n(n_), ng(ng_), depth(depth_)
 {
+    if ( std::is_same<TYPE,double>() ) {
+        N_type = 1;
+        datatype = MPI_DOUBLE;
+    } else if ( std::is_same<TYPE,float>() ) {
+        N_type = 1;
+        datatype = MPI_FLOAT;
+    } else if ( sizeof(TYPE)%sizeof(double)==0 ) {
+        N_type = sizeof(TYPE) / sizeof(double);
+        datatype = MPI_DOUBLE;
+    } else if ( sizeof(TYPE)%sizeof(float)==0 ) {
+        N_type = sizeof(TYPE) / sizeof(float);
+        datatype = MPI_FLOAT;
+    } else {
+        N_type = sizeof(TYPE);
+        datatype = MPI_BYTE;
+    }
    // Set the fill pattern
    memset(fill_pattern,0,sizeof(fill_pattern));
    if ( fill[0] ) {
@ -233,8 +251,8 @@ void fillHalo<TYPE>::fill( Array<TYPE>& data )
            for (int k=0; k<3; k++) {
                if ( !fill_pattern[i][j][k] )
                    continue;
-                recv_req[i][j][k] = comm.Irecv( recv[i][j][k], depth2*N_send_recv[i][j][k], 
-                    info.rank[i][j][k], tag[2-i][2-j][2-k] );
+                MPI_Irecv( recv[i][j][k], N_type*depth2*N_send_recv[i][j][k], datatype, 
+                    info.rank[i][j][k], tag[2-i][2-j][2-k], comm, &recv_req[i][j][k] );
            }
        }
    }
@ -245,18 +263,19 @@ void fillHalo<TYPE>::fill( Array<TYPE>& data )
                if ( !fill_pattern[i][j][k] )
                    continue;
                pack( data, i-1, j-1, k-1, send[i][j][k] );
-                send_req[i][j][k] = comm.Isend( send[i][j][k], depth2*N_send_recv[i][j][k], 
-                    info.rank[i][j][k], tag[i][j][k] );
+                MPI_Isend( send[i][j][k], N_type*depth2*N_send_recv[i][j][k], datatype, 
+                    info.rank[i][j][k], tag[i][j][k], comm, &send_req[i][j][k] );
            }
        }
    }
    // Recv the dst data and unpack (we recive in reverse order to match the sends)
+    MPI_Status status;
    for (int i=2; i>=0; i--) {
        for (int j=2; j>=0; j--) {
            for (int k=2; k>=0; k--) {
                if ( !fill_pattern[i][j][k] )
                    continue;
-                comm.wait( recv_req[i][j][k] );
+                MPI_Wait(&recv_req[i][j][k],&status);
                unpack( data, i-1, j-1, k-1, recv[i][j][k] );
            }
        }
@ -267,7 +286,7 @@ void fillHalo<TYPE>::fill( Array<TYPE>& data )
            for (int k=0; k<3; k++) {
                if ( !fill_pattern[i][j][k] )
                    continue;
-                comm.wait( send_req[i][j][k] );
+                MPI_Wait(&send_req[i][j][k],&status);
            }
        }
    }
--- a/common/Domain.cpp
+++ b/common/Domain.cpp
@ -12,7 +12,7 @@
 #include "common/Domain.h"
 #include "common/Array.h"
 #include "common/Utilities.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/Communication.h"

 // Inline function to read line without a return argument
@ -62,10 +62,11 @@ Domain::Domain( int nx, int ny, int nz, int rnk, int npx, int npy, int npz,
    NULL_USE( npy );
    NULL_USE( npz );
 	// set up the neighbor ranks
-    int myrank = Comm.getRank();
+    int myrank;
+    MPI_Comm_rank( Comm, &myrank );
 	rank_info = RankInfoStruct( myrank, rank_info.nx, rank_info.ny, rank_info.nz );
 	
-	Comm.barrier();
+	MPI_Barrier(Comm);
 	
    auto db = std::make_shared<Database>( );
    db->putScalar<int>( "BC", BC );
@ -75,9 +76,10 @@ Domain::Domain( int nx, int ny, int nz, int rnk, int npx, int npy, int npz,
    db->putVector<double>( "L", { lx, ly, lz } );
    initialize( db );
 }
-Domain::Domain( std::shared_ptr<Database> db, const Utilities::MPI& Communicator):
+Domain::Domain( std::shared_ptr<Database> db, MPI_Comm Communicator):
 	database(db), Nx(0), Ny(0), Nz(0), 
 	Lx(0), Ly(0), Lz(0), Volume(0), BoundaryCondition(0),
+	Comm(MPI_COMM_NULL),
 	inlet_layers_x(0), inlet_layers_y(0), inlet_layers_z(0),
 	outlet_layers_x(0), outlet_layers_y(0), outlet_layers_z(0),
    inlet_layers_phase(1),outlet_layers_phase(2),
@ -107,13 +109,14 @@ Domain::Domain( std::shared_ptr<Database> db, const Utilities::MPI& Communicator
 	recvData_xY(NULL), recvData_yZ(NULL), recvData_Xz(NULL), recvData_XY(NULL), recvData_YZ(NULL), recvData_XZ(NULL),
 	id(NULL)
 {
-    Comm = Communicator.dup();
+    MPI_Comm_dup(Communicator,&Comm);

 	// set up the neighbor ranks
-    int myrank = Comm.getRank();
+    int myrank;
+    MPI_Comm_rank( Comm, &myrank );
    initialize( db );
 	rank_info = RankInfoStruct( myrank, rank_info.nx, rank_info.ny, rank_info.nz );
-    Comm.barrier();
+	MPI_Barrier(Comm);
 }

 Domain::~Domain()
@ -162,6 +165,10 @@ Domain::~Domain()
 	delete [] recvData_yZ;  delete [] recvData_Yz;  delete [] recvData_YZ;
 	// Free id
 	delete [] id;
+	// Free the communicator
+	if ( Comm != MPI_COMM_WORLD && Comm != MPI_COMM_NULL ) {
+		MPI_Comm_free(&Comm);
+	}
 }

 void Domain::initialize( std::shared_ptr<Database> db )
@ -212,7 +219,8 @@ void Domain::initialize( std::shared_ptr<Database> db )
    Ny = ny+2;
    Nz = nz+2;
    // Initialize ranks
-    int myrank = Comm.getRank();
+    int myrank;
+    MPI_Comm_rank( Comm, &myrank );
 	rank_info = RankInfoStruct(myrank,nproc[0],nproc[1],nproc[2]);
 	// inlet layers only apply to lower part of domain
 	if (rank_info.ix > 0) inlet_layers_x = 0;
@ -231,7 +239,8 @@ void Domain::initialize( std::shared_ptr<Database> db )
 	id = new signed char[N];
 	memset(id,0,N);
 	BoundaryCondition = d_db->getScalar<int>("BC");
-    int nprocs = Comm.getSize();
+    int nprocs;
+    MPI_Comm_size( Comm, &nprocs );
 	INSIST(nprocs == nproc[0]*nproc[1]*nproc[2],"Fatal error in processor count!");
 }

@ -560,7 +569,7 @@ void Domain::Decomp( const std::string& Filename )
 					}
 					else{
 						//printf("Sending data to process %i \n", rnk);
-						Comm.send(loc_id,N,rnk,15);
+						MPI_Send(loc_id,N,MPI_CHAR,rnk,15,Comm);
 					}
 					// Write the data for this rank data 
 					sprintf(LocalRankFilename,"ID.%05i",rnk+rank_offset);
@ -575,10 +584,10 @@ void Domain::Decomp( const std::string& Filename )
 	else{
 		// Recieve the subdomain from rank = 0
 		//printf("Ready to recieve data %i at process %i \n", N,rank);
-		Comm.recv(id,N,0,15);
+		MPI_Recv(id,N,MPI_CHAR,0,15,Comm,MPI_STATUS_IGNORE);
 	}
-	Comm.barrier();
-
+	//Comm.barrier();
+	MPI_Barrier(Comm);
 	// Compute the porosity
 	double sum;
 	double sum_local=0.0;
@ -618,7 +627,8 @@ void Domain::Decomp( const std::string& Filename )
            }
        }
    }
-    sum = Comm.sumReduce(sum_local);
+    MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,Comm);
+    //sum = Comm.sumReduce(sum_local);
    porosity = sum*iVol_global;
    if (rank()==0) printf("Media porosity = %f \n",porosity);
 	//.........................................................
@ -661,7 +671,7 @@ void Domain::AggregateLabels( const std::string& filename ){
 			}
 		}
 	}
-	Comm.barrier();
+	MPI_Barrier(Comm);

 	// populate the FullID 
 	if (rank() == 0){
@ -687,7 +697,7 @@ void Domain::AggregateLabels( const std::string& filename ){
 			ipx = (rnk - ipz*npx*npy - ipy*npx); 
 			//printf("ipx=%i ipy=%i ipz=%i\n", ipx, ipy, ipz);
 			int tag = 15+rnk;
-			Comm.recv(LocalID,local_size,rnk,tag);
+			MPI_Recv(LocalID,local_size,MPI_CHAR,rnk,tag,Comm,MPI_STATUS_IGNORE);
 			for (int k=1; k<nz-1; k++){
 				for (int j=1; j<ny-1; j++){
 					for (int i=1; i<nx-1; i++){
@ -710,9 +720,9 @@ void Domain::AggregateLabels( const std::string& filename ){
 		// send LocalID to rank=0
 		int tag = 15+ rank();
 		int dstrank = 0;
-		Comm.send(LocalID,local_size,dstrank,tag);
+		MPI_Send(LocalID,local_size,MPI_CHAR,dstrank,tag,Comm);
 	}
-	Comm.barrier();
+	MPI_Barrier(Comm);

 }

@ -837,45 +847,45 @@ void Domain::CommInit()
 	sendBuf_YZ = new int [sendCount_YZ];
 	sendBuf_XZ = new int [sendCount_XZ];
 	//......................................................................................
-	MPI_Isend(&sendCount_x, 1,MPI_INT,rank_x(),sendtag+0,Comm.getCommunicator(),&req1[0]);
-	MPI_Irecv(&recvCount_X, 1,MPI_INT,rank_X(),recvtag+0,Comm.getCommunicator(),&req2[0]);
-	MPI_Isend(&sendCount_X, 1,MPI_INT,rank_X(),sendtag+1,Comm.getCommunicator(),&req1[1]);
-	MPI_Irecv(&recvCount_x, 1,MPI_INT,rank_x(),recvtag+1,Comm.getCommunicator(),&req2[1]);
-	MPI_Isend(&sendCount_y, 1,MPI_INT,rank_y(),sendtag+2,Comm.getCommunicator(),&req1[2]);
-	MPI_Irecv(&recvCount_Y, 1,MPI_INT,rank_Y(),recvtag+2,Comm.getCommunicator(),&req2[2]);
-	MPI_Isend(&sendCount_Y, 1,MPI_INT,rank_Y(),sendtag+3,Comm.getCommunicator(),&req1[3]);
-	MPI_Irecv(&recvCount_y, 1,MPI_INT,rank_y(),recvtag+3,Comm.getCommunicator(),&req2[3]);
-	MPI_Isend(&sendCount_z, 1,MPI_INT,rank_z(),sendtag+4,Comm.getCommunicator(),&req1[4]);
-	MPI_Irecv(&recvCount_Z, 1,MPI_INT,rank_Z(),recvtag+4,Comm.getCommunicator(),&req2[4]);
-	MPI_Isend(&sendCount_Z, 1,MPI_INT,rank_Z(),sendtag+5,Comm.getCommunicator(),&req1[5]);
-	MPI_Irecv(&recvCount_z, 1,MPI_INT,rank_z(),recvtag+5,Comm.getCommunicator(),&req2[5]);
-	MPI_Isend(&sendCount_xy, 1,MPI_INT,rank_xy(),sendtag+6,Comm.getCommunicator(),&req1[6]);
-	MPI_Irecv(&recvCount_XY, 1,MPI_INT,rank_XY(),recvtag+6,Comm.getCommunicator(),&req2[6]);
-	MPI_Isend(&sendCount_XY, 1,MPI_INT,rank_XY(),sendtag+7,Comm.getCommunicator(),&req1[7]);
-	MPI_Irecv(&recvCount_xy, 1,MPI_INT,rank_xy(),recvtag+7,Comm.getCommunicator(),&req2[7]);
-	MPI_Isend(&sendCount_Xy, 1,MPI_INT,rank_Xy(),sendtag+8,Comm.getCommunicator(),&req1[8]);
-	MPI_Irecv(&recvCount_xY, 1,MPI_INT,rank_xY(),recvtag+8,Comm.getCommunicator(),&req2[8]);
-	MPI_Isend(&sendCount_xY, 1,MPI_INT,rank_xY(),sendtag+9,Comm.getCommunicator(),&req1[9]);
-	MPI_Irecv(&recvCount_Xy, 1,MPI_INT,rank_Xy(),recvtag+9,Comm.getCommunicator(),&req2[9]);
-	MPI_Isend(&sendCount_xz, 1,MPI_INT,rank_xz(),sendtag+10,Comm.getCommunicator(),&req1[10]);
-	MPI_Irecv(&recvCount_XZ, 1,MPI_INT,rank_XZ(),recvtag+10,Comm.getCommunicator(),&req2[10]);
-	MPI_Isend(&sendCount_XZ, 1,MPI_INT,rank_XZ(),sendtag+11,Comm.getCommunicator(),&req1[11]);
-	MPI_Irecv(&recvCount_xz, 1,MPI_INT,rank_xz(),recvtag+11,Comm.getCommunicator(),&req2[11]);
-	MPI_Isend(&sendCount_Xz, 1,MPI_INT,rank_Xz(),sendtag+12,Comm.getCommunicator(),&req1[12]);
-	MPI_Irecv(&recvCount_xZ, 1,MPI_INT,rank_xZ(),recvtag+12,Comm.getCommunicator(),&req2[12]);
-	MPI_Isend(&sendCount_xZ, 1,MPI_INT,rank_xZ(),sendtag+13,Comm.getCommunicator(),&req1[13]);
-	MPI_Irecv(&recvCount_Xz, 1,MPI_INT,rank_Xz(),recvtag+13,Comm.getCommunicator(),&req2[13]);
-	MPI_Isend(&sendCount_yz, 1,MPI_INT,rank_yz(),sendtag+14,Comm.getCommunicator(),&req1[14]);
-	MPI_Irecv(&recvCount_YZ, 1,MPI_INT,rank_YZ(),recvtag+14,Comm.getCommunicator(),&req2[14]);
-	MPI_Isend(&sendCount_YZ, 1,MPI_INT,rank_YZ(),sendtag+15,Comm.getCommunicator(),&req1[15]);
-	MPI_Irecv(&recvCount_yz, 1,MPI_INT,rank_yz(),recvtag+15,Comm.getCommunicator(),&req2[15]);
-	MPI_Isend(&sendCount_Yz, 1,MPI_INT,rank_Yz(),sendtag+16,Comm.getCommunicator(),&req1[16]);
-	MPI_Irecv(&recvCount_yZ, 1,MPI_INT,rank_yZ(),recvtag+16,Comm.getCommunicator(),&req2[16]);
-	MPI_Isend(&sendCount_yZ, 1,MPI_INT,rank_yZ(),sendtag+17,Comm.getCommunicator(),&req1[17]);
-	MPI_Irecv(&recvCount_Yz, 1,MPI_INT,rank_Yz(),recvtag+17,Comm.getCommunicator(),&req2[17]);
+	MPI_Isend(&sendCount_x, 1,MPI_INT,rank_x(),sendtag+0,Comm,&req1[0]);
+	MPI_Irecv(&recvCount_X, 1,MPI_INT,rank_X(),recvtag+0,Comm,&req2[0]);
+	MPI_Isend(&sendCount_X, 1,MPI_INT,rank_X(),sendtag+1,Comm,&req1[1]);
+	MPI_Irecv(&recvCount_x, 1,MPI_INT,rank_x(),recvtag+1,Comm,&req2[1]);
+	MPI_Isend(&sendCount_y, 1,MPI_INT,rank_y(),sendtag+2,Comm,&req1[2]);
+	MPI_Irecv(&recvCount_Y, 1,MPI_INT,rank_Y(),recvtag+2,Comm,&req2[2]);
+	MPI_Isend(&sendCount_Y, 1,MPI_INT,rank_Y(),sendtag+3,Comm,&req1[3]);
+	MPI_Irecv(&recvCount_y, 1,MPI_INT,rank_y(),recvtag+3,Comm,&req2[3]);
+	MPI_Isend(&sendCount_z, 1,MPI_INT,rank_z(),sendtag+4,Comm,&req1[4]);
+	MPI_Irecv(&recvCount_Z, 1,MPI_INT,rank_Z(),recvtag+4,Comm,&req2[4]);
+	MPI_Isend(&sendCount_Z, 1,MPI_INT,rank_Z(),sendtag+5,Comm,&req1[5]);
+	MPI_Irecv(&recvCount_z, 1,MPI_INT,rank_z(),recvtag+5,Comm,&req2[5]);
+	MPI_Isend(&sendCount_xy, 1,MPI_INT,rank_xy(),sendtag+6,Comm,&req1[6]);
+	MPI_Irecv(&recvCount_XY, 1,MPI_INT,rank_XY(),recvtag+6,Comm,&req2[6]);
+	MPI_Isend(&sendCount_XY, 1,MPI_INT,rank_XY(),sendtag+7,Comm,&req1[7]);
+	MPI_Irecv(&recvCount_xy, 1,MPI_INT,rank_xy(),recvtag+7,Comm,&req2[7]);
+	MPI_Isend(&sendCount_Xy, 1,MPI_INT,rank_Xy(),sendtag+8,Comm,&req1[8]);
+	MPI_Irecv(&recvCount_xY, 1,MPI_INT,rank_xY(),recvtag+8,Comm,&req2[8]);
+	MPI_Isend(&sendCount_xY, 1,MPI_INT,rank_xY(),sendtag+9,Comm,&req1[9]);
+	MPI_Irecv(&recvCount_Xy, 1,MPI_INT,rank_Xy(),recvtag+9,Comm,&req2[9]);
+	MPI_Isend(&sendCount_xz, 1,MPI_INT,rank_xz(),sendtag+10,Comm,&req1[10]);
+	MPI_Irecv(&recvCount_XZ, 1,MPI_INT,rank_XZ(),recvtag+10,Comm,&req2[10]);
+	MPI_Isend(&sendCount_XZ, 1,MPI_INT,rank_XZ(),sendtag+11,Comm,&req1[11]);
+	MPI_Irecv(&recvCount_xz, 1,MPI_INT,rank_xz(),recvtag+11,Comm,&req2[11]);
+	MPI_Isend(&sendCount_Xz, 1,MPI_INT,rank_Xz(),sendtag+12,Comm,&req1[12]);
+	MPI_Irecv(&recvCount_xZ, 1,MPI_INT,rank_xZ(),recvtag+12,Comm,&req2[12]);
+	MPI_Isend(&sendCount_xZ, 1,MPI_INT,rank_xZ(),sendtag+13,Comm,&req1[13]);
+	MPI_Irecv(&recvCount_Xz, 1,MPI_INT,rank_Xz(),recvtag+13,Comm,&req2[13]);
+	MPI_Isend(&sendCount_yz, 1,MPI_INT,rank_yz(),sendtag+14,Comm,&req1[14]);
+	MPI_Irecv(&recvCount_YZ, 1,MPI_INT,rank_YZ(),recvtag+14,Comm,&req2[14]);
+	MPI_Isend(&sendCount_YZ, 1,MPI_INT,rank_YZ(),sendtag+15,Comm,&req1[15]);
+	MPI_Irecv(&recvCount_yz, 1,MPI_INT,rank_yz(),recvtag+15,Comm,&req2[15]);
+	MPI_Isend(&sendCount_Yz, 1,MPI_INT,rank_Yz(),sendtag+16,Comm,&req1[16]);
+	MPI_Irecv(&recvCount_yZ, 1,MPI_INT,rank_yZ(),recvtag+16,Comm,&req2[16]);
+	MPI_Isend(&sendCount_yZ, 1,MPI_INT,rank_yZ(),sendtag+17,Comm,&req1[17]);
+	MPI_Irecv(&recvCount_Yz, 1,MPI_INT,rank_Yz(),recvtag+17,Comm,&req2[17]);
 	MPI_Waitall(18,req1,stat1);
 	MPI_Waitall(18,req2,stat2);
-	Comm.barrier();
+	MPI_Barrier(Comm);
 	//......................................................................................
 	// recv buffers
 	recvList_x = new int [recvCount_x];
@ -897,42 +907,42 @@ void Domain::CommInit()
 	recvList_YZ = new int [recvCount_YZ];
 	recvList_XZ = new int [recvCount_XZ];
 	//......................................................................................
-	MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_x(),sendtag,Comm.getCommunicator(),&req1[0]);
-	MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_X(),recvtag,Comm.getCommunicator(),&req2[0]);
-	MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_X(),sendtag,Comm.getCommunicator(),&req1[1]);
-	MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_x(),recvtag,Comm.getCommunicator(),&req2[1]);
-	MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_y(),sendtag,Comm.getCommunicator(),&req1[2]);
-	MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_Y(),recvtag,Comm.getCommunicator(),&req2[2]);
-	MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_Y(),sendtag,Comm.getCommunicator(),&req1[3]);
-	MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_y(),recvtag,Comm.getCommunicator(),&req2[3]);
-	MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_z(),sendtag,Comm.getCommunicator(),&req1[4]);
-	MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_Z(),recvtag,Comm.getCommunicator(),&req2[4]);
-	MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_Z(),sendtag,Comm.getCommunicator(),&req1[5]);
-	MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_z(),recvtag,Comm.getCommunicator(),&req2[5]);
-	MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_xy(),sendtag,Comm.getCommunicator(),&req1[6]);
-	MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_XY(),recvtag,Comm.getCommunicator(),&req2[6]);
-	MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_XY(),sendtag,Comm.getCommunicator(),&req1[7]);
-	MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_xy(),recvtag,Comm.getCommunicator(),&req2[7]);
-	MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_Xy(),sendtag,Comm.getCommunicator(),&req1[8]);
-	MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_xY(),recvtag,Comm.getCommunicator(),&req2[8]);
-	MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_xY(),sendtag,Comm.getCommunicator(),&req1[9]);
-	MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_Xy(),recvtag,Comm.getCommunicator(),&req2[9]);
-	MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_xz(),sendtag,Comm.getCommunicator(),&req1[10]);
-	MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_XZ(),recvtag,Comm.getCommunicator(),&req2[10]);
-	MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_XZ(),sendtag,Comm.getCommunicator(),&req1[11]);
-	MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_xz(),recvtag,Comm.getCommunicator(),&req2[11]);
-	MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_Xz(),sendtag,Comm.getCommunicator(),&req1[12]);
-	MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_xZ(),recvtag,Comm.getCommunicator(),&req2[12]);
-	MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_xZ(),sendtag,Comm.getCommunicator(),&req1[13]);
-	MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_Xz(),recvtag,Comm.getCommunicator(),&req2[13]);
-	MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_yz(),sendtag,Comm.getCommunicator(),&req1[14]);
-	MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_YZ(),recvtag,Comm.getCommunicator(),&req2[14]);
-	MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_YZ(),sendtag,Comm.getCommunicator(),&req1[15]);
-	MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_yz(),recvtag,Comm.getCommunicator(),&req2[15]);
-	MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_Yz(),sendtag,Comm.getCommunicator(),&req1[16]);
-	MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_yZ(),recvtag,Comm.getCommunicator(),&req2[16]);
-	MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_yZ(),sendtag,Comm.getCommunicator(),&req1[17]);
-	MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_Yz(),recvtag,Comm.getCommunicator(),&req2[17]);
+	MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_x(),sendtag,Comm,&req1[0]);
+	MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_X(),recvtag,Comm,&req2[0]);
+	MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_X(),sendtag,Comm,&req1[1]);
+	MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_x(),recvtag,Comm,&req2[1]);
+	MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_y(),sendtag,Comm,&req1[2]);
+	MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_Y(),recvtag,Comm,&req2[2]);
+	MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_Y(),sendtag,Comm,&req1[3]);
+	MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_y(),recvtag,Comm,&req2[3]);
+	MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_z(),sendtag,Comm,&req1[4]);
+	MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_Z(),recvtag,Comm,&req2[4]);
+	MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_Z(),sendtag,Comm,&req1[5]);
+	MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_z(),recvtag,Comm,&req2[5]);
+	MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_xy(),sendtag,Comm,&req1[6]);
+	MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_XY(),recvtag,Comm,&req2[6]);
+	MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_XY(),sendtag,Comm,&req1[7]);
+	MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_xy(),recvtag,Comm,&req2[7]);
+	MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_Xy(),sendtag,Comm,&req1[8]);
+	MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_xY(),recvtag,Comm,&req2[8]);
+	MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_xY(),sendtag,Comm,&req1[9]);
+	MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_Xy(),recvtag,Comm,&req2[9]);
+	MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_xz(),sendtag,Comm,&req1[10]);
+	MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_XZ(),recvtag,Comm,&req2[10]);
+	MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_XZ(),sendtag,Comm,&req1[11]);
+	MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_xz(),recvtag,Comm,&req2[11]);
+	MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_Xz(),sendtag,Comm,&req1[12]);
+	MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_xZ(),recvtag,Comm,&req2[12]);
+	MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_xZ(),sendtag,Comm,&req1[13]);
+	MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_Xz(),recvtag,Comm,&req2[13]);
+	MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_yz(),sendtag,Comm,&req1[14]);
+	MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_YZ(),recvtag,Comm,&req2[14]);
+	MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_YZ(),sendtag,Comm,&req1[15]);
+	MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_yz(),recvtag,Comm,&req2[15]);
+	MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_Yz(),sendtag,Comm,&req1[16]);
+	MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_yZ(),recvtag,Comm,&req2[16]);
+	MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_yZ(),sendtag,Comm,&req1[17]);
+	MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_Yz(),recvtag,Comm,&req2[17]);
 	MPI_Waitall(18,req1,stat1);
 	MPI_Waitall(18,req2,stat2);
 	//......................................................................................
@ -1077,7 +1087,7 @@ void Domain::ReadIDs(){
            }
        }
    }
-    sum = Comm.sumReduce(sum_local);
+    MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,Comm);
    porosity = sum*iVol_global;
    if (rank()==0) printf("Media porosity = %f \n",porosity);
 	//.........................................................
@ -1125,41 +1135,41 @@ void Domain::CommunicateMeshHalo(DoubleArray &Mesh)
 	PackMeshData(sendList_YZ, sendCount_YZ ,sendData_YZ, MeshData);
 	//......................................................................................
 	MPI_Sendrecv(sendData_x,sendCount_x,MPI_DOUBLE,rank_x(),sendtag,
-			recvData_X,recvCount_X,MPI_DOUBLE,rank_X(),recvtag,Comm.getCommunicator(),MPI_STATUS_IGNORE);
+			recvData_X,recvCount_X,MPI_DOUBLE,rank_X(),recvtag,Comm,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendData_X,sendCount_X,MPI_DOUBLE,rank_X(),sendtag,
-			recvData_x,recvCount_x,MPI_DOUBLE,rank_x(),recvtag,Comm.getCommunicator(),MPI_STATUS_IGNORE);
+			recvData_x,recvCount_x,MPI_DOUBLE,rank_x(),recvtag,Comm,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendData_y,sendCount_y,MPI_DOUBLE,rank_y(),sendtag,
-			recvData_Y,recvCount_Y,MPI_DOUBLE,rank_Y(),recvtag,Comm.getCommunicator(),MPI_STATUS_IGNORE);
+			recvData_Y,recvCount_Y,MPI_DOUBLE,rank_Y(),recvtag,Comm,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendData_Y,sendCount_Y,MPI_DOUBLE,rank_Y(),sendtag,
-			recvData_y,recvCount_y,MPI_DOUBLE,rank_y(),recvtag,Comm.getCommunicator(),MPI_STATUS_IGNORE);
+			recvData_y,recvCount_y,MPI_DOUBLE,rank_y(),recvtag,Comm,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendData_z,sendCount_z,MPI_DOUBLE,rank_z(),sendtag,
-			recvData_Z,recvCount_Z,MPI_DOUBLE,rank_Z(),recvtag,Comm.getCommunicator(),MPI_STATUS_IGNORE);
+			recvData_Z,recvCount_Z,MPI_DOUBLE,rank_Z(),recvtag,Comm,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendData_Z,sendCount_Z,MPI_DOUBLE,rank_Z(),sendtag,
-			recvData_z,recvCount_z,MPI_DOUBLE,rank_z(),recvtag,Comm.getCommunicator(),MPI_STATUS_IGNORE);
+			recvData_z,recvCount_z,MPI_DOUBLE,rank_z(),recvtag,Comm,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendData_xy,sendCount_xy,MPI_DOUBLE,rank_xy(),sendtag,
-			recvData_XY,recvCount_XY,MPI_DOUBLE,rank_XY(),recvtag,Comm.getCommunicator(),MPI_STATUS_IGNORE);
+			recvData_XY,recvCount_XY,MPI_DOUBLE,rank_XY(),recvtag,Comm,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendData_XY,sendCount_XY,MPI_DOUBLE,rank_XY(),sendtag,
-			recvData_xy,recvCount_xy,MPI_DOUBLE,rank_xy(),recvtag,Comm.getCommunicator(),MPI_STATUS_IGNORE);
+			recvData_xy,recvCount_xy,MPI_DOUBLE,rank_xy(),recvtag,Comm,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendData_Xy,sendCount_Xy,MPI_DOUBLE,rank_Xy(),sendtag,
-			recvData_xY,recvCount_xY,MPI_DOUBLE,rank_xY(),recvtag,Comm.getCommunicator(),MPI_STATUS_IGNORE);
+			recvData_xY,recvCount_xY,MPI_DOUBLE,rank_xY(),recvtag,Comm,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendData_xY,sendCount_xY,MPI_DOUBLE,rank_xY(),sendtag,
-			recvData_Xy,recvCount_Xy,MPI_DOUBLE,rank_Xy(),recvtag,Comm.getCommunicator(),MPI_STATUS_IGNORE);
+			recvData_Xy,recvCount_Xy,MPI_DOUBLE,rank_Xy(),recvtag,Comm,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendData_xz,sendCount_xz,MPI_DOUBLE,rank_xz(),sendtag,
-			recvData_XZ,recvCount_XZ,MPI_DOUBLE,rank_XZ(),recvtag,Comm.getCommunicator(),MPI_STATUS_IGNORE);
+			recvData_XZ,recvCount_XZ,MPI_DOUBLE,rank_XZ(),recvtag,Comm,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendData_XZ,sendCount_XZ,MPI_DOUBLE,rank_XZ(),sendtag,
-			recvData_xz,recvCount_xz,MPI_DOUBLE,rank_xz(),recvtag,Comm.getCommunicator(),MPI_STATUS_IGNORE);
+			recvData_xz,recvCount_xz,MPI_DOUBLE,rank_xz(),recvtag,Comm,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendData_Xz,sendCount_Xz,MPI_DOUBLE,rank_Xz(),sendtag,
-			recvData_xZ,recvCount_xZ,MPI_DOUBLE,rank_xZ(),recvtag,Comm.getCommunicator(),MPI_STATUS_IGNORE);
+			recvData_xZ,recvCount_xZ,MPI_DOUBLE,rank_xZ(),recvtag,Comm,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendData_xZ,sendCount_xZ,MPI_DOUBLE,rank_xZ(),sendtag,
-			recvData_Xz,recvCount_Xz,MPI_DOUBLE,rank_Xz(),recvtag,Comm.getCommunicator(),MPI_STATUS_IGNORE);
+			recvData_Xz,recvCount_Xz,MPI_DOUBLE,rank_Xz(),recvtag,Comm,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendData_yz,sendCount_yz,MPI_DOUBLE,rank_yz(),sendtag,
-			recvData_YZ,recvCount_YZ,MPI_DOUBLE,rank_YZ(),recvtag,Comm.getCommunicator(),MPI_STATUS_IGNORE);
+			recvData_YZ,recvCount_YZ,MPI_DOUBLE,rank_YZ(),recvtag,Comm,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendData_YZ,sendCount_YZ,MPI_DOUBLE,rank_YZ(),sendtag,
-			recvData_yz,recvCount_yz,MPI_DOUBLE,rank_yz(),recvtag,Comm.getCommunicator(),MPI_STATUS_IGNORE);
+			recvData_yz,recvCount_yz,MPI_DOUBLE,rank_yz(),recvtag,Comm,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendData_Yz,sendCount_Yz,MPI_DOUBLE,rank_Yz(),sendtag,
-			recvData_yZ,recvCount_yZ,MPI_DOUBLE,rank_yZ(),recvtag,Comm.getCommunicator(),MPI_STATUS_IGNORE);
+			recvData_yZ,recvCount_yZ,MPI_DOUBLE,rank_yZ(),recvtag,Comm,MPI_STATUS_IGNORE);
 	MPI_Sendrecv(sendData_yZ,sendCount_yZ,MPI_DOUBLE,rank_yZ(),sendtag,
-			recvData_Yz,recvCount_Yz,MPI_DOUBLE,rank_Yz(),recvtag,Comm.getCommunicator(),MPI_STATUS_IGNORE);
+			recvData_Yz,recvCount_Yz,MPI_DOUBLE,rank_Yz(),recvtag,Comm,MPI_STATUS_IGNORE);
 	//........................................................................................
 	UnpackMeshData(recvList_x, recvCount_x ,recvData_x, MeshData);
 	UnpackMeshData(recvList_X, recvCount_X ,recvData_X, MeshData);
--- a/common/Domain.h
+++ b/common/Domain.h
@ -12,7 +12,7 @@

 #include "common/Array.h"
 #include "common/Utilities.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/Communication.h"
 #include "common/Database.h"

@ -63,7 +63,7 @@ private:
 class Domain{
 public:
    //! Default constructor
-    Domain( std::shared_ptr<Database> db, const Utilities::MPI& Communicator);
+    Domain( std::shared_ptr<Database> db, MPI_Comm Communicator);

    //! Obsolete constructor
    Domain( int nx, int ny, int nz, int rnk, int npx, int npy, int npz, 
@ -116,7 +116,7 @@ public: // Public variables (need to create accessors instead)
    double porosity;
    RankInfoStruct rank_info;

-    Utilities::MPI Comm;        // MPI Communicator for this domain
+    MPI_Comm Comm;        // MPI Communicator for this domain

    int BoundaryCondition;

--- a/common/MPI.I
+++ b/common/MPI.I
--- a/common/MPI.cpp
+++ b/common/MPI.cpp
--- a/common/MPI.h
+++ b/common/MPI.h
--- a/common/MPI_Helpers.cpp
+++ b/common/MPI_Helpers.cpp
@ -0,0 +1,266 @@
+#include "common/MPI_Helpers.h"
+#include "common/Utilities.h"
+
+
+/********************************************************
+* Return the MPI data type                              *
+********************************************************/
+template<> MPI_Datatype getMPItype<char>() {
+    return MPI_CHAR;
+}
+template<> MPI_Datatype getMPItype<unsigned char>() {
+    return MPI_UNSIGNED_CHAR;
+}
+template<> MPI_Datatype getMPItype<int>() {
+    return MPI_INT;
+}
+template<> MPI_Datatype getMPItype<long>() {
+    return MPI_LONG;
+}
+template<> MPI_Datatype getMPItype<unsigned long>() {
+    return MPI_UNSIGNED_LONG;
+}
+template<> MPI_Datatype getMPItype<long long>() {
+    return MPI_LONG_LONG;
+}
+template<> MPI_Datatype getMPItype<float>() {
+    return MPI_FLOAT;
+}
+template<> MPI_Datatype getMPItype<double>() {
+    return MPI_DOUBLE;
+}
+
+
+/********************************************************
+* Concrete implimentations for packing/unpacking        *
+********************************************************/
+// unsigned char
+template<>
+size_t packsize<unsigned char>( const unsigned char& )
+{
+    return sizeof(unsigned char);
+}
+template<>
+void pack<unsigned char>( const unsigned char& rhs, char *buffer )
+{
+    memcpy(buffer,&rhs,sizeof(unsigned char));
+}
+template<>
+void unpack<unsigned char>( unsigned char& data, const char *buffer )
+{
+    memcpy(&data,buffer,sizeof(unsigned char));
+}
+// char
+template<>
+size_t packsize<char>( const char& )
+{
+    return sizeof(char);
+}
+template<>
+void pack<char>( const char& rhs, char *buffer )
+{
+    memcpy(buffer,&rhs,sizeof(char));
+}
+template<>
+void unpack<char>( char& data, const char *buffer )
+{
+    memcpy(&data,buffer,sizeof(char));
+}
+// int
+template<>
+size_t packsize<int>( const int& )
+{
+    return sizeof(int);
+}
+template<>
+void pack<int>( const int& rhs, char *buffer )
+{
+    memcpy(buffer,&rhs,sizeof(int));
+}
+template<>
+void unpack<int>( int& data, const char *buffer )
+{
+    memcpy(&data,buffer,sizeof(int));
+}
+// unsigned int
+template<>
+size_t packsize<unsigned int>( const unsigned int& )
+{
+    return sizeof(unsigned int);
+}
+template<>
+void pack<unsigned int>( const unsigned int& rhs, char *buffer )
+{
+    memcpy(buffer,&rhs,sizeof(int));
+}
+template<>
+void unpack<unsigned int>( unsigned int& data, const char *buffer )
+{
+    memcpy(&data,buffer,sizeof(int));
+}
+// size_t
+template<>
+size_t packsize<size_t>( const size_t& )
+{
+    return sizeof(size_t);
+}
+template<>
+void pack<size_t>( const size_t& rhs, char *buffer )
+{
+    memcpy(buffer,&rhs,sizeof(size_t));
+}
+template<>
+void unpack<size_t>( size_t& data, const char *buffer )
+{
+    memcpy(&data,buffer,sizeof(size_t));
+}
+// std::string
+template<>
+size_t packsize<std::string>( const std::string& rhs )
+{
+    return rhs.size()+1;
+}
+template<>
+void pack<std::string>( const std::string& rhs, char *buffer )
+{
+    memcpy(buffer,rhs.c_str(),rhs.size()+1);
+}
+template<>
+void unpack<std::string>( std::string& data, const char *buffer )
+{
+    data = std::string(buffer);
+}
+
+
+/********************************************************
+* Fake MPI routines                                     *
+********************************************************/
+#ifndef USE_MPI
+int MPI_Init(int*,char***)
+{
+    return 0;
+}
+int MPI_Init_thread(int*,char***, int required, int *provided )
+{
+    *provided = required;
+    return 0;
+}
+int MPI_Finalize()
+{
+    return 0;
+}
+int MPI_Comm_size( MPI_Comm, int *size )
+{
+    *size = 1;
+    return 0;
+}
+int MPI_Comm_rank( MPI_Comm, int *rank )
+{
+    *rank = 0;
+    return 0;
+}
+int MPI_Barrier( MPI_Comm )
+{
+    return 0;
+}
+int MPI_Waitall( int, MPI_Request[], MPI_Status[] )
+{
+    return 0;
+}
+int MPI_Wait( MPI_Request*, MPI_Status* )
+{
+    return 0;
+}
+int MPI_Bcast( void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm )
+{
+    return 0;
+}
+int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+         MPI_Comm comm)
+{
+    ERROR("Not implimented yet");
+    return 0;
+}
+int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag,
+         MPI_Comm comm, MPI_Status *status)
+{
+    ERROR("Not implimented yet");
+    return 0;
+}
+int MPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+              MPI_Comm comm, MPI_Request *request)
+{
+    ERROR("Not implimented yet");
+    return 0;
+}
+int MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source,
+              int tag, MPI_Comm comm, MPI_Request *request)
+{
+    ERROR("Not implimented yet");
+    return 0;
+}
+int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count,
+                  MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
+{
+    ERROR("Not implimented yet");
+    return 0;
+}
+int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                  void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                  MPI_Comm comm)
+{
+    ERROR("Not implimented yet");
+    return 0;
+}
+int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                   void *recvbuf, const int *recvcounts, const int *displs,
+                   MPI_Datatype recvtype, MPI_Comm comm)
+{
+    ERROR("Not implimented yet");
+    return 0;
+}
+int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                int dest, int sendtag,
+                void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                int source, int recvtag,
+                MPI_Comm comm, MPI_Status *status)
+{
+    ERROR("Not implimented yet");
+    return 0;
+}
+int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+               MPI_Op op, int root, MPI_Comm comm)
+{
+    ERROR("Not implimented yet");
+    return 0;
+}
+int MPI_Comm_group(MPI_Comm comm, MPI_Group *group)
+{
+    ERROR("Not implimented yet");
+    return 0;
+}
+int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm)
+{
+    ERROR("Not implimented yet");
+    return 0;
+}
+int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm)
+{
+    *newcomm = comm;
+    return 0;
+}
+double MPI_Wtime( void )
+{
+    return 0.0;
+}
+int MPI_Comm_free(MPI_Comm *group)
+{
+    return 0;
+}
+int MPI_Group_free(MPI_Group *group)
+{
+    return 0;
+}
+#endif
+
+
--- a/common/MPI_Helpers.h
+++ b/common/MPI_Helpers.h
@ -0,0 +1,239 @@
+// This file contains wrappers for MPI routines and functions to pack/unpack data structures
+#ifndef MPI_WRAPPERS_INC
+#define MPI_WRAPPERS_INC
+
+#include <string.h>
+#include <vector>
+#include <set>
+#include <map>
+
+#ifdef USE_MPI
+    // Inlcude MPI
+    #include "mpi.h"
+#else
+    // Create fake MPI types
+    typedef int MPI_Comm;
+    typedef int MPI_Request;
+    typedef int MPI_Status;
+    #define MPI_COMM_WORLD 0
+    #define MPI_COMM_SELF 0
+    #define MPI_COMM_NULL -1
+    #define MPI_GROUP_NULL -2
+    #define MPI_STATUS_IGNORE NULL
+    enum MPI_Datatype { MPI_LOGICAL, MPI_CHAR, MPI_UNSIGNED_CHAR, MPI_INT, 
+        MPI_UNSIGNED, MPI_LONG, MPI_UNSIGNED_LONG, MPI_LONG_LONG, MPI_FLOAT, MPI_DOUBLE };
+    enum MPI_Op { MPI_MIN, MPI_MAX, MPI_SUM };
+    typedef int MPI_Group;
+    #define MPI_THREAD_SINGLE 0
+    #define MPI_THREAD_FUNNELED 1
+    #define MPI_THREAD_SERIALIZED 2
+    #define MPI_THREAD_MULTIPLE 3
+    // Fake MPI functions
+	int MPI_Init(int*,char***);
+    int MPI_Init_thread( int *argc, char ***argv, int required, int *provided );
+	int MPI_Finalize();
+    int MPI_Comm_size( MPI_Comm, int *size );
+    int MPI_Comm_rank( MPI_Comm, int *rank );
+    int MPI_Barrier(MPI_Comm);
+    int MPI_Wait(MPI_Request*,MPI_Status*);
+    int MPI_Waitall(int,MPI_Request[],MPI_Status[]);
+    int MPI_Bcast(void*,int,MPI_Datatype,int,MPI_Comm);
+    int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+             MPI_Comm comm);
+    int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag,
+             MPI_Comm comm, MPI_Status *status);
+    int MPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+              MPI_Comm comm, MPI_Request *request);
+    int MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source,
+              int tag, MPI_Comm comm, MPI_Request *request);
+    int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count,
+                  MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
+    int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                  void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                  MPI_Comm comm);
+    int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                  void *recvbuf, const int *recvcounts, const int *displs,
+                  MPI_Datatype recvtype, MPI_Comm comm);
+    int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                int dest, int sendtag,
+                void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                int source, int recvtag,
+                MPI_Comm comm, MPI_Status *status);
+    int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+               MPI_Op op, int root, MPI_Comm comm);
+    double MPI_Wtime( void );
+    int MPI_Comm_group(MPI_Comm comm, MPI_Group *group);
+    int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm);
+    int MPI_Comm_free(MPI_Comm *group);
+    int MPI_Group_free(MPI_Group *group);
+    int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm);
+#endif
+
+
+//! Get the size of the MPI_Comm
+//  Note: this is a thread and interrupt safe function
+inline int comm_size( MPI_Comm comm ) {
+    int size = 1;
+    MPI_Comm_size( comm, &size );
+    return size;
+}
+    
+
+//! Get the rank of the MPI_Comm
+//  Note: this is a thread and interrupt safe function
+inline int comm_rank( MPI_Comm comm ) {
+    int rank = 1;
+    MPI_Comm_rank( comm, &rank );
+    return rank;
+}
+    
+
+//! Get the size of MPI_COMM_WORLD
+inline int MPI_WORLD_SIZE( ) {
+    return comm_size( MPI_COMM_WORLD );
+}
+
+//! Get the size of MPI_COMM_WORLD
+inline int MPI_WORLD_RANK( ) {
+    return comm_rank( MPI_COMM_WORLD );
+}
+
+//! Return the appropriate MPI datatype for a class
+template<class TYPE>
+MPI_Datatype getMPItype();
+
+
+//! Template function to return the buffer size required to pack a class
+template<class TYPE>
+size_t packsize( const TYPE& rhs );
+
+//! Template function to pack a class to a buffer
+template<class TYPE>
+void pack( const TYPE& rhs, char *buffer );
+
+//! Template function to unpack a class from a buffer
+template<class TYPE>
+void unpack( TYPE& data, const char *buffer );
+
+
+//! Template function to return the buffer size required to pack a std::vector
+template<class TYPE>
+size_t packsize( const std::vector<TYPE>& rhs );
+
+//! Template function to pack a class to a buffer
+template<class TYPE>
+void pack( const std::vector<TYPE>& rhs, char *buffer );
+
+//! Template function to pack a class to a buffer
+template<class TYPE>
+void unpack( std::vector<TYPE>& data, const char *buffer );
+
+
+//! Template function to return the buffer size required to pack a std::pair
+template<class TYPE1, class TYPE2>
+size_t packsize( const std::pair<TYPE1,TYPE2>& rhs );
+
+//! Template function to pack a class to a buffer
+template<class TYPE1, class TYPE2>
+void pack( const std::pair<TYPE1,TYPE2>& rhs, char *buffer );
+
+//! Template function to pack a class to a buffer
+template<class TYPE1, class TYPE2>
+void unpack( std::pair<TYPE1,TYPE2>& data, const char *buffer );
+
+
+//! Template function to return the buffer size required to pack a std::map
+template<class TYPE1, class TYPE2>
+size_t packsize( const std::map<TYPE1,TYPE2>& rhs );
+
+//! Template function to pack a class to a buffer
+template<class TYPE1, class TYPE2>
+void pack( const std::map<TYPE1,TYPE2>& rhs, char *buffer );
+
+//! Template function to pack a class to a buffer
+template<class TYPE1, class TYPE2>
+void unpack( std::map<TYPE1,TYPE2>& data, const char *buffer );
+
+
+//! Template function to return the buffer size required to pack a std::set
+template<class TYPE>
+size_t packsize( const std::set<TYPE>& rhs );
+
+//! Template function to pack a class to a buffer
+template<class TYPE>
+void pack( const std::set<TYPE>& rhs, char *buffer );
+
+//! Template function to pack a class to a buffer
+template<class TYPE>
+void unpack( std::set<TYPE>& data, const char *buffer );
+
+
+
+// Helper functions
+inline double sumReduce( MPI_Comm comm, double x )
+{
+    double y = 0;
+	MPI_Allreduce(&x,&y,1,MPI_DOUBLE,MPI_SUM,comm);
+    return y;
+}
+inline float sumReduce( MPI_Comm comm, float x )
+{
+    float y = 0;
+	MPI_Allreduce(&x,&y,1,MPI_FLOAT,MPI_SUM,comm);
+    return y;
+}
+inline int sumReduce( MPI_Comm comm, int x )
+{
+    int y = 0;
+	MPI_Allreduce(&x,&y,1,MPI_INT,MPI_SUM,comm);
+    return y;
+}
+inline long long sumReduce( MPI_Comm comm, long long x )
+{
+    long long y = 0;
+	MPI_Allreduce(&x,&y,1,MPI_LONG_LONG,MPI_SUM,comm);
+    return y;
+}
+inline bool sumReduce( MPI_Comm comm, bool x )
+{
+    int y = sumReduce( comm, x?1:0 );
+    return y>0;
+}
+inline std::vector<float> sumReduce( MPI_Comm comm, const std::vector<float>& x )
+{
+    auto y = x;
+	MPI_Allreduce(x.data(),y.data(),x.size(),MPI_FLOAT,MPI_SUM,comm);
+    return y;
+}
+inline std::vector<int> sumReduce( MPI_Comm comm, const std::vector<int>& x )
+{
+    auto y = x;
+	MPI_Allreduce(x.data(),y.data(),x.size(),MPI_INT,MPI_SUM,comm);
+    return y;
+}
+inline double maxReduce( MPI_Comm comm, double x )
+{
+    double y = 0;
+	MPI_Allreduce(&x,&y,1,MPI_DOUBLE,MPI_MAX,comm);
+    return y;
+}
+inline float maxReduce( MPI_Comm comm, float x )
+{
+    float y = 0;
+	MPI_Allreduce(&x,&y,1,MPI_FLOAT,MPI_MAX,comm);
+    return y;
+}
+inline int maxReduce( MPI_Comm comm, int x )
+{
+    int y = 0;
+	MPI_Allreduce(&x,&y,1,MPI_INT,MPI_MAX,comm);
+    return y;
+}
+
+
+#endif
+
+
+#include "common/MPI_Helpers.hpp"
+
+
--- a/common/MPI_Helpers.hpp
+++ b/common/MPI_Helpers.hpp
@ -1,9 +1,8 @@
-// This file functions to pack/unpack data structures
-#ifndef included_PackData_hpp
-#define included_PackData_hpp
-
-#include "IO/PackData.h"
+// This file contains wrappers for MPI routines and functions to pack/unpack data structures
+#ifndef MPI_WRAPPERS_HPP
+#define MPI_WRAPPERS_HPP

+#include "common/MPI_Helpers.h"
 #include <string.h>
 #include <vector>
 #include <set>
--- a/common/ReadMicroCT.cpp
+++ b/common/ReadMicroCT.cpp
@ -64,11 +64,11 @@ Array<uint8_t> readMicroCT( const std::string& filename )


 // Read the compressed micro CT data and distribute
-Array<uint8_t> readMicroCT( const Database& domain, const Utilities::MPI& comm )
+Array<uint8_t> readMicroCT( const Database& domain, MPI_Comm comm )
 {
    // Get the local problem info
    auto n = domain.getVector<int>( "n" );
-    int rank = comm.getRank();
+    int rank = comm_rank(MPI_COMM_WORLD);
    auto nproc = domain.getVector<int>( "nproc" );
    RankInfoStruct rankInfo( rank, nproc[0], nproc[1], nproc[2] );
    
--- a/common/ReadMicroCT.h
+++ b/common/ReadMicroCT.h
@ -5,12 +5,11 @@
 #include "common/Array.h"
 #include "common/Communication.h"
 #include "common/Database.h"
-#include "common/MPI.h"


 Array<uint8_t> readMicroCT( const std::string& filename );

-Array<uint8_t> readMicroCT( const Database& domain, const Utilities::MPI& comm );
+Array<uint8_t> readMicroCT( const Database& domain, MPI_Comm comm );


 #endif
--- a/common/ScaLBL.cpp
+++ b/common/ScaLBL.cpp
@ -5,7 +5,9 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr <Domain> Dm){
 	Lock=false; // unlock the communicator
 	//......................................................................................
 	// Create a separate copy of the communicator for the device
-    MPI_COMM_SCALBL = Dm->Comm.dup();
+	//MPI_Comm_group(Dm->Comm,&Group);
+	//MPI_Comm_create(Dm->Comm,Group,&MPI_COMM_SCALBL);
+	MPI_Comm_dup(Dm->Comm,&MPI_COMM_SCALBL);
 	//......................................................................................
 	// Copy the domain size and communication information directly from Dm
 	Nx = Dm->Nx;
@ -213,7 +215,7 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr <Domain> Dm){
 	ScaLBL_CopyToZeroCopy(dvcRecvList_Yz,Dm->recvList_Yz,recvCount_Yz*sizeof(int));
 	//......................................................................................

-	MPI_COMM_SCALBL.barrier();
+	MPI_Barrier(MPI_COMM_SCALBL);

 	//...................................................................................
 	// Set up the recieve distribution lists
@ -286,7 +288,7 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr <Domain> Dm){
 	//...................................................................................

 	//......................................................................................
-	MPI_COMM_SCALBL.barrier();
+	MPI_Barrier(MPI_COMM_SCALBL);
 	ScaLBL_DeviceBarrier();
 	//......................................................................................
 	SendCount = sendCount_x+sendCount_X+sendCount_y+sendCount_Y+sendCount_z+sendCount_Z+
@ -867,8 +869,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){
 	ScaLBL_D3Q19_Pack(12,dvcSendList_x,3*sendCount_x,sendCount_x,sendbuf_x,dist,N);
 	ScaLBL_D3Q19_Pack(14,dvcSendList_x,4*sendCount_x,sendCount_x,sendbuf_x,dist,N);
 	
-	req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 5*sendCount_x,rank_x,sendtag);
-	req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 5*recvCount_X,rank_X,recvtag);
+	MPI_Isend(sendbuf_x, 5*sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_SCALBL,&req1[0]);
+	MPI_Irecv(recvbuf_X, 5*recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_SCALBL,&req2[0]);
 	//...Packing for X face(1,7,9,11,13)................................
 	ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,dist,N);
 	ScaLBL_D3Q19_Pack(7,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,dist,N);
@ -876,8 +878,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){
 	ScaLBL_D3Q19_Pack(11,dvcSendList_X,3*sendCount_X,sendCount_X,sendbuf_X,dist,N);
 	ScaLBL_D3Q19_Pack(13,dvcSendList_X,4*sendCount_X,sendCount_X,sendbuf_X,dist,N);
 	
-	req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 5*sendCount_X,rank_X,sendtag);
-	req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 5*recvCount_x,rank_x,recvtag);
+	MPI_Isend(sendbuf_X, 5*sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_SCALBL,&req1[1]);
+	MPI_Irecv(recvbuf_x, 5*recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_SCALBL,&req2[1]);
 	//...Packing for y face(4,8,9,16,18).................................
 	ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,dist,N);
 	ScaLBL_D3Q19_Pack(8,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,dist,N);
@ -885,8 +887,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){
 	ScaLBL_D3Q19_Pack(16,dvcSendList_y,3*sendCount_y,sendCount_y,sendbuf_y,dist,N);
 	ScaLBL_D3Q19_Pack(18,dvcSendList_y,4*sendCount_y,sendCount_y,sendbuf_y,dist,N);
 	
-	req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 5*sendCount_y,rank_y,sendtag);
-	req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 5*recvCount_Y,rank_Y,recvtag);
+	MPI_Isend(sendbuf_y, 5*sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_SCALBL,&req1[2]);
+	MPI_Irecv(recvbuf_Y, 5*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_SCALBL,&req2[2]);
 	//...Packing for Y face(3,7,10,15,17).................................
 	ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,dist,N);
 	ScaLBL_D3Q19_Pack(7,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,dist,N);
@ -894,8 +896,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){
 	ScaLBL_D3Q19_Pack(15,dvcSendList_Y,3*sendCount_Y,sendCount_Y,sendbuf_Y,dist,N);
 	ScaLBL_D3Q19_Pack(17,dvcSendList_Y,4*sendCount_Y,sendCount_Y,sendbuf_Y,dist,N);
 	
-	req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 5*sendCount_Y,rank_Y,sendtag);
-	req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 5*recvCount_y,rank_y,recvtag);
+	MPI_Isend(sendbuf_Y, 5*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]);
+	MPI_Irecv(recvbuf_y, 5*recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]);
 	//...Packing for z face(6,12,13,16,17)................................
 	ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,dist,N);
 	ScaLBL_D3Q19_Pack(12,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,dist,N);
@ -903,8 +905,8 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){
 	ScaLBL_D3Q19_Pack(16,dvcSendList_z,3*sendCount_z,sendCount_z,sendbuf_z,dist,N);
 	ScaLBL_D3Q19_Pack(17,dvcSendList_z,4*sendCount_z,sendCount_z,sendbuf_z,dist,N);
 	
-	req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 5*sendCount_z,rank_z,sendtag);
-	req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 5*recvCount_Z,rank_Z,recvtag);
+	MPI_Isend(sendbuf_z, 5*sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_SCALBL,&req1[4]);
+	MPI_Irecv(recvbuf_Z, 5*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_SCALBL,&req2[4]);
 	
 	//...Packing for Z face(5,11,14,15,18)................................
 	ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,dist,N);
@ -913,57 +915,57 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){
 	ScaLBL_D3Q19_Pack(15,dvcSendList_Z,3*sendCount_Z,sendCount_Z,sendbuf_Z,dist,N);
 	ScaLBL_D3Q19_Pack(18,dvcSendList_Z,4*sendCount_Z,sendCount_Z,sendbuf_Z,dist,N);
 	
-	req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 5*sendCount_Z,rank_Z,sendtag);
-	req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 5*recvCount_z,rank_z,recvtag);
+	MPI_Isend(sendbuf_Z, 5*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_SCALBL,&req1[5]);
+	MPI_Irecv(recvbuf_z, 5*recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_SCALBL,&req2[5]);
 	
 	//...Pack the xy edge (8)................................
 	ScaLBL_D3Q19_Pack(8,dvcSendList_xy,0,sendCount_xy,sendbuf_xy,dist,N);
-	req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag);
-	req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag);
+	MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,MPI_COMM_SCALBL,&req1[6]);
+	MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,MPI_COMM_SCALBL,&req2[6]);
 	//...Pack the Xy edge (9)................................
 	ScaLBL_D3Q19_Pack(9,dvcSendList_Xy,0,sendCount_Xy,sendbuf_Xy,dist,N);
-	req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag);
-	req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag);
+	MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,MPI_COMM_SCALBL,&req1[8]);
+	MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,MPI_COMM_SCALBL,&req2[8]);
 	//...Pack the xY edge (10)................................
 	ScaLBL_D3Q19_Pack(10,dvcSendList_xY,0,sendCount_xY,sendbuf_xY,dist,N);
-	req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag);
-	req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag);
+	MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,MPI_COMM_SCALBL,&req1[9]);
+	MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,MPI_COMM_SCALBL,&req2[9]);
 	//...Pack the XY edge (7)................................
 	ScaLBL_D3Q19_Pack(7,dvcSendList_XY,0,sendCount_XY,sendbuf_XY,dist,N);
-	req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag);
-	req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag);
+	MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,MPI_COMM_SCALBL,&req1[7]);
+	MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,MPI_COMM_SCALBL,&req2[7]);
 	//...Pack the xz edge (12)................................
 	ScaLBL_D3Q19_Pack(12,dvcSendList_xz,0,sendCount_xz,sendbuf_xz,dist,N);
-	req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag);
-	req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag);
+	MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,MPI_COMM_SCALBL,&req1[10]);
+	MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,MPI_COMM_SCALBL,&req2[10]);
 	//...Pack the xZ edge (14)................................
 	ScaLBL_D3Q19_Pack(14,dvcSendList_xZ,0,sendCount_xZ,sendbuf_xZ,dist,N);
-	req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag);
-	req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag);
+	MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,MPI_COMM_SCALBL,&req1[13]);
+	MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,MPI_COMM_SCALBL,&req2[13]);
 	//...Pack the Xz edge (13)................................
 	ScaLBL_D3Q19_Pack(13,dvcSendList_Xz,0,sendCount_Xz,sendbuf_Xz,dist,N);
-	req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag);
-	req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag);
+	MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,MPI_COMM_SCALBL,&req1[12]);
+	MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,MPI_COMM_SCALBL,&req2[12]);
 	//...Pack the XZ edge (11)................................
 	ScaLBL_D3Q19_Pack(11,dvcSendList_XZ,0,sendCount_XZ,sendbuf_XZ,dist,N);
-	req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag);
-	req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag);
+	MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,MPI_COMM_SCALBL,&req1[11]);
+	MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,MPI_COMM_SCALBL,&req2[11]);
 	//...Pack the yz edge (16)................................
 	ScaLBL_D3Q19_Pack(16,dvcSendList_yz,0,sendCount_yz,sendbuf_yz,dist,N);
-	req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag);
-	req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag);
+	MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,MPI_COMM_SCALBL,&req1[14]);
+	MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,MPI_COMM_SCALBL,&req2[14]);
 	//...Pack the yZ edge (18)................................
 	ScaLBL_D3Q19_Pack(18,dvcSendList_yZ,0,sendCount_yZ,sendbuf_yZ,dist,N);
-	req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag);
-	req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag);
+	MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,MPI_COMM_SCALBL,&req1[17]);
+	MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,MPI_COMM_SCALBL,&req2[17]);
 	//...Pack the Yz edge (17)................................
 	ScaLBL_D3Q19_Pack(17,dvcSendList_Yz,0,sendCount_Yz,sendbuf_Yz,dist,N);
-	req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag);
-	req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag);
+	MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,MPI_COMM_SCALBL,&req1[16]);
+	MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,MPI_COMM_SCALBL,&req2[16]);
 	//...Pack the YZ edge (15)................................
 	ScaLBL_D3Q19_Pack(15,dvcSendList_YZ,0,sendCount_YZ,sendbuf_YZ,dist,N);
-	req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag);
-	req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag);
+	MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,MPI_COMM_SCALBL,&req1[15]);
+	MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,MPI_COMM_SCALBL,&req2[15]);
 	//...................................................................................

 }
@ -973,8 +975,8 @@ void ScaLBL_Communicator::RecvD3Q19AA(double *dist){
 	// NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2
 	//...................................................................................
 	// Wait for completion of D3Q19 communication
-	MPI_COMM_SCALBL.waitAll(18,req1);
-	MPI_COMM_SCALBL.waitAll(18,req2);
+	MPI_Waitall(18,req1,stat1);
+	MPI_Waitall(18,req2,stat2);
 	ScaLBL_DeviceBarrier();

 	//...................................................................................
@ -1057,8 +1059,8 @@ void ScaLBL_Communicator::RecvGrad(double *phi, double *grad){
 	// Recieves halo and incorporates into D3Q19 based stencil gradient computation
 	//...................................................................................
 	// Wait for completion of D3Q19 communication
-	MPI_COMM_SCALBL.waitAll(18,req1);
-	MPI_COMM_SCALBL.waitAll(18,req2);
+	MPI_Waitall(18,req1,stat1);
+	MPI_Waitall(18,req2,stat2);
 	ScaLBL_DeviceBarrier();

 	//...................................................................................
@ -1151,36 +1153,36 @@ void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq){
 	ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,Aq,N);
 	ScaLBL_D3Q19_Pack(2,dvcSendList_x,sendCount_x,sendCount_x,sendbuf_x,Bq,N);

-	req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 2*sendCount_x,rank_x,sendtag);
-	req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 2*recvCount_X,rank_X,recvtag);
+	MPI_Isend(sendbuf_x, 2*sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_SCALBL,&req1[0]);
+	MPI_Irecv(recvbuf_X, 2*recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_SCALBL,&req2[0]);
 	
 	//...Packing for X face(1,7,9,11,13)................................
 	ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,Aq,N);
 	ScaLBL_D3Q19_Pack(1,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,Bq,N);
 	
-	req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 2*sendCount_X,rank_X,sendtag);
-	req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 2*recvCount_x,rank_x,recvtag);
+	MPI_Isend(sendbuf_X, 2*sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_SCALBL,&req1[1]);
+	MPI_Irecv(recvbuf_x, 2*recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_SCALBL,&req2[1]);

 	//...Packing for y face(4,8,9,16,18).................................
 	ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,Aq,N);
 	ScaLBL_D3Q19_Pack(4,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,Bq,N);

-	req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 2*sendCount_y,rank_y,sendtag);
-	req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 2*recvCount_Y,rank_Y,recvtag);
+	MPI_Isend(sendbuf_y, 2*sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_SCALBL,&req1[2]);
+	MPI_Irecv(recvbuf_Y, 2*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_SCALBL,&req2[2]);
 	
 	//...Packing for Y face(3,7,10,15,17).................................
 	ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,Aq,N);
 	ScaLBL_D3Q19_Pack(3,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,Bq,N);

-	req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 2*sendCount_Y,rank_Y,sendtag);
-	req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 2*recvCount_y,rank_y,recvtag);
+	MPI_Isend(sendbuf_Y, 2*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]);
+	MPI_Irecv(recvbuf_y, 2*recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]);
 	
 	//...Packing for z face(6,12,13,16,17)................................
 	ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,Aq,N);
 	ScaLBL_D3Q19_Pack(6,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,Bq,N);
 	
-	req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 2*sendCount_z,rank_z,sendtag);
-	req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 2*recvCount_Z,rank_Z,recvtag);
+	MPI_Isend(sendbuf_z, 2*sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_SCALBL,&req1[4]);
+	MPI_Irecv(recvbuf_Z, 2*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_SCALBL,&req2[4]);
 	
 	//...Packing for Z face(5,11,14,15,18)................................
 	ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,Aq,N);
@ -1188,8 +1190,8 @@ void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq){

 	//...................................................................................
 	// Send all the distributions
-	req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 2*sendCount_Z,rank_Z,sendtag);
-	req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 2*recvCount_z,rank_z,recvtag);
+	MPI_Isend(sendbuf_Z, 2*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_SCALBL,&req1[5]);
+	MPI_Irecv(recvbuf_z, 2*recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_SCALBL,&req2[5]);

 }

@ -1199,8 +1201,8 @@ void ScaLBL_Communicator::BiRecvD3Q7AA(double *Aq, double *Bq){
 	// NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2
 	//...................................................................................
 	// Wait for completion of D3Q19 communication
-	MPI_COMM_SCALBL.waitAll(6,req1);
-	MPI_COMM_SCALBL.waitAll(6,req2);
+	MPI_Waitall(6,req1,stat1);
+	MPI_Waitall(6,req2,stat2);
 	ScaLBL_DeviceBarrier();

 	//...................................................................................
@ -1291,18 +1293,18 @@ void ScaLBL_Communicator::TriSendD3Q7AA(double *Aq, double *Bq, double *Cq){

 	//...................................................................................
 	// Send all the distributions
-	req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 3*sendCount_x,rank_x,sendtag);
-	req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 3*recvCount_X,rank_X,recvtag);
-	req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 3*sendCount_X,rank_X,sendtag);
-	req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 3*recvCount_x,rank_x,recvtag);
-	req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 3*sendCount_y,rank_y,sendtag);
-	req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 3*recvCount_Y,rank_Y,recvtag);
-	req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 3*sendCount_Y,rank_Y,sendtag);
-	req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 3*recvCount_y,rank_y,recvtag);
-	req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 3*sendCount_z,rank_z,sendtag);
-	req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 3*recvCount_Z,rank_Z,recvtag);
-	req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 3*sendCount_Z,rank_Z,sendtag);
-	req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 3*recvCount_z,rank_z,recvtag);
+	MPI_Isend(sendbuf_x, 3*sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_SCALBL,&req1[0]);
+	MPI_Irecv(recvbuf_X, 3*recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_SCALBL,&req2[0]);
+	MPI_Isend(sendbuf_X, 3*sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_SCALBL,&req1[1]);
+	MPI_Irecv(recvbuf_x, 3*recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_SCALBL,&req2[1]);
+	MPI_Isend(sendbuf_y, 3*sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_SCALBL,&req1[2]);
+	MPI_Irecv(recvbuf_Y, 3*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_SCALBL,&req2[2]);
+	MPI_Isend(sendbuf_Y, 3*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]);
+	MPI_Irecv(recvbuf_y, 3*recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]);
+	MPI_Isend(sendbuf_z, 3*sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_SCALBL,&req1[4]);
+	MPI_Irecv(recvbuf_Z, 3*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_SCALBL,&req2[4]);
+	MPI_Isend(sendbuf_Z, 3*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_SCALBL,&req1[5]);
+	MPI_Irecv(recvbuf_z, 3*recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_SCALBL,&req2[5]);

 }

@ -1312,8 +1314,8 @@ void ScaLBL_Communicator::TriRecvD3Q7AA(double *Aq, double *Bq, double *Cq){
 	// NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2
 	//...................................................................................
 	// Wait for completion of D3Q19 communication
-	MPI_COMM_SCALBL.waitAll(6,req1);
-	MPI_COMM_SCALBL.waitAll(6,req2);
+	MPI_Waitall(6,req1,stat1);
+	MPI_Waitall(6,req2,stat2);
 	ScaLBL_DeviceBarrier();

 	//...................................................................................
@ -1407,49 +1409,49 @@ void ScaLBL_Communicator::SendHalo(double *data){
 	// Send / Recv all the phase indcator field values
 	//...................................................................................

-	req1[0]  = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x,rank_x,sendtag);
-	req2[0]  = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag);
-	req1[1]  = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X,rank_X,sendtag);
-	req2[1]  = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag);
-	req1[2]  = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y,rank_y,sendtag);
-	req2[2]  = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y,rank_Y,recvtag);
-	req1[3]  = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y,rank_Y,sendtag);
-	req2[3]  = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y,rank_y,recvtag);
-	req1[4]  = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z,rank_z,sendtag);
-	req2[4]  = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z,rank_Z,recvtag);
-	req1[5]  = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z,rank_Z,sendtag);
-	req2[5]  = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z,rank_z,recvtag);
-	req1[6]  = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag);
-	req2[6]  = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag);
-	req1[7]  = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag);
-	req2[7]  = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag);
-	req1[8]  = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag);
-	req2[8]  = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag);
-	req1[9]  = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag);
-	req2[9]  = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag);
-	req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag);
-	req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag);
-	req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag);
-	req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag);
-	req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag);
-	req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag);
-	req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag);
-	req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag);
-	req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag);
-	req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag);
-	req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag);
-	req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag);
-	req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag);
-	req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag);
-	req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag);
-	req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag);
+	MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_SCALBL,&req1[0]);
+	MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_SCALBL,&req2[0]);
+	MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_SCALBL,&req1[1]);
+	MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_SCALBL,&req2[1]);
+	MPI_Isend(sendbuf_y, sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_SCALBL,&req1[2]);
+	MPI_Irecv(recvbuf_Y, recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_SCALBL,&req2[2]);
+	MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]);
+	MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]);
+	MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_SCALBL,&req1[4]);
+	MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_SCALBL,&req2[4]);
+	MPI_Isend(sendbuf_Z, sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_SCALBL,&req1[5]);
+	MPI_Irecv(recvbuf_z, recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_SCALBL,&req2[5]);
+	MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,MPI_COMM_SCALBL,&req1[6]);
+	MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,MPI_COMM_SCALBL,&req2[6]);
+	MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,MPI_COMM_SCALBL,&req1[7]);
+	MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,MPI_COMM_SCALBL,&req2[7]);
+	MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,MPI_COMM_SCALBL,&req1[8]);
+	MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,MPI_COMM_SCALBL,&req2[8]);
+	MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,MPI_COMM_SCALBL,&req1[9]);
+	MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,MPI_COMM_SCALBL,&req2[9]);
+	MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,MPI_COMM_SCALBL,&req1[10]);
+	MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,MPI_COMM_SCALBL,&req2[10]);
+	MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,MPI_COMM_SCALBL,&req1[11]);
+	MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,MPI_COMM_SCALBL,&req2[11]);
+	MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,MPI_COMM_SCALBL,&req1[12]);
+	MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,MPI_COMM_SCALBL,&req2[12]);
+	MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,MPI_COMM_SCALBL,&req1[13]);
+	MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,MPI_COMM_SCALBL,&req2[13]);
+	MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,MPI_COMM_SCALBL,&req1[14]);
+	MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,MPI_COMM_SCALBL,&req2[14]);
+	MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,MPI_COMM_SCALBL,&req1[15]);
+	MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,MPI_COMM_SCALBL,&req2[15]);
+	MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,MPI_COMM_SCALBL,&req1[16]);
+	MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,MPI_COMM_SCALBL,&req2[16]);
+	MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,MPI_COMM_SCALBL,&req1[17]);
+	MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,MPI_COMM_SCALBL,&req2[17]);
 	//...................................................................................
 }
 void ScaLBL_Communicator::RecvHalo(double *data){

 	//...................................................................................
-	MPI_COMM_SCALBL.waitAll(18,req1);
-	MPI_COMM_SCALBL.waitAll(18,req2);
+	MPI_Waitall(18,req1,stat1);
+	MPI_Waitall(18,req2,stat2);
 	ScaLBL_DeviceBarrier();
 	//...................................................................................
 	//...................................................................................
@ -1562,7 +1564,7 @@ double ScaLBL_Communicator::D3Q19_Flux_BC_z(int *neighborList, double *fq, doubl
 		LocInletArea = double(sendCount_z);
 	else LocInletArea = 0.f;
 	
-	InletArea = MPI_COMM_SCALBL.sumReduce( LocInletArea );
+	MPI_Allreduce(&LocInletArea,&InletArea,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_SCALBL);
 	//printf("Inlet area = %f \n", InletArea);

 	// Set the flux BC
@ -1571,7 +1573,7 @@ double ScaLBL_Communicator::D3Q19_Flux_BC_z(int *neighborList, double *fq, doubl
 		if (kproc == 0) 
 			locsum = ScaLBL_D3Q19_AAeven_Flux_BC_z(dvcSendList_z, fq, flux, InletArea, sendCount_z, N);
 		
-		sum = MPI_COMM_SCALBL.sumReduce( locsum );
+		MPI_Allreduce(&locsum,&sum,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_SCALBL);
 		din = flux/InletArea + sum;
 		//if (rank==0) printf("computed din (even) =%f \n",din);
 		if (kproc == 0)
@ -1581,7 +1583,7 @@ double ScaLBL_Communicator::D3Q19_Flux_BC_z(int *neighborList, double *fq, doubl
 		if (kproc == 0) 
 			locsum = ScaLBL_D3Q19_AAodd_Flux_BC_z(neighborList, dvcSendList_z, fq, flux, InletArea, sendCount_z, N);

-		sum = MPI_COMM_SCALBL.sumReduce( locsum );
+		MPI_Allreduce(&locsum,&sum,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_SCALBL);
 		din = flux/InletArea + sum;
 		
 		//if (rank==0) printf("computed din (odd)=%f \n",din);
--- a/common/ScaLBL.h
+++ b/common/ScaLBL.h
@ -202,8 +202,9 @@ private:
 	// Give the object it's own MPI communicator
 	RankInfoStruct rank_info;
 	MPI_Group Group;	// Group of processors associated with this domain
-	Utilities::MPI MPI_COMM_SCALBL;		// MPI Communicator for this domain
+	MPI_Comm MPI_COMM_SCALBL;		// MPI Communicator for this domain
 	MPI_Request req1[18],req2[18];
+	MPI_Status stat1[18],stat2[18];
 	//......................................................................................
 	// MPI ranks for all 18 neighbors
 	//......................................................................................
--- a/common/SpherePack.cpp
+++ b/common/SpherePack.cpp
@ -9,6 +9,7 @@

 #include "common/Array.h"
 #include "common/Utilities.h"
+#include "common/MPI_Helpers.h"
 #include "common/Communication.h"
 #include "common/Database.h"
 #include "common/SpherePack.h"
--- a/common/SpherePack.h
+++ b/common/SpherePack.h
@ -12,6 +12,7 @@

 #include "common/Array.h"
 #include "common/Utilities.h"
+#include "common/MPI_Helpers.h"
 #include "common/Communication.h"
 #include "common/Database.h"

--- a/common/UnitTest.cpp
+++ b/common/UnitTest.cpp
@ -14,49 +14,44 @@
 /********************************************************************
 *  Constructor/Destructor                                           *
 ********************************************************************/
-UnitTest::UnitTest() : d_verbose( false ), d_comm( MPI_COMM_SELF )
+UnitTest::UnitTest()
 {
-    if ( Utilities::MPI::MPI_active() )
-        d_comm = MPI_COMM_WORLD;
+#ifdef USE_MPI
+    comm = MPI_COMM_WORLD;
+#endif
 }
 UnitTest::~UnitTest() { reset(); }
 void UnitTest::reset()
 {
-    d_mutex.lock();
+    mutex.lock();
    // Clear the data forcing a reallocation
-    std::vector<std::string>().swap( d_pass );
-    std::vector<std::string>().swap( d_fail );
-    std::vector<std::string>().swap( d_expected );
-    d_mutex.unlock();
+    std::vector<std::string>().swap( pass_messages );
+    std::vector<std::string>().swap( fail_messages );
+    std::vector<std::string>().swap( expected_fail_messages );
+    mutex.unlock();
 }


 /********************************************************************
 *  Add a pass, fail, expected failure message in a thread-safe way  *
 ********************************************************************/
-void UnitTest::passes( std::string in )
+void UnitTest::passes( const std::string &in )
 {
-    d_mutex.lock();
-    if ( d_verbose )
-        printf( "UnitTest: %i passes: %s\n", d_comm.getRank(), in.data() );
-    d_pass.emplace_back( std::move( in ) );
-    d_mutex.unlock();
+    mutex.lock();
+    pass_messages.push_back( in );
+    mutex.unlock();
 }
-void UnitTest::failure( std::string in )
+void UnitTest::failure( const std::string &in )
 {
-    d_mutex.lock();
-    if ( d_verbose )
-        printf( "UnitTest: %i failed: %s\n", d_comm.getRank(), in.data() );
-    d_fail.emplace_back( std::move( in ) );
-    d_mutex.unlock();
+    mutex.lock();
+    fail_messages.push_back( in );
+    mutex.unlock();
 }
-void UnitTest::expected_failure( std::string in )
+void UnitTest::expected_failure( const std::string &in )
 {
-    d_mutex.lock();
-    if ( d_verbose )
-        printf( "UnitTest: %i expected_failure: %s\n", d_comm.getRank(), in.data() );
-    d_expected.emplace_back( std::move( in ) );
-    d_mutex.unlock();
+    mutex.lock();
+    expected_fail_messages.push_back( in );
+    mutex.unlock();
 }


@ -64,6 +59,23 @@ void UnitTest::expected_failure( std::string in )
 *  Print a global report                                            *
 *  Note: only rank 0 will print, all messages will be aggregated    *
 ********************************************************************/
+inline std::vector<int> UnitTest::allGather( int value ) const
+{
+    int size = getSize();
+    std::vector<int> data( size, value );
+#ifdef USE_MPI
+    if ( size > 1 )
+        MPI_Allgather( &value, 1, MPI_INT, data.data(), 1, MPI_INT, comm );
+#endif
+    return data;
+}
+inline void UnitTest::barrier() const
+{
+#ifdef USE_MPI
+    if ( getSize() > 1 )
+        MPI_Barrier( comm );
+#endif
+}
 static inline void print_messages( const std::vector<std::vector<std::string>> &messages )
 {
    if ( messages.size() > 1 ) {
@ -81,27 +93,28 @@ static inline void print_messages( const std::vector<std::vector<std::string>> &
 }
 void UnitTest::report( const int level0 ) const
 {
-    d_mutex.lock();
-    int size = d_comm.getSize();
-    int rank = d_comm.getRank();
-    // Give all processors a chance to print any remaining messages
-    d_comm.barrier();
-    Utilities::sleep_ms( 10 );
+    mutex.lock();
+    int size = getSize();
+    int rank = getRank();
    // Broadcast the print level from rank 0
-    int level = d_comm.bcast( level0, 0 );
+    int level = level0;
+#ifdef USE_MPI
+    if ( getSize() > 1 )
+        MPI_Bcast( &level, 1, MPI_INT, 0, comm );
+#endif
    if ( level < 0 || level > 2 )
        ERROR( "Invalid print level" );
    // Perform a global all gather to get the number of failures per processor
-    auto N_pass        = d_comm.allGather<int>( d_pass.size() );
-    auto N_fail        = d_comm.allGather<int>( d_fail.size() );
-    auto N_expected    = d_comm.allGather<int>( d_expected.size() );
-    int N_pass_tot     = 0;
-    int N_fail_tot     = 0;
-    int N_expected_tot = 0;
+    auto N_pass             = allGather( pass_messages.size() );
+    auto N_fail             = allGather( fail_messages.size() );
+    auto N_expected_fail    = allGather( expected_fail_messages.size() );
+    int N_pass_tot          = 0;
+    int N_fail_tot          = 0;
+    int N_expected_fail_tot = 0;
    for ( int i = 0; i < size; i++ ) {
        N_pass_tot += N_pass[i];
        N_fail_tot += N_fail[i];
-        N_expected_tot += N_expected[i];
+        N_expected_fail_tot += N_expected_fail[i];
    }
    // Send all messages to rank 0 (if needed)
    std::vector<std::vector<std::string>> pass_messages_rank( size );
@ -109,13 +122,13 @@ void UnitTest::report( const int level0 ) const
    std::vector<std::vector<std::string>> expected_fail_rank( size );
    // Get the pass messages
    if ( ( level == 1 && N_pass_tot <= 20 ) || level == 2 )
-        pass_messages_rank = UnitTest::gatherMessages( d_pass, 1 );
+        pass_messages_rank = UnitTest::gatherMessages( pass_messages, 1 );
    // Get the fail messages
    if ( level == 1 || level == 2 )
-        fail_messages_rank = UnitTest::gatherMessages( d_fail, 2 );
+        fail_messages_rank = UnitTest::gatherMessages( fail_messages, 2 );
    // Get the expected_fail messages
-    if ( ( level == 1 && N_expected_tot <= 50 ) || level == 2 )
-        expected_fail_rank = UnitTest::gatherMessages( d_expected, 2 );
+    if ( ( level == 1 && N_expected_fail_tot <= 50 ) || level == 2 )
+        expected_fail_rank = UnitTest::gatherMessages( expected_fail_messages, 2 );
    // Print the results of all messages (only rank 0 will print)
    if ( rank == 0 ) {
        pout << std::endl;
@ -161,31 +174,31 @@ void UnitTest::report( const int level0 ) const
        pout << std::endl;
        // Print the tests that expected failed
        pout << "Tests expected failed" << std::endl;
-        if ( level == 0 || ( level == 1 && N_expected_tot > 50 ) ) {
+        if ( level == 0 || ( level == 1 && N_expected_fail_tot > 50 ) ) {
            // We want to print a summary
            if ( size > 8 ) {
                // Print 1 summary for all processors
                printp( "     %i tests expected failed (use report level 2 for more detail)\n",
-                    N_expected_tot );
+                    N_expected_fail_tot );
            } else {
                // Print a summary for each processor
                for ( int i = 0; i < size; i++ )
                    printp( "     %i tests expected failed (proc %i) (use report level 2 for more "
                            "detail)\n",
-                        N_expected[i], i );
+                        N_expected_fail[i], i );
            }
        } else {
            // We want to print all messages
            for ( int i = 0; i < size; i++ )
-                ASSERT( (int) expected_fail_rank[i].size() == N_expected[i] );
+                ASSERT( (int) expected_fail_rank[i].size() == N_expected_fail[i] );
            print_messages( expected_fail_rank );
        }
        pout << std::endl;
    }
    // Add a barrier to synchronize all processors (rank 0 is much slower)
-    d_comm.barrier();
+    barrier();
    Utilities::sleep_ms( 10 ); // Need a brief pause to allow any printing to finish
-    d_mutex.unlock();
+    mutex.unlock();
 }


@ -195,8 +208,8 @@ void UnitTest::report( const int level0 ) const
 std::vector<std::vector<std::string>> UnitTest::gatherMessages(
    const std::vector<std::string> &local_messages, int tag ) const
 {
-    const int rank = d_comm.getRank();
-    const int size = d_comm.getSize();
+    const int rank = getRank();
+    const int size = getSize();
    std::vector<std::vector<std::string>> messages( size );
    if ( rank == 0 ) {
        // Rank 0 should receive all messages
@ -220,6 +233,7 @@ std::vector<std::vector<std::string>> UnitTest::gatherMessages(
 void UnitTest::pack_message_stream(
    const std::vector<std::string> &messages, const int rank, const int tag ) const
 {
+#ifdef USE_MPI
    // Get the size of the messages
    auto N_messages  = (int) messages.size();
    auto *msg_size   = new int[N_messages];
@ -240,11 +254,18 @@ void UnitTest::pack_message_stream(
        k += msg_size[i];
    }
    // Send the message stream (using a non-blocking send)
-    auto request = d_comm.Isend( data, size_data, rank, tag );
+    MPI_Request request;
+    MPI_Isend( data, size_data, MPI_CHAR, rank, tag, comm, &request );
    // Wait for the communication to send and free the temporary memory
-    d_comm.wait( request );
+    MPI_Status status;
+    MPI_Wait( &request, &status );
    delete[] data;
    delete[] msg_size;
+#else
+    NULL_USE( messages );
+    NULL_USE( rank );
+    NULL_USE( tag );
+#endif
 }


@ -253,15 +274,20 @@ void UnitTest::pack_message_stream(
 ********************************************************************/
 std::vector<std::string> UnitTest::unpack_message_stream( const int rank, const int tag ) const
 {
+#ifdef USE_MPI
    // Probe the message to get the message size
-    int size_data = d_comm.probe( rank, tag );
+    MPI_Status status;
+    MPI_Probe( rank, tag, comm, &status );
+    int size_data = -1;
+    MPI_Get_count( &status, MPI_BYTE, &size_data );
    ASSERT( size_data >= 0 );
    // Allocate memory to receive the data
    auto *data = new char[size_data];
    // receive the data (using a non-blocking receive)
-    auto request = d_comm.Irecv( data, size_data, rank, tag );
+    MPI_Request request;
+    MPI_Irecv( data, size_data, MPI_CHAR, rank, tag, comm, &request );
    // Wait for the communication to be received
-    d_comm.wait( request );
+    MPI_Wait( &request, &status );
    // Unpack the message stream
    int N_messages = 0;
    memcpy( &N_messages, data, sizeof( int ) );
@ -277,16 +303,77 @@ std::vector<std::string> UnitTest::unpack_message_stream( const int rank, const
        messages[i] = std::string( &data[k], msg_size[i] );
        k += msg_size[i];
    }
-    // Delete the temporary memory
    delete[] data;
    return messages;
+#else
+    NULL_USE( rank );
+    NULL_USE( tag );
+    return std::vector<std::string>();
+#endif
 }


 /********************************************************************
 *  Other functions                                                  *
 ********************************************************************/
-size_t UnitTest::NumPassGlobal() const { return d_comm.sumReduce( d_pass.size() ); }
-size_t UnitTest::NumFailGlobal() const { return d_comm.sumReduce( d_fail.size() ); }
-size_t UnitTest::NumExpectedFailGlobal() const { return d_comm.sumReduce( d_expected.size() ); }
-
+int UnitTest::getRank() const
+{
+    int rank = 0;
+#ifdef USE_MPI
+    int flag = 0;
+    MPI_Initialized( &flag );
+    if ( flag )
+        MPI_Comm_rank( comm, &rank );
+#endif
+    return rank;
+}
+int UnitTest::getSize() const
+{
+    int size = 1;
+#ifdef USE_MPI
+    int flag = 0;
+    MPI_Initialized( &flag );
+    if ( flag )
+        MPI_Comm_size( comm, &size );
+#endif
+    return size;
+}
+size_t UnitTest::NumPassGlobal() const
+{
+    size_t num = pass_messages.size();
+#ifdef USE_MPI
+    if ( getSize() > 1 ) {
+        auto send = static_cast<int>( num );
+        int sum   = 0;
+        MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm );
+        num = static_cast<size_t>( sum );
+    }
+#endif
+    return num;
+}
+size_t UnitTest::NumFailGlobal() const
+{
+    size_t num = fail_messages.size();
+#ifdef USE_MPI
+    if ( getSize() > 1 ) {
+        auto send = static_cast<int>( num );
+        int sum   = 0;
+        MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm );
+        num = static_cast<size_t>( sum );
+    }
+#endif
+    return num;
+}
+size_t UnitTest::NumExpectedFailGlobal() const
+{
+    size_t num = expected_fail_messages.size();
+#ifdef USE_MPI
+    if ( getSize() > 1 ) {
+        auto send = static_cast<int>( num );
+        int sum   = 0;
+        MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm );
+        num = static_cast<size_t>( sum );
+    }
+#endif
+    return num;
+}
--- a/common/UnitTest.h
+++ b/common/UnitTest.h
@ -1,11 +1,13 @@
 #ifndef included_UnitTest
 #define included_UnitTest

-#include "common/MPI.h"
-
 #include <mutex>
+#include <sstream>
 #include <string>
 #include <vector>
+#ifdef USE_MPI
+#include "mpi.h"
+#endif


 /*!
@ -26,47 +28,47 @@
 * \endcode

 */
-class UnitTest final
+class UnitTest
 {
 public:
    //! Constructor
    UnitTest();

    //! Destructor
-    ~UnitTest();
-
-    // Copy constructor
-    UnitTest( const UnitTest & ) = delete;
-
-    // Assignment operator
-    UnitTest &operator=( const UnitTest & ) = delete;
+    virtual ~UnitTest();

    //! Indicate a passed test (thread-safe)
-    void passes( std::string in );
+    virtual void passes( const std::string &in );

    //! Indicate a failed test (thread-safe)
-    void failure( std::string in );
+    virtual void failure( const std::string &in );

    //! Indicate an expected failed test (thread-safe)
-    void expected_failure( std::string in );
+    virtual void expected_failure( const std::string &in );

    //! Return the number of passed tests locally
-    inline size_t NumPassLocal() const { return d_pass.size(); }
+    virtual size_t NumPassLocal() const { return pass_messages.size(); }

    //! Return the number of failed tests locally
-    inline size_t NumFailLocal() const { return d_fail.size(); }
+    virtual size_t NumFailLocal() const { return fail_messages.size(); }

    //! Return the number of expected failed tests locally
-    inline size_t NumExpectedFailLocal() const { return d_expected.size(); }
+    virtual size_t NumExpectedFailLocal() const { return expected_fail_messages.size(); }

    //! Return the number of passed tests locally
-    size_t NumPassGlobal() const;
+    virtual size_t NumPassGlobal() const;

    //! Return the number of failed tests locally
-    size_t NumFailGlobal() const;
+    virtual size_t NumFailGlobal() const;

    //! Return the number of expected failed tests locally
-    size_t NumExpectedFailGlobal() const;
+    virtual size_t NumExpectedFailGlobal() const;
+
+    //! Return the rank of the current processor
+    int getRank() const;
+
+    //! Return the number of processors
+    int getSize() const;

    /*!
     * Print a report of the passed and failed tests.
@ -75,28 +77,29 @@ public:
     * to print correctly).
     * @param level     Optional integer specifying the level of reporting (default: 1)
     *                  0: Report the number of tests passed, failed, and expected failures.
-     *                  1: Report the passed tests (if <=20) or number passed,
-     *                     Report all failures,
-     *                     Report the expected failed tests (if <=50) or the number passed.
+     *                  1: Report the number of passed tests (if <=20) or the number passed
+     *                     otherwise, report all failures, report the number of expected
+     *                     failed tests (if <=50) or the number passed otherwise.
     *                  2: Report all passed, failed, and expected failed tests.
     */
-    void report( const int level = 1 ) const;
+    virtual void report( const int level = 1 ) const;

    //! Clear the messages
    void reset();

-    //! Make the unit test operator verbose?
-    void verbose( bool verbose = true ) { d_verbose = verbose; }
+protected:
+    std::vector<std::string> pass_messages;
+    std::vector<std::string> fail_messages;
+    std::vector<std::string> expected_fail_messages;
+    mutable std::mutex mutex;
+#ifdef USE_MPI
+    MPI_Comm comm;
+#endif

 private:
-    std::vector<std::string> d_pass;
-    std::vector<std::string> d_fail;
-    std::vector<std::string> d_expected;
-    bool d_verbose;
-    mutable std::mutex d_mutex;
-    Utilities::MPI d_comm;
+    // Make the copy constructor private
+    UnitTest( const UnitTest & ) {}

-private:
    // Function to pack the messages into a single data stream and send to the given processor
    // Note: This function does not return until the message stream has been sent
    void pack_message_stream(
@ -106,7 +109,9 @@ private:
    // Note: This function does not return until the message stream has been received
    std::vector<std::string> unpack_message_stream( const int rank, const int tag ) const;

-    // Gather the messages
+    // Helper functions
+    inline void barrier() const;
+    inline std::vector<int> allGather( int value ) const;
    inline std::vector<std::vector<std::string>> gatherMessages(
        const std::vector<std::string> &local_messages, int tag ) const;
 };
--- a/common/UtilityMacros.h
+++ b/common/UtilityMacros.h
@ -143,43 +143,35 @@
 *      Be sure to follow with ENABLE_WARNINGS
 */
 // clang-format off
-#ifndef DISABLE_WARNINGS
-#if defined( USING_MSVC )
+#ifdef DISABLE_WARNINGS
+    // Macros previously defined
+#elif defined( USING_MSVC )
    #define DISABLE_WARNINGS __pragma( warning( push, 0 ) )
    #define ENABLE_WARNINGS __pragma( warning( pop ) )
 #elif defined( USING_CLANG )
    #define DISABLE_WARNINGS                                                \
-        _Pragma( "clang diagnostic push" )                                  \
-        _Pragma( "clang diagnostic ignored \"-Wall\"" )                     \
+        _Pragma( "clang diagnostic push" ) _Pragma( "clang diagnostic ignored \"-Wall\"" ) \
        _Pragma( "clang diagnostic ignored \"-Wextra\"" )                   \
        _Pragma( "clang diagnostic ignored \"-Wunused-private-field\"" )    \
-        _Pragma( "clang diagnostic ignored \"-Wdeprecated-declarations\"" ) \
-        _Pragma( "clang diagnostic ignored \"-Winteger-overflow\"" )
+        _Pragma( "clang diagnostic ignored \"-Wmismatched-new-delete\"" )
    #define ENABLE_WARNINGS _Pragma( "clang diagnostic pop" )
 #elif defined( USING_GCC )
+    // Note: We cannot disable the -Wliteral-suffix message with this macro because the
+    // pragma command cannot suppress warnings from the C++ preprocessor.  See gcc bug #53431.
    #define DISABLE_WARNINGS                                                \
-        _Pragma( "GCC diagnostic push" )                                    \
-        _Pragma( "GCC diagnostic ignored \"-Wpragmas\"" )                   \
-        _Pragma( "GCC diagnostic ignored \"-Wall\"" )                       \
+        _Pragma( "GCC diagnostic push" ) _Pragma( "GCC diagnostic ignored \"-Wall\"" ) \
        _Pragma( "GCC diagnostic ignored \"-Wextra\"" )                     \
-        _Pragma( "GCC diagnostic ignored \"-Wpedantic\"" )                  \
+        _Pragma( "GCC diagnostic ignored \"-Wpragmas\"" )                     \
        _Pragma( "GCC diagnostic ignored \"-Wunused-local-typedefs\"" )     \
        _Pragma( "GCC diagnostic ignored \"-Woverloaded-virtual\"" )        \
        _Pragma( "GCC diagnostic ignored \"-Wunused-parameter\"" )          \
-        _Pragma( "GCC diagnostic ignored \"-Wdeprecated-declarations\"" )   \
-        _Pragma( "GCC diagnostic ignored \"-Wvirtual-move-assign\"" )       \
-        _Pragma( "GCC diagnostic ignored \"-Wunused-function\"" )           \
-        _Pragma( "GCC diagnostic ignored \"-Woverflow\"" )                  \
-        _Pragma( "GCC diagnostic ignored \"-Wunused-variable\"" )           \
-        _Pragma( "GCC diagnostic ignored \"-Wignored-qualifiers\"" )        \
-        _Pragma( "GCC diagnostic ignored \"-Wenum-compare\"" )              \
+        _Pragma( "GCC diagnostic ignored \"-Warray-bounds\"" )              \
        _Pragma( "GCC diagnostic ignored \"-Wterminate\"" )
    #define ENABLE_WARNINGS _Pragma( "GCC diagnostic pop" )
 #else
    #define DISABLE_WARNINGS
    #define ENABLE_WARNINGS
 #endif
-#endif
 // clang-format on


--- a/cpu/BGK.cpp
+++ b/cpu/BGK.cpp
@ -1,4 +1,5 @@
 extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
+	int n;
 	// conserved momemnts
 	double rho,ux,uy,uz,uu;
 	// non-conserved moments
@ -110,12 +111,14 @@ extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int
 }

 extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
+	int n;
 	// conserved momemnts
 	double rho,ux,uy,uz,uu;
 	// non-conserved moments
 	double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18;
 	int nr1,nr2,nr3,nr4,nr5,nr6,nr7,nr8,nr9,nr10,nr11,nr12,nr13,nr14,nr15,nr16,nr17,nr18;

+	int nread;
 	for (int n=start; n<finish; n++){
 		
 		// q=0
@ -272,4 +275,4 @@ extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int star
 				rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz);

 	}
-}
+}
--- a/cpu/Color.cpp
+++ b/cpu/Color.cpp
@ -920,17 +920,21 @@ extern "C" void ScaLBL_D3Q7_ColorCollideMass(char *ID, double *A_even, double *A
 		double *Den, double *Phi, double *ColorGrad, double *Velocity, double beta, int N, bool pBC)
 {
 	char id;
+
+	int idx,n,q,Cqx,Cqy,Cqz;
+	//	int sendLoc;
+
 	double f0,f1,f2,f3,f4,f5,f6;
 	double na,nb,nab;		// density values
 	double ux,uy,uz;	// flow velocity
 	double nx,ny,nz,C;	// color gradient components
 	double a1,a2,b1,b2;
-	double delta;
+	double sp,delta;
 	//double feq[6];		// equilibrium distributions
 	// Set of Discrete velocities for the D3Q19 Model
 	//int D3Q7[3][3]={{1,0,0},{0,1,0},{0,0,1}};

-	for (int n=0; n<N; n++){
+	for (n=0; n<N; n++){
 		id = ID[n];
 		if (id != 0 ){

@ -1220,20 +1224,25 @@ extern "C" void DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *P

 extern "C" void ScaLBL_ComputePhaseField(char *ID, double *Phi, double *Den, int N)
 {
+	int n;
+	double Na,Nb;
+	//...................................................................
 	// Update Phi
-	for (int n=0; n<N; n++){
+	for (n=0; n<N; n++){

 		if (ID[n] > 0 ){
 			// Get the density value (Streaming already performed)
-			double Na = Den[n];
-			double Nb = Den[N+n];
+			Na = Den[n];
+			Nb = Den[N+n];
 			Phi[n] = (Na-Nb)/(Na+Nb);
 		}
 	}
+	//...................................................................
 }

 extern "C" void ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice){
-	for (int n=Slice*Nx*Ny; n<(Slice+1)*Nx*Ny; n++){
+	int n;
+	for (n=Slice*Nx*Ny; n<(Slice+1)*Nx*Ny; n++){
 		Phi[n] = value;
 	}
 }
@ -1246,7 +1255,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, do
 		double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta,
 		double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){

-	int ijk,nn;
+	int ijk,nn,n;
 	double fq;
 	// conserved momemnts
 	double rho,jx,jy,jz;
@ -1829,7 +1838,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *di
 		double *Phi, double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta,
 		double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){
 	
-	int nn,ijk,nread;
+	int n,nn,ijk,nread;
 	int nr1,nr2,nr3,nr4,nr5,nr6;
 	int nr7,nr8,nr9,nr10;
 	int nr11,nr12,nr13,nr14;
@ -2483,7 +2492,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *di
 extern "C" void ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double *Aq, double *Bq, 
 			double *Den, double *Phi, int start, int finish, int Np){

-	int idx, nread;
+	int idx,n,nread;
 	double fq,nA,nB;

 	for (int n=start; n<finish; n++){
@ -2569,11 +2578,11 @@ extern "C" void ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double
 }

 extern "C" void ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq, double *Den, double *Phi, 
-			int start, int finish, int Np)
-{
+			int start, int finish, int Np){
+	int idx,n,nread;
+	double fq,nA,nB;
 	for (int n=start; n<finish; n++){
-		double fq,nA,nB;
-
+		
 		// compute number density for component A
 		// q=0
 		fq = Aq[n];
@ -2637,25 +2646,27 @@ extern "C" void ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq,
 		Den[Np+n] = nB;
 		
 		// save the phase indicator field
-		int idx = Map[n];
+		idx = Map[n];
 		Phi[idx] = (nA-nB)/(nA+nB); 	
 	}	
 }

 extern "C" void ScaLBL_D3Q19_Gradient(int *Map, double *phi, double *ColorGrad, int start, int finish, int Np, int Nx, int Ny, int Nz){
+	int idx,n,N,i,j,k,nn;
 	// distributions
 	double f1,f2,f3,f4,f5,f6,f7,f8,f9;
 	double f10,f11,f12,f13,f14,f15,f16,f17,f18;
 	double nx,ny,nz;
-	for (int idx=0; idx<Np; idx++){
+
+	for (idx=0; idx<Np; idx++){

 		// Get the 1D index based on regular data layout
-		int n = Map[idx];
+		n = Map[idx];
 		
 		//.......Back out the 3D indices for node n..............
-		int k = n/(Nx*Ny);
-		int j = (n-Nx*Ny*k)/Nx;
-		int i = n-Nx*Ny*k-Nx*j;
+		k = n/(Nx*Ny);
+		j = (n-Nx*Ny*k)/Nx;
+		i = n-Nx*Ny*k-Nx*j;
 		//........................................................................
 		//........Get 1-D index for this thread....................
 		//		n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
@ -2664,7 +2675,7 @@ extern "C" void ScaLBL_D3Q19_Gradient(int *Map, double *phi, double *ColorGrad,
 		//........................................................................
 		//.................Read Phase Indicator Values............................
 		//........................................................................
-		int nn = n-1;						// neighbor index (get convention)
+		nn = n-1;							// neighbor index (get convention)
 		if (i-1<0)		nn += Nx;			// periodic BC along the x-boundary
 		f1 = phi[nn];						// get neighbor for phi - 1
 		//........................................................................
--- a/cpu/exe/lb2_Color_mpi.cpp
+++ b/cpu/exe/lb2_Color_mpi.cpp
@ -4,7 +4,7 @@
 #include "D3Q19.h"
 #include "D3Q7.h"
 #include "Color.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"

 using namespace std;

--- a/cpu/exe/lb2_Color_wia_mpi_bubble.cpp
+++ b/cpu/exe/lb2_Color_wia_mpi_bubble.cpp
@ -10,7 +10,7 @@
 #include "D3Q19.h"
 #include "D3Q7.h"
 #include "Color.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"

 using namespace std;

--- a/models/ColorModel.cpp
+++ b/models/ColorModel.cpp
@ -9,7 +9,7 @@ color lattice boltzmann model
 #include <stdlib.h>
 #include <time.h>

-ScaLBL_ColorModel::ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM):
+ScaLBL_ColorModel::ScaLBL_ColorModel(int RANK, int NP, MPI_Comm COMM):
 rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),alpha(0),beta(0),
 Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0),
 Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM)
@ -167,9 +167,9 @@ void ScaLBL_ColorModel::SetDomain(){
 	for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = 1;               // initialize this way
 	//Averages = std::shared_ptr<TwoPhase> ( new TwoPhase(Dm) ); // TwoPhase analysis object
 	Averages = std::shared_ptr<SubPhase> ( new SubPhase(Dm) ); // TwoPhase analysis object
-	comm.barrier();
+	MPI_Barrier(comm);
 	Dm->CommInit();
-	comm.barrier();
+	MPI_Barrier(comm);
 	// Read domain parameters
 	rank = Dm->rank();	
 	nprocx = Dm->nprocx();
@ -292,7 +292,7 @@ void ScaLBL_ColorModel::AssignComponentLabels(double *phase)
 	for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = Mask->id[i]; 
 	
 	for (size_t idx=0; idx<NLABELS; idx++)
-		label_count_global[idx] = Dm->Comm.sumReduce( label_count[idx] );
+		label_count_global[idx]=sumReduce( Dm->Comm, label_count[idx]);

 	if (rank==0){
 		printf("Component labels: %lu \n",NLABELS);
@ -333,7 +333,7 @@ void ScaLBL_ColorModel::Create(){
 	Map.resize(Nx,Ny,Nz);       Map.fill(-2);
 	auto neighborList= new int[18*Npad];
 	Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np);
-	comm.barrier();
+	MPI_Barrier(comm);

 	//...........................................................................
 	//                MAIN  VARIABLES ALLOCATED HERE
@ -465,7 +465,7 @@ void ScaLBL_ColorModel::Initialize(){
 		ScaLBL_CopyToDevice(Phi,cPhi,N*sizeof(double));
 		ScaLBL_DeviceBarrier();

-		comm.barrier();
+		MPI_Barrier(comm);
 	}

 	if (rank==0)	printf ("Initializing phase field \n");
@ -651,7 +651,7 @@ void ScaLBL_ColorModel::Run(){
 	//.......create and start timer............
 	double starttime,stoptime,cputime;
 	ScaLBL_DeviceBarrier();
-	comm.barrier();
+	MPI_Barrier(comm);
 	starttime = MPI_Wtime();
 	//.........................................

@ -700,8 +700,7 @@ void ScaLBL_ColorModel::Run(){
 		}
 		ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB,
 				alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np);
-		ScaLBL_DeviceBarrier();
-        comm.barrier();
+		ScaLBL_DeviceBarrier(); MPI_Barrier(comm);

 		// *************EVEN TIMESTEP*************
 		timestep++;
@ -736,10 +735,10 @@ void ScaLBL_ColorModel::Run(){
 		}
 		ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB,
 				alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np);
-		ScaLBL_DeviceBarrier();
-        comm.barrier();
+		ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
 		//************************************************************************
 		
+		MPI_Barrier(comm);
 		PROFILE_STOP("Update");

 		if (rank==0 && timestep%analysis_interval == 0 && BoundaryCondition > 0){
@ -980,7 +979,7 @@ void ScaLBL_ColorModel::Run(){
 					//morph_delta *= (-1.0);
 					REVERSE_FLOW_DIRECTION = false;
 				}
-				comm.barrier();
+				MPI_Barrier(comm);
 			}
 			morph_timesteps += analysis_interval;
 		}
@ -990,7 +989,7 @@ void ScaLBL_ColorModel::Run(){
 	PROFILE_SAVE("lbpm_color_simulator",1);
 	//************************************************************************
 	ScaLBL_DeviceBarrier();
-	comm.barrier();
+	MPI_Barrier(comm);
 	stoptime = MPI_Wtime();
 	if (rank==0) printf("-------------------------------------------------------------------\n");
 	// Compute the walltime per timestep
@ -1035,17 +1034,17 @@ double ScaLBL_ColorModel::ImageInit(std::string Filename){
 		}
 	}

-	Count = Dm->Comm.sumReduce( Count );
-	PoreCount = Dm->Comm.sumReduce( PoreCount );
+	Count=sumReduce( Dm->Comm, Count);
+	PoreCount=sumReduce( Dm->Comm, PoreCount);
 	
 	if (rank==0) printf("   new saturation: %f (%f / %f) \n", Count / PoreCount, Count, PoreCount);
 	ScaLBL_CopyToDevice(Phi, PhaseLabel, Nx*Ny*Nz*sizeof(double));
-	comm.barrier();
+	MPI_Barrier(comm);
 	
 	ScaLBL_D3Q19_Init(fq, Np);
 	ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np);
 	ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
-	comm.barrier();
+	MPI_Barrier(comm);
 	
 	ScaLBL_CopyToHost(Averages->Phi.data(),Phi,Nx*Ny*Nz*sizeof(double));

@ -1077,7 +1076,7 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){
 		BlobIDstruct new_index;
 		double vF=0.0; double vS=0.0;
 		ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,Averages->SDs,vF,vS,phase_label,Dm->Comm);
-		Dm->Comm.barrier();
+		MPI_Barrier(Dm->Comm);

 		long long count_connected=0;
 		long long count_porespace=0;
@ -1099,9 +1098,9 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){
 				}
 			}
 		}
-		count_connected = Dm->Comm.sumReduce( count_connected);
-		count_porespace = Dm->Comm.sumReduce( count_porespace);
-		count_water = Dm->Comm.sumReduce( count_water);
+		count_connected=sumReduce( Dm->Comm, count_connected);
+		count_porespace=sumReduce( Dm->Comm, count_porespace);
+		count_water=sumReduce( Dm->Comm, count_water);

 		for (int k=0; k<nz; k++){
 			for (int j=0; j<ny; j++){
@ -1173,7 +1172,7 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){
 				}
 			}
 		}
-		count_morphopen = Dm->Comm.sumReduce( count_morphopen);
+		count_morphopen=sumReduce( Dm->Comm, count_morphopen);
 		volume_change = double(count_morphopen - count_connected);
 		
 		if (rank==0)  printf("   opening of connected oil %f \n",volume_change/count_connected);
@ -1279,8 +1278,8 @@ double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){
 		mass_loss += random_value*seed_water_in_oil;
 	}

-	count = Dm->Comm.sumReduce( count );
-	mass_loss = Dm->Comm.sumReduce( mass_loss );
+	count= sumReduce( Dm->Comm, count);
+	mass_loss= sumReduce( Dm->Comm, mass_loss);
 	if (rank == 0) printf("Remove mass %f from %f voxels \n",mass_loss,count);

 	// Need to initialize Aq, Bq, Den, Phi directly
@ -1317,7 +1316,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta
 			}
 		}
 	}
-	double volume_initial = Dm->Comm.sumReduce(  count);
+	double volume_initial = sumReduce( Dm->Comm, count);
 	/*
 	sprintf(LocalRankFilename,"phi_initial.%05i.raw",rank);
 	FILE *INPUT = fopen(LocalRankFilename,"wb");
@ -1327,7 +1326,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta
 	// 2. Identify connected components of phase field -> phase_label
 	BlobIDstruct new_index;
 	ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,phase,Averages->SDs,vF,vS,phase_label,comm);
-	comm.barrier();
+	MPI_Barrier(comm);
 	
 	// only operate on component "0"
 	count = 0.0;
@ -1349,8 +1348,8 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta
 			}
 		}
 	}	
-	double volume_connected = Dm->Comm.sumReduce( count );
-	second_biggest = Dm->Comm.sumReduce( second_biggest );
+	double volume_connected = sumReduce( Dm->Comm, count);
+	second_biggest = sumReduce( Dm->Comm, second_biggest);

 	/*int reach_x, reach_y, reach_z;
 	for (int k=0; k<Nz; k++){
@ -1437,7 +1436,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta
 			}
 		}
 	}
-	double volume_final = Dm->Comm.sumReduce( count );
+	double volume_final= sumReduce( Dm->Comm, count);

 	delta_volume = (volume_final-volume_initial);
 	if (rank == 0)  printf("MorphInit: change fluid volume fraction by %f \n", delta_volume/volume_initial);
--- a/models/ColorModel.h
+++ b/models/ColorModel.h
@ -12,13 +12,13 @@ Implementation of color lattice boltzmann model
 #include "common/Communication.h"
 #include "analysis/TwoPhase.h"
 #include "analysis/runAnalysis.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "ProfilerApp.h"
 #include "threadpool/thread_pool.h"

 class ScaLBL_ColorModel{
 public:
-	ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM);
+	ScaLBL_ColorModel(int RANK, int NP, MPI_Comm COMM);
 	~ScaLBL_ColorModel();	
 	
 	// functions in they should be run
@ -68,7 +68,7 @@ public:
 	double *Pressure;
 		
 private:
-	Utilities::MPI comm;
+	MPI_Comm comm;
    
 	int dist_mem_size;
 	int neighborSize;
--- a/models/DFHModel.cpp
+++ b/models/DFHModel.cpp
@ -3,7 +3,7 @@ color lattice boltzmann model
 */
 #include "models/DFHModel.h"

-ScaLBL_DFHModel::ScaLBL_DFHModel(int RANK, int NP, const Utilities::MPI& COMM):
+ScaLBL_DFHModel::ScaLBL_DFHModel(int RANK, int NP, MPI_Comm COMM):
 rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),alpha(0),beta(0),
 Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0),
 Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM)
@ -100,16 +100,16 @@ void ScaLBL_DFHModel::ReadParams(string filename){

 }
 void ScaLBL_DFHModel::SetDomain(){
-	Dm   = std::make_shared<Domain>(domain_db,comm); // full domain for analysis
-	Mask = std::make_shared<Domain>(domain_db,comm); // mask domain removes immobile phases
+	Dm  = std::shared_ptr<Domain>(new Domain(domain_db,comm));      // full domain for analysis
+	Mask  = std::shared_ptr<Domain>(new Domain(domain_db,comm));    // mask domain removes immobile phases
 	Nx+=2; Ny+=2; Nz += 2;
 	N = Nx*Ny*Nz;
 	id = new char [N];
-	for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = 1;   // initialize this way
-	Averages = std::make_shared<TwoPhase>( Dm );    // TwoPhase analysis object
-	comm.barrier();
+	for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = 1;               // initialize this way
+	Averages = std::shared_ptr<TwoPhase> ( new TwoPhase(Dm) ); // TwoPhase analysis object
+	MPI_Barrier(comm);
 	Dm->CommInit();
-	comm.barrier();
+	MPI_Barrier(comm);
 	rank = Dm->rank();
 }

@ -131,7 +131,7 @@ void ScaLBL_DFHModel::ReadInput(){
 	sprintf(LocalRankString,"%05d",rank);
 	sprintf(LocalRankFilename,"%s%s","SignDist.",LocalRankString);
 	ReadBinaryFile(LocalRankFilename, Averages->SDs.data(), N);
-	comm.barrier();
+	MPI_Barrier(comm);
 	if (rank == 0) cout << "Domain set." << endl;
 }

@ -206,7 +206,7 @@ void ScaLBL_DFHModel::Create(){
 	Map.resize(Nx,Ny,Nz);       Map.fill(-2);
 	auto neighborList= new int[18*Npad];
 	Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np);
-	comm.barrier();
+	MPI_Barrier(comm);

 	//...........................................................................
 	//                MAIN  VARIABLES ALLOCATED HERE
@ -424,7 +424,7 @@ void ScaLBL_DFHModel::Initialize(){
 			}
 		}
 	}
-	count_wet_global = comm.sumReduce( count_wet );
+	MPI_Allreduce(&count_wet,&count_wet_global,1,MPI_DOUBLE,MPI_SUM,comm);
 	if (rank==0)	printf("Wetting phase volume fraction =%f \n",count_wet_global/double(Nx*Ny*Nz*nprocs));
 	// initialize phi based on PhaseLabel (include solid component labels)
 	ScaLBL_CopyToDevice(Phi, PhaseLabel, Np*sizeof(double));
@ -446,7 +446,7 @@ void ScaLBL_DFHModel::Initialize(){
 				timestep=0;
 			}
 		}
-		comm.bcast(&timestep,1,0);
+		MPI_Bcast(&timestep,1,MPI_INT,0,comm);
 		// Read in the restart file to CPU buffers
 		double *cPhi = new double[Np];
 		double *cDist = new double[19*Np];
@ -468,7 +468,7 @@ void ScaLBL_DFHModel::Initialize(){
 		ScaLBL_DeviceBarrier();
 		delete [] cPhi;
 		delete [] cDist;
-		comm.barrier();
+		MPI_Barrier(comm);
 	}

 	if (rank==0)    printf ("Initializing phase field \n");
@ -486,7 +486,7 @@ void ScaLBL_DFHModel::Run(){
 	//.......create and start timer............
 	double starttime,stoptime,cputime;
 	ScaLBL_DeviceBarrier();
-	comm.barrier();
+	MPI_Barrier(comm);
 	starttime = MPI_Wtime();
 	//.........................................
 	//************ MAIN ITERATION LOOP ***************************************/
@ -532,8 +532,7 @@ void ScaLBL_DFHModel::Run(){
 		}
 		ScaLBL_D3Q19_AAodd_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, SolidPotential, rhoA, rhoB, tauA, tauB,
 				alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np);
-		ScaLBL_DeviceBarrier();
-        comm.barrier();
+		ScaLBL_DeviceBarrier(); MPI_Barrier(comm);

 		// *************EVEN TIMESTEP*************
 		timestep++;
@ -569,9 +568,9 @@ void ScaLBL_DFHModel::Run(){
 		}
 		ScaLBL_D3Q19_AAeven_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, SolidPotential, rhoA, rhoB, tauA, tauB,
 				alpha, beta, Fx, Fy, Fz,  0, ScaLBL_Comm->LastExterior(), Np);
-		ScaLBL_DeviceBarrier();
-        comm.barrier();
+		ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
 		//************************************************************************
+		MPI_Barrier(comm);
 		PROFILE_STOP("Update");

 		// Run the analysis
@ -582,7 +581,7 @@ void ScaLBL_DFHModel::Run(){
 	PROFILE_SAVE("lbpm_color_simulator",1);
 	//************************************************************************
 	ScaLBL_DeviceBarrier();
-	comm.barrier();
+	MPI_Barrier(comm);
 	stoptime = MPI_Wtime();
 	if (rank==0) printf("-------------------------------------------------------------------\n");
 	// Compute the walltime per timestep
--- a/models/DFHModel.h
+++ b/models/DFHModel.h
@ -12,13 +12,13 @@ Implementation of color lattice boltzmann model
 #include "common/Communication.h"
 #include "analysis/TwoPhase.h"
 #include "analysis/runAnalysis.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "ProfilerApp.h"
 #include "threadpool/thread_pool.h"

 class ScaLBL_DFHModel{
 public:
-	ScaLBL_DFHModel(int RANK, int NP, const Utilities::MPI& COMM);
+	ScaLBL_DFHModel(int RANK, int NP, MPI_Comm COMM);
 	~ScaLBL_DFHModel();	
 	
 	// functions in they should be run
@ -66,7 +66,7 @@ public:
    double *Pressure;
 		
 private:
-	Utilities::MPI comm;
+	MPI_Comm comm;
    
 	int dist_mem_size;
 	int neighborSize;
--- a/models/MRTModel.cpp
+++ b/models/MRTModel.cpp
@ -5,7 +5,7 @@
 #include "analysis/distance.h"
 #include "common/ReadMicroCT.h"

-ScaLBL_MRTModel::ScaLBL_MRTModel(int RANK, int NP, const Utilities::MPI& COMM):
+ScaLBL_MRTModel::ScaLBL_MRTModel(int RANK, int NP, MPI_Comm COMM):
 rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0),
 Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),mu(0),
 Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM)
@ -83,9 +83,9 @@ void ScaLBL_MRTModel::SetDomain(){
 	
 	for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = 1;               // initialize this way
 	//Averages = std::shared_ptr<TwoPhase> ( new TwoPhase(Dm) ); // TwoPhase analysis object
-	comm.barrier();
+	MPI_Barrier(comm);
 	Dm->CommInit();
-	comm.barrier();
+	MPI_Barrier(comm);
 	
 	rank = Dm->rank();	
 	nprocx = Dm->nprocx();
@ -171,7 +171,7 @@ void ScaLBL_MRTModel::Create(){
 	Map.resize(Nx,Ny,Nz);       Map.fill(-2);
 	auto neighborList= new int[18*Npad];
 	Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np);
-	comm.barrier();
+	MPI_Barrier(comm);
 	//...........................................................................
 	//                MAIN  VARIABLES ALLOCATED HERE
 	//...........................................................................
@ -190,7 +190,7 @@ void ScaLBL_MRTModel::Create(){
 	if (rank==0)    printf ("Setting up device map and neighbor list \n");
 	// copy the neighbor list 
 	ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
-	comm.barrier();
+	MPI_Barrier(comm);
 	
 }        

@ -225,8 +225,7 @@ void ScaLBL_MRTModel::Run(){

 	//.......create and start timer............
 	double starttime,stoptime,cputime;
-	ScaLBL_DeviceBarrier();
-    comm.barrier();
+	ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
 	starttime = MPI_Wtime();
 	if (rank==0) printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax);
 	if (rank==0) printf("********************************************************\n");
@ -240,21 +239,18 @@ void ScaLBL_MRTModel::Run(){
 		ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq,  ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
 		ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
 		ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
-		ScaLBL_DeviceBarrier();
-        comm.barrier();
+		ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
 		timestep++;
 		ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL
 		ScaLBL_D3Q19_AAeven_MRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
 		ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
 		ScaLBL_D3Q19_AAeven_MRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
-		ScaLBL_DeviceBarrier();
-        comm.barrier();
+		ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
 		//************************************************************************/
 		
 		if (timestep%1000==0){
 			ScaLBL_D3Q19_Momentum(fq,Velocity, Np);
-			ScaLBL_DeviceBarrier();
-            comm.barrier();
+			ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
 			ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x);
 			ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y);
 			ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z);
@ -276,10 +272,10 @@ void ScaLBL_MRTModel::Run(){
 					}
 				}
 			}
-            vax = Mask->Comm.sumReduce( vax_loc );
-            vay = Mask->Comm.sumReduce( vay_loc );
-            vaz = Mask->Comm.sumReduce( vaz_loc );
-            count = Mask->Comm.sumReduce( count_loc );
+			MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
+			MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
+			MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
+			MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
 			
 			vax /= count;
 			vay /= count;
@ -309,10 +305,10 @@ void ScaLBL_MRTModel::Run(){
 			double As = Morphology.A();
 			double Hs = Morphology.H();
 			double Xs = Morphology.X();
-			Vs = Dm->Comm.sumReduce( Vs);
-			As = Dm->Comm.sumReduce( As);
-			Hs = Dm->Comm.sumReduce( Hs);
-			Xs = Dm->Comm.sumReduce( Xs);
+			Vs=sumReduce( Dm->Comm, Vs);
+			As=sumReduce( Dm->Comm, As);
+			Hs=sumReduce( Dm->Comm, Hs);
+			Xs=sumReduce( Dm->Comm, Xs);
 			double h = Dm->voxel_length;
 			double absperm = h*h*mu*Mask->Porosity()*flow_rate / force_mag;
 			if (rank==0) {
@ -346,8 +342,7 @@ void ScaLBL_MRTModel::VelocityField(){
 /*	Minkowski Morphology(Mask);
 	int SIZE=Np*sizeof(double);
 	ScaLBL_D3Q19_Momentum(fq,Velocity, Np);
-	ScaLBL_DeviceBarrier();.
-    comm.barrier();
+	ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
 	ScaLBL_CopyToHost(&VELOCITY[0],&Velocity[0],3*SIZE);

 	memcpy(Morphology.SDn.data(), Distance.data(), Nx*Ny*Nz*sizeof(double));
@ -374,10 +369,10 @@ void ScaLBL_MRTModel::VelocityField(){
 		vaz_loc += VELOCITY[2*Np+n];
 		count_loc+=1.0;
 	}
-    vax = Mask->Comm.sumReduce( vax_loc );
-    vay = Mask->Comm.sumReduce( vay_loc );
-    vaz = Mask->Comm.sumReduce( vaz_loc );
-    count = Mask->Comm.sumReduce( count_loc );
+	MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
+	MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
+	MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
+	MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
 	
 	vax /= count;
 	vay /= count;
--- a/models/MRTModel.h
+++ b/models/MRTModel.h
@ -11,13 +11,13 @@

 #include "common/ScaLBL.h"
 #include "common/Communication.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "analysis/Minkowski.h"
 #include "ProfilerApp.h"

 class ScaLBL_MRTModel{
 public:
-	ScaLBL_MRTModel(int RANK, int NP, const Utilities::MPI& COMM);
+	ScaLBL_MRTModel(int RANK, int NP, MPI_Comm COMM);
 	~ScaLBL_MRTModel();	
 	
 	// functions in they should be run
@ -63,7 +63,7 @@ public:
    DoubleArray Velocity_y;
    DoubleArray Velocity_z;
 private:
-	Utilities::MPI comm;
+	MPI_Comm comm;
 	
 	// filenames
    char LocalRankString[8];
--- a/tests/BlobAnalyzeParallel.cpp
+++ b/tests/BlobAnalyzeParallel.cpp
@ -100,10 +100,11 @@ inline void  WriteBlobStates(TwoPhase TCAT, double D, double porosity){
 int main(int argc, char **argv)
 {
 	// Initialize MPI
+	int rank, nprocs;
 	MPI_Init(&argc,&argv);
-    Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+    MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
    Utilities::setAbortBehavior( true, true, true );
    Utilities::setErrorHandlers();
 	PROFILE_ENABLE(0);
@ -136,7 +137,7 @@ int main(int argc, char **argv)
    	domain >> Ly;
    	domain >> Lz;
    }
-	comm.barrier();
+	MPI_Barrier(comm);
 	// Computational domain
 	MPI_Bcast(&nx,1,MPI_INT,0,comm);
 	MPI_Bcast(&ny,1,MPI_INT,0,comm);
@ -149,7 +150,7 @@ int main(int argc, char **argv)
 	MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm);
 	MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm);
 	//.................................................
-	comm.barrier();
+	MPI_Barrier(comm);

    // Check that the number of processors >= the number of ranks
    if ( rank==0 ) {
@ -208,7 +209,7 @@ int main(int argc, char **argv)
 //	WriteLocalSolidID(LocalRankFilename, id, N);
 	sprintf(LocalRankFilename,"%s%s","SignDist.",LocalRankString);
 	ReadBinaryFile(LocalRankFilename, Averages.SDs.get(), N);
-	comm.barrier();
+	MPI_Barrier(comm);
 	if (rank == 0) cout << "Domain set." << endl;
    //.......................................................................
 	//copies of data needed to perform checkpointing from cpu
@ -220,7 +221,7 @@ int main(int argc, char **argv)
 	if (rank==0) printf("Reading restart file! \n");
 	// Read in the restart file to CPU buffers
 	ReadCheckpoint(LocalRestartFile, Den, DistEven, DistOdd, N);
-	comm.barrier();
+	MPI_Barrier(comm);
 	//.........................................................................
 	// Populate the arrays needed to perform averaging
 	if (rank==0) printf("Populate arrays \n");
@ -328,14 +329,14 @@ int main(int argc, char **argv)
    //      BlobContainer Blobs;
    DoubleArray RecvBuffer(dimx);
    //    MPI_Allreduce(&Averages.BlobAverages.get(),&Blobs.get(),1,MPI_DOUBLE,MPI_SUM,Dm.Comm);
-    comm.barrier();
+    MPI_Barrier(comm);
    if (rank==0) printf("Number of components is %i \n",dimy);

    for (int b=0; b<dimy; b++){

    	MPI_Allreduce(&Averages.BlobAverages(0,b),&RecvBuffer(0),dimx,MPI_DOUBLE,MPI_SUM,comm);
    	for (int idx=0; idx<dimx-1; idx++) Averages.BlobAverages(idx,b)=RecvBuffer(idx);
-    	comm.barrier();
+    	MPI_Barrier(comm);

    	if (Averages.BlobAverages(0,b) > 0.0){
    		double Vn,pn,awn,ans,Jwn,Kwn,lwns,cwns,trawn,trJwn;
@ -481,7 +482,7 @@ int main(int argc, char **argv)
    fclose(BLOBS);*/
    PROFILE_STOP("main");
    PROFILE_SAVE("BlobIdentifyParallel",false);
-    comm.barrier();
+    MPI_Barrier(comm);
    MPI_Finalize();
    return 0;  
 }
--- a/tests/BlobIdentifyParallel.cpp
+++ b/tests/BlobIdentifyParallel.cpp
@ -47,10 +47,11 @@ void readRankData( int proc, int nx, int ny, int nz, DoubleArray& Phase, DoubleA
 int main(int argc, char **argv)
 {
 	// Initialize MPI
+	int rank, nprocs;
 	MPI_Init(&argc,&argv);
-    Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+    MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
 #ifdef PROFILE
 	PROFILE_ENABLE(0);
    PROFILE_DISABLE_TRACE();
@ -128,7 +129,7 @@ int main(int argc, char **argv)
    PROFILE_STOP("main");
    PROFILE_SAVE("BlobIdentifyParallel",false);
 #endif
-    comm.barrier();
+    MPI_Barrier(comm);
 	MPI_Finalize();
    return 0;  
 }
--- a/tests/ColorToBinary.cpp
+++ b/tests/ColorToBinary.cpp
@ -114,10 +114,11 @@ inline void ReadFromRank(char *FILENAME, DoubleArray &Phase, int nx, int ny, int
 int main(int argc, char **argv)
 {
 	// Initialize MPI
+	int rank,nprocs;
 	MPI_Init(&argc,&argv);
-    Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+    MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);

 	printf("----------------------------------------------------------\n");
 	printf("Creating single Binary file from restart (8-bit integer)\n");
@ -275,7 +276,7 @@ int main(int argc, char **argv)
 	*/
 	
 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	// ****************************************************
 }
--- a/tests/ComponentLabel.cpp
+++ b/tests/ComponentLabel.cpp
@ -119,10 +119,11 @@ inline void ReadFromRank(char *FILENAME, DoubleArray &Phase, DoubleArray &Pressu
 int main(int argc, char **argv)
 {
 	// Initialize MPI
+	int rank,nprocs;
 	MPI_Init(&argc,&argv);
-    Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+    MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);

 	printf("----------------------------------------------------------\n");
 	printf("COMPUTING TCAT ANALYSIS FOR NON-WETTING PHASE FEATURES \n");
@ -432,7 +433,7 @@ int main(int argc, char **argv)
 	fclose(DISTANCE);
 	*/
 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	// ****************************************************
 }
--- a/tests/GenerateSphereTest.cpp
+++ b/tests/GenerateSphereTest.cpp
@ -9,7 +9,7 @@
 //#include "common/pmmc.h"
 #include "common/Domain.h"
 #include "common/SpherePack.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/Communication.h"

 /*
@ -70,8 +70,8 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny
 		}
 	}
 	// total Global is the number of nodes in the pore-space
-	totalGlobal = Dm.Comm.sumReduce( count );
-	maxdistGlobal = Dm.Comm.sumReduce( maxdist );
+	MPI_Allreduce(&count,&totalGlobal,1,MPI_DOUBLE,MPI_SUM,Dm.Comm);
+	MPI_Allreduce(&maxdist,&maxdistGlobal,1,MPI_DOUBLE,MPI_MAX,Dm.Comm);
 	double volume=double(nprocx*nprocy*nprocz)*double(nx-2)*double(ny-2)*double(nz-2);
 	double porosity=totalGlobal/volume;
 	if (rank==0) printf("Media Porosity: %f \n",porosity);
@ -148,6 +148,7 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny
 	double Rcrit_old=0.0;
 	double Rcrit_new=0.0;

+	double GlobalNumber = 1.f;
 	int imin,jmin,kmin,imax,jmax,kmax;
    
 	Rcrit_new = maxdistGlobal;
@ -214,41 +215,41 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny
        PackID(Dm.sendList_YZ, Dm.sendCount_YZ ,sendID_YZ, id);
        //......................................................................................
        MPI_Sendrecv(sendID_x,Dm.sendCount_x,MPI_CHAR,Dm.rank_x(),sendtag,
-		     recvID_X,Dm.recvCount_X,MPI_CHAR,Dm.rank_X(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE);
+		     recvID_X,Dm.recvCount_X,MPI_CHAR,Dm.rank_X(),recvtag,Dm.Comm,MPI_STATUS_IGNORE);
        MPI_Sendrecv(sendID_X,Dm.sendCount_X,MPI_CHAR,Dm.rank_X(),sendtag,
-		     recvID_x,Dm.recvCount_x,MPI_CHAR,Dm.rank_x(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE);
+		     recvID_x,Dm.recvCount_x,MPI_CHAR,Dm.rank_x(),recvtag,Dm.Comm,MPI_STATUS_IGNORE);
        MPI_Sendrecv(sendID_y,Dm.sendCount_y,MPI_CHAR,Dm.rank_y(),sendtag,
-		     recvID_Y,Dm.recvCount_Y,MPI_CHAR,Dm.rank_Y(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE);
+		     recvID_Y,Dm.recvCount_Y,MPI_CHAR,Dm.rank_Y(),recvtag,Dm.Comm,MPI_STATUS_IGNORE);
        MPI_Sendrecv(sendID_Y,Dm.sendCount_Y,MPI_CHAR,Dm.rank_Y(),sendtag,
-		     recvID_y,Dm.recvCount_y,MPI_CHAR,Dm.rank_y(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE);
+		     recvID_y,Dm.recvCount_y,MPI_CHAR,Dm.rank_y(),recvtag,Dm.Comm,MPI_STATUS_IGNORE);
        MPI_Sendrecv(sendID_z,Dm.sendCount_z,MPI_CHAR,Dm.rank_z(),sendtag,
-		     recvID_Z,Dm.recvCount_Z,MPI_CHAR,Dm.rank_Z(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE);
+		     recvID_Z,Dm.recvCount_Z,MPI_CHAR,Dm.rank_Z(),recvtag,Dm.Comm,MPI_STATUS_IGNORE);
        MPI_Sendrecv(sendID_Z,Dm.sendCount_Z,MPI_CHAR,Dm.rank_Z(),sendtag,
-		     recvID_z,Dm.recvCount_z,MPI_CHAR,Dm.rank_z(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE);
+		     recvID_z,Dm.recvCount_z,MPI_CHAR,Dm.rank_z(),recvtag,Dm.Comm,MPI_STATUS_IGNORE);
        MPI_Sendrecv(sendID_xy,Dm.sendCount_xy,MPI_CHAR,Dm.rank_xy(),sendtag,
-		     recvID_XY,Dm.recvCount_XY,MPI_CHAR,Dm.rank_XY(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE);
+		     recvID_XY,Dm.recvCount_XY,MPI_CHAR,Dm.rank_XY(),recvtag,Dm.Comm,MPI_STATUS_IGNORE);
        MPI_Sendrecv(sendID_XY,Dm.sendCount_XY,MPI_CHAR,Dm.rank_XY(),sendtag,
-		     recvID_xy,Dm.recvCount_xy,MPI_CHAR,Dm.rank_xy(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE);
+		     recvID_xy,Dm.recvCount_xy,MPI_CHAR,Dm.rank_xy(),recvtag,Dm.Comm,MPI_STATUS_IGNORE);
        MPI_Sendrecv(sendID_Xy,Dm.sendCount_Xy,MPI_CHAR,Dm.rank_Xy(),sendtag,
-		     recvID_xY,Dm.recvCount_xY,MPI_CHAR,Dm.rank_xY(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE);
+		     recvID_xY,Dm.recvCount_xY,MPI_CHAR,Dm.rank_xY(),recvtag,Dm.Comm,MPI_STATUS_IGNORE);
        MPI_Sendrecv(sendID_xY,Dm.sendCount_xY,MPI_CHAR,Dm.rank_xY(),sendtag,
-		     recvID_Xy,Dm.recvCount_Xy,MPI_CHAR,Dm.rank_Xy(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE);
+		     recvID_Xy,Dm.recvCount_Xy,MPI_CHAR,Dm.rank_Xy(),recvtag,Dm.Comm,MPI_STATUS_IGNORE);
        MPI_Sendrecv(sendID_xz,Dm.sendCount_xz,MPI_CHAR,Dm.rank_xz(),sendtag,
-		     recvID_XZ,Dm.recvCount_XZ,MPI_CHAR,Dm.rank_XZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE);
+		     recvID_XZ,Dm.recvCount_XZ,MPI_CHAR,Dm.rank_XZ(),recvtag,Dm.Comm,MPI_STATUS_IGNORE);
        MPI_Sendrecv(sendID_XZ,Dm.sendCount_XZ,MPI_CHAR,Dm.rank_XZ(),sendtag,
-		     recvID_xz,Dm.recvCount_xz,MPI_CHAR,Dm.rank_xz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE);
+		     recvID_xz,Dm.recvCount_xz,MPI_CHAR,Dm.rank_xz(),recvtag,Dm.Comm,MPI_STATUS_IGNORE);
        MPI_Sendrecv(sendID_Xz,Dm.sendCount_Xz,MPI_CHAR,Dm.rank_Xz(),sendtag,
-		     recvID_xZ,Dm.recvCount_xZ,MPI_CHAR,Dm.rank_xZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE);
+		     recvID_xZ,Dm.recvCount_xZ,MPI_CHAR,Dm.rank_xZ(),recvtag,Dm.Comm,MPI_STATUS_IGNORE);
        MPI_Sendrecv(sendID_xZ,Dm.sendCount_xZ,MPI_CHAR,Dm.rank_xZ(),sendtag,
-		     recvID_Xz,Dm.recvCount_Xz,MPI_CHAR,Dm.rank_Xz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE);
+		     recvID_Xz,Dm.recvCount_Xz,MPI_CHAR,Dm.rank_Xz(),recvtag,Dm.Comm,MPI_STATUS_IGNORE);
        MPI_Sendrecv(sendID_yz,Dm.sendCount_yz,MPI_CHAR,Dm.rank_yz(),sendtag,
-		     recvID_YZ,Dm.recvCount_YZ,MPI_CHAR,Dm.rank_YZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE);
+		     recvID_YZ,Dm.recvCount_YZ,MPI_CHAR,Dm.rank_YZ(),recvtag,Dm.Comm,MPI_STATUS_IGNORE);
        MPI_Sendrecv(sendID_YZ,Dm.sendCount_YZ,MPI_CHAR,Dm.rank_YZ(),sendtag,
-		     recvID_yz,Dm.recvCount_yz,MPI_CHAR,Dm.rank_yz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE);
+		     recvID_yz,Dm.recvCount_yz,MPI_CHAR,Dm.rank_yz(),recvtag,Dm.Comm,MPI_STATUS_IGNORE);
        MPI_Sendrecv(sendID_Yz,Dm.sendCount_Yz,MPI_CHAR,Dm.rank_Yz(),sendtag,
-		     recvID_yZ,Dm.recvCount_yZ,MPI_CHAR,Dm.rank_yZ(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE);
+		     recvID_yZ,Dm.recvCount_yZ,MPI_CHAR,Dm.rank_yZ(),recvtag,Dm.Comm,MPI_STATUS_IGNORE);
        MPI_Sendrecv(sendID_yZ,Dm.sendCount_yZ,MPI_CHAR,Dm.rank_yZ(),sendtag,
-		     recvID_Yz,Dm.recvCount_Yz,MPI_CHAR,Dm.rank_Yz(),recvtag,Dm.Comm.getCommunicator(),MPI_STATUS_IGNORE);
+		     recvID_Yz,Dm.recvCount_Yz,MPI_CHAR,Dm.rank_Yz(),recvtag,Dm.Comm,MPI_STATUS_IGNORE);
        //......................................................................................
        UnpackID(Dm.recvList_x, Dm.recvCount_x ,recvID_x, id);
        UnpackID(Dm.recvList_X, Dm.recvCount_X ,recvID_X, id);
@ -270,7 +271,7 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny
        UnpackID(Dm.recvList_YZ, Dm.recvCount_YZ ,recvID_YZ, id);
        //......................................................................................

-        //double GlobalNumber = Dm.Comm.sumReduce( LocalNumber );
+        MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm.Comm);

        count = 0.f;
        for (int k=1; k<Nz-1; k++){
@ -283,7 +284,7 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny
                }
            }
        }
-        countGlobal = Dm.Comm.sumReduce( count );
+        MPI_Allreduce(&count,&countGlobal,1,MPI_DOUBLE,MPI_SUM,Dm.Comm);
        sw_new = countGlobal/totalGlobal;
        sw_diff_new = abs(sw_new-SW);
        // for test only
@ -313,11 +314,15 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny

 int main(int argc, char **argv)
 {
+	//*****************************************
+	// ***** MPI STUFF ****************
+	//*****************************************
 	// Initialize MPI
+	int rank,nprocs;
 	MPI_Init(&argc,&argv);
-	Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+	MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
 	{
 		// parallel domain size (# of sub-domains)
 		int nprocx,nprocy,nprocz;
@ -407,14 +412,14 @@ int main(int argc, char **argv)
 		//.......................................................................
 		if (rank == 0)	printf("Reading the sphere packing \n");
 		if (rank == 0)	ReadSpherePacking(nspheres,cx,cy,cz,rad);
-		comm.barrier();
+		MPI_Barrier(comm);
 		// Broadcast the sphere packing to all processes
-		comm.bcast(cx,nspheres,0);
-		comm.bcast(cy,nspheres,0);
-		comm.bcast(cz,nspheres,0);
-		comm.bcast(rad,nspheres,0);
+		MPI_Bcast(cx,nspheres,MPI_DOUBLE,0,comm);
+		MPI_Bcast(cy,nspheres,MPI_DOUBLE,0,comm);
+		MPI_Bcast(cz,nspheres,MPI_DOUBLE,0,comm);
+		MPI_Bcast(rad,nspheres,MPI_DOUBLE,0,comm);
 		//...........................................................................
-		comm.barrier();
+		MPI_Barrier(comm);
 		if (rank == 0) cout << "Domain set." << endl;
 		if (rank == 0){
 			// Compute the Sauter mean diameter
@ -428,7 +433,7 @@ int main(int argc, char **argv)
 			D = 6.0*(Nx-2)*nprocx*totVol / totArea / Lx;
 			printf("Sauter Mean Diameter (computed from sphere packing) = %f \n",D);
 		}
-		comm.bcast(&D,1,0);
+		MPI_Bcast(&D,1,MPI_DOUBLE,0,comm);

 		//.......................................................................
 		SignedDistance(SignDist.data(),nspheres,cx,cy,cz,rad,Lx,Ly,Lz,Nx,Ny,Nz,
@ -460,7 +465,7 @@ int main(int argc, char **argv)
 			}
 		}
 		sum_local = 1.0*sum;
-		porosity = comm.sumReduce(sum_local);
+		MPI_Allreduce(&sum_local,&porosity,1,MPI_DOUBLE,MPI_SUM,comm);
 		porosity = porosity*iVol_global;
 		if (rank==0) printf("Media porosity = %f \n",porosity);

@ -493,7 +498,7 @@ int main(int argc, char **argv)
 		//......................................................................
 	}
 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	// ****************************************************
 }
--- a/tests/TestBlobAnalyze.cpp
+++ b/tests/TestBlobAnalyze.cpp
@ -127,10 +127,11 @@ inline void  WriteBlobStates(TwoPhase TCAT, double D, double porosity){
 int main(int argc, char **argv)
 {
  // Initialize MPI
+  int rank, nprocs;
  MPI_Init(&argc,&argv);
-  Utilities::MPI comm( MPI_COMM_WORLD );
-  int rank = comm.getRank();
-  int nprocs = comm.getSize();
+  MPI_Comm comm = MPI_COMM_WORLD;
+  MPI_Comm_rank(comm,&rank);
+  MPI_Comm_size(comm,&nprocs);
  { // Limit scope so variables that contain communicators will free before MPI_Finialize

    if ( rank==0 ) {
@ -188,7 +189,7 @@ int main(int argc, char **argv)
    		Lx=Ly=Lz=1;
    	}
    }
-	comm.barrier();
+	MPI_Barrier(comm);
 	// Computational domain
 	MPI_Bcast(&nx,1,MPI_INT,0,comm);
 	MPI_Bcast(&ny,1,MPI_INT,0,comm);
@ -201,7 +202,7 @@ int main(int argc, char **argv)
 	MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm);
 	MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm);
 	//.................................................
-	comm.barrier();
+	MPI_Barrier(comm);

    // Check that the number of processors >= the number of ranks
    if ( rank==0 ) {
@ -253,14 +254,14 @@ int main(int argc, char **argv)
 	cz[0]=0.25*Lz; cx[1]=0.75*Lz; cx[2]=0.25*Lz; cx[3]=0.25*Lz;
 	rad[0]=rad[1]=rad[2]=rad[3]=0.1*Lx;

-	comm.barrier();
+	MPI_Barrier(comm);
 	// Broadcast the sphere packing to all processes
 	MPI_Bcast(cx,nspheres,MPI_DOUBLE,0,comm);
 	MPI_Bcast(cy,nspheres,MPI_DOUBLE,0,comm);
 	MPI_Bcast(cz,nspheres,MPI_DOUBLE,0,comm);
 	MPI_Bcast(rad,nspheres,MPI_DOUBLE,0,comm);
 	//...........................................................................
-	comm.barrier();
+	MPI_Barrier(comm);
 	//.......................................................................
 	SignedDistance(Averages.Phase.data(),nspheres,cx,cy,cz,rad,Lx,Ly,Lz,Nx,Ny,Nz,
 		       Dm->iproc(),Dm->jproc(),Dm->kproc(),Dm->nprocx(),Dm->nprocy(),Dm->nprocz());
@ -316,7 +317,7 @@ int main(int argc, char **argv)
 	delete [] rad;

  } // Limit scope so variables that contain communicators will free before MPI_Finialize
-  comm.barrier();
+  MPI_Barrier(comm);
  MPI_Finalize();
  return 0;  
 }
--- a/tests/TestBlobIdentify.cpp
+++ b/tests/TestBlobIdentify.cpp
@ -23,19 +23,21 @@ inline double rand2()


 // Test if all ranks agree on a value
-bool allAgree( int x, const Utilities::MPI& comm ) {
+bool allAgree( int x, MPI_Comm comm ) {
    int x2 = x;
-    comm.bcast(&x2,1,0);
+    MPI_Bcast(&x2,1,MPI_INT,0,comm);
    int diff = x==x2 ? 0:1;
-    int diff2 = comm.sumReduce( diff );
+    int diff2 = 0;
+    MPI_Allreduce(&diff,&diff2,1,MPI_INT,MPI_SUM,comm);
    return diff2==0;
 }
 template<class T>
-bool allAgree( const std::vector<T>& x, const Utilities::MPI& comm ) {
+bool allAgree( const std::vector<T>& x, MPI_Comm comm ) {
    std::vector<T> x2 = x;
-    comm.bcast(&x2[0],x.size()*sizeof(T)/sizeof(int),0);
+    MPI_Bcast(&x2[0],x.size()*sizeof(T)/sizeof(int),MPI_INT,0,comm);
    int diff = x==x2 ? 0:1;
-    int diff2 = comm.sumReduce( diff );
+    int diff2 = 0;
+    MPI_Allreduce(&diff,&diff2,1,MPI_INT,MPI_SUM,comm);
    return diff2==0;
 }

@ -72,9 +74,9 @@ struct bubble_struct {


 // Create a random set of bubles
-std::vector<bubble_struct> create_bubbles( int N_bubbles, double Lx, double Ly, double Lz, const Utilities::MPI& comm )
+std::vector<bubble_struct> create_bubbles( int N_bubbles, double Lx, double Ly, double Lz, MPI_Comm comm )
 {
-    int rank = comm.getRank();
+    int rank = comm_rank(comm);
    std::vector<bubble_struct> bubbles(N_bubbles);
    if ( rank == 0 ) {
        double R0 = 0.2*Lx*Ly*Lz/pow((double)N_bubbles,0.333);
@ -89,7 +91,7 @@ std::vector<bubble_struct> create_bubbles( int N_bubbles, double Lx, double Ly,
        }
    }
    size_t N_bytes = N_bubbles*sizeof(bubble_struct);
-    comm.bcast((char*)&bubbles[0],N_bytes,0);
+    MPI_Bcast((char*)&bubbles[0],N_bytes,MPI_CHAR,0,comm);
    return bubbles;
 }

@ -122,7 +124,7 @@ void fillBubbleData( const std::vector<bubble_struct>& bubbles, DoubleArray& Pha


 // Shift all of the data by the given number of cells
-void shift_data( DoubleArray& data, int sx, int sy, int sz, const RankInfoStruct& rank_info, const Utilities::MPI& comm )
+void shift_data( DoubleArray& data, int sx, int sy, int sz, const RankInfoStruct& rank_info, MPI_Comm comm )
 {
    int nx = data.size(0)-2;
    int ny = data.size(1)-2;
@ -152,10 +154,11 @@ void shift_data( DoubleArray& data, int sx, int sy, int sz, const RankInfoStruct
 int main(int argc, char **argv)
 {
    // Initialize MPI
+    int rank, nprocs;
    MPI_Init(&argc,&argv);
-    Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+    MPI_Comm comm = MPI_COMM_WORLD;
+    MPI_Comm_rank(comm,&rank);
+    MPI_Comm_size(comm,&nprocs);
    PROFILE_ENABLE(1);
    PROFILE_DISABLE_TRACE();
    PROFILE_SYNCHRONIZE();
@ -294,7 +297,7 @@ int main(int argc, char **argv)
            velocity[i].z = bubbles[i].radius*(2*rand2()-1);
        }
    }
-    comm.bcast((char*)&velocity[0],bubbles.size()*sizeof(Point),0);
+    MPI_Bcast((char*)&velocity[0],bubbles.size()*sizeof(Point),MPI_CHAR,0,comm);
    fillBubbleData( bubbles, Phase, SignDist, Lx, Ly, Lz, rank_info );
    fillData.fill(Phase);
    fillData.fill(SignDist);
@ -388,8 +391,8 @@ int main(int argc, char **argv)
                printf("\n");
            }
        }
-        comm.bcast(&N1,1,0);
-        comm.bcast(&N2,1,0);
+        MPI_Bcast(&N1,1,MPI_INT,0,comm);
+        MPI_Bcast(&N2,1,MPI_INT,0,comm);
        if ( N1!=nblobs || N2!=nblobs2 ) {
            if ( rank==0 )
                printf("Error, blob ids do not map in moving bubble test (%i,%i,%i,%i)\n",
@ -409,7 +412,7 @@ int main(int argc, char **argv)
    // Finished
    PROFILE_STOP("main");
    PROFILE_SAVE("TestBlobIdentify",false);
-    comm.barrier();
+    MPI_Barrier(comm);
    MPI_Finalize();
    return N_errors;  
 }
--- a/tests/TestBlobIdentifyCorners.cpp
+++ b/tests/TestBlobIdentifyCorners.cpp
@ -18,9 +18,10 @@
 int main(int argc, char **argv)
 {
    // Initialize MPI
+    int rank, nprocs;
    MPI_Init(&argc,&argv);
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+    MPI_Comm_rank(MPI_COMM_WORLD,&rank);
+    MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
    /*if ( nprocs != 8 ) {
        printf("This tests requires 8 processors\n");
        return -1;
--- a/tests/TestBubble.cpp
+++ b/tests/TestBubble.cpp
@ -7,7 +7,7 @@

 #include "analysis/pmmc.h"
 #include "common/ScaLBL.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/Communication.h"
 #include "IO/Mesh.h"
 #include "IO/Writer.h"
@ -32,15 +32,14 @@ int main(int argc, char **argv)
  // Initialize MPI
  int provided_thread_support = -1;
  MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support);
-
+  MPI_Comm comm;
+  MPI_Comm_dup(MPI_COMM_WORLD,&comm);
+  int rank = comm_rank(comm);
+  int nprocs = comm_size(comm);
+  if ( rank==0 && provided_thread_support<MPI_THREAD_MULTIPLE )
+    std::cerr << "Warning: Failed to start MPI with necessary thread support, thread support will be disabled" << std::endl;
  { // Limit scope so variables that contain communicators will free before MPI_Finialize

-    auto comm = Utilities::MPI( MPI_COMM_WORLD ).dup();
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
-    if ( rank==0 && provided_thread_support<MPI_THREAD_MULTIPLE )
-      std::cerr << "Warning: Failed to start MPI with necessary thread support, thread support will be disabled" << std::endl;
-
    // parallel domain size (# of sub-domains)
    int nprocx,nprocy,nprocz;

@ -120,7 +119,7 @@ int main(int argc, char **argv)
    int jproc = rank_info.jy;
    int kproc = rank_info.kz;

-    comm.barrier();
+    MPI_Barrier(comm);
    // **************************************************************
    // **************************************************************
    double Ps = -(das-dbs)/(das+dbs);
@ -163,7 +162,7 @@ int main(int argc, char **argv)
    // Mask that excludes the solid phase
    Domain Mask(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,pBC);
     
-     comm.barrier();
+     MPI_Barrier(comm);

    Nx+=2; Ny+=2; Nz += 2;

@ -433,7 +432,7 @@ int main(int argc, char **argv)

    //.......create and start timer............
    double starttime,stoptime,cputime;
-    comm.barrier();
+    MPI_Barrier(comm);
    starttime = MPI_Wtime();
    //.........................................
    //...........................................................................
@ -518,7 +517,7 @@ int main(int argc, char **argv)
            ScaLBL_CopyToDevice(f_odd,cDistOdd,9*N*sizeof(double));
            ScaLBL_CopyToDevice(Den,cDen,2*N*sizeof(double));
            ScaLBL_DeviceBarrier();
-            comm.barrier();
+            MPI_Barrier(comm);
        }

        //*************************************************************************
@ -530,7 +529,7 @@ int main(int argc, char **argv)
        ScaLBL_Comm.SendHalo(Phi);
        ScaLBL_Comm.RecvHalo(Phi);
        ScaLBL_DeviceBarrier();
-        comm.barrier();
+        MPI_Barrier(comm);
        //*************************************************************************

        if (rank==0 && pBC){
@ -561,7 +560,7 @@ int main(int argc, char **argv)
        ScaLBL_D3Q19_Pressure(ID,f_even,f_odd,Pressure,Nx,Ny,Nz);
        ScaLBL_CopyToHost(Phase.data(),Phi,N*sizeof(double));
        ScaLBL_CopyToHost(Press.data(),Pressure,N*sizeof(double));
-        comm.barrier();
+        MPI_Barrier(comm);
        //...........................................................................
        
        int timestep = 0;
@ -592,7 +591,7 @@ int main(int argc, char **argv)
            //*************************************************************************

            ScaLBL_DeviceBarrier();
-            comm.barrier();
+            MPI_Barrier(comm);
            //*************************************************************************
            //         Swap the distributions for momentum transport
            //*************************************************************************
@ -600,7 +599,7 @@ int main(int argc, char **argv)
            //*************************************************************************

            ScaLBL_DeviceBarrier();
-            comm.barrier();
+            MPI_Barrier(comm);
            //*************************************************************************
            // Wait for communications to complete and unpack the distributions
            ScaLBL_Comm.RecvD3Q19(f_even, f_odd);
@ -617,7 +616,7 @@ int main(int argc, char **argv)
            ScaLBL_D3Q7_Swap(ID, B_even, B_odd, Nx, Ny, Nz);

            ScaLBL_DeviceBarrier();
-            comm.barrier();
+            MPI_Barrier(comm);

            //*************************************************************************
            // Wait for communication and unpack the D3Q7 distributions
@ -634,7 +633,7 @@ int main(int argc, char **argv)
            //*************************************************************************
            // ScaLBL_ComputePhaseField(ID, Phi, Copy, Den, N);
            ScaLBL_DeviceBarrier();
-            comm.barrier();
+            MPI_Barrier(comm);
        
            ScaLBL_ComputePhaseField(ID, Phi, Den, N);
            //*************************************************************************
@ -660,7 +659,7 @@ int main(int argc, char **argv)
            
            //...................................................................................

-            comm.barrier();
+            MPI_Barrier(comm);

            // Timestep completed!
            timestep++;
@ -808,7 +807,7 @@ int main(int argc, char **argv)
            //...........................................................................
        }
        //...........................................................................
-        comm.barrier();
+        MPI_Barrier(comm);
        MPI_Allreduce(&nwp_volume,&nwp_volume_global,1,MPI_DOUBLE,MPI_SUM,comm);
        MPI_Allreduce(&awn,&awn_global,1,MPI_DOUBLE,MPI_SUM,comm);
        MPI_Allreduce(&ans,&ans_global,1,MPI_DOUBLE,MPI_SUM,comm);
@ -828,7 +827,7 @@ int main(int argc, char **argv)
        MPI_Allreduce(&Gwn(0),&Gwn_global(0),6,MPI_DOUBLE,MPI_SUM,comm);
        MPI_Allreduce(&Gns(0),&Gns_global(0),6,MPI_DOUBLE,MPI_SUM,comm);
        MPI_Allreduce(&Gws(0),&Gws_global(0),6,MPI_DOUBLE,MPI_SUM,comm);
-        comm.barrier();
+        MPI_Barrier(comm);
        //.........................................................................
        // Compute the change in the total surface energy based on the defined interval
        // See McClure, Prins and Miller (2013) 
@ -951,7 +950,7 @@ int main(int argc, char **argv)

    //************************************************************************/
    ScaLBL_DeviceBarrier();
-    comm.barrier();
+    MPI_Barrier(comm);
    stoptime = MPI_Wtime();
    if (rank==0) printf("-------------------------------------------------------------------\n");
    // Compute the walltime per timestep
@ -990,8 +989,9 @@ int main(int argc, char **argv)
    PROFILE_SAVE("TestBubble");
    
    // ****************************************************
-    comm.barrier();
+    MPI_Barrier(comm);
  } // Limit scope so variables that contain communicators will free before MPI_Finialize
+  MPI_Comm_free(&comm);
  MPI_Finalize();
  return 0;
 }
--- a/tests/TestBubbleDFH.cpp
+++ b/tests/TestBubbleDFH.cpp
@ -9,7 +9,7 @@
 #include "common/Communication.h"
 #include "analysis/TwoPhase.h"
 #include "analysis/runAnalysis.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "ProfilerApp.h"
 #include "threadpool/thread_pool.h"

@ -29,9 +29,10 @@ int main(int argc, char **argv)
 	// Initialize MPI
 	int provided_thread_support = -1;
 	MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support);
-	Utilities::MPI comm( MPI_COMM_WORLD );
-	int rank = comm.getRank();
-	int nprocs = comm.getSize();
+	MPI_Comm comm;
+	MPI_Comm_dup(MPI_COMM_WORLD,&comm);
+	int rank = comm_rank(comm);
+	int nprocs = comm_size(comm);
 	int check=0;
 	{ // Limit scope so variables that contain communicators will free before MPI_Finialize
 	  int i,j,k,n,Np;
@ -44,7 +45,7 @@ int main(int argc, char **argv)
 		int device=ScaLBL_SetDevice(rank);
 		printf("Using GPU ID %i for rank %i \n",device,rank);
 		ScaLBL_DeviceBarrier();
-		comm.barrier();
+		MPI_Barrier(comm);

 		PROFILE_ENABLE(1);
 		//PROFILE_ENABLE_TRACE();
@ -71,7 +72,7 @@ int main(int argc, char **argv)
        // Initialize compute device
        //        int device=ScaLBL_SetDevice(rank);
        ScaLBL_DeviceBarrier();
-        comm.barrier();
+        MPI_Barrier(comm);

        Utilities::setErrorHandlers();

@ -117,7 +118,7 @@ int main(int argc, char **argv)
        // Get the rank info
        const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz);

-        comm.barrier();
+        MPI_Barrier(comm);

        if (nprocs != nprocx*nprocy*nprocz){
            printf("nprocx =  %i \n",nprocx);
@ -166,7 +167,7 @@ int main(int argc, char **argv)

        // Mask that excludes the solid phase
        auto Mask = std::make_shared<Domain>(domain_db,comm);
-        comm.barrier();
+        MPI_Barrier(comm);

        Nx+=2; Ny+=2; Nz += 2;
        int N = Nx*Ny*Nz;
@ -249,7 +250,7 @@ int main(int argc, char **argv)
 		IntArray Map(Nx,Ny,Nz);
 		auto neighborList= new int[18*Npad];
 		Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np);
-		comm.barrier();
+		MPI_Barrier(comm);

 		//...........................................................................
 		//				MAIN  VARIABLES ALLOCATED HERE
@ -386,7 +387,7 @@ int main(int argc, char **argv)
 		//.......create and start timer............
 		double starttime,stoptime,cputime;
 		ScaLBL_DeviceBarrier();
-		comm.barrier();
+		MPI_Barrier(comm);
 		starttime = MPI_Wtime();
 		//.........................................

@ -436,7 +437,7 @@ int main(int argc, char **argv)
 			}
 			ScaLBL_D3Q19_AAodd_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, SolidPotential, rhoA, rhoB, tauA, tauB,
 					alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->next, Np);
-			ScaLBL_DeviceBarrier(); comm.barrier();
+			ScaLBL_DeviceBarrier(); MPI_Barrier(comm);

 			// *************EVEN TIMESTEP*************
 			timestep++;
@ -472,9 +473,9 @@ int main(int argc, char **argv)
 			}
 			ScaLBL_D3Q19_AAeven_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, SolidPotential, rhoA, rhoB, tauA, tauB,
 					alpha, beta, Fx, Fy, Fz,  0, ScaLBL_Comm->next, Np);
-			ScaLBL_DeviceBarrier(); comm.barrier();
+			ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
 			//************************************************************************
-			comm.barrier();
+			MPI_Barrier(comm);
 			PROFILE_STOP("Update");

 			// Run the analysis
@ -486,7 +487,7 @@ int main(int argc, char **argv)
 		PROFILE_SAVE("lbpm_color_simulator",1);
 		//************************************************************************
 		ScaLBL_DeviceBarrier();
-		comm.barrier();
+		MPI_Barrier(comm);
 		stoptime = MPI_Wtime();
 		if (rank==0) printf("-------------------------------------------------------------------\n");
 		// Compute the walltime per timestep
@ -546,8 +547,9 @@ int main(int argc, char **argv)
 		PROFILE_STOP("Main");
 		PROFILE_SAVE("lbpm_color_simulator",1);
 		// ****************************************************
-		comm.barrier();
+		MPI_Barrier(comm);
 	} // Limit scope so variables that contain communicators will free before MPI_Finialize
+	MPI_Comm_free(&comm);
 	MPI_Finalize();
 	return check;
 }
--- a/tests/TestColorBubble.cpp
+++ b/tests/TestColorBubble.cpp
@ -7,7 +7,7 @@
 #include <iostream>
 #include <fstream>
 #include "common/ScaLBL.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "models/ColorModel.h"

 using namespace std;
@ -64,11 +64,15 @@ inline void InitializeBubble(ScaLBL_ColorModel &ColorModel, double BubbleRadius)
 //***************************************************************************************
 int main(int argc, char **argv)
 {
+	//*****************************************
+	// ***** MPI STUFF ****************
+	//*****************************************
 	// Initialize MPI
+	int rank,nprocs;
 	MPI_Init(&argc,&argv);
-	Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+	MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
 	int check=0;
 	{
 		if (rank == 0){
@ -93,7 +97,7 @@ int main(int argc, char **argv)
 		ColorModel.WriteDebug();
 	}
 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	// ****************************************************

--- a/tests/TestColorGrad.cpp
+++ b/tests/TestColorGrad.cpp
@ -7,7 +7,7 @@
 #include <iostream>
 #include <fstream>
 #include "common/ScaLBL.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"

 using namespace std;

@ -15,11 +15,15 @@ using namespace std;
 //***************************************************************************************
 int main(int argc, char **argv)
 {
+	//*****************************************
+	// ***** MPI STUFF ****************
+	//*****************************************
 	// Initialize MPI
+	int rank,nprocs;
 	MPI_Init(&argc,&argv);
-	Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+	MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
 	int check;
 	{
 		// parallel domain size (# of sub-domains)
@ -112,7 +116,7 @@ int main(int argc, char **argv)
 		}
 		// **************************************************************
 		// Broadcast simulation parameters from rank 0 to all other procs
-		comm.barrier();
+		MPI_Barrier(comm);
 		//.................................................
 		MPI_Bcast(&Nx,1,MPI_INT,0,comm);
 		MPI_Bcast(&Ny,1,MPI_INT,0,comm);
@ -125,7 +129,7 @@ int main(int argc, char **argv)
 		MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm);
 		MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm);
 		//.................................................
-		comm.barrier();
+		MPI_Barrier(comm);
 		// **************************************************************
 		// **************************************************************

@ -142,7 +146,7 @@ int main(int argc, char **argv)
 			printf("********************************************************\n");
 		}

-		comm.barrier();
+		MPI_Barrier(comm);

 		double iVol_global = 1.0/Nx/Ny/Nz/nprocx/nprocy/nprocz;
 		int BoundaryCondition=0;
@ -171,7 +175,7 @@ int main(int argc, char **argv)
 			}
 		}
 		Dm.CommInit();
-		comm.barrier();
+		MPI_Barrier(comm);
 		if (rank == 0) cout << "Domain set." << endl;
 		if (rank==0)	printf ("Create ScaLBL_Communicator \n");

@ -188,7 +192,7 @@ int main(int argc, char **argv)
 		neighborList= new int[18*Np];

 		ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm.id,Np);
-		comm.barrier();
+		MPI_Barrier(comm);

 		//......................device distributions.................................
 		int dist_mem_size = Np*sizeof(double);
@ -256,7 +260,7 @@ int main(int argc, char **argv)

 	}
 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	// ****************************************************

--- a/tests/TestColorGradDFH.cpp
+++ b/tests/TestColorGradDFH.cpp
@ -7,7 +7,7 @@
 #include <iostream>
 #include <fstream>
 #include "common/ScaLBL.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"

 using namespace std;

@ -25,11 +25,15 @@ std::shared_ptr<Database> loadInputs( int nprocs )
 //***************************************************************************************
 int main(int argc, char **argv)
 {
+	//*****************************************
+	// ***** MPI STUFF ****************
+	//*****************************************
 	// Initialize MPI
+	int rank,nprocs;
 	MPI_Init(&argc,&argv);
-	Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+	MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
 	int check=0;
 	{
 		// parallel domain size (# of sub-domains)
@ -78,7 +82,7 @@ int main(int argc, char **argv)
 			}
 		}
 		Dm->CommInit();
-		comm.barrier();
+		MPI_Barrier(comm);
 		if (rank == 0) cout << "Domain set." << endl;
 		if (rank==0)	printf ("Create ScaLBL_Communicator \n");

@ -101,7 +105,7 @@ int main(int argc, char **argv)
 		IntArray Map(Nx,Ny,Nz);
 		neighborList= new int[18*Npad];
 		Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np);
-		comm.barrier();
+		MPI_Barrier(comm);

 		//......................device distributions.................................
 		int neighborSize=18*Np*sizeof(int);
@ -207,7 +211,7 @@ int main(int argc, char **argv)

 	}
 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	// ****************************************************

--- a/tests/TestColorMassBounceback.cpp
+++ b/tests/TestColorMassBounceback.cpp
@ -7,7 +7,7 @@
 #include <iostream>
 #include <fstream>
 #include "common/ScaLBL.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"

 using namespace std;

@ -15,11 +15,15 @@ using namespace std;
 //***************************************************************************************
 int main(int argc, char **argv)
 {
+	//*****************************************
+	// ***** MPI STUFF ****************
+	//*****************************************
 	// Initialize MPI
+	int rank,nprocs;
 	MPI_Init(&argc,&argv);
-	Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+	MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
 	int check=0;
 	{
 		// parallel domain size (# of sub-domains)
@ -38,7 +42,7 @@ int main(int argc, char **argv)
        // Initialize compute device
        //        int device=ScaLBL_SetDevice(rank);
        ScaLBL_DeviceBarrier();
-        comm.barrier();
+        MPI_Barrier(comm);
        Utilities::setErrorHandlers();

        // Variables that specify the computational domain  
@ -73,7 +77,7 @@ int main(int argc, char **argv)
        // Get the rank info
        const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz);

-        comm.barrier();
+        MPI_Barrier(comm);

        if (nprocs != nprocx*nprocy*nprocz){
            printf("nprocx =  %i \n",nprocx);
@ -117,7 +121,7 @@ int main(int argc, char **argv)
        std::shared_ptr<Domain> Dm(new Domain(domain_db,comm));
        for (int i=0; i<Dm->Nx*Dm->Ny*Dm->Nz; i++) Dm->id[i] = 1;
        Dm->CommInit();
-        comm.barrier();
+        MPI_Barrier(comm);

        Nx+=2; Ny+=2; Nz += 2;
        int N = Nx*Ny*Nz;
@ -149,7 +153,7 @@ int main(int argc, char **argv)
 			}
 		}
 		Dm->CommInit();
-		comm.barrier();
+		MPI_Barrier(comm);
 		if (rank == 0) cout << "Domain set." << endl;
 		if (rank==0)	printf ("Create ScaLBL_Communicator \n");

@ -166,7 +170,7 @@ int main(int argc, char **argv)
 		Npad=Np+32;
 		neighborList= new int[18*Npad];
 		Np=ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np);
-		comm.barrier();
+		MPI_Barrier(comm);

 		//......................device distributions.................................
 		int dist_mem_size = Np*sizeof(double);
@ -268,7 +272,7 @@ int main(int argc, char **argv)

        ScaLBL_D3Q19_AAodd_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, rhoA, rhoB, tauA, tauB,
                alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np);
-        ScaLBL_DeviceBarrier(); comm.barrier();
+        ScaLBL_DeviceBarrier(); MPI_Barrier(comm);

 		timestep++;

@ -328,7 +332,7 @@ int main(int argc, char **argv)
         ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
         ScaLBL_D3Q19_AAeven_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, rhoA, rhoB, tauA, tauB,
                 alpha, beta, Fx, Fy, Fz,  0, ScaLBL_Comm->LastExterior(), Np);
-         ScaLBL_DeviceBarrier(); comm.barrier();
+         ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
         timestep++;
         //************************************************************************
 		printf("Check after even time \n");
@ -411,7 +415,7 @@ int main(int argc, char **argv)

        ScaLBL_D3Q19_AAodd_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, rhoA, rhoB, tauA, tauB,
                alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np);
-        ScaLBL_DeviceBarrier(); comm.barrier();
+        ScaLBL_DeviceBarrier(); MPI_Barrier(comm);

 		timestep++;

@ -472,7 +476,7 @@ int main(int argc, char **argv)
         ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
         ScaLBL_D3Q19_AAeven_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, rhoA, rhoB, tauA, tauB,
                 alpha, beta, Fx, Fy, Fz,  0, ScaLBL_Comm->LastExterior(), Np);
-         ScaLBL_DeviceBarrier(); comm.barrier();
+         ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
         timestep++;
         //************************************************************************
 		printf("Check after even time \n");
@ -519,7 +523,7 @@ int main(int argc, char **argv)

 	}
 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	// ****************************************************
 	return check;
--- a/tests/TestColorSquareTube.cpp
+++ b/tests/TestColorSquareTube.cpp
@ -7,7 +7,7 @@
 #include <iostream>
 #include <fstream>
 #include "common/ScaLBL.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "models/ColorModel.h"

 std::shared_ptr<Database> loadInputs( int nprocs )
@ -84,11 +84,15 @@ void InitializeSquareTube(ScaLBL_ColorModel &ColorModel){
 //***************************************************************************************
 int main(int argc, char **argv)
 {
+	//*****************************************
+	// ***** MPI STUFF ****************
+	//*****************************************
 	// Initialize MPI
+	int rank,nprocs;
 	MPI_Init(&argc,&argv);
-	Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+	MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
 	int check=0;
 	{
 		if (rank == 0){
@ -109,7 +113,7 @@ int main(int argc, char **argv)
 
 	}
 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	// ****************************************************

--- a/tests/TestCommD3Q19.cpp
+++ b/tests/TestCommD3Q19.cpp
@ -6,7 +6,7 @@
 #include <iostream>
 #include <fstream>
 #include "common/ScaLBL.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"

 using namespace std;

@ -164,10 +164,11 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){
 int main(int argc, char **argv)
 {
 	// Initialize MPI
+	int rank,nprocs;
 	MPI_Init(&argc,&argv);
-	Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+	MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
 	int check;
 	{

@ -262,14 +263,14 @@ int main(int argc, char **argv)
 				}
 			}
 		}
-		sum = comm.sumReduce( sum_local );
+		MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm);
 		double iVol_global=1.f/double((Nx-2)*(Ny-2)*(Nz-2)*nprocx*nprocy*nprocz);
 	    	porosity = 1.0-sum*iVol_global;
 		if (rank==0) printf("Media porosity = %f \n",porosity);
 		//.......................................................................

 		//...........................................................................
-		comm.barrier();
+		MPI_Barrier(comm);
 		if (rank == 0) cout << "Domain set." << endl;
 		//...........................................................................

@ -284,7 +285,7 @@ int main(int argc, char **argv)
 		IntArray Map(Nx,Ny,Nz);
 		Map.fill(-2);		
 		Np = ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np);
-		comm.barrier();
+		MPI_Barrier(comm);
 		int neighborSize=18*Np*sizeof(int);
 		//......................device distributions.................................
 		dist_mem_size = Np*sizeof(double);
@ -354,7 +355,7 @@ int main(int argc, char **argv)
 		GlobalFlipScaLBL_D3Q19_Init(fq_host, Map, Np, Nx-2, Ny-2, Nz-2, iproc,jproc,kproc,nprocx,nprocy,nprocz);
 		ScaLBL_CopyToDevice(fq, fq_host, 19*dist_mem_size);
 		ScaLBL_DeviceBarrier();
-		comm.barrier();
+		MPI_Barrier(comm);
 		//*************************************************************************
 		// First timestep
 		ScaLBL_Comm.SendD3Q19AA(fq); //READ FROM NORMAL
@ -377,7 +378,7 @@ int main(int argc, char **argv)

 		//.......create and start timer............
 		double starttime,stoptime,cputime;
-		comm.barrier();
+		MPI_Barrier(comm);
 		starttime = MPI_Wtime();
 		//.........................................

@ -397,7 +398,7 @@ int main(int argc, char **argv)
 			//*********************************************

 			ScaLBL_DeviceBarrier();
-			comm.barrier();
+			MPI_Barrier(comm);
 			// Iteration completed!
 			timestep++;
 			//...................................................................
@ -426,7 +427,7 @@ int main(int argc, char **argv)
 		if (rank==0) printf("Aggregated communication bandwidth = %f Gbit/sec \n",nprocs*ScaLBL_Comm.CommunicationCount*64*timestep/1e9);
 	}
 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	// ****************************************************

--- a/tests/TestDatabase.cpp
+++ b/tests/TestDatabase.cpp
@ -9,7 +9,7 @@

 #include "common/UnitTest.h"
 #include "common/Utilities.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/Database.h"
 #include "ProfilerApp.h"

@ -17,8 +17,11 @@
 // Main
 int main(int argc, char **argv)
 {
+    int rank,nprocs;
    MPI_Init(&argc,&argv);
-    Utilities::MPI comm( MPI_COMM_WORLD );
+    MPI_Comm comm = MPI_COMM_WORLD;
+    MPI_Comm_rank(comm,&rank);
+    MPI_Comm_size(comm,&nprocs);
    Utilities::setAbortBehavior(true,2);
    Utilities::setErrorHandlers();
    UnitTest ut;
@ -66,7 +69,7 @@ int main(int argc, char **argv)

    // Finished
    PROFILE_SAVE("TestDatabase",true);
-    comm.barrier();
+    MPI_Barrier(comm);
    MPI_Finalize();
    return err;
 }
--- a/tests/TestFluxBC.cpp
+++ b/tests/TestFluxBC.cpp
@ -1,5 +1,5 @@
 #include <iostream>
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/Utilities.h"
 #include "common/ScaLBL.h"

@ -18,9 +18,9 @@ std::shared_ptr<Database> loadInputs( int nprocs )
 int main (int argc, char **argv)
 {
 	MPI_Init(&argc,&argv);
-	Utilities::MPI comm( MPI_COMM_WORLD );
-	int rank = comm.getRank();
-	int nprocs = comm.getSize();
+	MPI_Comm comm = MPI_COMM_WORLD;
+	int rank = MPI_WORLD_RANK();
+	int nprocs = MPI_WORLD_SIZE();

 	// set the error code
 	// Note: the error code should be consistent across all processors
@ -89,7 +89,7 @@ int main (int argc, char **argv)
 		neighborList= new int[18*Npad];

 		Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np);
-		comm.barrier();
+		MPI_Barrier(comm);

 		//......................device distributions.................................
 		int dist_mem_size = Np*sizeof(double);
@ -149,7 +149,7 @@ int main (int argc, char **argv)
    	double *VEL;
    	VEL= new double [3*Np];
    	int SIZE=3*Np*sizeof(double);
-    	ScaLBL_DeviceBarrier(); comm.barrier();
+    	ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
    	ScaLBL_CopyToHost(&VEL[0],&dvc_vel[0],SIZE);

    	double Q = 0.f;    	
@ -192,7 +192,7 @@ int main (int argc, char **argv)
 			din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
 			ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
 			ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, 0, ScaLBL_Comm->next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
-			ScaLBL_DeviceBarrier(); comm.barrier();
+			ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
 			timestep++;

 			ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL
@ -201,7 +201,7 @@ int main (int argc, char **argv)
 			din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
 			ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
 			ScaLBL_D3Q19_AAeven_MRT(fq, 0, ScaLBL_Comm->next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
-			ScaLBL_DeviceBarrier(); comm.barrier();
+			ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
 			timestep++;
 			//************************************************************************/

@ -265,7 +265,7 @@ int main (int argc, char **argv)

 	}
 	// Finished
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
    return error; 
 }
--- a/tests/TestForceD3Q19.cpp
+++ b/tests/TestForceD3Q19.cpp
@ -1,5 +1,5 @@
 #include <iostream>
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/Utilities.h"
 #include <math.h>

@ -443,9 +443,8 @@ inline void MRT_Transform(double *dist, int Np, double Fx, double Fy, double Fz)
 int main (int argc, char **argv)
 {
 	MPI_Init(&argc,&argv);
-    Utilities::MPI comm( MPI_COMM_WORLD );
-	int rank = comm.getRank();
-	int nprocs = comm.getSize();
+	int rank = MPI_WORLD_RANK();
+	int nprocs = MPI_WORLD_SIZE();

 	for (int i=0; i<nprocs; i++) {
 		if ( rank==i )
--- a/tests/TestForceMoments.cpp
+++ b/tests/TestForceMoments.cpp
@ -7,7 +7,7 @@
 #include <iostream>
 #include <fstream>
 #include "common/ScaLBL.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"

 using namespace std;

@ -46,11 +46,15 @@ std::shared_ptr<Database> loadInputs( int nprocs )
 //***************************************************************************************
 int main(int argc, char **argv)
 {
+	//*****************************************
+	// ***** MPI STUFF ****************
+	//*****************************************
 	// Initialize MPI
+	int rank,nprocs;
 	MPI_Init(&argc,&argv);
-	Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+	MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
 	int check=0;
 	{
 		// parallel domain size (# of sub-domains)
@ -94,7 +98,7 @@ int main(int argc, char **argv)
 			printf("********************************************************\n");
 		}

-		comm.barrier();
+		MPI_Barrier(comm);
 		kproc = rank/(nprocx*nprocy);
 		jproc = (rank-nprocx*nprocy*kproc)/nprocx;
 		iproc = rank-nprocx*nprocy*kproc-nprocz*jproc;
@ -102,7 +106,7 @@ int main(int argc, char **argv)
 		if (rank == 0) {
 			printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc);
 		}
-		comm.barrier();
+		MPI_Barrier(comm);
 		if (rank == 1){
 			printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc);
 			printf("\n\n");
@ -139,7 +143,7 @@ int main(int argc, char **argv)
 			}
 		}
 		Dm->CommInit();
-		comm.barrier();
+		MPI_Barrier(comm);
 		if (rank == 0) cout << "Domain set." << endl;

 		int Np=0;  // number of local pore nodes
@ -184,7 +188,7 @@ int main(int argc, char **argv)

 	        if (rank == 0) PrintNeighborList(neighborList,Np, rank);

-		comm.barrier();
+		MPI_Barrier(comm);

 		//......................device distributions.................................
 		int dist_mem_size = Np*sizeof(double);
@ -209,13 +213,13 @@ int main(int argc, char **argv)
 		//.......create and start timer............
 		double starttime,stoptime,cputime;

-		ScaLBL_DeviceBarrier(); comm.barrier();
+		ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
 		starttime = MPI_Wtime();

 		/************ MAIN ITERATION LOOP (timing communications)***************************************/
 		//ScaLBL_Comm->SendD3Q19(dist, &dist[10*Np]);
 		//ScaLBL_Comm->RecvD3Q19(dist, &dist[10*Np]);
-		ScaLBL_DeviceBarrier(); comm.barrier();
+		ScaLBL_DeviceBarrier(); MPI_Barrier(comm);

 		if (rank==0) printf("Beginning AA timesteps...\n");
 		if (rank==0) printf("********************************************************\n");
@ -227,14 +231,14 @@ int main(int argc, char **argv)
 			ScaLBL_D3Q19_AAodd_MRT(NeighborList, dist,  ScaLBL_Comm->first_interior, ScaLBL_Comm->last_interior, Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
 			ScaLBL_Comm->RecvD3Q19AA(dist); //WRITE INTO OPPOSITE
 			ScaLBL_D3Q19_AAodd_MRT(NeighborList, dist, 0, ScaLBL_Comm->next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
-			ScaLBL_DeviceBarrier(); comm.barrier();
+			ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
 			timestep++;

 			ScaLBL_Comm->SendD3Q19AA(dist); //READ FORM NORMAL
 			ScaLBL_D3Q19_AAeven_MRT(dist, ScaLBL_Comm->first_interior, ScaLBL_Comm->last_interior, Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
 			ScaLBL_Comm->RecvD3Q19AA(dist); //WRITE INTO OPPOSITE
 			ScaLBL_D3Q19_AAeven_MRT(dist, 0, ScaLBL_Comm->next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
-			ScaLBL_DeviceBarrier(); comm.barrier();
+			ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
 			timestep++;
 			//************************************************************************/
 			
@ -327,7 +331,7 @@ int main(int argc, char **argv)

 	}
 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	// ****************************************************

--- a/tests/TestInterfaceSpeed.cpp
+++ b/tests/TestInterfaceSpeed.cpp
@ -2,7 +2,7 @@
 #include <math.h>

 #include "analysis/TwoPhase.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/Communication.h"
 #include "IO/Mesh.h"
 #include "IO/Writer.h"
@ -18,9 +18,13 @@
 int main (int argc, char *argv[])
 {
 	// Initialize MPI
+	int rank,nprocs;
 	MPI_Init(&argc,&argv);
-    Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
+    MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
+
+	int i,j,k;

    // Load inputs
 	string FILENAME = argv[1];
@ -36,7 +40,7 @@ int main (int argc, char *argv[])

    Nx+=2; Ny+=2; Nz+=2;

-	for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = 1;
+	for (i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = 1;

 	Dm->CommInit();

@ -47,9 +51,9 @@ int main (int argc, char *argv[])
 	double dist1,dist2;

 	Cx = Cy = Cz = N*0.5;
-	for (int k=0; k<Nz; k++){
-		for (int j=0; j<Ny; j++){
-			for (int i=0; i<Nx; i++){
+	for (k=0; k<Nz; k++){
+		for (j=0; j<Ny; j++){
+			for (i=0; i<Nx; i++){
 				dist2 = sqrt((i-Cx)*(i-Cx)+(j-Cy)*(j-Cy)+(k-Cz)*(k-Cz)) - CAPRAD;
 				dist2 = fabs(Cz-k)-HEIGHT;

@ -58,9 +62,9 @@ int main (int argc, char *argv[])
 		} 
 	}
 	Cz += SPEED;
-	for (int k=0; k<Nz; k++){
-		for (int j=0; j<Ny; j++){
-			for (int i=0; i<Nx; i++){
+	for (k=0; k<Nz; k++){
+		for (j=0; j<Ny; j++){
+			for (i=0; i<Nx; i++){
 				
 				dist1 = sqrt((i-Cx)*(i-Cx)+(j-Cy)*(j-Cy)) - RADIUS;
 				dist2 = sqrt((i-Cx)*(i-Cx)+(j-Cy)*(j-Cy)+(k-Cz)*(k-Cz)) - CAPRAD;
@ -73,9 +77,9 @@ int main (int argc, char *argv[])
 		}   
 	}
 	Cz += SPEED;
-	for (int k=0; k<Nz; k++){
-		for (int j=0; j<Ny; j++){
-			for (int i=0; i<Nx; i++){
+	for (k=0; k<Nz; k++){
+		for (j=0; j<Ny; j++){
+			for (i=0; i<Nx; i++){
 				dist2 = sqrt((i-Cx)*(i-Cx)+(j-Cy)*(j-Cy)+(k-Cz)*(k-Cz)) - CAPRAD;
 				dist2 = fabs(Cz-k)-HEIGHT;

@ -147,7 +151,7 @@ int main (int argc, char *argv[])
 	return toReturn;

 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
 	return 0;
 	MPI_Finalize();
 	// ****************************************************
--- a/tests/TestMRT.cpp
+++ b/tests/TestMRT.cpp
@ -7,7 +7,7 @@
 #include <iostream>
 #include <fstream>
 #include "common/ScaLBL.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"

 using namespace std;

@ -488,11 +488,15 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){
 //***************************************************************************************
 int main(int argc, char **argv)
 {
+	//*****************************************
+	// ***** MPI STUFF ****************
+	//*****************************************
 	// Initialize MPI
+	int rank,nprocs;
 	MPI_Init(&argc,&argv);
-	Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+	MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
 	int check;
 	{
 		// parallel domain size (# of sub-domains)
@ -578,7 +582,7 @@ int main(int argc, char **argv)
 		}
 		// **************************************************************
 		// Broadcast simulation parameters from rank 0 to all other procs
-		comm.barrier();
+		MPI_Barrier(comm);
 		//.................................................
 		MPI_Bcast(&Nx,1,MPI_INT,0,comm);
 		MPI_Bcast(&Ny,1,MPI_INT,0,comm);
@ -591,7 +595,7 @@ int main(int argc, char **argv)
 		MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm);
 		MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm);
 		//.................................................
-		comm.barrier();
+		MPI_Barrier(comm);
 		// **************************************************************
 		// **************************************************************

@ -609,7 +613,7 @@ int main(int argc, char **argv)
 			printf("********************************************************\n");
 		}

-		comm.barrier();
+		MPI_Barrier(comm);
 		kproc = rank/(nprocx*nprocy);
 		jproc = (rank-nprocx*nprocy*kproc)/nprocx;
 		iproc = rank-nprocx*nprocy*kproc-nprocz*jproc;
@ -617,7 +621,7 @@ int main(int argc, char **argv)
 		if (rank == 0) {
 			printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc);
 		}
-		comm.barrier();
+		MPI_Barrier(comm);
 		if (rank == 1){
 			printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc);
 			printf("\n\n");
@ -646,7 +650,7 @@ int main(int argc, char **argv)
 		fread(Dm.id,1,N,IDFILE);
 		fclose(IDFILE);

-		comm.barrier();
+		MPI_Barrier(comm);
 		Dm.CommInit();

 		//.......................................................................
@ -667,12 +671,12 @@ int main(int argc, char **argv)
 				}
 			}
 		}
-		comm.barrier();
+		MPI_Barrier(comm);
 		MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm);
 		porosity = sum*iVol_global;
 		if (rank==0) printf("Media porosity = %f \n",porosity);

-		comm.barrier();
+		MPI_Barrier(comm);
 		if (rank == 0) cout << "Domain set." << endl;
 		if (rank==0)	printf ("Create ScaLBL_Communicator \n");

@ -702,7 +706,7 @@ int main(int argc, char **argv)
 		neighborList= new int[18*Np];

 		ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm.id,Np);
-		comm.barrier();
+		MPI_Barrier(comm);

 		//......................device distributions.................................
 		int dist_mem_size = Np*sizeof(double);
@ -730,7 +734,7 @@ int main(int argc, char **argv)
 		//.......create and start timer............
 		double starttime,stoptime,cputime;

-		ScaLBL_DeviceBarrier(); comm.barrier();
+		ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
 		starttime = MPI_Wtime();

 		while (timestep < timesteps) {
@ -739,14 +743,14 @@ int main(int argc, char **argv)
 			ScaLBL_D3Q19_AAodd_MRT(NeighborList, dist, ScaLBL_Comm.next, Np, Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
 			ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE
 			ScaLBL_D3Q19_AAodd_MRT(NeighborList, dist, 0, ScaLBL_Comm.next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
-			ScaLBL_DeviceBarrier(); comm.barrier();
+			ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
 			timestep++;

 			ScaLBL_Comm.SendD3Q19AA(dist); //READ FORM NORMAL
 			ScaLBL_D3Q19_AAeven_MRT(dist, ScaLBL_Comm.next, Np, Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
 			ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE
 			ScaLBL_D3Q19_AAeven_MRT(dist, 0, ScaLBL_Comm.next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
-			ScaLBL_DeviceBarrier(); comm.barrier();
+			ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
 			timestep++;
 			//************************************************************************/

@ -779,7 +783,7 @@ int main(int argc, char **argv)
    	VEL= new double [3*Np];
    	int SIZE=3*Np*sizeof(double);
    	ScaLBL_D3Q19_Momentum(dist,Velocity, Np);
-    	ScaLBL_DeviceBarrier(); comm.barrier();
+    	ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
    	ScaLBL_CopyToHost(&VEL[0],&Velocity[0],SIZE);

    	sum_local=0.f;
@ -801,7 +805,7 @@ int main(int argc, char **argv)

 	}
 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	// ****************************************************

--- a/tests/TestMap.cpp
+++ b/tests/TestMap.cpp
@ -7,7 +7,7 @@
 #include <iostream>
 #include <fstream>
 #include "common/ScaLBL.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"

 using namespace std;

@ -26,9 +26,15 @@ std::shared_ptr<Database> loadInputs( int nprocs )
 //***************************************************************************************
 int main(int argc, char **argv)
 {
+	//*****************************************
+	// ***** MPI STUFF ****************
+	//*****************************************
 	// Initialize MPI
+	int rank,nprocs;
 	MPI_Init(&argc,&argv);
-	Utilities::MPI comm( MPI_COMM_WORLD );
+	MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
 	int check=0;
 	{

@ -39,7 +45,6 @@ int main(int argc, char **argv)
 				{1,0,1},{-1,0,-1},{1,0,-1},{-1,0,1},
 				{0,1,1},{0,-1,-1},{0,1,-1},{0,-1,1}};

-        int rank = comm.getRank();
 		if (rank == 0){
 			printf("********************************************************\n");
 			printf("Running unit test: TestMap	\n");
@ -47,7 +52,7 @@ int main(int argc, char **argv)
 		}
 		
 	    // Load inputs
-	    auto db = loadInputs( comm.getSize() );
+	    auto db = loadInputs( nprocs );
 	    int Nx = db->getVector<int>( "n" )[0];
 	    int Ny = db->getVector<int>( "n" )[1];
 	    int Nz = db->getVector<int>( "n" )[2];
@ -89,7 +94,7 @@ int main(int argc, char **argv)
 		neighborList= new int[18*Npad];

 		Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np);
-		comm.barrier();
+		MPI_Barrier(comm);
 		
 		// Check the neighborlist
 		printf("Check neighborlist: exterior %i, first interior %i last interior %i \n",ScaLBL_Comm->LastExterior(),ScaLBL_Comm->FirstInterior(),ScaLBL_Comm->LastInterior());
@ -192,7 +197,7 @@ int main(int argc, char **argv)

 	}
 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	// ****************************************************

--- a/tests/TestMassConservationD3Q7.cpp
+++ b/tests/TestMassConservationD3Q7.cpp
@ -8,7 +8,7 @@
 #include <fstream>

 #include "common/ScaLBL.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "models/ColorModel.h"

 inline void InitializeBubble(ScaLBL_ColorModel &ColorModel, double BubbleRadius){
@ -67,10 +67,11 @@ inline void InitializeBubble(ScaLBL_ColorModel &ColorModel, double BubbleRadius)
 int main(int argc, char **argv)
 {
 	// Initialize MPI
+	int rank,nprocs;
 	MPI_Init(&argc,&argv);
-    Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+    MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
 	// parallel domain size (# of sub-domains)

 	if (rank == 0){
@ -265,7 +266,7 @@ int main(int argc, char **argv)
 	}
 }
 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	// ****************************************************
 }
--- a/tests/TestMicroCTReader.cpp
+++ b/tests/TestMicroCTReader.cpp
@ -1,6 +1,6 @@
 // Test reading high-resolution files from the microct database

-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/UnitTest.h"
 #include "common/Database.h"
 #include "common/Domain.h"
@ -13,14 +13,12 @@

 void testReadMicroCT( const std::string& filename, UnitTest& ut )
 {
-    Utilities::MPI comm( MPI_COMM_WORLD );
-
    // Get the domain info
    auto db = std::make_shared<Database>( filename );
    auto domain_db = db->getDatabase( "Domain" );

    // Test reading microCT files
-    auto data = readMicroCT( *domain_db, comm );
+    auto data = readMicroCT( *domain_db, MPI_COMM_WORLD );
    
    // Check if we loaded the data correctly
    if ( data.size() == domain_db->getVector<size_t>( "n" ) )
@ -32,7 +30,7 @@ void testReadMicroCT( const std::string& filename, UnitTest& ut )
    auto n = domain_db->getVector<int>( "n" );
    auto nproc = domain_db->getVector<int>( "nproc" );
    int N[3] = { n[0]*nproc[0], n[1]*nproc[1], n[2]*nproc[2] };
-    int rank = comm.getRank();
+    int rank = comm_rank(MPI_COMM_WORLD);
    RankInfoStruct rankInfo( rank, nproc[0], nproc[1], nproc[2] );
    std::vector<IO::MeshDataStruct> meshData( 1 );
    auto Var = std::make_shared<IO::Variable>();
@ -43,7 +41,7 @@ void testReadMicroCT( const std::string& filename, UnitTest& ut )
    meshData[0].meshName = "grid";
    meshData[0].mesh = std::make_shared<IO::DomainMesh>(rankInfo,n[0],n[1],n[2],N[0],N[1],N[2]);
    meshData[0].vars.push_back(Var);
-    IO::writeData( 0, meshData, comm );
+    IO::writeData( 0, meshData, MPI_COMM_WORLD );
 }


--- a/tests/TestMomentsD3Q19.cpp
+++ b/tests/TestMomentsD3Q19.cpp
@ -1,5 +1,5 @@
 #include <iostream>
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/Utilities.h"
 #include <math.h>

@ -463,14 +463,13 @@ inline void MRT_Transform(double *dist, int Np) {
 int main (int argc, char **argv)
 {
 	MPI_Init(&argc,&argv);
-    Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+    int rank = MPI_WORLD_RANK();
+    int nprocs = MPI_WORLD_SIZE();

    for (int i=0; i<nprocs; i++) {
        if ( rank==i )
            printf("%i of %i: TestMoments\n",rank,nprocs);
-        comm.barrier();
+        MPI_Barrier(MPI_COMM_WORLD);
    }

    // Create a memory leak for valgrind to find
--- a/tests/TestNetcdf.cpp
+++ b/tests/TestNetcdf.cpp
@ -1,7 +1,7 @@
 // Test reading/writing netcdf files

 #include "IO/netcdf.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/Communication.h"
 #include "common/UnitTest.h"

@ -13,8 +13,7 @@ void load( const std::string& );

 void test_NETCDF( UnitTest& ut )
 {
-    Utilities::MPI comm( MPI_COMM_WORLD );
-    const int rank = comm.getRank();
+    const int rank = comm_rank( MPI_COMM_WORLD );
    int nprocx = 2;
    int nprocy = 2;
    int nprocz = 2;
@ -31,7 +30,7 @@ void test_NETCDF( UnitTest& ut )
    auto dims =  netcdf::defDim( fid, {"X", "Y", "Z"}, dim );
    netcdf::write( fid, "tmp", dims, data, info );
    netcdf::close( fid );
-    comm.barrier();
+    MPI_Barrier( MPI_COMM_WORLD );
    // Read the contents of the file we created
    fid = netcdf::open( filename, netcdf::READ );
    Array<float> tmp = netcdf::getVar<float>( fid, "tmp" );
@ -96,8 +95,7 @@ int main(int argc, char **argv)
 {
    // Initialize MPI
    MPI_Init(&argc,&argv);
-    Utilities::MPI comm( MPI_COMM_WORLD );
-    const int rank = comm.getRank();
+    int rank = comm_rank(MPI_COMM_WORLD);
    UnitTest ut;
    PROFILE_START("Main");

--- a/tests/TestPoiseuille.cpp
+++ b/tests/TestPoiseuille.cpp
@ -7,7 +7,7 @@
 #include <iostream>
 #include <fstream>
 #include "common/ScaLBL.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "models/MRTModel.h"

 void ParallelPlates(ScaLBL_MRTModel &MRT){
@ -47,11 +47,15 @@ void ParallelPlates(ScaLBL_MRTModel &MRT){
 //***************************************************************************************
 int main(int argc, char **argv)
 {
+	//*****************************************
+	// ***** MPI STUFF ****************
+	//*****************************************
 	// Initialize MPI
+	int rank,nprocs;
 	MPI_Init(&argc,&argv);
-	Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+	MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
 	int check=0;
 	{
 		if (rank == 0){
@ -73,7 +77,7 @@ int main(int argc, char **argv)

 		int SIZE=MRT.Np*sizeof(double);
 		ScaLBL_D3Q19_Momentum(MRT.fq,MRT.Velocity, MRT.Np);
-		ScaLBL_DeviceBarrier(); comm.barrier();
+		ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
 		ScaLBL_CopyToHost(&Vz[0],&MRT.Velocity[0],3*SIZE);
 		
 		if (rank == 0) printf("Force: %f,%f,%f \n",MRT.Fx,MRT.Fy,MRT.Fz);
@ -87,7 +91,7 @@ int main(int argc, char **argv)
 		j=Ny/2; k=Nz/2;
 		if (rank == 0) printf("Channel width=%f \n",W);
 		if (rank == 0) printf("ID flag vz       analytical\n");
-		comm.barrier();
+		MPI_Barrier(comm);

 		if (rank == 0) {
 			for (i=0;i<Nx;i++){
@ -126,7 +130,7 @@ int main(int argc, char **argv)
 	}

 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	// ****************************************************

--- a/tests/TestPressVel.cpp
+++ b/tests/TestPressVel.cpp
@ -7,16 +7,21 @@
 #include <iostream>
 #include <fstream>
 #include "common/ScaLBL.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"


 //***************************************************************************************
 int main(int argc, char **argv)
 {
+	//*****************************************
+	// ***** MPI STUFF ****************
+	//*****************************************
 	// Initialize MPI
+	int rank,nprocs;
 	MPI_Init(&argc,&argv);
-	Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
+	MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
 	int check=0;
 	{
 		if (rank == 0){
@ -45,7 +50,7 @@ int main(int argc, char **argv)
 			printf("********************************************************\n");
 		}

-		comm.barrier();
+		MPI_Barrier(comm);
 		int kproc = rank/(nprocx*nprocy);
 		int jproc = (rank-nprocx*nprocy*kproc)/nprocx;
 		int iproc = rank-nprocx*nprocy*kproc-nprocz*jproc;
@ -53,7 +58,7 @@ int main(int argc, char **argv)
 		if (rank == 0) {
 			printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc);
 		}
-		comm.barrier();
+		MPI_Barrier(comm);
 		if (rank == 1){
 			printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc);
 			printf("\n\n");
@ -97,11 +102,11 @@ int main(int argc, char **argv)
 				}
 			}
 		}
-        sum = comm.sumReduce( sum_local );
+		MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm);
 		porosity = sum*iVol_global;
 		if (rank==0) printf("Media porosity = %f \n",porosity);

-		comm.barrier();
+		MPI_Barrier(comm);
 		if (rank == 0) cout << "Domain set." << endl;
 		if (rank==0)	printf ("Create ScaLBL_Communicator \n");

@ -128,7 +133,7 @@ int main(int argc, char **argv)
 		IntArray Map(Nx,Ny,Nz);
 		neighborList= new int[18*Npad];
 		Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np);
-		comm.barrier();
+		MPI_Barrier(comm);

 		//......................device distributions.................................
 		if (rank==0)	printf ("Allocating distributions \n");
@ -189,7 +194,7 @@ int main(int argc, char **argv)
 	   }
 	}
 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	// ****************************************************
 	return check;
--- a/tests/TestSegDist.cpp
+++ b/tests/TestSegDist.cpp
@ -39,10 +39,11 @@ std::shared_ptr<Database> loadInputs( int nprocs )
 int main(int argc, char **argv)
 {
    // Initialize MPI
+    int rank, nprocs;
    MPI_Init(&argc,&argv);
-    Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+    MPI_Comm comm = MPI_COMM_WORLD;
+    MPI_Comm_rank(comm,&rank);
+    MPI_Comm_size(comm,&nprocs);
    {


@ -97,7 +98,7 @@ int main(int argc, char **argv)
        }
    }

-    comm.barrier();
+    MPI_Barrier(comm);
    if (rank==0) printf("Initialized! Converting to Signed Distance function \n");

    double t1 = MPI_Wtime();
@ -115,7 +116,7 @@ int main(int argc, char **argv)
            }
        }
    }
-    err = Dm.Comm.sumReduce( err );
+    err = sumReduce( Dm.Comm, err );
    err = sqrt( err / (nx*ny*nz*nprocs) );
    if (rank==0)
        printf("Mean error %0.4f \n", err);
@ -141,7 +142,7 @@ int main(int argc, char **argv)
    IO::writeData( "testSegDist", data, MPI_COMM_WORLD );

    }
-    comm.barrier();
+    MPI_Barrier(comm);
    MPI_Finalize();
    return 0;

--- a/tests/TestSubphase.cpp
+++ b/tests/TestSubphase.cpp
@ -26,10 +26,11 @@ std::shared_ptr<Database> loadInputs( int nprocs )
 int main(int argc, char **argv)
 {
 	// Initialize MPI
+	int rank, nprocs;
 	MPI_Init(&argc,&argv);
-	Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+	MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
 	{ // Limit scope so variables that contain communicators will free before MPI_Finialize

 		if ( rank==0 ) {
@ -136,7 +137,7 @@ int main(int argc, char **argv)
 		// Averages->Reduce();

 	} // Limit scope so variables that contain communicators will free before MPI_Finialize
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	return 0;  
 }
--- a/tests/TestTopo3D.cpp
+++ b/tests/TestTopo3D.cpp
@ -26,10 +26,11 @@ std::shared_ptr<Database> loadInputs( int nprocs )
 int main(int argc, char **argv)
 {
 	// Initialize MPI
+	int rank, nprocs;
 	MPI_Init(&argc,&argv);
-	Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+	MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
 	{ // Limit scope so variables that contain communicators will free before MPI_Finialize

 		if ( rank==0 ) {
@ -225,7 +226,7 @@ int main(int argc, char **argv)
 		IO::writeData( timestep, visData, comm );

 	} // Limit scope so variables that contain communicators will free before MPI_Finialize
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	return 0;  
 }
--- a/tests/TestTorus.cpp
+++ b/tests/TestTorus.cpp
@ -26,10 +26,11 @@ std::shared_ptr<Database> loadInputs( int nprocs )
 int main(int argc, char **argv)
 {
  // Initialize MPI
+  int rank, nprocs;
  MPI_Init(&argc,&argv);
-  Utilities::MPI comm( MPI_COMM_WORLD );
-  int rank = comm.getRank();
-  int nprocs = comm.getSize();
+  MPI_Comm comm = MPI_COMM_WORLD;
+  MPI_Comm_rank(comm,&rank);
+  MPI_Comm_size(comm,&nprocs);
  { // Limit scope so variables that contain communicators will free before MPI_Finialize

    if ( rank==0 ) {
@ -164,7 +165,7 @@ int main(int argc, char **argv)
   // Averages->Reduce();

  } // Limit scope so variables that contain communicators will free before MPI_Finialize
-  comm.barrier();
+  MPI_Barrier(comm);
  MPI_Finalize();
  return 0;  
 }
--- a/tests/TestTorusEvolve.cpp
+++ b/tests/TestTorusEvolve.cpp
@ -26,10 +26,11 @@ std::shared_ptr<Database> loadInputs( int nprocs )
 int main(int argc, char **argv)
 {
  // Initialize MPI
+  int rank, nprocs;
  MPI_Init(&argc,&argv);
-  Utilities::MPI comm( MPI_COMM_WORLD );
-  int rank = comm.getRank();
-  int nprocs = comm.getSize();
+  MPI_Comm comm = MPI_COMM_WORLD;
+  MPI_Comm_rank(comm,&rank);
+  MPI_Comm_size(comm,&nprocs);
  { // Limit scope so variables that contain communicators will free before MPI_Finialize

    if ( rank==0 ) {
@ -156,7 +157,7 @@ int main(int argc, char **argv)

    }
  } // Limit scope so variables that contain communicators will free before MPI_Finialize
-  comm.barrier();
+  MPI_Barrier(comm);
  MPI_Finalize();
  return 0;  
 }
--- a/tests/TestTwoPhase.cpp
+++ b/tests/TestTwoPhase.cpp
@ -8,7 +8,7 @@
 #include <fstream>

 #include "analysis/TwoPhase.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/Communication.h"
 #include "IO/Mesh.h"
 #include "IO/Writer.h"
@ -17,10 +17,11 @@
 int main(int argc, char **argv)
 {
  // Initialize MPI
+  int rank,nprocs;
  MPI_Init(&argc,&argv);
-  Utilities::MPI comm( MPI_COMM_WORLD );
-  int rank = comm.getRank();
-  int nprocs = comm.getSize();
+  MPI_Comm comm = MPI_COMM_WORLD;
+  MPI_Comm_rank(comm,&rank);
+  MPI_Comm_size(comm,&nprocs);
  { // Limit scope so Domain can free it's communicator

 	printf("Running two-phase averaging test on %i processors \n",nprocs);
@ -109,7 +110,7 @@ int main(int argc, char **argv)
 		fclose(PHASE);
 	}
 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
  } // Limit scope so Domain will free it's communicator
  MPI_Finalize();
  return 0;
--- a/tests/TestWriter.cpp
+++ b/tests/TestWriter.cpp
@ -8,7 +8,7 @@

 #include "common/UnitTest.h"
 #include "common/Utilities.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "IO/MeshDatabase.h"
 #include "IO/Reader.h"
 #include "IO/Writer.h"
@ -34,9 +34,11 @@ inline double distance( const Point& p )
 // Test writing and reading the given format
 void testWriter( const std::string& format, std::vector<IO::MeshDataStruct>& meshData, UnitTest& ut )
 {
-    Utilities::MPI comm( MPI_COMM_WORLD );
-    int nprocs = comm.getSize();
-    comm.barrier();
+    int rank, nprocs;
+    MPI_Comm comm = MPI_COMM_WORLD;
+    MPI_Comm_rank(comm,&rank);
+    MPI_Comm_size(comm,&nprocs);
+    MPI_Barrier(comm);

    // Get the format
    std::string format2 = format;
@ -61,7 +63,7 @@ void testWriter( const std::string& format, std::vector<IO::MeshDataStruct>& mes
    IO::initialize( "test_"+format, format2, false );
    IO::writeData( 0, meshData, comm );
    IO::writeData( 3, meshData, comm );
-    comm.barrier();
+    MPI_Barrier(comm);
    PROFILE_STOP(format+"-write");

    // Get the summary name for reading
@ -226,10 +228,11 @@ void testWriter( const std::string& format, std::vector<IO::MeshDataStruct>& mes
 // Main
 int main(int argc, char **argv)
 {
+    int rank,nprocs;
    MPI_Init(&argc,&argv);
-    Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+    MPI_Comm comm = MPI_COMM_WORLD;
+    MPI_Comm_rank(comm,&rank);
+    MPI_Comm_size(comm,&nprocs);
    Utilities::setAbortBehavior(true,2);
    Utilities::setErrorHandlers();
    UnitTest ut;
@ -386,7 +389,7 @@ int main(int argc, char **argv)
    ut.report();
    PROFILE_SAVE("TestWriter",true);
    int N_errors = ut.NumFailGlobal();
-    comm.barrier();
+    MPI_Barrier(comm);
    MPI_Finalize();
    return N_errors;
 }
--- a/tests/convertIO.cpp
+++ b/tests/convertIO.cpp
@ -5,7 +5,7 @@
 #include <stdexcept>
 #include <fstream>

-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/Communication.h"
 #include "common/Utilities.h"
 #include "IO/Mesh.h"
@ -17,10 +17,11 @@
 int main(int argc, char **argv)
 {
  // Initialize MPI
+  int rank,nprocs;
  MPI_Init(&argc,&argv);
-  Utilities::MPI comm( MPI_COMM_WORLD );
-  int rank = comm.getRank();
-  int nprocs = comm.getSize();
+  MPI_Comm comm = MPI_COMM_WORLD;
+  MPI_Comm_rank(comm,&rank);
+  MPI_Comm_size(comm,&nprocs);
  Utilities::setErrorHandlers();
  PROFILE_ENABLE(2);
  PROFILE_ENABLE_TRACE();
@ -69,20 +70,20 @@ int main(int argc, char **argv)

            i++;
        }
-        comm.barrier();
+        MPI_Barrier(comm);
        PROFILE_STOP("Read");

        // Save the mesh data to a new file
        PROFILE_START("Write");
        IO::writeData( timestep, meshData, MPI_COMM_WORLD );
-        comm.barrier();
+        MPI_Barrier(comm);
        PROFILE_STOP("Write");
    }

  } // Limit scope
  PROFILE_STOP("Main");
  PROFILE_SAVE("convertData",true);
-  comm.barrier();
+  MPI_Barrier(comm);
  MPI_Finalize();
  return 0;
 }
--- a/tests/hello_world.cpp
+++ b/tests/hello_world.cpp
@ -1,19 +1,18 @@
 #include <iostream>
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "common/Utilities.h"


 int main (int argc, char **argv)
 {
 	MPI_Init(&argc,&argv);
-    Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+    int rank = MPI_WORLD_RANK();
+    int nprocs = MPI_WORLD_SIZE();

    for (int i=0; i<nprocs; i++) {
        if ( rank==i )
            printf("%i of %i: Hello world\n",rank,nprocs);
-        comm.barrier();
+        MPI_Barrier(MPI_COMM_WORLD);
    }

    // Create a memory leak for valgrind to find
@ -27,7 +26,7 @@ int main (int argc, char **argv)
    int error = 0;
    
    // Finished
-    comm.barrier();
+	MPI_Barrier(MPI_COMM_WORLD);
 	MPI_Finalize();
    return error; 
 }
--- a/tests/lb2_CMT_wia.cpp
+++ b/tests/lb2_CMT_wia.cpp
@ -11,7 +11,7 @@
 #include "D3Q19.h"
 #include "D3Q7.h"
 #include "Color.h"
-//#include "common/MPI.h"
+//#include "common/MPI_Helpers.h"
 //#include "Communication.h"

 //#define CBUB
--- a/tests/lb2_Color_blob_wia_mpi.cpp
+++ b/tests/lb2_Color_blob_wia_mpi.cpp
@ -12,7 +12,7 @@
 #include "D3Q19.h"
 #include "D3Q7.h"
 #include "Color.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "Communication.h"

 #define WRITE_SURFACES
@ -96,11 +96,15 @@ inline void ZeroHalo(double *Data, int Nx, int Ny, int Nz)

 int main(int argc, char **argv)
 {
+	//*****************************************
+	// ***** MPI STUFF ****************
+	//*****************************************
 	// Initialize MPI
+	int rank,nprocs;
 	MPI_Init(&argc,&argv);
-    Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+    MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
 	// parallel domain size (# of sub-domains)
 	int nprocx,nprocy,nprocz;
 	int iproc,jproc,kproc;
@ -205,7 +209,7 @@ int main(int argc, char **argv)
 	}
 	// **************************************************************
 	// Broadcast simulation parameters from rank 0 to all other procs
-	comm.barrier();
+	MPI_Barrier(comm);
 	//.................................................
 	MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm);
 	MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm);
@ -238,7 +242,7 @@ int main(int argc, char **argv)
 	MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm);
 	MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm);
 	//.................................................
-	comm.barrier();
+	MPI_Barrier(comm);
 	
 	RESTART_INTERVAL=interval;
 	// **************************************************************
@ -280,7 +284,7 @@ int main(int argc, char **argv)
 			 	 	 rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz,
 			 	 	 rank_yz, rank_YZ, rank_yZ, rank_Yz );
 	 
-	 comm.barrier();
+	 MPI_Barrier(comm);

 	Nz += 2;
 	Nx = Ny = Nz;	// Cubic domain
@ -397,14 +401,14 @@ int main(int argc, char **argv)
 	//.......................................................................
 	if (rank == 0)	printf("Reading the sphere packing \n");
 	if (rank == 0)	ReadSpherePacking(nspheres,cx,cy,cz,rad);
-	comm.barrier();
+	MPI_Barrier(comm);
 	// Broadcast the sphere packing to all processes
 	MPI_Bcast(cx,nspheres,MPI_DOUBLE,0,comm);
 	MPI_Bcast(cy,nspheres,MPI_DOUBLE,0,comm);
 	MPI_Bcast(cz,nspheres,MPI_DOUBLE,0,comm);
 	MPI_Bcast(rad,nspheres,MPI_DOUBLE,0,comm);
 	//...........................................................................
-	comm.barrier();
+	MPI_Barrier(comm);
 	if (rank == 0) cout << "Domain set." << endl;
 	if (rank == 0){
 		// Compute the Sauter mean diameter
@ -592,7 +596,7 @@ int main(int argc, char **argv)
 			}
 		}
 	}
-	comm.barrier();
+	MPI_Barrier(comm);
 	if (rank==0)	printf ("SendLists are ready on host\n");
 	//......................................................................................
 	// Use MPI to fill in the recvCounts form the associated processes
@ -779,7 +783,7 @@ int main(int argc, char **argv)
 	ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_Yz, recvCount_Yz*sizeof(int));	// Allocate device memory
 	ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_YZ, recvCount_YZ*sizeof(int));	// Allocate device memory
 	//......................................................................................
-	comm.barrier();
+	MPI_Barrier(comm);
 	if (rank==0)	printf ("Prepare to copy send/recv Lists to device \n");
 	ScaLBL_CopyToDevice(dvcSendList_x,sendList_x,sendCount_x*sizeof(int));
 	ScaLBL_CopyToDevice(dvcSendList_X,sendList_X,sendCount_X*sizeof(int));
@ -989,7 +993,7 @@ int main(int argc, char **argv)
 	recvMeshData_YZ = new double [recvCount_YZ];
 	recvMeshData_XZ = new double [recvCount_XZ];
 	if (rank==0)	printf ("Devices are ready to communicate. \n");
-	comm.barrier();
+	MPI_Barrier(comm);

 	//...........device phase ID.................................................
 	if (rank==0)	printf ("Copying phase ID to device \n");
@ -1216,7 +1220,7 @@ int main(int argc, char **argv)
 		ScaLBL_CopyToDevice(f_odd,cDistOdd,9*N*sizeof(double));
 		ScaLBL_CopyToDevice(Den,cDen,2*N*sizeof(double));
 		ScaLBL_DeviceBarrier();
-		comm.barrier();
+		MPI_Barrier(comm);
 	}
 	// Set up the cube list (very regular in this case due to lack of blob-ID)
 	// Set up kstart, kfinish so that the reservoirs are excluded from averaging
@ -1483,7 +1487,7 @@ int main(int argc, char **argv)
 	ScaLBL_CopyToHost(Vel_x.data,&Velocity[0],N*sizeof(double));
 	ScaLBL_CopyToHost(Vel_y.data,&Velocity[N],N*sizeof(double));
 	ScaLBL_CopyToHost(Vel_z.data,&Velocity[2*N],N*sizeof(double));
-	comm.barrier();
+	MPI_Barrier(comm);
 	//...........................................................................
 	
 	int timestep = 0;
@ -1496,7 +1500,7 @@ int main(int argc, char **argv)
 	
 	//.......create and start timer............
 	double starttime,stoptime,cputime;
-	comm.barrier();
+	MPI_Barrier(comm);
 	starttime = MPI_Wtime();
 	//.........................................
 	
@ -1911,7 +1915,7 @@ int main(int argc, char **argv)
 		
 		//...................................................................................

-		comm.barrier();
+		MPI_Barrier(comm);

 		// Timestep completed!
 		timestep++;
@ -1936,7 +1940,7 @@ int main(int argc, char **argv)
 			ScaLBL_CopyToHost(Vel_x.data,&Velocity[0],N*sizeof(double));
 			ScaLBL_CopyToHost(Vel_y.data,&Velocity[N],N*sizeof(double));
 			ScaLBL_CopyToHost(Vel_z.data,&Velocity[2*N],N*sizeof(double));
-			comm.barrier();
+			MPI_Barrier(comm);
 		}
 		if (timestep%1000 == 5){
 			//...........................................................................
@ -2441,7 +2445,7 @@ int main(int argc, char **argv)
 			}
 			
 			//...........................................................................
-			comm.barrier();
+			MPI_Barrier(comm);
 			MPI_Allreduce(&nwp_volume,&nwp_volume_global,1,MPI_DOUBLE,MPI_SUM,comm);
 			MPI_Allreduce(&awn,&awn_global,1,MPI_DOUBLE,MPI_SUM,comm);
 			MPI_Allreduce(&ans,&ans_global,1,MPI_DOUBLE,MPI_SUM,comm);
@ -2464,7 +2468,7 @@ int main(int argc, char **argv)
 			MPI_Allreduce(&Gws(0),&Gws_global(0),6,MPI_DOUBLE,MPI_SUM,comm);
 			MPI_Allreduce(&trawn,&trawn_global,1,MPI_DOUBLE,MPI_SUM,comm);
 			MPI_Allreduce(&trJwn,&trJwn_global,1,MPI_DOUBLE,MPI_SUM,comm);
-			comm.barrier();
+			MPI_Barrier(comm);
 			//.........................................................................
 			// Compute the change in the total surface energy based on the defined interval
 			// See McClure, Prins and Miller (2014) 
@ -2543,7 +2547,7 @@ int main(int argc, char **argv)
 			if (rank==0){
 				mkdir(tmpstr,0777);
 			}
-			comm.barrier();
+			MPI_Barrier(comm);
 			
 			FILE *WN_TRIS;
 			sprintf(LocalRankFilename,"%s/%s%s",tmpstr,"wn-tris.",LocalRankString);
@ -2688,7 +2692,7 @@ int main(int argc, char **argv)
 	}
 	//************************************************************************/
 	ScaLBL_DeviceBarrier();
-	comm.barrier();
+	MPI_Barrier(comm);
 	stoptime = MPI_Wtime();
 	if (rank==0) printf("-------------------------------------------------------------------\n");
 	// Compute the walltime per timestep
@ -2812,7 +2816,7 @@ int main(int argc, char **argv)
 */	//************************************************************************/

 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	// ****************************************************
 }
--- a/tests/lbpm_BGK_simulator.cpp
+++ b/tests/lbpm_BGK_simulator.cpp
@ -9,7 +9,7 @@
 #include "common/ScaLBL.h"
 #include "common/Communication.h"
 #include "analysis/TwoPhase.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"

 //#define WRITE_SURFACES

@ -23,12 +23,15 @@ using namespace std;

 int main(int argc, char **argv)
 {
+	//*****************************************
+	// ***** MPI STUFF ****************
+	//*****************************************
 	// Initialize MPI
 	int rank,nprocs;
 	MPI_Init(&argc,&argv);
-	Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+	MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
 	{
 		// parallel domain size (# of sub-domains)
 		int nprocx,nprocy,nprocz;
@ -95,7 +98,7 @@ int main(int argc, char **argv)
 		}
 		// **************************************************************
 		// Broadcast simulation parameters from rank 0 to all other procs
-		comm.barrier();
+		MPI_Barrier(comm);
 		//.................................................
 		MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm);
 		//MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm);
@ -120,7 +123,7 @@ int main(int argc, char **argv)
 		MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm);
 		MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm);
 		//.................................................
-		comm.barrier();
+		MPI_Barrier(comm);

 		RESTART_INTERVAL=interval;
 		// **************************************************************
@ -155,7 +158,7 @@ int main(int argc, char **argv)

 		// Mask that excludes the solid phase
 		Domain Mask(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BC);
-		comm.barrier();
+		MPI_Barrier(comm);

 		Nx += 2;	Ny += 2;	Nz += 2;
 		int N = Nx*Ny*Nz;
@ -191,7 +194,7 @@ int main(int argc, char **argv)
 		sprintf(LocalRankString,"%05d",rank);
 		sprintf(LocalRankFilename,"%s%s","SignDist.",LocalRankString);
 		ReadBinaryFile(LocalRankFilename, Averages.SDs.data(), N);
-		comm.barrier();
+		MPI_Barrier(comm);
 		if (rank == 0) cout << "Domain set." << endl;

 		//.......................................................................
@ -258,7 +261,7 @@ int main(int argc, char **argv)
 		id[0] = id[Nx-1] = id[(Ny-1)*Nx] = id[(Ny-1)*Nx + Nx-1] = 0;
 		id[(Nz-1)*Nx*Ny] = id[(Nz-1)*Nx*Ny+Nx-1] = id[(Nz-1)*Nx*Ny+(Ny-1)*Nx] = id[(Nz-1)*Nx*Ny+(Ny-1)*Nx + Nx-1] = 0;
 		//.........................................................
-		comm.barrier();
+		MPI_Barrier(comm);

 		// Initialize communication structures in averaging domain
 		for (i=0; i<Mask.Nx*Mask.Ny*Mask.Nz; i++) Mask.id[i] = id[i];
@ -274,7 +277,7 @@ int main(int argc, char **argv)
 		IntArray Map(Nx,Ny,Nz);
 		neighborList= new int[18*Npad];
 		Np = ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Mask.id,Np);
-		comm.barrier();
+		MPI_Barrier(comm);
 		
 		// LBM variables
 		if (rank==0)	printf ("Allocating distributions \n");
@ -330,7 +333,7 @@ int main(int argc, char **argv)

 		//.......create and start timer............
 		double starttime,stoptime,cputime;
-		comm.barrier();
+		MPI_Barrier(comm);
 		starttime = MPI_Wtime();
 		//.........................................

@ -345,14 +348,14 @@ int main(int argc, char **argv)
 			ScaLBL_D3Q19_AAodd_BGK(NeighborList, dist, ScaLBL_Comm.first_interior, ScaLBL_Comm.last_interior, Np, rlx, Fx, Fy, Fz);
 			ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE
 			ScaLBL_D3Q19_AAodd_BGK(NeighborList, dist, 0, ScaLBL_Comm.next, Np, rlx, Fx, Fy, Fz);
-			ScaLBL_DeviceBarrier(); comm.barrier();
+			ScaLBL_DeviceBarrier(); MPI_Barrier(comm);

 			timestep++;
 			ScaLBL_Comm.SendD3Q19AA(dist); //READ FORM NORMAL
 			ScaLBL_D3Q19_AAeven_BGK(dist, ScaLBL_Comm.first_interior, ScaLBL_Comm.last_interior, Np, rlx, Fx, Fy, Fz);
 			ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE
 			ScaLBL_D3Q19_AAeven_BGK(dist, 0, ScaLBL_Comm.next, Np, rlx, Fx, Fy, Fz);
-			ScaLBL_DeviceBarrier(); comm.barrier();
+			ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
 			//************************************************************************/

 			if (timestep%500 == 0){
@ -409,7 +412,7 @@ int main(int argc, char **argv)
 		}
 		//************************************************************************/
 		ScaLBL_DeviceBarrier();
-		comm.barrier();
+		MPI_Barrier(comm);
 		stoptime = MPI_Wtime();
 		if (rank==0) printf("-------------------------------------------------------------------\n");
 		// Compute the walltime per timestep
@ -427,7 +430,7 @@ int main(int argc, char **argv)
 		NULL_USE(RESTART_INTERVAL);
 	}
 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	// ****************************************************
 }
--- a/tests/lbpm_captube_pp.cpp
+++ b/tests/lbpm_captube_pp.cpp
@ -9,7 +9,7 @@
 #include "common/ScaLBL.h"
 #include "common/Communication.h"
 #include "analysis/TwoPhase.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"


 std::shared_ptr<Database> loadInputs( )
@ -24,11 +24,15 @@ std::shared_ptr<Database> loadInputs( )
 //***************************************************************************************
 int main(int argc, char **argv)
 {
+	//*****************************************
+	// ***** MPI STUFF ****************
+	//*****************************************
 	// Initialize MPI
+	int rank,nprocs;
 	MPI_Init(&argc,&argv);
-    Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+    MPI_Comm comm = MPI_COMM_WORLD;
+	MPI_Comm_rank(comm,&rank);
+	MPI_Comm_size(comm,&nprocs);
 	{
 	//*****************************************
 	// MPI ranks for all 18 neighbors
@ -92,7 +96,7 @@ int main(int argc, char **argv)
 			 	 	 rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz,
 			 	 	 rank_yz, rank_YZ, rank_yZ, rank_Yz );
 	 
-	comm.barrier();
+	MPI_Barrier(comm);

 	Nz += 2;
 	Nx = Ny = Nz;	// Cubic domain
@ -181,7 +185,7 @@ int main(int argc, char **argv)

 	}
 	// ****************************************************
-	comm.barrier();
+	MPI_Barrier(comm);
 	MPI_Finalize();
 	// ****************************************************
 }
--- a/tests/lbpm_color_macro_simulator.cpp
+++ b/tests/lbpm_color_macro_simulator.cpp
@ -9,7 +9,7 @@
 #include "common/Communication.h"
 #include "analysis/TwoPhase.h"
 #include "analysis/runAnalysis.h"
-#include "common/MPI.h"
+#include "common/MPI_Helpers.h"
 #include "ProfilerApp.h"
 #include "threadpool/thread_pool.h"

@ -30,9 +30,10 @@ int main(int argc, char **argv)
 	// Initialize MPI
 	int provided_thread_support = -1;
 	MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support);
-    Utilities::MPI comm( MPI_COMM_WORLD );
-	int rank = comm.getRank();
-	int nprocs = comm.getSize();
+	MPI_Comm comm;
+	MPI_Comm_dup(MPI_COMM_WORLD,&comm);
+	int rank = comm_rank(comm);
+	int nprocs = comm_size(comm);
 	{ // Limit scope so variables that contain communicators will free before MPI_Finialize

 		// parallel domain size (# of sub-domains)
@ -51,7 +52,7 @@ int main(int argc, char **argv)
 		//		int device=ScaLBL_SetDevice(rank);
 		//printf("Using GPU ID %i for rank %i \n",device,rank);
 		ScaLBL_DeviceBarrier();
-		comm.barrier();
+		MPI_Barrier(comm);

 		PROFILE_ENABLE(1);
 		//PROFILE_ENABLE_TRACE();
@ -170,7 +171,7 @@ int main(int argc, char **argv)
 		}
 		// **************************************************************
 		// Broadcast simulation parameters from rank 0 to all other procs
-		comm.barrier();
+		MPI_Barrier(comm);
 		//.................................................
 		MPI_Bcast(&tauA,1,MPI_DOUBLE,0,comm);
 		MPI_Bcast(&tauB,1,MPI_DOUBLE,0,comm);
@ -206,7 +207,7 @@ int main(int argc, char **argv)
 		// Get the rank info
 		const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz);

-		comm.barrier();
+		MPI_Barrier(comm);

 		if (nprocs != nprocx*nprocy*nprocz){
 			printf("nprocx =  %i \n",nprocx);
@ -261,7 +262,7 @@ int main(int argc, char **argv)

 		// Mask that excludes the solid phase
 		Domain Mask(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition);
-		comm.barrier();
+		MPI_Barrier(comm);

 		Nx+=2; Ny+=2; Nz += 2;
 		int N = Nx*Ny*Nz;
@ -296,7 +297,7 @@ int main(int argc, char **argv)
 		sprintf(LocalRankString,"%05d",rank);
 		sprintf(LocalRankFilename,"%s%s","SignDist.",LocalRankString);
 		ReadBinaryFile(LocalRankFilename, Averages->SDs.data(), N);
-		comm.barrier();
+		MPI_Barrier(comm);
 		if (rank == 0) cout << "Domain set." << endl;

 		if (rank==0) printf("Initialize from segmented data: solid=0, NWP=1, WP=2 \n");
@ -340,7 +341,7 @@ int main(int argc, char **argv)
 			delete [] cDen;
 			delete [] cfq;
 			*/
-			comm.barrier();
+			MPI_Barrier(comm);
 		}
 		
 		fflush(stdout);
@ -415,7 +416,7 @@ int main(int argc, char **argv)
 		neighborList= new int[18*Npad];
 		Np = ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Mask.id,Np);
 		if (rank==0)	printf ("Set up memory efficient layout Npad=%i, Np=%i \n",Npad,Np);
-		comm.barrier();
+		MPI_Barrier(comm);
 		//...........................................................................
 		//				MAIN  VARIABLES ALLOCATED HERE
 		//...........................................................................
@ -536,7 +537,7 @@ int main(int argc, char **argv)
 		//.......create and start timer............
 		double starttime,stoptime,cputime;
 		ScaLBL_DeviceBarrier();
-		comm.barrier();
+		MPI_Barrier(comm);
 		starttime = MPI_Wtime();
 		//.........................................

@ -588,7 +589,7 @@ int main(int argc, char **argv)
 			}
 			ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB,
 					alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm.next, Np);
-			ScaLBL_DeviceBarrier(); comm.barrier();
+			ScaLBL_DeviceBarrier(); MPI_Barrier(comm);

 			// *************EVEN TIMESTEP*************
 			timestep++;
@ -621,10 +622,10 @@ int main(int argc, char **argv)
 			}
 			ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB,
 					alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm.next, Np);
-			ScaLBL_DeviceBarrier(); comm.barrier();
+			ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
 			//************************************************************************
 			
-			comm.barrier();
+			MPI_Barrier(comm);
 			PROFILE_STOP("Update");

 			// Run the analysis
@ -636,7 +637,7 @@ int main(int argc, char **argv)
 		PROFILE_SAVE("lbpm_color_simulator",1);
 		//************************************************************************
 		ScaLBL_DeviceBarrier();
-		comm.barrier();
+		MPI_Barrier(comm);
 		stoptime = MPI_Wtime();
 		if (rank==0) printf("-------------------------------------------------------------------\n");
 		// Compute the walltime per timestep
@ -656,8 +657,9 @@ int main(int argc, char **argv)
 		PROFILE_STOP("Main");
 		PROFILE_SAVE("lbpm_color_simulator",1);
 		// ****************************************************
-		comm.barrier();
+		MPI_Barrier(comm);
 	} // Limit scope so variables that contain communicators will free before MPI_Finialize
+	MPI_Comm_free(&comm);
 	MPI_Finalize();
 }

--- a/tests/lbpm_color_simulator.cpp
+++ b/tests/lbpm_color_simulator.cpp
@ -28,9 +28,10 @@ int main(int argc, char **argv)

  { // Limit scope so variables that contain communicators will free before MPI_Finialize

-    Utilities::MPI comm( MPI_COMM_WORLD );
-    int rank = comm.getRank();
-    int nprocs = comm.getSize();
+    MPI_Comm comm;
+    MPI_Comm_dup(MPI_COMM_WORLD,&comm);
+    int rank = comm_rank(comm);
+    int nprocs = comm_size(comm);

    if (rank == 0){
 	    printf("********************************************************\n");
@ -40,7 +41,7 @@ int main(int argc, char **argv)
    // Initialize compute device
    ScaLBL_SetDevice(rank);
    ScaLBL_DeviceBarrier();
-    comm.barrier();
+    MPI_Barrier(comm);

    PROFILE_ENABLE(1);
    //PROFILE_ENABLE_TRACE();
@ -50,7 +51,7 @@ int main(int argc, char **argv)
    Utilities::setErrorHandlers();

    auto filename = argv[1];
-    ScaLBL_ColorModel ColorModel(rank,nprocs,comm.dup());
+    ScaLBL_ColorModel ColorModel(rank,nprocs,comm);
    ColorModel.ReadParams(filename);
    ColorModel.SetDomain();    
    ColorModel.ReadInput();    
@ -63,7 +64,8 @@ int main(int argc, char **argv)
    PROFILE_SAVE("lbpm_color_simulator",1);
    // ****************************************************

-    comm.barrier();
+    MPI_Barrier(comm);
+    MPI_Comm_free(&comm);

  } // Limit scope so variables that contain communicators will free before MPI_Finialize

--- a/Show More
+++ b/Show More