partial merge

This commit is contained in:
James McClure 2021-01-04 19:33:27 -05:00
commit 20800ed7eb
173 changed files with 27358 additions and 7516 deletions

View File

@ -1,8 +1,7 @@
#include "IO/MeshDatabase.h"
#include "IO/Mesh.h"
#include "IO/PackData.h"
#include "IO/IOHelpers.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include "common/Utilities.h"
#include <vector>
@ -14,6 +13,8 @@
/****************************************************
****************************************************/
// MeshType
template<>
size_t packsize<IO::MeshType>( const IO::MeshType& rhs )
@ -246,76 +247,80 @@ void DatabaseEntry::read( const std::string& line )
// Gather the mesh databases from all processors
inline int tod( int N ) { return (N+7)/sizeof(double); }
std::vector<MeshDatabase> gatherAll( const std::vector<MeshDatabase>& meshes, const Utilities::MPI& comm )
std::vector<MeshDatabase> gatherAll( const std::vector<MeshDatabase>& meshes, MPI_Comm comm )
{
if ( comm.getSize() == 1 )
return meshes;
PROFILE_START("gatherAll");
PROFILE_START("gatherAll-pack",2);
int size = comm.getSize();
// First pack the mesh data to local buffers
int localsize = 0;
for (size_t i=0; i<meshes.size(); i++)
localsize += tod(packsize(meshes[i]));
auto localbuf = new double[localsize];
int pos = 0;
for (size_t i=0; i<meshes.size(); i++) {
pack( meshes[i], (char*) &localbuf[pos] );
pos += tod(packsize(meshes[i]));
}
PROFILE_STOP("gatherAll-pack",2);
// Get the number of bytes each processor will be sending/recieving
PROFILE_START("gatherAll-send1",2);
auto recvsize = comm.allGather( localsize );
int globalsize = recvsize[0];
auto disp = new int[size];
disp[0] = 0;
for (int i=1; i<size; i++) {
disp[i] = disp[i-1] + recvsize[i];
globalsize += recvsize[i];
}
PROFILE_STOP("gatherAll-send1",2);
// Send/recv the global data
PROFILE_START("gatherAll-send2",2);
auto globalbuf = new double[globalsize];
comm.allGather(localbuf,localsize,globalbuf,recvsize.data(),disp,true);
PROFILE_STOP("gatherAll-send2",2);
// Unpack the data
PROFILE_START("gatherAll-unpack",2);
std::map<std::string,MeshDatabase> data;
pos = 0;
while ( pos < globalsize ) {
MeshDatabase tmp;
unpack(tmp,(char*)&globalbuf[pos]);
pos += tod(packsize(tmp));
std::map<std::string,MeshDatabase>::iterator it = data.find(tmp.name);
if ( it==data.end() ) {
data[tmp.name] = tmp;
} else {
for (size_t i=0; i<tmp.domains.size(); i++)
it->second.domains.push_back(tmp.domains[i]);
for (size_t i=0; i<tmp.variables.size(); i++)
it->second.variables.push_back(tmp.variables[i]);
it->second.variable_data.insert(tmp.variable_data.begin(),tmp.variable_data.end());
#ifdef USE_MPI
PROFILE_START("gatherAll");
PROFILE_START("gatherAll-pack",2);
int size = MPI_WORLD_SIZE();
// First pack the mesh data to local buffers
int localsize = 0;
for (size_t i=0; i<meshes.size(); i++)
localsize += tod(packsize(meshes[i]));
auto localbuf = new double[localsize];
int pos = 0;
for (size_t i=0; i<meshes.size(); i++) {
pack( meshes[i], (char*) &localbuf[pos] );
pos += tod(packsize(meshes[i]));
}
}
for (auto it=data.begin(); it!=data.end(); ++it) {
// Get the unique variables
std::set<VariableDatabase> data2(it->second.variables.begin(),it->second.variables.end());
it->second.variables = std::vector<VariableDatabase>(data2.begin(),data2.end());
}
// Free temporary memory
delete [] localbuf;
delete [] disp;
delete [] globalbuf;
// Return the results
std::vector<MeshDatabase> data2(data.size());
size_t i=0;
for (std::map<std::string,MeshDatabase>::iterator it=data.begin(); it!=data.end(); ++it, ++i)
data2[i] = it->second;
PROFILE_STOP("gatherAll-unpack",2);
PROFILE_STOP("gatherAll");
return data2;
PROFILE_STOP("gatherAll-pack",2);
// Get the number of bytes each processor will be sending/recieving
PROFILE_START("gatherAll-send1",2);
auto recvsize = new int[size];
MPI_Allgather(&localsize,1,MPI_INT,recvsize,1,MPI_INT,comm);
int globalsize = recvsize[0];
auto disp = new int[size];
disp[0] = 0;
for (int i=1; i<size; i++) {
disp[i] = disp[i-1] + recvsize[i];
globalsize += recvsize[i];
}
PROFILE_STOP("gatherAll-send1",2);
// Send/recv the global data
PROFILE_START("gatherAll-send2",2);
auto globalbuf = new double[globalsize];
MPI_Allgatherv(localbuf,localsize,MPI_DOUBLE,globalbuf,recvsize,disp,MPI_DOUBLE,comm);
PROFILE_STOP("gatherAll-send2",2);
// Unpack the data
PROFILE_START("gatherAll-unpack",2);
std::map<std::string,MeshDatabase> data;
pos = 0;
while ( pos < globalsize ) {
MeshDatabase tmp;
unpack(tmp,(char*)&globalbuf[pos]);
pos += tod(packsize(tmp));
std::map<std::string,MeshDatabase>::iterator it = data.find(tmp.name);
if ( it==data.end() ) {
data[tmp.name] = tmp;
} else {
for (size_t i=0; i<tmp.domains.size(); i++)
it->second.domains.push_back(tmp.domains[i]);
for (size_t i=0; i<tmp.variables.size(); i++)
it->second.variables.push_back(tmp.variables[i]);
it->second.variable_data.insert(tmp.variable_data.begin(),tmp.variable_data.end());
}
}
for (std::map<std::string,MeshDatabase>::iterator it=data.begin(); it!=data.end(); ++it) {
// Get the unique variables
std::set<VariableDatabase> data2(it->second.variables.begin(),it->second.variables.end());
it->second.variables = std::vector<VariableDatabase>(data2.begin(),data2.end());
}
// Free temporary memory
delete [] localbuf;
delete [] recvsize;
delete [] disp;
delete [] globalbuf;
// Return the results
std::vector<MeshDatabase> data2(data.size());
size_t i=0;
for (std::map<std::string,MeshDatabase>::iterator it=data.begin(); it!=data.end(); ++it, ++i)
data2[i] = it->second;
PROFILE_STOP("gatherAll-unpack",2);
PROFILE_STOP("gatherAll");
return data2;
#else
return meshes;
#endif
}

View File

@ -2,7 +2,7 @@
#define MeshDatabase_INC
#include "IO/Mesh.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include <iostream>
#include <memory>
@ -70,7 +70,7 @@ public:
//! Gather the mesh databases from all processors
std::vector<MeshDatabase> gatherAll( const std::vector<MeshDatabase>& meshes, const Utilities::MPI& comm );
std::vector<MeshDatabase> gatherAll( const std::vector<MeshDatabase>& meshes, MPI_Comm comm );
//! Write the mesh databases to a file

View File

@ -1,6 +1,6 @@
#include "IO/PIO.h"
#include "common/Utilities.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include <fstream>
#include <string>
@ -36,7 +36,10 @@ static void shutdownFilestream( )
}
void Utilities::logOnlyNodeZero( const std::string &filename )
{
int rank = ::Utilities::MPI( MPI_COMM_WORLD ).getRank();
int rank = 0;
#ifdef USE_MPI
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
#endif
if ( rank == 0 )
logAllNodes(filename,true);
}
@ -51,7 +54,10 @@ void Utilities::logAllNodes( const std::string &filename, bool singleStream )
// Open the log stream and redirect output
std::string full_filename = filename;
if ( !singleStream ) {
int rank = ::Utilities::MPI( MPI_COMM_WORLD ).getRank();
int rank = 0;
#ifdef USE_MPI
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
#endif
char tmp[100];
sprintf(tmp,".%04i",rank);
full_filename += std::string(tmp);

View File

@ -1,105 +0,0 @@
#include "IO/PackData.h"
#include <string.h>
/********************************************************
* Concrete implimentations for packing/unpacking *
********************************************************/
// unsigned char
template<>
size_t packsize<unsigned char>( const unsigned char& rhs )
{
return sizeof(unsigned char);
}
template<>
void pack<unsigned char>( const unsigned char& rhs, char *buffer )
{
memcpy(buffer,&rhs,sizeof(unsigned char));
}
template<>
void unpack<unsigned char>( unsigned char& data, const char *buffer )
{
memcpy(&data,buffer,sizeof(unsigned char));
}
// char
template<>
size_t packsize<char>( const char& rhs )
{
return sizeof(char);
}
template<>
void pack<char>( const char& rhs, char *buffer )
{
memcpy(buffer,&rhs,sizeof(char));
}
template<>
void unpack<char>( char& data, const char *buffer )
{
memcpy(&data,buffer,sizeof(char));
}
// int
template<>
size_t packsize<int>( const int& rhs )
{
return sizeof(int);
}
template<>
void pack<int>( const int& rhs, char *buffer )
{
memcpy(buffer,&rhs,sizeof(int));
}
template<>
void unpack<int>( int& data, const char *buffer )
{
memcpy(&data,buffer,sizeof(int));
}
// unsigned int
template<>
size_t packsize<unsigned int>( const unsigned int& rhs )
{
return sizeof(unsigned int);
}
template<>
void pack<unsigned int>( const unsigned int& rhs, char *buffer )
{
memcpy(buffer,&rhs,sizeof(int));
}
template<>
void unpack<unsigned int>( unsigned int& data, const char *buffer )
{
memcpy(&data,buffer,sizeof(int));
}
// size_t
template<>
size_t packsize<size_t>( const size_t& rhs )
{
return sizeof(size_t);
}
template<>
void pack<size_t>( const size_t& rhs, char *buffer )
{
memcpy(buffer,&rhs,sizeof(size_t));
}
template<>
void unpack<size_t>( size_t& data, const char *buffer )
{
memcpy(&data,buffer,sizeof(size_t));
}
// std::string
template<>
size_t packsize<std::string>( const std::string& rhs )
{
return rhs.size()+1;
}
template<>
void pack<std::string>( const std::string& rhs, char *buffer )
{
memcpy(buffer,rhs.c_str(),rhs.size()+1);
}
template<>
void unpack<std::string>( std::string& data, const char *buffer )
{
data = std::string(buffer);
}

View File

@ -1,78 +0,0 @@
// This file contains unctions to pack/unpack data structures
#ifndef included_PackData
#define included_PackData
#include <vector>
#include <set>
#include <map>
//! Template function to return the buffer size required to pack a class
template<class TYPE>
size_t packsize( const TYPE& rhs );
//! Template function to pack a class to a buffer
template<class TYPE>
void pack( const TYPE& rhs, char *buffer );
//! Template function to unpack a class from a buffer
template<class TYPE>
void unpack( TYPE& data, const char *buffer );
//! Template function to return the buffer size required to pack a std::vector
template<class TYPE>
size_t packsize( const std::vector<TYPE>& rhs );
//! Template function to pack a class to a buffer
template<class TYPE>
void pack( const std::vector<TYPE>& rhs, char *buffer );
//! Template function to pack a class to a buffer
template<class TYPE>
void unpack( std::vector<TYPE>& data, const char *buffer );
//! Template function to return the buffer size required to pack a std::pair
template<class TYPE1, class TYPE2>
size_t packsize( const std::pair<TYPE1,TYPE2>& rhs );
//! Template function to pack a class to a buffer
template<class TYPE1, class TYPE2>
void pack( const std::pair<TYPE1,TYPE2>& rhs, char *buffer );
//! Template function to pack a class to a buffer
template<class TYPE1, class TYPE2>
void unpack( std::pair<TYPE1,TYPE2>& data, const char *buffer );
//! Template function to return the buffer size required to pack a std::map
template<class TYPE1, class TYPE2>
size_t packsize( const std::map<TYPE1,TYPE2>& rhs );
//! Template function to pack a class to a buffer
template<class TYPE1, class TYPE2>
void pack( const std::map<TYPE1,TYPE2>& rhs, char *buffer );
//! Template function to pack a class to a buffer
template<class TYPE1, class TYPE2>
void unpack( std::map<TYPE1,TYPE2>& data, const char *buffer );
//! Template function to return the buffer size required to pack a std::set
template<class TYPE>
size_t packsize( const std::set<TYPE>& rhs );
//! Template function to pack a class to a buffer
template<class TYPE>
void pack( const std::set<TYPE>& rhs, char *buffer );
//! Template function to pack a class to a buffer
template<class TYPE>
void unpack( std::set<TYPE>& data, const char *buffer );
#include "IO/PackData.hpp"
#endif

View File

@ -2,7 +2,7 @@
#include "IO/MeshDatabase.h"
#include "IO/IOHelpers.h"
#include "IO/silo.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include "common/Utilities.h"
#include <sys/stat.h>
@ -36,7 +36,7 @@ void IO::initialize( const std::string& path, const std::string& format, bool ap
global_IO_format = Format::SILO;
else
ERROR("Unknown format");
int rank = Utilities::MPI(MPI_COMM_WORLD).getRank();
int rank = comm_rank(MPI_COMM_WORLD);
if ( !append && rank==0 ) {
mkdir(path.c_str(),S_IRWXU|S_IRGRP);
std::string filename;
@ -55,7 +55,7 @@ void IO::initialize( const std::string& path, const std::string& format, bool ap
// Write the mesh data in the original format
static std::vector<IO::MeshDatabase> writeMeshesOrigFormat( const std::vector<IO::MeshDataStruct>& meshData, const std::string& path )
{
int rank = Utilities::MPI(MPI_COMM_WORLD).getRank();
int rank = MPI_WORLD_RANK();
std::vector<IO::MeshDatabase> meshes_written;
for (size_t i=0; i<meshData.size(); i++) {
char domainname[100], filename[100], fullpath[200];
@ -120,7 +120,7 @@ static std::vector<IO::MeshDatabase> writeMeshesOrigFormat( const std::vector<IO
// Create the database entry for the mesh data
static IO::MeshDatabase getDatabase( const std::string& filename, const IO::MeshDataStruct& mesh, int format )
{
int rank = Utilities::MPI(MPI_COMM_WORLD).getRank();
int rank = MPI_WORLD_RANK();
char domainname[100];
sprintf(domainname,"%s_%05i",mesh.meshName.c_str(),rank);
// Create the MeshDatabase
@ -161,7 +161,7 @@ static IO::MeshDatabase write_domain( FILE *fid, const std::string& filename,
const IO::MeshDataStruct& mesh, int format )
{
const int level = 0;
int rank = Utilities::MPI(MPI_COMM_WORLD).getRank();
int rank = MPI_WORLD_RANK();
// Create the MeshDatabase
IO::MeshDatabase database = getDatabase( filename, mesh, format );
// Write the mesh
@ -399,7 +399,7 @@ void writeSiloSummary( const std::vector<IO::MeshDatabase>& meshes_written, cons
static std::vector<IO::MeshDatabase> writeMeshesNewFormat(
const std::vector<IO::MeshDataStruct>& meshData, const std::string& path, int format )
{
int rank = Utilities::MPI(MPI_COMM_WORLD).getRank();
int rank = MPI_WORLD_RANK();
std::vector<IO::MeshDatabase> meshes_written;
char filename[100], fullpath[200];
sprintf(filename,"%05i",rank);
@ -419,7 +419,7 @@ static std::vector<IO::MeshDatabase> writeMeshesSilo(
const std::vector<IO::MeshDataStruct>& meshData, const std::string& path, int format )
{
#ifdef USE_SILO
int rank = Utilities::MPI(MPI_COMM_WORLD).getRank();
int rank = MPI_WORLD_RANK();
std::vector<IO::MeshDatabase> meshes_written;
char filename[100], fullpath[200];
sprintf(filename,"%05i.silo",rank);
@ -441,12 +441,12 @@ static std::vector<IO::MeshDatabase> writeMeshesSilo(
/****************************************************
* Write the mesh data *
****************************************************/
void IO::writeData( const std::string& subdir, const std::vector<IO::MeshDataStruct>& meshData, const Utilities::MPI& comm )
void IO::writeData( const std::string& subdir, const std::vector<IO::MeshDataStruct>& meshData, MPI_Comm comm )
{
if ( global_IO_path.empty() )
IO::initialize( );
PROFILE_START("writeData");
int rank = Utilities::MPI(MPI_COMM_WORLD).getRank();
int rank = comm_rank(comm);
// Check the meshData before writing
for ( const auto& data : meshData ) {
if ( !data.check() )
@ -457,7 +457,7 @@ void IO::writeData( const std::string& subdir, const std::vector<IO::MeshDataStr
if ( rank == 0 ) {
mkdir(path.c_str(),S_IRWXU|S_IRGRP);
}
comm.barrier();
MPI_Barrier(comm);
// Write the mesh files
std::vector<IO::MeshDatabase> meshes_written;
if ( global_IO_format == Format::OLD ) {

View File

@ -34,7 +34,7 @@ void initialize( const std::string& path="", const std::string& format="silo", b
* @param[in] meshData The data to write
* @param[in] comm The comm to use for writing (usually MPI_COMM_WORLD or a dup thereof)
*/
void writeData( const std::string& subdir, const std::vector<IO::MeshDataStruct>& meshData, const Utilities::MPI& comm );
void writeData( const std::string& subdir, const std::vector<IO::MeshDataStruct>& meshData, MPI_Comm comm );
/*!
@ -44,7 +44,7 @@ void writeData( const std::string& subdir, const std::vector<IO::MeshDataStruct>
* @param[in] meshData The data to write
* @param[in] comm The comm to use for writing (usually MPI_COMM_WORLD or a dup thereof)
*/
inline void writeData( int timestep, const std::vector<IO::MeshDataStruct>& meshData, const Utilities::MPI& comm )
inline void writeData( int timestep, const std::vector<IO::MeshDataStruct>& meshData, MPI_Comm comm )
{
char subdir[100];
sprintf(subdir,"vis%03i",timestep);

View File

@ -1,6 +1,6 @@
#include "IO/netcdf.h"
#include "common/Utilities.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include "ProfilerApp.h"
@ -116,10 +116,10 @@ std::string VariableTypeName( VariableType type )
/****************************************************
* Open/close a file *
****************************************************/
int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm )
int open( const std::string& filename, FileMode mode, MPI_Comm comm )
{
int fid = 0;
if ( comm.isNull() ) {
if ( comm == MPI_COMM_NULL ) {
if ( mode == READ ) {
int err = nc_open( filename.c_str(), NC_NOWRITE, &fid );
CHECK_NC_ERR( err );
@ -134,13 +134,13 @@ int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm
}
} else {
if ( mode == READ ) {
int err = nc_open_par( filename.c_str(), NC_MPIPOSIX, comm.getCommunicator(), MPI_INFO_NULL, &fid );
int err = nc_open_par( filename.c_str(), NC_MPIPOSIX, comm, MPI_INFO_NULL, &fid );
CHECK_NC_ERR( err );
} else if ( mode == WRITE ) {
int err = nc_open_par( filename.c_str(), NC_WRITE|NC_MPIPOSIX, comm.getCommunicator(), MPI_INFO_NULL, &fid );
int err = nc_open_par( filename.c_str(), NC_WRITE|NC_MPIPOSIX, comm, MPI_INFO_NULL, &fid );
CHECK_NC_ERR( err );
} else if ( mode == CREATE ) {
int err = nc_create_par( filename.c_str(), NC_NETCDF4|NC_MPIIO, comm.getCommunicator(), MPI_INFO_NULL, &fid );
int err = nc_create_par( filename.c_str(), NC_NETCDF4|NC_MPIIO, comm, MPI_INFO_NULL, &fid );
CHECK_NC_ERR( err );
} else {
ERROR("Unknown file mode");
@ -375,7 +375,7 @@ Array<TYPE> getVar( int fid, const std::string& var, const std::vector<int>& sta
std::vector<size_t> var_size = getVarDim( fid, var );
for (int d=0; d<(int)var_size.size(); d++) {
if ( start[d]<0 || start[d]+stride[d]*(count[d]-1)>(int)var_size[d] ) {
int rank = Utilities::MPI(MPI_COMM_WORLD).getRank();
int rank = comm_rank(MPI_COMM_WORLD);
char tmp[1000];
sprintf(tmp,"%i: Range exceeded array dimension:\n"
" start[%i]=%i, count[%i]=%i, stride[%i]=%i, var_size[%i]=%i",

View File

@ -5,7 +5,7 @@
#include <vector>
#include "common/Array.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include "common/Communication.h"
@ -32,7 +32,7 @@ std::string VariableTypeName( VariableType type );
* @param mode Open the file for reading or writing
* @param comm MPI communicator to use (MPI_COMM_WORLD: don't use parallel netcdf)
*/
int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm=MPI_COMM_NULL );
int open( const std::string& filename, FileMode mode, MPI_Comm comm=MPI_COMM_NULL );
/*!

View File

@ -1,6 +1,6 @@
#include "IO/silo.h"
#include "common/Utilities.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include "ProfilerApp.h"

View File

@ -6,7 +6,7 @@
#include <array>
#include "common/Array.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include "common/Communication.h"

View File

@ -3,7 +3,7 @@
#include "IO/silo.h"
#include "common/Utilities.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include "ProfilerApp.h"

View File

@ -6,7 +6,7 @@
#include <functional>
#include "common/MPI.h"
#include "mpi.h"
namespace StackTrace

View File

@ -14,7 +14,7 @@
#include <typeinfo>
#ifdef USE_MPI
#include "common/MPI.h"
#include "mpi.h"
#endif
#ifdef USE_TIMER

View File

@ -0,0 +1,228 @@
#include "analysis/ElectroChemistry.h"
ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(std::shared_ptr <Domain> dm):
Dm(dm),
fillData(dm->Comm,dm->rank_info,{dm->Nx-2,dm->Ny-2,dm->Nz-2},{1,1,1},0,1)
{
MPI_Comm_dup(dm->Comm,&comm);
Nx=dm->Nx; Ny=dm->Ny; Nz=dm->Nz;
Volume=(Nx-2)*(Ny-2)*(Nz-2)*Dm->nprocx()*Dm->nprocy()*Dm->nprocz()*1.0;
ChemicalPotential.resize(Nx,Ny,Nz); ChemicalPotential.fill(0);
ElectricalPotential.resize(Nx,Ny,Nz); ElectricalPotential.fill(0);
Pressure.resize(Nx,Ny,Nz); Pressure.fill(0);
Rho.resize(Nx,Ny,Nz); Rho.fill(0);
Vel_x.resize(Nx,Ny,Nz); Vel_x.fill(0); // Gradient of the phase indicator field
Vel_y.resize(Nx,Ny,Nz); Vel_y.fill(0);
Vel_z.resize(Nx,Ny,Nz); Vel_z.fill(0);
SDs.resize(Nx,Ny,Nz); SDs.fill(0);
if (Dm->rank()==0){
bool WriteHeader=false;
TIMELOG = fopen("electrokinetic.csv","r");
if (TIMELOG != NULL)
fclose(TIMELOG);
else
WriteHeader=true;
TIMELOG = fopen("electrokinetic.csv","a+");
if (WriteHeader)
{
// If timelog is empty, write a short header to list the averages
//fprintf(TIMELOG,"--------------------------------------------------------------------------------------\n");
fprintf(TIMELOG,"TBD TBD\n");
}
}
}
ElectroChemistryAnalyzer::~ElectroChemistryAnalyzer(){
if (Dm->rank()==0){
fclose(TIMELOG);
}
}
void ElectroChemistryAnalyzer::SetParams(){
}
void ElectroChemistryAnalyzer::Basic(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes, int timestep){
int i,j,k;
Poisson.getElectricPotential(ElectricalPotential);
/* local sub-domain averages */
double rho_avg_local[Ion.number_ion_species];
double rho_mu_avg_local[Ion.number_ion_species];
double rho_mu_fluctuation_local[Ion.number_ion_species];
double rho_psi_avg_local[Ion.number_ion_species];
double rho_psi_fluctuation_local[Ion.number_ion_species];
/* global averages */
double rho_avg_global[Ion.number_ion_species];
double rho_mu_avg_global[Ion.number_ion_species];
double rho_mu_fluctuation_global[Ion.number_ion_species];
double rho_psi_avg_global[Ion.number_ion_species];
double rho_psi_fluctuation_global[Ion.number_ion_species];
for (int ion=0; ion<Ion.number_ion_species; ion++){
rho_avg_local[ion] = 0.0;
rho_mu_avg_local[ion] = 0.0;
rho_psi_avg_local[ion] = 0.0;
Ion.getIonConcentration(Rho,ion);
/* Compute averages for each ion */
for (k=1; k<Nz; k++){
for (j=1; j<Ny; j++){
for (i=1; i<Nx; i++){
rho_avg_local[ion] += Rho(i,j,k);
rho_mu_avg_local[ion] += Rho(i,j,k)*Rho(i,j,k);
rho_psi_avg_local[ion] += Rho(i,j,k)*ElectricalPotential(i,j,k);
}
}
}
rho_avg_global[ion]=sumReduce( Dm->Comm, rho_avg_local[ion]);
rho_mu_avg_global[ion]=sumReduce( Dm->Comm, rho_mu_avg_local[ion]);
rho_psi_avg_global[ion]=sumReduce( Dm->Comm, rho_psi_avg_local[ion]);
rho_mu_avg_global[ion] /= rho_avg_global[ion];
rho_psi_avg_global[ion] /= rho_avg_global[ion];
}
for (int ion=0; ion<Ion.number_ion_species; ion++){
rho_mu_fluctuation_local[ion] = 0.0;
rho_psi_fluctuation_local[ion] = 0.0;
/* Compute averages for each ion */
for (k=1; k<Nz; k++){
for (j=1; j<Ny; j++){
for (i=1; i<Nx; i++){
rho_mu_fluctuation_local[ion] += (Rho(i,j,k)*Rho(i,j,k) - rho_mu_avg_global[ion]);
rho_psi_fluctuation_local[ion] += (Rho(i,j,k)*ElectricalPotential(i,j,k) - rho_psi_avg_global[ion]);
}
}
}
rho_mu_fluctuation_global[ion]=sumReduce( Dm->Comm, rho_mu_fluctuation_local[ion]);
rho_psi_fluctuation_global[ion]=sumReduce( Dm->Comm, rho_psi_fluctuation_local[ion]);
}
if (Dm->rank()==0){
fprintf(TIMELOG,"%i ",timestep);
for (int ion=0; ion<Ion.number_ion_species; ion++){
fprintf(TIMELOG,"%.8g ",rho_avg_global[ion]);
fprintf(TIMELOG,"%.8g ",rho_mu_avg_global[ion]);
fprintf(TIMELOG,"%.8g ",rho_psi_avg_global[ion]);
fprintf(TIMELOG,"%.8g ",rho_mu_fluctuation_global[ion]);
fprintf(TIMELOG,"%.8g ",rho_psi_fluctuation_global[ion]);
}
fflush(TIMELOG);
}
/* else{
fprintf(TIMELOG,"%i ",timestep);
for (int ion=0; ion<Ion.number_ion_species; ion++){
fprintf(TIMELOG,"%.8g ",rho_avg_local[ion]);
fprintf(TIMELOG,"%.8g ",rho_mu_avg_local[ion]);
fprintf(TIMELOG,"%.8g ",rho_psi_avg_local[ion]);
fprintf(TIMELOG,"%.8g ",rho_mu_fluctuation_local[ion]);
fprintf(TIMELOG,"%.8g ",rho_psi_fluctuation_local[ion]);
}
fflush(TIMELOG);
} */
}
void ElectroChemistryAnalyzer::WriteVis( ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes, std::shared_ptr<Database> input_db, int timestep){
auto vis_db = input_db->getDatabase( "Visualization" );
char VisName[40];
IO::initialize("","silo","false");
// Create the MeshDataStruct
visData.resize(1);
visData[0].meshName = "domain";
visData[0].mesh = std::make_shared<IO::DomainMesh>( Dm->rank_info,Dm->Nx-2,Dm->Ny-2,Dm->Nz-2,Dm->Lx,Dm->Ly,Dm->Lz );
auto ElectricPotential = std::make_shared<IO::Variable>();
std::vector<shared_ptr<IO::Variable>> IonConcentration;
for (int ion=0; ion<Ion.number_ion_species; ion++){
IonConcentration.push_back(std::make_shared<IO::Variable>());
}
auto VxVar = std::make_shared<IO::Variable>();
auto VyVar = std::make_shared<IO::Variable>();
auto VzVar = std::make_shared<IO::Variable>();
if (vis_db->getWithDefault<bool>( "save_electric_potential", true )){
ElectricPotential->name = "ElectricPotential";
ElectricPotential->type = IO::VariableType::VolumeVariable;
ElectricPotential->dim = 1;
ElectricPotential->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2);
visData[0].vars.push_back(ElectricPotential);
}
if (vis_db->getWithDefault<bool>( "save_concentration", true )){
for (int ion=0; ion<Ion.number_ion_species; ion++){
sprintf(VisName,"IonConcentration_%i",ion+1);
IonConcentration[ion]->name = VisName;
IonConcentration[ion]->type = IO::VariableType::VolumeVariable;
IonConcentration[ion]->dim = 1;
IonConcentration[ion]->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2);
visData[0].vars.push_back(IonConcentration[ion]);
}
}
if (vis_db->getWithDefault<bool>( "save_velocity", false )){
VxVar->name = "Velocity_x";
VxVar->type = IO::VariableType::VolumeVariable;
VxVar->dim = 1;
VxVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2);
visData[0].vars.push_back(VxVar);
VyVar->name = "Velocity_y";
VyVar->type = IO::VariableType::VolumeVariable;
VyVar->dim = 1;
VyVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2);
visData[0].vars.push_back(VyVar);
VzVar->name = "Velocity_z";
VzVar->type = IO::VariableType::VolumeVariable;
VzVar->dim = 1;
VzVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2);
visData[0].vars.push_back(VzVar);
}
if (vis_db->getWithDefault<bool>( "save_electric_potential", true )){
ASSERT(visData[0].vars[0]->name=="ElectricPotential");
Poisson.getElectricPotential(ElectricalPotential);
Array<double>& ElectricPotentialData = visData[0].vars[0]->data;
fillData.copy(ElectricalPotential,ElectricPotentialData);
}
if (vis_db->getWithDefault<bool>( "save_concentration", true )){
for (int ion=0; ion<Ion.number_ion_species; ion++){
sprintf(VisName,"IonConcentration_%i",ion+1);
IonConcentration[ion]->name = VisName;
ASSERT(visData[0].vars[1+ion]->name==VisName);
Array<double>& IonConcentrationData = visData[0].vars[1+ion]->data;
Ion.getIonConcentration(Rho,ion);
fillData.copy(Rho,IonConcentrationData);
}
}
if (vis_db->getWithDefault<bool>( "save_velocity", false )){
ASSERT(visData[0].vars[1+Ion.number_ion_species+0]->name=="Velocity_x");
ASSERT(visData[0].vars[1+Ion.number_ion_species+1]->name=="Velocity_y");
ASSERT(visData[0].vars[1+Ion.number_ion_species+2]->name=="Velocity_z");
Stokes.getVelocity(Vel_x,Vel_y,Vel_z);
Array<double>& VelxData = visData[0].vars[1+Ion.number_ion_species+0]->data;
Array<double>& VelyData = visData[0].vars[1+Ion.number_ion_species+1]->data;
Array<double>& VelzData = visData[0].vars[1+Ion.number_ion_species+2]->data;
fillData.copy(Vel_x,VelxData);
fillData.copy(Vel_y,VelyData);
fillData.copy(Vel_z,VelzData);
}
if (vis_db->getWithDefault<bool>( "write_silo", true ))
IO::writeData( timestep, visData, comm );
/* if (vis_db->getWithDefault<bool>( "save_8bit_raw", true )){
char CurrentIDFilename[40];
sprintf(CurrentIDFilename,"id_t%d.raw",timestep);
Averages.AggregateLabels(CurrentIDFilename);
}
*/
}

View File

@ -0,0 +1,59 @@
/*
* averaging tools for electrochemistry
*/
#ifndef ElectroChem_INC
#define ElectroChem_INC
#include <vector>
#include "common/Domain.h"
#include "common/Communication.h"
#include "analysis/analysis.h"
#include "analysis/distance.h"
#include "analysis/Minkowski.h"
#include "common/Utilities.h"
#include "common/MPI_Helpers.h"
#include "IO/MeshDatabase.h"
#include "IO/Reader.h"
#include "IO/Writer.h"
#include "models/IonModel.h"
#include "models/PoissonSolver.h"
#include "models/StokesModel.h"
class ElectroChemistryAnalyzer{
public:
MPI_Comm comm;
int tag;
std::shared_ptr <Domain> Dm;
double Volume;
// input variables
double rho_n, rho_w;
double nu_n, nu_w;
double gamma_wn, beta;
double Fx, Fy, Fz;
//...........................................................................
int Nx,Ny,Nz;
DoubleArray Rho; // density field
DoubleArray ChemicalPotential; // density field
DoubleArray ElectricalPotential; // density field
DoubleArray Pressure; // pressure field
DoubleArray Vel_x; // velocity field
DoubleArray Vel_y;
DoubleArray Vel_z;
DoubleArray SDs;
ElectroChemistryAnalyzer(std::shared_ptr <Domain> Dm);
~ElectroChemistryAnalyzer();
void SetParams();
void Basic( ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes, int timestep);
void WriteVis( ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes, std::shared_ptr<Database> input_db, int timestep);
private:
std::vector<IO::MeshDataStruct> visData;
fillHalo<double> fillData;
FILE *TIMELOG;
};
#endif

259
analysis/GreyPhase.cpp Normal file
View File

@ -0,0 +1,259 @@
#include "analysis/GreyPhase.h"
// Constructor
GreyPhaseAnalysis::GreyPhaseAnalysis(std::shared_ptr <Domain> dm):
Dm(dm)
{
Nx=dm->Nx; Ny=dm->Ny; Nz=dm->Nz;
Volume=(Nx-2)*(Ny-2)*(Nz-2)*Dm->nprocx()*Dm->nprocy()*Dm->nprocz()*1.0;
// Global arrays
SDs.resize(Nx,Ny,Nz); SDs.fill(0);
Porosity.resize(Nx,Ny,Nz); Porosity.fill(0);
//PhaseID.resize(Nx,Ny,Nz); PhaseID.fill(0);
Rho_n.resize(Nx,Ny,Nz); Rho_n.fill(0);
Rho_w.resize(Nx,Ny,Nz); Rho_w.fill(0);
Pressure.resize(Nx,Ny,Nz); Pressure.fill(0);
//Phi.resize(Nx,Ny,Nz); Phi.fill(0);
//DelPhi.resize(Nx,Ny,Nz); DelPhi.fill(0);
Vel_x.resize(Nx,Ny,Nz); Vel_x.fill(0); // Gradient of the phase indicator field
Vel_y.resize(Nx,Ny,Nz); Vel_y.fill(0);
Vel_z.resize(Nx,Ny,Nz); Vel_z.fill(0);
//.........................................
if (Dm->rank()==0){
bool WriteHeader=false;
TIMELOG = fopen("timelog.csv","r");
if (TIMELOG != NULL)
fclose(TIMELOG);
else
WriteHeader=true;
TIMELOG = fopen("timelog.csv","a+");
if (WriteHeader)
{
// If timelog is empty, write a short header to list the averages
//fprintf(TIMELOG,"--------------------------------------------------------------------------------------\n");
fprintf(TIMELOG,"sw krw krn vw vn pw pn\n");
}
}
}
// Destructor
GreyPhaseAnalysis::~GreyPhaseAnalysis()
{
}
void GreyPhaseAnalysis::Write(int timestep)
{
}
void GreyPhaseAnalysis::SetParams(double rhoA, double rhoB, double tauA, double tauB, double force_x, double force_y, double force_z, double alpha, double B, double GreyPorosity)
{
Fx = force_x;
Fy = force_y;
Fz = force_z;
rho_n = rhoA;
rho_w = rhoB;
nu_n = (tauA-0.5)/3.f;
nu_w = (tauB-0.5)/3.f;
gamma_wn = 6.0*alpha;
beta = B;
grey_porosity = GreyPorosity;
}
void GreyPhaseAnalysis::Basic(){
int i,j,k,n,imin,jmin,kmin,kmax;
// If external boundary conditions are set, do not average over the inlet
kmin=1; kmax=Nz-1;
imin=jmin=1;
if (Dm->inlet_layers_z > 0 && Dm->kproc() == 0) kmin += Dm->inlet_layers_z;
if (Dm->outlet_layers_z > 0 && Dm->kproc() == Dm->nprocz()-1) kmax -= Dm->outlet_layers_z;
Water_local.reset();
Oil_local.reset();
double count_w = 0.0;
double count_n = 0.0;
for (k=kmin; k<kmax; k++){
for (j=jmin; j<Ny-1; j++){
for (i=imin; i<Nx-1; i++){
n = k*Nx*Ny + j*Nx + i;
// Compute volume averages
if ( Dm->id[n] > 0 ){
// compute density
double nA = Rho_n(n);
double nB = Rho_w(n);
double phi = (nA-nB)/(nA+nB);
double porosity = Porosity(n);
Water_local.M += rho_w*nB*porosity;
Water_local.Px += porosity*rho_w*nB*Vel_x(n);
Water_local.Py += porosity*rho_w*nB*Vel_y(n);
Water_local.Pz += porosity*rho_w*nB*Vel_z(n);
Oil_local.M += rho_n*nA*porosity;
Oil_local.Px += porosity*rho_n*nA*Vel_x(n);
Oil_local.Py += porosity*rho_n*nA*Vel_y(n);
Oil_local.Pz += porosity*rho_n*nA*Vel_z(n);
if ( phi > 0.99 ){
Oil_local.p += Pressure(n);
//Oil_local.p += pressure*(rho_n*nA)/(rho_n*nA+rho_w*nB);
count_n += 1.0;
}
else if ( phi < -0.99 ){
Water_local.p += Pressure(n);
//Water_local.p += pressure*(rho_w*nB)/(rho_n*nA+rho_w*nB);
count_w += 1.0;
}
}
}
}
}
Oil.M=sumReduce( Dm->Comm, Oil_local.M);
Oil.Px=sumReduce( Dm->Comm, Oil_local.Px);
Oil.Py=sumReduce( Dm->Comm, Oil_local.Py);
Oil.Pz=sumReduce( Dm->Comm, Oil_local.Pz);
Water.M=sumReduce( Dm->Comm, Water_local.M);
Water.Px=sumReduce( Dm->Comm, Water_local.Px);
Water.Py=sumReduce( Dm->Comm, Water_local.Py);
Water.Pz=sumReduce( Dm->Comm, Water_local.Pz);
//Oil.p /= Oil.M;
//Water.p /= Water.M;
count_w=sumReduce( Dm->Comm, count_w);
count_n=sumReduce( Dm->Comm, count_n);
if (count_w > 0.0)
Water.p=sumReduce( Dm->Comm, Water_local.p) / count_w;
else
Water.p = 0.0;
if (count_n > 0.0)
Oil.p=sumReduce( Dm->Comm, Oil_local.p) / count_n;
else
Oil.p = 0.0;
// check for NaN
bool err=false;
if (Water.M != Water.M) err=true;
if (Water.p != Water.p) err=true;
if (Water.Px != Water.Px) err=true;
if (Water.Py != Water.Py) err=true;
if (Water.Pz != Water.Pz) err=true;
if (Oil.M != Oil.M) err=true;
if (Oil.p != Oil.p) err=true;
if (Oil.Px != Oil.Px) err=true;
if (Oil.Py != Oil.Py) err=true;
if (Oil.Pz != Oil.Pz) err=true;
if (Dm->rank() == 0){
double force_mag = sqrt(Fx*Fx+Fy*Fy+Fz*Fz);
double dir_x = 0.0;
double dir_y = 0.0;
double dir_z = 0.0;
if (force_mag > 0.0){
dir_x = Fx/force_mag;
dir_y = Fy/force_mag;
dir_z = Fz/force_mag;
}
else {
// default to z direction
dir_x = 0.0;
dir_y = 0.0;
dir_z = 1.0;
}
if (Dm->BoundaryCondition == 1 || Dm->BoundaryCondition == 2 || Dm->BoundaryCondition == 3 || Dm->BoundaryCondition == 4 ){
// compute the pressure drop
double pressure_drop = (Pressure(Nx*Ny + Nx + 1) - 1.0) / 3.0;
double length = ((Nz-2)*Dm->nprocz());
force_mag -= pressure_drop/length;
}
if (force_mag == 0.0){
// default to z direction
dir_x = 0.0;
dir_y = 0.0;
dir_z = 1.0;
force_mag = 1.0;
}
saturation=Water.M/(Water.M + Oil.M); // assume constant density
water_flow_rate=grey_porosity*saturation*(Water.Px*dir_x + Water.Py*dir_y + Water.Pz*dir_z)/Water.M;
oil_flow_rate =grey_porosity*(1.0-saturation)*(Oil.Px*dir_x + Oil.Py*dir_y + Oil.Pz*dir_z)/Oil.M;
double h = Dm->voxel_length;
//TODO check if need greyporosity or domain porosity ? - compare to analytical solution
double krn = h*h*nu_n*oil_flow_rate / force_mag ;
double krw = h*h*nu_w*water_flow_rate / force_mag;
//printf(" water saturation = %f, fractional flow =%f \n",saturation,fractional_flow);
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %.5g %.5g %.5g\n",saturation,krw,krn,h*water_flow_rate,h*oil_flow_rate, Water.p, Oil.p);
fflush(TIMELOG);
}
if (err==true){
// exception if simulation produceds NaN
printf("GreyPhaseAnalysis.cpp: NaN encountered, may need to check simulation parameters \n");
}
ASSERT(err==false);
}
/*
inline void InterfaceTransportMeasures( double beta, double rA, double rB, double nA, double nB,
double nx, double ny, double nz, double ux, double uy, double uz, interface &I){
double A1,A2,A3,A4,A5,A6;
double B1,B2,B3,B4,B5,B6;
double nAB,delta;
// Instantiate mass transport distributions
// Stationary value - distribution 0
nAB = 1.0/(nA+nB);
//...............................................
// q = 0,2,4
// Cq = {1,0,0}, {0,1,0}, {0,0,1}
delta = beta*nA*nB*nAB*0.1111111111111111*nx;
if (!(nA*nB*nAB>0)) delta=0;
A1 = nA*(0.1111111111111111*(1+4.5*ux))+delta;
B1 = nB*(0.1111111111111111*(1+4.5*ux))-delta;
A2 = nA*(0.1111111111111111*(1-4.5*ux))-delta;
B2 = nB*(0.1111111111111111*(1-4.5*ux))+delta;
//...............................................
// Cq = {0,1,0}
delta = beta*nA*nB*nAB*0.1111111111111111*ny;
if (!(nA*nB*nAB>0)) delta=0;
A3 = nA*(0.1111111111111111*(1+4.5*uy))+delta;
B3 = nB*(0.1111111111111111*(1+4.5*uy))-delta;
A4 = nA*(0.1111111111111111*(1-4.5*uy))-delta;
B4 = nB*(0.1111111111111111*(1-4.5*uy))+delta;
//...............................................
// q = 4
// Cq = {0,0,1}
delta = beta*nA*nB*nAB*0.1111111111111111*nz;
if (!(nA*nB*nAB>0)) delta=0;
A5 = nA*(0.1111111111111111*(1+4.5*uz))+delta;
B5 = nB*(0.1111111111111111*(1+4.5*uz))-delta;
A6 = nA*(0.1111111111111111*(1-4.5*uz))-delta;
B6 = nB*(0.1111111111111111*(1-4.5*uz))+delta;
double unx = (A1-A2);
double uny = (A3-A4);
double unz = (A5-A6);
double uwx = (B1-B2);
double uwy = (B3-B4);
double uwz = (B5-B6);
I.Mn += rA*nA;
I.Mw += rB*nB;
I.Pnx += rA*nA*unx;
I.Pny += rA*nA*uny;
I.Pnz += rA*nA*unz;
I.Pwx += rB*nB*uwx;
I.Pwy += rB*nB*uwy;
I.Pwz += rB*nB*uwz;
I.Kn += rA*nA*(unx*unx + uny*uny + unz*unz);
I.Kw += rB*nB*(uwx*uwx + uwy*uwy + uwz*uwz);
}
*/

71
analysis/GreyPhase.h Normal file
View File

@ -0,0 +1,71 @@
/*
* Sub-phase averaging tools
*/
#ifndef GreyPhase_INC
#define GreyPhase_INC
#include <vector>
#include "common/ScaLBL.h"
#include "common/Communication.h"
#include "analysis/analysis.h"
#include "common/Utilities.h"
#include "common/MPI_Helpers.h"
#include "IO/MeshDatabase.h"
#include "IO/Reader.h"
#include "IO/Writer.h"
class GreyPhase{
public:
double p;
double M,Px,Py,Pz;
void reset(){
p=M=Px=Py=Pz=0.0;
}
private:
};
class GreyPhaseAnalysis{
public:
std::shared_ptr <Domain> Dm;
double Volume;
// input variables
double rho_n, rho_w;
double nu_n, nu_w;
double gamma_wn, beta;
double Fx, Fy, Fz;
double grey_porosity;
// outputs
double saturation,water_flow_rate, oil_flow_rate;
//simulation outputs (averaged values)
GreyPhase Water, Oil;
GreyPhase Water_local, Oil_local;
//...........................................................................
int Nx,Ny,Nz;
//IntArray PhaseID; // Phase ID array
DoubleArray SDs; // contains porosity map
DoubleArray Porosity; // contains porosity map
DoubleArray Rho_n; // density field
DoubleArray Rho_w; // density field
//DoubleArray Phi; // phase indicator field
//DoubleArray DelPhi; // Magnitude of Gradient of the phase indicator field
DoubleArray Pressure; // pressure field
DoubleArray Vel_x; // velocity field
DoubleArray Vel_y;
DoubleArray Vel_z;
GreyPhaseAnalysis(std::shared_ptr <Domain> Dm);
~GreyPhaseAnalysis();
void SetParams(double rhoA, double rhoB, double tauA, double tauB, double force_x, double force_y, double force_z, double alpha, double beta, double GreyPorosity);
void Basic();
void Write(int time);
private:
FILE *TIMELOG;
};
#endif

View File

@ -4,7 +4,7 @@
#include "common/Domain.h"
#include "common/Communication.h"
#include "common/Utilities.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include "IO/MeshDatabase.h"
#include "IO/Reader.h"
#include "IO/Writer.h"
@ -91,7 +91,21 @@ void Minkowski::ComputeScalar(const DoubleArray& Field, const double isovalue)
Xi -= 0.5;
}
// Euler characteristic -- each vertex shared by four cubes
Xi += 0.25*double(object.VertexCount);
//Xi += 0.25*double(object.VertexCount);
// check if vertices are at corners
for (int idx=0; idx<object.VertexCount; idx++){
/*auto P1 = object.vertex.coords(idx);
if ( remainder(P1.x,1.0)==0.0 && remainder(P1.y,1.0)==0.0 && remainder(P1.z,1.0)==0.0 ){
Xi += 0.125;
}
else
*/
Xi += 0.25;
}
/*double nside_extern = double(npts);
double nside_intern = double(npts)-3.0;
EulerChar=0.0;
if (npts > 0) EulerChar = (0.25*nvert - nside_intern - 0.5*nside_extern + nface); */
}
}
}
@ -109,13 +123,13 @@ void Minkowski::ComputeScalar(const DoubleArray& Field, const double isovalue)
// convert X for 2D manifold to 3D object
Xi *= 0.5;
Dm->Comm.barrier();
MPI_Barrier(Dm->Comm);
// Phase averages
Vi_global = Dm->Comm.sumReduce( Vi );
Xi_global = Dm->Comm.sumReduce( Xi );
Ai_global = Dm->Comm.sumReduce( Ai );
Ji_global = Dm->Comm.sumReduce( Ji );
Dm->Comm.barrier();
MPI_Allreduce(&Vi,&Vi_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&Xi,&Xi_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&Ai,&Ai_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&Ji,&Ji_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Barrier(Dm->Comm);
PROFILE_STOP("ComputeScalar");
}
@ -128,7 +142,7 @@ void Minkowski::MeasureObject(){
* 0 - labels the object
* 1 - labels the rest of the
*/
//DoubleArray smooth_distance(Nx,Ny,Nz);
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
@ -137,6 +151,44 @@ void Minkowski::MeasureObject(){
}
}
CalcDist(distance,id,*Dm);
//Mean3D(distance,smooth_distance);
//Eikonal(distance, id, *Dm, 20, {true, true, true});
ComputeScalar(distance,0.0);
}
void Minkowski::MeasureObject(double factor, const DoubleArray &Phi){
/*
* compute the distance to an object
*
* THIS ALGORITHM ASSUMES THAT id() is populated with phase id to distinguish objects
* 0 - labels the object
* 1 - labels the rest of the
*/
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
distance(i,j,k) =2.0*double(id(i,j,k))-1.0;
}
}
}
CalcDist(distance,id,*Dm);
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
double value = Phi(i,j,k);
double dist_value = distance(i,j,k);
if (dist_value < 2.5 && dist_value > -2.5) {
double new_distance = factor*log((1.0+value)/(1.0-value));
if (dist_value*new_distance < 0.0 )
new_distance = (-1.0)*new_distance;
distance(i,j,k) = new_distance;
}
}
}
}
ComputeScalar(distance,0.0);
}
@ -168,7 +220,7 @@ int Minkowski::MeasureConnectedPathway(){
double vF=0.0;
n_connected_components = ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,Dm->rank_info,distance,distance,vF,vF,label,Dm->Comm);
// int n_connected_components = ComputeGlobalPhaseComponent(Nx-2,Ny-2,Nz-2,Dm->rank_info,const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, Dm->Comm )
Dm->Comm.barrier();
MPI_Barrier(Dm->Comm);
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
@ -186,6 +238,50 @@ int Minkowski::MeasureConnectedPathway(){
return n_connected_components;
}
int Minkowski::MeasureConnectedPathway(double factor, const DoubleArray &Phi){
/*
* compute the connected pathway for object with LABEL in id field
* compute the labels for connected components
* compute the distance to the connected pathway
*
* THIS ALGORITHM ASSUMES THAT id() is populated with phase id to distinguish objects
*/
char LABEL = 0;
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
if (id(i,j,k) == LABEL){
distance(i,j,k) = 1.0;
}
else
distance(i,j,k) = -1.0;
}
}
}
// Extract only the connected part of NWP
double vF=0.0;
n_connected_components = ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,Dm->rank_info,distance,distance,vF,vF,label,Dm->Comm);
// int n_connected_components = ComputeGlobalPhaseComponent(Nx-2,Ny-2,Nz-2,Dm->rank_info,const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, Dm->Comm )
MPI_Barrier(Dm->Comm);
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
if ( label(i,j,k) == 0){
id(i,j,k) = 0;
}
else{
id(i,j,k) = 1;
}
}
}
}
MeasureObject(factor,Phi);
return n_connected_components;
}
void Minkowski::PrintAll()
{

View File

@ -10,9 +10,10 @@
#include "common/Communication.h"
#include "analysis/analysis.h"
#include "analysis/distance.h"
#include "analysis/filters.h"
#include "common/Utilities.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include "IO/MeshDatabase.h"
#include "IO/Reader.h"
#include "IO/Writer.h"
@ -62,7 +63,9 @@ public:
Minkowski(std::shared_ptr <Domain> Dm);
~Minkowski();
void MeasureObject();
void MeasureObject(double factor, const DoubleArray &Phi);
int MeasureConnectedPathway();
int MeasureConnectedPathway(double factor, const DoubleArray &Phi);
void ComputeScalar(const DoubleArray& Field, const double isovalue);
void PrintAll();

View File

@ -161,12 +161,12 @@ void SubPhase::Basic(){
// If external boundary conditions are set, do not average over the inlet
kmin=1; kmax=Nz-1;
imin=jmin=1;
// If inlet/outlet layers exist use these as default
/*// If inlet/outlet layers exist use these as default
if (Dm->inlet_layers_x > 0) imin = Dm->inlet_layers_x;
if (Dm->inlet_layers_y > 0) jmin = Dm->inlet_layers_y;
if (Dm->inlet_layers_z > 0 && Dm->kproc() == 0) kmin += Dm->inlet_layers_z;
if (Dm->outlet_layers_z > 0 && Dm->kproc() == Dm->nprocz()-1) kmax -= Dm->outlet_layers_z;
*/
nb.reset(); wb.reset();
double count_w = 0.0;
@ -229,25 +229,25 @@ void SubPhase::Basic(){
}
}
}
gwb.V = Dm->Comm.sumReduce( wb.V);
gnb.V = Dm->Comm.sumReduce( nb.V);
gwb.M = Dm->Comm.sumReduce( wb.M);
gnb.M = Dm->Comm.sumReduce( nb.M);
gwb.Px = Dm->Comm.sumReduce( wb.Px);
gwb.Py = Dm->Comm.sumReduce( wb.Py);
gwb.Pz = Dm->Comm.sumReduce( wb.Pz);
gnb.Px = Dm->Comm.sumReduce( nb.Px);
gnb.Py = Dm->Comm.sumReduce( nb.Py);
gnb.Pz = Dm->Comm.sumReduce( nb.Pz);
gwb.V=sumReduce( Dm->Comm, wb.V);
gnb.V=sumReduce( Dm->Comm, nb.V);
gwb.M=sumReduce( Dm->Comm, wb.M);
gnb.M=sumReduce( Dm->Comm, nb.M);
gwb.Px=sumReduce( Dm->Comm, wb.Px);
gwb.Py=sumReduce( Dm->Comm, wb.Py);
gwb.Pz=sumReduce( Dm->Comm, wb.Pz);
gnb.Px=sumReduce( Dm->Comm, nb.Px);
gnb.Py=sumReduce( Dm->Comm, nb.Py);
gnb.Pz=sumReduce( Dm->Comm, nb.Pz);
count_w = Dm->Comm.sumReduce( count_w);
count_n = Dm->Comm.sumReduce( count_n);
count_w=sumReduce( Dm->Comm, count_w);
count_n=sumReduce( Dm->Comm, count_n);
if (count_w > 0.0)
gwb.p = Dm->Comm.sumReduce(wb.p) / count_w;
gwb.p=sumReduce( Dm->Comm, wb.p) / count_w;
else
gwb.p = 0.0;
if (count_n > 0.0)
gnb.p = Dm->Comm.sumReduce( nb.p) / count_n;
gnb.p=sumReduce( Dm->Comm, nb.p) / count_n;
else
gnb.p = 0.0;
@ -280,7 +280,7 @@ void SubPhase::Basic(){
dir_y = 0.0;
dir_z = 1.0;
}
if (Dm->BoundaryCondition > 0 ){
if (Dm->BoundaryCondition == 1 || Dm->BoundaryCondition == 2 || Dm->BoundaryCondition == 3 || Dm->BoundaryCondition == 4 ){
// compute the pressure drop
double pressure_drop = (Pressure(Nx*Ny + Nx + 1) - 1.0) / 3.0;
double length = ((Nz-2)*Dm->nprocz());
@ -376,16 +376,17 @@ void SubPhase::Full(){
// If external boundary conditions are set, do not average over the inlet
kmin=1; kmax=Nz-1;
if (Dm->BoundaryCondition > 0 && Dm->kproc() == 0) kmin=4;
if (Dm->BoundaryCondition > 0 && Dm->kproc() == Dm->nprocz()-1) kmax=Nz-4;
/*if (Dm->BoundaryCondition > 0 && Dm->BoundaryCondition != 5 && Dm->kproc() == 0) kmin=4;
if (Dm->BoundaryCondition > 0 && Dm->BoundaryCondition != 5 && Dm->kproc() == Dm->nprocz()-1) kmax=Nz-4;
*/
imin=jmin=1;
// If inlet layers exist use these as default
/*// If inlet layers exist use these as default
* NOTE -- excluding inlet / outlet will screw up topological averages!!!
if (Dm->inlet_layers_x > 0) imin = Dm->inlet_layers_x;
if (Dm->inlet_layers_y > 0) jmin = Dm->inlet_layers_y;
if (Dm->inlet_layers_z > 0 && Dm->kproc() == 0) kmin += Dm->inlet_layers_z;
if (Dm->outlet_layers_z > 0 && Dm->kproc() == Dm->nprocz()-1) kmax -= Dm->outlet_layers_z;
*/
nd.reset(); nc.reset(); wd.reset(); wc.reset(); iwn.reset(); iwnc.reset();
Dm->CommunicateMeshHalo(Phi);
@ -426,13 +427,13 @@ void SubPhase::Full(){
}
}
// measure the whole object
morph_n->MeasureObject();
morph_n->MeasureObject();//0.5/beta,Phi);
nd.V = morph_n->V();
nd.A = morph_n->A();
nd.H = morph_n->H();
nd.X = morph_n->X();
// measure only the connected part
nd.Nc = morph_n->MeasureConnectedPathway();
nd.Nc = morph_n->MeasureConnectedPathway();//0.5/beta,Phi);
nc.V = morph_n->V();
nc.A = morph_n->A();
nc.H = morph_n->H();
@ -444,14 +445,14 @@ void SubPhase::Full(){
nd.X -= nc.X;
// compute global entities
gnc.V = Dm->Comm.sumReduce( nc.V );
gnc.A = Dm->Comm.sumReduce( nc.A );
gnc.H = Dm->Comm.sumReduce( nc.H );
gnc.X = Dm->Comm.sumReduce( nc.X );
gnd.V = Dm->Comm.sumReduce( nd.V );
gnd.A = Dm->Comm.sumReduce( nd.A );
gnd.H = Dm->Comm.sumReduce( nd.H );
gnd.X = Dm->Comm.sumReduce( nd.X );
gnc.V=sumReduce( Dm->Comm, nc.V);
gnc.A=sumReduce( Dm->Comm, nc.A);
gnc.H=sumReduce( Dm->Comm, nc.H);
gnc.X=sumReduce( Dm->Comm, nc.X);
gnd.V=sumReduce( Dm->Comm, nd.V);
gnd.A=sumReduce( Dm->Comm, nd.A);
gnd.H=sumReduce( Dm->Comm, nd.H);
gnd.X=sumReduce( Dm->Comm, nd.X);
gnd.Nc = nd.Nc;
// wetting
for (k=0; k<Nz; k++){
@ -474,13 +475,13 @@ void SubPhase::Full(){
}
}
}
morph_w->MeasureObject();
morph_w->MeasureObject();//-0.5/beta,Phi);
wd.V = morph_w->V();
wd.A = morph_w->A();
wd.H = morph_w->H();
wd.X = morph_w->X();
// measure only the connected part
wd.Nc = morph_w->MeasureConnectedPathway();
wd.Nc = morph_w->MeasureConnectedPathway();//-0.5/beta,Phi);
wc.V = morph_w->V();
wc.A = morph_w->A();
wc.H = morph_w->H();
@ -491,14 +492,14 @@ void SubPhase::Full(){
wd.H -= wc.H;
wd.X -= wc.X;
// compute global entities
gwc.V = Dm->Comm.sumReduce( wc.V );
gwc.A = Dm->Comm.sumReduce( wc.A );
gwc.H = Dm->Comm.sumReduce( wc.H );
gwc.X = Dm->Comm.sumReduce( wc.X );
gwd.V = Dm->Comm.sumReduce( wd.V );
gwd.A = Dm->Comm.sumReduce( wd.A );
gwd.H = Dm->Comm.sumReduce( wd.H );
gwd.X = Dm->Comm.sumReduce( wd.X );
gwc.V=sumReduce( Dm->Comm, wc.V);
gwc.A=sumReduce( Dm->Comm, wc.A);
gwc.H=sumReduce( Dm->Comm, wc.H);
gwc.X=sumReduce( Dm->Comm, wc.X);
gwd.V=sumReduce( Dm->Comm, wd.V);
gwd.A=sumReduce( Dm->Comm, wd.A);
gwd.H=sumReduce( Dm->Comm, wd.H);
gwd.X=sumReduce( Dm->Comm, wd.X);
gwd.Nc = wd.Nc;
/* Set up geometric analysis of interface region */
@ -526,20 +527,20 @@ void SubPhase::Full(){
iwn.A = morph_i->A();
iwn.H = morph_i->H();
iwn.X = morph_i->X();
giwn.V = Dm->Comm.sumReduce( iwn.V );
giwn.A = Dm->Comm.sumReduce( iwn.A );
giwn.H = Dm->Comm.sumReduce( iwn.H );
giwn.X = Dm->Comm.sumReduce( iwn.X );
giwn.V=sumReduce( Dm->Comm, iwn.V);
giwn.A=sumReduce( Dm->Comm, iwn.A);
giwn.H=sumReduce( Dm->Comm, iwn.H);
giwn.X=sumReduce( Dm->Comm, iwn.X);
// measure only the connected part
iwnc.Nc = morph_i->MeasureConnectedPathway();
iwnc.V = morph_i->V();
iwnc.A = morph_i->A();
iwnc.H = morph_i->H();
iwnc.X = morph_i->X();
giwnc.V = Dm->Comm.sumReduce( iwnc.V );
giwnc.A = Dm->Comm.sumReduce( iwnc.A );
giwnc.H = Dm->Comm.sumReduce( iwnc.H );
giwnc.X = Dm->Comm.sumReduce( iwnc.X );
giwnc.V=sumReduce( Dm->Comm, iwnc.V);
giwnc.A=sumReduce( Dm->Comm, iwnc.A);
giwnc.H=sumReduce( Dm->Comm, iwnc.H);
giwnc.X=sumReduce( Dm->Comm, iwnc.X);
giwnc.Nc = iwnc.Nc;
double vol_nc_bulk = 0.0;
@ -630,46 +631,46 @@ void SubPhase::Full(){
}
}
gnd.M = Dm->Comm.sumReduce( nd.M );
gnd.Px = Dm->Comm.sumReduce( nd.Px );
gnd.Py = Dm->Comm.sumReduce( nd.Py );
gnd.Pz = Dm->Comm.sumReduce( nd.Pz );
gnd.K = Dm->Comm.sumReduce( nd.K );
gnd.M=sumReduce( Dm->Comm, nd.M);
gnd.Px=sumReduce( Dm->Comm, nd.Px);
gnd.Py=sumReduce( Dm->Comm, nd.Py);
gnd.Pz=sumReduce( Dm->Comm, nd.Pz);
gnd.K=sumReduce( Dm->Comm, nd.K);
gwd.M = Dm->Comm.sumReduce( wd.M );
gwd.Px = Dm->Comm.sumReduce( wd.Px );
gwd.Py = Dm->Comm.sumReduce( wd.Py );
gwd.Pz = Dm->Comm.sumReduce( wd.Pz );
gwd.K = Dm->Comm.sumReduce( wd.K );
gwd.M=sumReduce( Dm->Comm, wd.M);
gwd.Px=sumReduce( Dm->Comm, wd.Px);
gwd.Py=sumReduce( Dm->Comm, wd.Py);
gwd.Pz=sumReduce( Dm->Comm, wd.Pz);
gwd.K=sumReduce( Dm->Comm, wd.K);
gnc.M = Dm->Comm.sumReduce( nc.M );
gnc.Px = Dm->Comm.sumReduce( nc.Px );
gnc.Py = Dm->Comm.sumReduce( nc.Py );
gnc.Pz = Dm->Comm.sumReduce( nc.Pz );
gnc.K = Dm->Comm.sumReduce( nc.K );
gnc.M=sumReduce( Dm->Comm, nc.M);
gnc.Px=sumReduce( Dm->Comm, nc.Px);
gnc.Py=sumReduce( Dm->Comm, nc.Py);
gnc.Pz=sumReduce( Dm->Comm, nc.Pz);
gnc.K=sumReduce( Dm->Comm, nc.K);
gwc.M = Dm->Comm.sumReduce( wc.M );
gwc.Px = Dm->Comm.sumReduce( wc.Px );
gwc.Py = Dm->Comm.sumReduce( wc.Py );
gwc.Pz = Dm->Comm.sumReduce( wc.Pz );
gwc.K = Dm->Comm.sumReduce( wc.K );
gwc.M=sumReduce( Dm->Comm, wc.M);
gwc.Px=sumReduce( Dm->Comm, wc.Px);
gwc.Py=sumReduce( Dm->Comm, wc.Py);
gwc.Pz=sumReduce( Dm->Comm, wc.Pz);
gwc.K=sumReduce( Dm->Comm, wc.K);
giwn.Mn = Dm->Comm.sumReduce( iwn.Mn );
giwn.Pnx = Dm->Comm.sumReduce( iwn.Pnx );
giwn.Pny = Dm->Comm.sumReduce( iwn.Pny );
giwn.Pnz = Dm->Comm.sumReduce( iwn.Pnz );
giwn.Kn = Dm->Comm.sumReduce( iwn.Kn );
giwn.Mw = Dm->Comm.sumReduce( iwn.Mw );
giwn.Pwx = Dm->Comm.sumReduce( iwn.Pwx );
giwn.Pwy = Dm->Comm.sumReduce( iwn.Pwy );
giwn.Pwz = Dm->Comm.sumReduce( iwn.Pwz );
giwn.Kw = Dm->Comm.sumReduce( iwn.Kw );
giwn.Mn=sumReduce( Dm->Comm, iwn.Mn);
giwn.Pnx=sumReduce( Dm->Comm, iwn.Pnx);
giwn.Pny=sumReduce( Dm->Comm, iwn.Pny);
giwn.Pnz=sumReduce( Dm->Comm, iwn.Pnz);
giwn.Kn=sumReduce( Dm->Comm, iwn.Kn);
giwn.Mw=sumReduce( Dm->Comm, iwn.Mw);
giwn.Pwx=sumReduce( Dm->Comm, iwn.Pwx);
giwn.Pwy=sumReduce( Dm->Comm, iwn.Pwy);
giwn.Pwz=sumReduce( Dm->Comm, iwn.Pwz);
giwn.Kw=sumReduce( Dm->Comm, iwn.Kw);
// pressure averaging
gnc.p = Dm->Comm.sumReduce( nc.p );
gnd.p = Dm->Comm.sumReduce( nd.p );
gwc.p = Dm->Comm.sumReduce( wc.p );
gwd.p = Dm->Comm.sumReduce( wd.p );
gnc.p=sumReduce( Dm->Comm, nc.p);
gnd.p=sumReduce( Dm->Comm, nd.p);
gwc.p=sumReduce( Dm->Comm, wc.p);
gwd.p=sumReduce( Dm->Comm, wd.p);
if (vol_wc_bulk > 0.0)
wc.p = wc.p /vol_wc_bulk;
@ -680,10 +681,10 @@ void SubPhase::Full(){
if (vol_nd_bulk > 0.0)
nd.p = nd.p /vol_nd_bulk;
vol_wc_bulk = Dm->Comm.sumReduce( vol_wc_bulk );
vol_wd_bulk = Dm->Comm.sumReduce( vol_wd_bulk );
vol_nc_bulk = Dm->Comm.sumReduce( vol_nc_bulk );
vol_nd_bulk = Dm->Comm.sumReduce( vol_nd_bulk );
vol_wc_bulk=sumReduce( Dm->Comm, vol_wc_bulk);
vol_wd_bulk=sumReduce( Dm->Comm, vol_wd_bulk);
vol_nc_bulk=sumReduce( Dm->Comm, vol_nc_bulk);
vol_nd_bulk=sumReduce( Dm->Comm, vol_nd_bulk);
if (vol_wc_bulk > 0.0)
gwc.p = gwc.p /vol_wc_bulk;
@ -719,7 +720,7 @@ void SubPhase::AggregateLabels( const std::string& filename )
}
}
}
Dm->Comm.barrier();
MPI_Barrier(Dm->Comm);
Dm->AggregateLabels( filename );

View File

@ -12,7 +12,7 @@
#include "analysis/distance.h"
#include "analysis/Minkowski.h"
#include "common/Utilities.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include "IO/MeshDatabase.h"
#include "IO/Reader.h"
#include "IO/Writer.h"

View File

@ -5,7 +5,7 @@
#include "common/Domain.h"
#include "common/Communication.h"
#include "common/Utilities.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include "IO/MeshDatabase.h"
#include "IO/Reader.h"
#include "IO/Writer.h"
@ -882,7 +882,7 @@ void TwoPhase::ComponentAverages()
}
}
Dm->Comm.barrier();
MPI_Barrier(Dm->Comm);
if (Dm->rank()==0){
printf("Component averages computed locally -- reducing result... \n");
}
@ -890,14 +890,14 @@ void TwoPhase::ComponentAverages()
RecvBuffer.resize(BLOB_AVG_COUNT,NumberComponents_NWP);
/* for (int b=0; b<NumberComponents_NWP; b++){
Dm->Comm.barrier();
Dm->Comm.sumReduce(&ComponentAverages_NWP(0,b),&RecvBuffer(0),BLOB_AVG_COUNT);
MPI_Barrier(Dm->Comm);
MPI_Allreduce(&ComponentAverages_NWP(0,b),&RecvBuffer(0),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,Dm->Comm);
for (int idx=0; idx<BLOB_AVG_COUNT; idx++) ComponentAverages_NWP(idx,b)=RecvBuffer(idx);
}
*/
Dm->Comm.barrier();
Dm->Comm.sumReduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT*NumberComponents_NWP);
// Dm->Comm.sumReduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT);
MPI_Barrier(Dm->Comm);
MPI_Allreduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT*NumberComponents_NWP, MPI_DOUBLE,MPI_SUM,Dm->Comm);
// MPI_Reduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,0,Dm->Comm);
if (Dm->rank()==0){
printf("rescaling... \n");
@ -993,8 +993,9 @@ void TwoPhase::ComponentAverages()
// reduce the wetting phase averages
for (int b=0; b<NumberComponents_WP; b++){
Dm->Comm.barrier();
Dm->Comm.sumReduce(&ComponentAverages_WP(0,b),RecvBuffer.data(),BLOB_AVG_COUNT);
MPI_Barrier(Dm->Comm);
// MPI_Allreduce(&ComponentAverages_WP(0,b),RecvBuffer.data(),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Reduce(&ComponentAverages_WP(0,b),RecvBuffer.data(),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,0,Dm->Comm);
for (int idx=0; idx<BLOB_AVG_COUNT; idx++) ComponentAverages_WP(idx,b)=RecvBuffer(idx);
}
@ -1077,48 +1078,43 @@ void TwoPhase::Reduce()
int i;
double iVol_global=1.0/Volume;
//...........................................................................
Dm->Comm.barrier();
nwp_volume_global = Dm->Comm.sumReduce( nwp_volume );
wp_volume_global = Dm->Comm.sumReduce( wp_volume );
awn_global = Dm->Comm.sumReduce( awn );
ans_global = Dm->Comm.sumReduce( ans );
aws_global = Dm->Comm.sumReduce( aws );
lwns_global = Dm->Comm.sumReduce( lwns );
As_global = Dm->Comm.sumReduce( As );
Jwn_global = Dm->Comm.sumReduce( Jwn );
Kwn_global = Dm->Comm.sumReduce( Kwn );
KGwns_global = Dm->Comm.sumReduce( KGwns );
KNwns_global = Dm->Comm.sumReduce( KNwns );
efawns_global = Dm->Comm.sumReduce( efawns );
wwndnw_global = Dm->Comm.sumReduce( wwndnw );
wwnsdnwn_global = Dm->Comm.sumReduce( wwnsdnwn );
Jwnwwndnw_global = Dm->Comm.sumReduce( Jwnwwndnw );
MPI_Barrier(Dm->Comm);
MPI_Allreduce(&nwp_volume,&nwp_volume_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&wp_volume,&wp_volume_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&awn,&awn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&ans,&ans_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&aws,&aws_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&lwns,&lwns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&As,&As_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&Jwn,&Jwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&Kwn,&Kwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&KGwns,&KGwns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&KNwns,&KNwns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&efawns,&efawns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&wwndnw,&wwndnw_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&wwnsdnwn,&wwnsdnwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&Jwnwwndnw,&Jwnwwndnw_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
// Phase averages
vol_w_global = Dm->Comm.sumReduce( vol_w );
vol_n_global = Dm->Comm.sumReduce( vol_n );
paw_global = Dm->Comm.sumReduce( paw );
pan_global = Dm->Comm.sumReduce( pan );
for (int idx=0; idx<3; idx++)
vaw_global(idx) = Dm->Comm.sumReduce( vaw(idx) );
for (int idx=0; idx<3; idx++)
van_global(idx) = Dm->Comm.sumReduce( van(idx));
for (int idx=0; idx<3; idx++)
vawn_global(idx) = Dm->Comm.sumReduce( vawn(idx) );
for (int idx=0; idx<3; idx++)
vawns_global(idx) = Dm->Comm.sumReduce( vawns(idx) );
for (int idx=0; idx<6; idx++){
Gwn_global(idx) = Dm->Comm.sumReduce( Gwn(idx) );
Gns_global(idx) = Dm->Comm.sumReduce( Gns(idx) );
Gws_global(idx) = Dm->Comm.sumReduce( Gws(idx) );
}
trawn_global = Dm->Comm.sumReduce( trawn );
trJwn_global = Dm->Comm.sumReduce( trJwn );
trRwn_global = Dm->Comm.sumReduce( trRwn );
euler_global = Dm->Comm.sumReduce( euler );
An_global = Dm->Comm.sumReduce( An );
Jn_global = Dm->Comm.sumReduce( Jn );
Kn_global = Dm->Comm.sumReduce( Kn );
Dm->Comm.barrier();
MPI_Allreduce(&vol_w,&vol_w_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&vol_n,&vol_n_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&paw,&paw_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&pan,&pan_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&vaw(0),&vaw_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&van(0),&van_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&vawn(0),&vawn_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&vawns(0),&vawns_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&Gwn(0),&Gwn_global(0),6,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&Gns(0),&Gns_global(0),6,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&Gws(0),&Gws_global(0),6,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&trawn,&trawn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&trJwn,&trJwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&trRwn,&trRwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&euler,&euler_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&An,&An_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&Jn,&Jn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&Kn,&Kn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Barrier(Dm->Comm);
// Normalize the phase averages
// (density of both components = 1.0)

View File

@ -12,7 +12,7 @@
#include "common/Domain.h"
#include "common/Communication.h"
#include "common/Utilities.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include "IO/MeshDatabase.h"
#include "IO/Reader.h"
#include "IO/Writer.h"

View File

@ -188,7 +188,7 @@ int ComputeLocalPhaseComponent(const IntArray &PhaseID, int &VALUE, BlobIDArray
/******************************************************************
* Reorder the global blob ids *
******************************************************************/
static int ReorderBlobIDs2( BlobIDArray& ID, int N_blobs, int ngx, int ngy, int ngz, const Utilities::MPI& comm )
static int ReorderBlobIDs2( BlobIDArray& ID, int N_blobs, int ngx, int ngy, int ngz, MPI_Comm comm )
{
if ( N_blobs==0 )
return 0;
@ -212,7 +212,7 @@ static int ReorderBlobIDs2( BlobIDArray& ID, int N_blobs, int ngx, int ngy, int
}
}
ASSERT(max_id<N_blobs);
comm.sumReduce(local_size,global_size,N_blobs);
MPI_Allreduce(local_size,global_size,N_blobs,MPI_DOUBLE,MPI_SUM,comm);
std::vector<std::pair<double,int> > map1(N_blobs);
int N_blobs2 = 0;
for (int i=0; i<N_blobs; i++) {
@ -235,12 +235,12 @@ static int ReorderBlobIDs2( BlobIDArray& ID, int N_blobs, int ngx, int ngy, int
PROFILE_STOP("ReorderBlobIDs2",1);
return N_blobs2;
}
void ReorderBlobIDs( BlobIDArray& ID, const Utilities::MPI& comm )
void ReorderBlobIDs( BlobIDArray& ID, MPI_Comm comm )
{
PROFILE_START("ReorderBlobIDs");
int tmp = ID.max()+1;
int N_blobs = 0;
N_blobs = comm.maxReduce( tmp );
MPI_Allreduce(&tmp,&N_blobs,1,MPI_INT,MPI_MAX,comm);
ReorderBlobIDs2(ID,N_blobs,1,1,1,comm);
PROFILE_STOP("ReorderBlobIDs");
}
@ -260,29 +260,30 @@ static void updateRemoteIds(
int N_send, const std::vector<int>& N_recv,
int64_t *send_buf, std::vector<int64_t*>& recv_buf,
std::map<int64_t,int64_t>& remote_map,
const Utilities::MPI& comm )
MPI_Comm comm )
{
std::vector<MPI_Request> send_req(neighbors.size());
std::vector<MPI_Request> recv_req(neighbors.size());
auto it = map.begin();
std::vector<MPI_Status> status(neighbors.size());
std::map<int64_t,global_id_info_struct>::const_iterator it = map.begin();
ASSERT(N_send==(int)map.size());
for (size_t i=0; i<map.size(); i++, ++it) {
send_buf[2*i+0] = it->first;
send_buf[2*i+1] = it->second.new_id;
}
for (size_t i=0; i<neighbors.size(); i++) {
send_req[i] = comm.Isend( send_buf, 2*N_send, neighbors[i], 0 );
recv_req[i] = comm.Irecv( recv_buf[i], 2*N_recv[i], neighbors[i], 0 );
MPI_Isend( send_buf, 2*N_send, MPI_LONG_LONG, neighbors[i], 0, comm, &send_req[i] );
MPI_Irecv( recv_buf[i], 2*N_recv[i], MPI_LONG_LONG, neighbors[i], 0, comm, &recv_req[i] );
}
for (it=map.begin(); it!=map.end(); ++it) {
remote_map[it->first] = it->second.new_id;
}
for (size_t i=0; i<neighbors.size(); i++) {
comm.wait( recv_req[i] );
MPI_Wait(&recv_req[i],&status[i]);
for (int j=0; j<N_recv[i]; j++)
remote_map[recv_buf[i][2*j+0]] = recv_buf[i][2*j+1];
}
comm.waitAll(neighbors.size(),getPtr(send_req));
MPI_Waitall(neighbors.size(),getPtr(send_req),getPtr(status));
}
// Compute a new local id for each local id
static bool updateLocalIds( const std::map<int64_t,int64_t>& remote_map,
@ -303,18 +304,18 @@ static bool updateLocalIds( const std::map<int64_t,int64_t>& remote_map,
return changed;
}
static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_info,
int nblobs, BlobIDArray& IDs, const Utilities::MPI& comm )
int nblobs, BlobIDArray& IDs, MPI_Comm comm )
{
PROFILE_START("LocalToGlobalIDs",1);
const int rank = rank_info.rank[1][1][1];
int nprocs = comm.getSize();
int nprocs = comm_size(comm);
const int ngx = (IDs.size(0)-nx)/2;
const int ngy = (IDs.size(1)-ny)/2;
const int ngz = (IDs.size(2)-nz)/2;
// Get the number of blobs for each rank
std::vector<int> N_blobs(nprocs,0);
PROFILE_START("LocalToGlobalIDs-Allgather",1);
comm.allGather(nblobs,getPtr(N_blobs));
MPI_Allgather(&nblobs,1,MPI_INT,getPtr(N_blobs),1,MPI_INT,comm);
PROFILE_STOP("LocalToGlobalIDs-Allgather",1);
int64_t N_blobs_tot = 0;
int offset = 0;
@ -362,12 +363,13 @@ static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_
std::vector<int> N_recv(neighbors.size(),0);
std::vector<MPI_Request> send_req(neighbors.size());
std::vector<MPI_Request> recv_req(neighbors.size());
std::vector<MPI_Status> status(neighbors.size());
for (size_t i=0; i<neighbors.size(); i++) {
send_req[i] = comm.Isend( &N_send, 1, neighbors[i], 0 );
recv_req[i] = comm.Irecv( &N_recv[i], 1, neighbors[i], 0 );
MPI_Isend( &N_send, 1, MPI_INT, neighbors[i], 0, comm, &send_req[i] );
MPI_Irecv( &N_recv[i], 1, MPI_INT, neighbors[i], 0, comm, &recv_req[i] );
}
comm.waitAll(neighbors.size(),getPtr(send_req));
comm.waitAll(neighbors.size(),getPtr(recv_req));
MPI_Waitall(neighbors.size(),getPtr(send_req),getPtr(status));
MPI_Waitall(neighbors.size(),getPtr(recv_req),getPtr(status));
// Allocate memory for communication
int64_t *send_buf = new int64_t[2*N_send];
std::vector<int64_t*> recv_buf(neighbors.size());
@ -396,7 +398,8 @@ static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_
bool changed = updateLocalIds( remote_map, map );
// Check if we are finished
int test = changed ? 1:0;
int result = comm.sumReduce( test );
int result = 0;
MPI_Allreduce(&test,&result,1,MPI_INT,MPI_SUM,comm);
if ( result==0 )
break;
}
@ -432,7 +435,7 @@ static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_
}
int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_info,
const DoubleArray& Phase, const DoubleArray& SignDist, double vF, double vS,
BlobIDArray& GlobalBlobID, const Utilities::MPI& comm )
BlobIDArray& GlobalBlobID, MPI_Comm comm )
{
PROFILE_START("ComputeGlobalBlobIDs");
// First compute the local ids
@ -443,7 +446,7 @@ int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_inf
return nglobal;
}
int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& rank_info,
const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, const Utilities::MPI& comm )
const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, MPI_Comm comm )
{
PROFILE_START("ComputeGlobalPhaseComponent");
// First compute the local ids
@ -459,27 +462,37 @@ int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& r
* Compute the mapping of blob ids between timesteps *
******************************************************************/
typedef std::map<BlobIDType,std::map<BlobIDType,int64_t> > map_type;
template<class TYPE> inline MPI_Datatype getMPIType();
template<> inline MPI_Datatype getMPIType<int32_t>() { return MPI_INT; }
template<> inline MPI_Datatype getMPIType<int64_t>() {
if ( sizeof(int64_t)==sizeof(long int) )
return MPI_LONG;
else if ( sizeof(int64_t)==sizeof(double) )
return MPI_DOUBLE;
}
template<class TYPE>
void gatherSet( std::set<TYPE>& set, const Utilities::MPI& comm )
void gatherSet( std::set<TYPE>& set, MPI_Comm comm )
{
int nprocs = comm.getSize();
int nprocs = comm_size(comm);
MPI_Datatype type = getMPIType<TYPE>();
std::vector<TYPE> send_data(set.begin(),set.end());
int send_count = send_data.size();
std::vector<int> recv_count(nprocs,0), recv_disp(nprocs,0);
comm.allGather( send_count, getPtr(recv_count) );
MPI_Allgather(&send_count,1,MPI_INT,getPtr(recv_count),1,MPI_INT,comm);
for (int i=1; i<nprocs; i++)
recv_disp[i] = recv_disp[i-1] + recv_count[i-1];
std::vector<TYPE> recv_data(recv_disp[nprocs-1]+recv_count[nprocs-1]);
comm.allGather( getPtr(send_data), send_count, getPtr(recv_data),
getPtr(recv_count), getPtr(recv_disp), true );
MPI_Allgatherv(getPtr(send_data),send_count,type,
getPtr(recv_data),getPtr(recv_count),getPtr(recv_disp),type,comm);
for (size_t i=0; i<recv_data.size(); i++)
set.insert(recv_data[i]);
}
void gatherSrcIDMap( map_type& src_map, const Utilities::MPI& comm )
void gatherSrcIDMap( map_type& src_map, MPI_Comm comm )
{
int nprocs = comm.getSize();
int nprocs = comm_size(comm);
MPI_Datatype type = getMPIType<int64_t>();
std::vector<int64_t> send_data;
for (auto it=src_map.begin(); it!=src_map.end(); ++it) {
for (map_type::const_iterator it=src_map.begin(); it!=src_map.end(); ++it) {
int id = it->first;
const std::map<BlobIDType,int64_t>& src_ids = it->second;
send_data.push_back(id);
@ -492,21 +505,21 @@ void gatherSrcIDMap( map_type& src_map, const Utilities::MPI& comm )
}
int send_count = send_data.size();
std::vector<int> recv_count(nprocs,0), recv_disp(nprocs,0);
comm.allGather(send_count,getPtr(recv_count));
MPI_Allgather(&send_count,1,MPI_INT,getPtr(recv_count),1,MPI_INT,comm);
for (int i=1; i<nprocs; i++)
recv_disp[i] = recv_disp[i-1] + recv_count[i-1];
std::vector<int64_t> recv_data(recv_disp[nprocs-1]+recv_count[nprocs-1]);
comm.allGather(getPtr(send_data),send_count,
getPtr(recv_data),getPtr(recv_count),getPtr(recv_disp),true);
MPI_Allgatherv(getPtr(send_data),send_count,type,
getPtr(recv_data),getPtr(recv_count),getPtr(recv_disp),type,comm);
size_t i=0;
src_map.clear();
while ( i < recv_data.size() ) {
BlobIDType id = recv_data[i];
size_t count = recv_data[i+1];
i += 2;
auto& src_ids = src_map[id];
std::map<BlobIDType,int64_t>& src_ids = src_map[id];
for (size_t j=0; j<count; j++,i+=2) {
auto it = src_ids.find(recv_data[i]);
std::map<BlobIDType,int64_t>::iterator it = src_ids.find(recv_data[i]);
if ( it == src_ids.end() )
src_ids.insert(std::pair<BlobIDType,int64_t>(recv_data[i],recv_data[i+1]));
else
@ -525,7 +538,7 @@ void addSrcDstIDs( BlobIDType src_id, map_type& src_map, map_type& dst_map,
}
}
ID_map_struct computeIDMap( int nx, int ny, int nz,
const BlobIDArray& ID1, const BlobIDArray& ID2, const Utilities::MPI& comm )
const BlobIDArray& ID1, const BlobIDArray& ID2, MPI_Comm comm )
{
ASSERT(ID1.size()==ID2.size());
PROFILE_START("computeIDMap");
@ -767,7 +780,7 @@ void renumberIDs( const std::vector<BlobIDType>& new_ids, BlobIDArray& IDs )
******************************************************************/
void writeIDMap( const ID_map_struct& map, long long int timestep, const std::string& filename )
{
int rank = Utilities::MPI( MPI_COMM_WORLD ).getRank();
int rank = MPI_WORLD_RANK();
if ( rank!=0 )
return;
bool empty = map.created.empty() && map.destroyed.empty() &&

View File

@ -58,7 +58,7 @@ int ComputeLocalPhaseComponent( const IntArray &PhaseID, int &VALUE, IntArray &C
*/
int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_info,
const DoubleArray& Phase, const DoubleArray& SignDist, double vF, double vS,
BlobIDArray& GlobalBlobID, const Utilities::MPI& comm );
BlobIDArray& GlobalBlobID, MPI_Comm comm );
/*!
@ -75,7 +75,7 @@ int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_inf
* @return Return the number of components in the specified phase
*/
int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& rank_info,
const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, const Utilities::MPI& comm );
const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, MPI_Comm comm );
/*!
@ -87,7 +87,7 @@ int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& r
* @param[in] nz Number of elements in the z-direction
* @param[in/out] ID The ids of the blobs
*/
void ReorderBlobIDs( BlobIDArray& ID, const Utilities::MPI& comm );
void ReorderBlobIDs( BlobIDArray& ID, MPI_Comm comm );
typedef std::pair<BlobIDType,std::vector<BlobIDType> > BlobIDSplitStruct;
@ -120,7 +120,7 @@ struct ID_map_struct {
* @param[in] ID1 The blob ids at the first timestep
* @param[in] ID2 The blob ids at the second timestep
*/
ID_map_struct computeIDMap( int nx, int ny, int nz, const BlobIDArray& ID1, const BlobIDArray& ID2, const Utilities::MPI& comm );
ID_map_struct computeIDMap( int nx, int ny, int nz, const BlobIDArray& ID1, const BlobIDArray& ID2, MPI_Comm comm );
/*!

View File

@ -1,7 +1,5 @@
#include "analysis/dcel.h"
DECL::DECL(){
}
@ -15,6 +13,25 @@ int DECL::Face(int index){
return FaceData[index];
}
void DECL::Write(){
int e1,e2,e3;
FILE *TRIANGLES;
TRIANGLES = fopen("triangles.stl","w");
fprintf(TRIANGLES,"solid \n");
for (int idx=0; idx<TriangleCount; idx++){
e1 = Face(idx);
e2 = halfedge.next(e1);
e3 = halfedge.next(e2);
auto P1 = vertex.coords(halfedge.v1(e1));
auto P2 = vertex.coords(halfedge.v1(e2));
auto P3 = vertex.coords(halfedge.v1(e3));
fprintf(TRIANGLES,"vertex %f %f %f\n",P1.x,P1.y,P1.z);
fprintf(TRIANGLES,"vertex %f %f %f\n",P2.x,P2.y,P2.z);
fprintf(TRIANGLES,"vertex %f %f %f\n",P3.x,P3.y,P3.z);
}
fclose(TRIANGLES);
}
void DECL::LocalIsosurface(const DoubleArray& A, double value, const int i, const int j, const int k){
Point P,Q;
Point PlaceHolder;
@ -350,243 +367,43 @@ double DECL::EdgeAngle(int edge)
return angle;
}
void Isosurface(DoubleArray &A, const double &v)
void iso_surface(const Array<double>&Field, const double isovalue)
{
NULL_USE( v );
Point P,Q;
Point PlaceHolder;
Point C0,C1,C2,C3,C4,C5,C6,C7;
int TriangleCount;
int VertexCount;
int CubeIndex;
Point VertexList[12];
Point NewVertexList[12];
int LocalRemap[12];
Point cellvertices[20];
std::array<std::array<int,3>,20> Triangles;
Triangles.fill( { 0 } );
// Values from array 'A' at the cube corners
double CubeValues[8];
int Nx = A.size(0);
int Ny = A.size(1);
int Nz = A.size(2);
// Points corresponding to cube corners
C0.x = 0.0; C0.y = 0.0; C0.z = 0.0;
C1.x = 1.0; C1.y = 0.0; C1.z = 0.0;
C2.x = 1.0; C2.y = 1.0; C2.z = 0.0;
C3.x = 0.0; C3.y = 1.0; C3.z = 0.0;
C4.x = 0.0; C4.y = 0.0; C4.z = 1.0;
C5.x = 1.0; C5.y = 0.0; C5.z = 1.0;
C6.x = 1.0; C6.y = 1.0; C6.z = 1.0;
C7.x = 0.0; C7.y = 1.0; C7.z = 1.0;
std::vector<std::array<int,6>> HalfEdge;
for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
// Set the corner values for this cube
CubeValues[0] = A(i,j,k);
CubeValues[1] = A(i+1,j,k);
CubeValues[2] = A(i+1,j+1,k);
CubeValues[3] = A(i,j+1,k);
CubeValues[4] = A(i,j,k+1);
CubeValues[5] = A(i+1,j,k+1);
CubeValues[6] = A(i+1,j+1,k+1);
CubeValues[7] = A(i,j+1,k+1);
//Determine the index into the edge table which
//tells us which vertices are inside of the surface
CubeIndex = 0;
if (CubeValues[0] < 0.0f) CubeIndex |= 1;
if (CubeValues[1] < 0.0f) CubeIndex |= 2;
if (CubeValues[2] < 0.0f) CubeIndex |= 4;
if (CubeValues[3] < 0.0f) CubeIndex |= 8;
if (CubeValues[4] < 0.0f) CubeIndex |= 16;
if (CubeValues[5] < 0.0f) CubeIndex |= 32;
if (CubeValues[6] < 0.0f) CubeIndex |= 64;
if (CubeValues[7] < 0.0f) CubeIndex |= 128;
//Find the vertices where the surface intersects the cube
if (edgeTable[CubeIndex] & 1){
P = VertexInterp(C0,C1,CubeValues[0],CubeValues[1]);
VertexList[0] = P;
Q = C0;
}
if (edgeTable[CubeIndex] & 2){
P = VertexInterp(C1,C2,CubeValues[1],CubeValues[2]);
VertexList[1] = P;
Q = C1;
}
if (edgeTable[CubeIndex] & 4){
P = VertexInterp(C2,C3,CubeValues[2],CubeValues[3]);
VertexList[2] = P;
Q = C2;
}
if (edgeTable[CubeIndex] & 8){
P = VertexInterp(C3,C0,CubeValues[3],CubeValues[0]);
VertexList[3] = P;
Q = C3;
}
if (edgeTable[CubeIndex] & 16){
P = VertexInterp(C4,C5,CubeValues[4],CubeValues[5]);
VertexList[4] = P;
Q = C4;
}
if (edgeTable[CubeIndex] & 32){
P = VertexInterp(C5,C6,CubeValues[5],CubeValues[6]);
VertexList[5] = P;
Q = C5;
}
if (edgeTable[CubeIndex] & 64){
P = VertexInterp(C6,C7,CubeValues[6],CubeValues[7]);
VertexList[6] = P;
Q = C6;
}
if (edgeTable[CubeIndex] & 128){
P = VertexInterp(C7,C4,CubeValues[7],CubeValues[4]);
VertexList[7] = P;
Q = C7;
}
if (edgeTable[CubeIndex] & 256){
P = VertexInterp(C0,C4,CubeValues[0],CubeValues[4]);
VertexList[8] = P;
Q = C0;
}
if (edgeTable[CubeIndex] & 512){
P = VertexInterp(C1,C5,CubeValues[1],CubeValues[5]);
VertexList[9] = P;
Q = C1;
}
if (edgeTable[CubeIndex] & 1024){
P = VertexInterp(C2,C6,CubeValues[2],CubeValues[6]);
VertexList[10] = P;
Q = C2;
}
if (edgeTable[CubeIndex] & 2048){
P = VertexInterp(C3,C7,CubeValues[3],CubeValues[7]);
VertexList[11] = P;
Q = C3;
}
VertexCount=0;
for (int idx=0;idx<12;idx++)
LocalRemap[idx] = -1;
for (int idx=0;triTable[CubeIndex][idx]!=-1;idx++)
{
if(LocalRemap[triTable[CubeIndex][idx]] == -1)
{
NewVertexList[VertexCount] = VertexList[triTable[CubeIndex][idx]];
LocalRemap[triTable[CubeIndex][idx]] = VertexCount;
VertexCount++;
}
}
for (int idx=0;idx<VertexCount;idx++) {
P = NewVertexList[idx];
//P.x += i;
//P.y += j;
//P.z += k;
cellvertices[idx] = P;
}
TriangleCount = 0;
for (int idx=0;triTable[CubeIndex][idx]!=-1;idx+=3) {
Triangles[TriangleCount][0] = LocalRemap[triTable[CubeIndex][idx+0]];
Triangles[TriangleCount][1] = LocalRemap[triTable[CubeIndex][idx+1]];
Triangles[TriangleCount][2] = LocalRemap[triTable[CubeIndex][idx+2]];
TriangleCount++;
}
int nTris = TriangleCount;
// Now add the local values to the DECL data structure
HalfEdge.resize(nTris*3);
int idx_edge=0;
for (int idx=0; idx<TriangleCount; idx++){
int V1 = Triangles[idx][0];
int V2 = Triangles[idx][1];
int V3 = Triangles[idx][2];
// first edge: V1->V2
HalfEdge[idx_edge][0] = V1; // first vertex
HalfEdge[idx_edge][1] = V2; // second vertex
HalfEdge[idx_edge][2] = idx; // triangle
HalfEdge[idx_edge][3] = -1; // twin
HalfEdge[idx_edge][4] = idx_edge+2; // previous edge
HalfEdge[idx_edge][5] = idx_edge+1; // next edge
idx_edge++;
// second edge: V2->V3
HalfEdge[idx_edge][0] = V2; // first vertex
HalfEdge[idx_edge][1] = V3; // second vertex
HalfEdge[idx_edge][2] = idx; // triangle
HalfEdge[idx_edge][3] = -1; // twin
HalfEdge[idx_edge][4] = idx_edge-1; // previous edge
HalfEdge[idx_edge][5] = idx_edge+1; // next edge
idx_edge++;
// third edge: V3->V1
HalfEdge[idx_edge][0] = V3; // first vertex
HalfEdge[idx_edge][1] = V1; // second vertex
HalfEdge[idx_edge][2] = idx; // triangle
HalfEdge[idx_edge][3] = -1; // twin
HalfEdge[idx_edge][4] = idx_edge-1; // previous edge
HalfEdge[idx_edge][5] = idx_edge-2; // next edge
idx_edge++;
}
int EdgeCount=idx_edge;
for (int idx=0; idx<EdgeCount; idx++){
int V1=HalfEdge[idx][0];
int V2=HalfEdge[idx][1];
// Find all the twins within the cube
for (int jdx=0; idx<EdgeCount; jdx++){
if (HalfEdge[jdx][1] == V1 && HalfEdge[jdx][0] == V2){
// this is the pair
HalfEdge[idx][3] = jdx;
HalfEdge[jdx][3] = idx;
}
if (HalfEdge[jdx][1] == V2 && HalfEdge[jdx][0] == V1 && !(idx==jdx)){
std::printf("WARNING: half edges with identical orientation! \n");
}
}
// Use "ghost" twins if edge is on a cube face
P = cellvertices[V1];
Q = cellvertices[V2];
if (P.x == 0.0 && Q.x == 0.0) HalfEdge[idx_edge][3] = -1; // ghost twin for x=0 face
if (P.x == 1.0 && Q.x == 1.0) HalfEdge[idx_edge][3] = -2; // ghost twin for x=1 face
if (P.y == 0.0 && Q.y == 0.0) HalfEdge[idx_edge][3] = -3; // ghost twin for y=0 face
if (P.y == 1.0 && Q.y == 1.0) HalfEdge[idx_edge][3] = -4; // ghost twin for y=1 face
if (P.z == 0.0 && Q.z == 0.0) HalfEdge[idx_edge][3] = -5; // ghost twin for z=0 face
if (P.z == 1.0 && Q.z == 1.0) HalfEdge[idx_edge][3] = -6; // ghost twin for z=1 face
}
// Find all the angles
/*for (int idx=0; idx<EdgeCount; idx++){
int V1=HalfEdge[idx][0];
int V2=HalfEdge[idx][1];
int T1= HalfEdge[idx_edge][2];
int twin=HalfEdge[idx_edge][3];
if (twin == -1){
}
}*/
// Map vertices to global coordinates
for (int idx=0;idx<VertexCount;idx++) {
P = cellvertices[idx];
P.x += i;
P.y += j;
P.z += k;
cellvertices[idx] = P;
}
}
}
}
DECL object;
int e1,e2,e3;
FILE *TRIANGLES;
TRIANGLES = fopen("isosurface.stl","w");
fprintf(TRIANGLES,"solid isosurface\n");
int Nx = Field.size(0);
int Ny = Field.size(1);
int Nz = Field.size(2);
for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
object.LocalIsosurface(Field,isovalue,i,j,k);
for (int idx=0; idx<object.TriangleCount; idx++){
e1 = object.Face(idx);
e2 = object.halfedge.next(e1);
e3 = object.halfedge.next(e2);
auto P1 = object.vertex.coords(object.halfedge.v1(e1));
auto P2 = object.vertex.coords(object.halfedge.v1(e2));
auto P3 = object.vertex.coords(object.halfedge.v1(e3));
auto Normal = object.TriNormal(e1);
// P1.x += 1.0*i; P1.y += 1.0*j; P1.z +=1.0*k;
//P2.x += 1.0*i; P2.y += 1.0*j; P2.z +=1.0*k;
//P3.x += 1.0*i; P3.y += 1.0*j; P3.z +=1.0*k;
fprintf(TRIANGLES,"facet normal %f %f %f\n",Normal.x,Normal.y,Normal.z);
fprintf(TRIANGLES," outer loop\n");
fprintf(TRIANGLES," vertex %f %f %f\n",P1.x,P1.y,P1.z);
fprintf(TRIANGLES," vertex %f %f %f\n",P2.x,P2.y,P2.z);
fprintf(TRIANGLES," vertex %f %f %f\n",P3.x,P3.y,P3.z);
fprintf(TRIANGLES," endloop\n");
fprintf(TRIANGLES,"endfacet\n");
}
}
}
}
fprintf(TRIANGLES,"endsolid isosurface\n");
fclose(TRIANGLES);
}

View File

@ -1,3 +1,6 @@
#ifndef DCEL_INC
#define DCEL_INC
#include <vector>
#include "analysis/pmmc.h"
@ -67,6 +70,7 @@ public:
Vertex vertex;
Halfedge halfedge;
void LocalIsosurface(const DoubleArray& A, double value, int i, int j, int k);
void Write();
int Face(int index);
double origin(int edge);
@ -78,3 +82,7 @@ public:
private:
std::vector<int> FaceData;
};
void iso_surface(const Array<double>&Field, const double isovalue);
#endif

View File

@ -176,12 +176,154 @@ void CalcVecDist( Array<Vec> &d, const Array<int> &ID0, const Domain &Dm,
// Update distance
double err = calcVecUpdateInterior( d, dx[0], dx[1], dx[2] );
// Check if we are finished
err = Dm.Comm.maxReduce( err );
err = maxReduce( Dm.Comm, err );
if ( err < tol )
break;
}
}
double Eikonal(DoubleArray &Distance, const Array<char> &ID, Domain &Dm, int timesteps, const std::array<bool,3>& periodic){
/*
* This routine converts the data in the Distance array to a signed distance
* by solving the equation df/dt = sign(1-|grad f|), where Distance provides
* the values of f on the mesh associated with domain Dm
* It has been tested with segmented data initialized to values [-1,1]
* and will converge toward the signed distance to the surface bounding the associated phases
*
* Reference:
* Min C (2010) On reinitializing level set functions, Journal of Computational Physics229
*/
int i,j,k;
double dt=0.1;
double Dx,Dy,Dz;
double Dxp,Dxm,Dyp,Dym,Dzp,Dzm;
double Dxxp,Dxxm,Dyyp,Dyym,Dzzp,Dzzm;
double sign,norm;
double LocalVar,GlobalVar,LocalMax,GlobalMax;
int xdim,ydim,zdim;
xdim=Dm.Nx-2;
ydim=Dm.Ny-2;
zdim=Dm.Nz-2;
//fillHalo<double> fillData(Dm.Comm, Dm.rank_info,xdim,ydim,zdim,1,1,1,0,1);
fillHalo<double> fillData( Dm.Comm, Dm.rank_info, {xdim, ydim, zdim}, {1,1,1}, 50, 1, {true,true,true}, periodic );
// Arrays to store the second derivatives
DoubleArray Dxx(Dm.Nx,Dm.Ny,Dm.Nz);
DoubleArray Dyy(Dm.Nx,Dm.Ny,Dm.Nz);
DoubleArray Dzz(Dm.Nx,Dm.Ny,Dm.Nz);
int count = 0;
while (count < timesteps){
// Communicate the halo of values
fillData.fill(Distance);
// Compute second order derivatives
for (k=1;k<Dm.Nz-1;k++){
for (j=1;j<Dm.Ny-1;j++){
for (i=1;i<Dm.Nx-1;i++){
Dxx(i,j,k) = Distance(i+1,j,k) + Distance(i-1,j,k) - 2*Distance(i,j,k);
Dyy(i,j,k) = Distance(i,j+1,k) + Distance(i,j-1,k) - 2*Distance(i,j,k);
Dzz(i,j,k) = Distance(i,j,k+1) + Distance(i,j,k-1) - 2*Distance(i,j,k);
}
}
}
fillData.fill(Dxx);
fillData.fill(Dyy);
fillData.fill(Dzz);
LocalMax=LocalVar=0.0;
// Execute the next timestep
for (k=1;k<Dm.Nz-1;k++){
for (j=1;j<Dm.Ny-1;j++){
for (i=1;i<Dm.Nx-1;i++){
int n = k*Dm.Nx*Dm.Ny + j*Dm.Nx + i;
sign = -1;
if (ID(i,j,k) == 1) sign = 1;
// local second derivative terms
Dxxp = minmod(Dxx(i,j,k),Dxx(i+1,j,k));
Dyyp = minmod(Dyy(i,j,k),Dyy(i,j+1,k));
Dzzp = minmod(Dzz(i,j,k),Dzz(i,j,k+1));
Dxxm = minmod(Dxx(i,j,k),Dxx(i-1,j,k));
Dyym = minmod(Dyy(i,j,k),Dyy(i,j-1,k));
Dzzm = minmod(Dzz(i,j,k),Dzz(i,j,k-1));
/* //............Compute upwind derivatives ...................
Dxp = Distance(i+1,j,k) - Distance(i,j,k) + 0.5*Dxxp;
Dyp = Distance(i,j+1,k) - Distance(i,j,k) + 0.5*Dyyp;
Dzp = Distance(i,j,k+1) - Distance(i,j,k) + 0.5*Dzzp;
Dxm = Distance(i,j,k) - Distance(i-1,j,k) + 0.5*Dxxm;
Dym = Distance(i,j,k) - Distance(i,j-1,k) + 0.5*Dyym;
Dzm = Distance(i,j,k) - Distance(i,j,k-1) + 0.5*Dzzm;
*/
Dxp = Distance(i+1,j,k)- Distance(i,j,k) - 0.5*Dxxp;
Dyp = Distance(i,j+1,k)- Distance(i,j,k) - 0.5*Dyyp;
Dzp = Distance(i,j,k+1)- Distance(i,j,k) - 0.5*Dzzp;
Dxm = Distance(i,j,k) - Distance(i-1,j,k) + 0.5*Dxxm;
Dym = Distance(i,j,k) - Distance(i,j-1,k) + 0.5*Dyym;
Dzm = Distance(i,j,k) - Distance(i,j,k-1) + 0.5*Dzzm;
// Compute upwind derivatives for Godunov Hamiltonian
if (sign < 0.0){
if (Dxp + Dxm > 0.f) Dx = Dxp*Dxp;
else Dx = Dxm*Dxm;
if (Dyp + Dym > 0.f) Dy = Dyp*Dyp;
else Dy = Dym*Dym;
if (Dzp + Dzm > 0.f) Dz = Dzp*Dzp;
else Dz = Dzm*Dzm;
}
else{
if (Dxp + Dxm < 0.f) Dx = Dxp*Dxp;
else Dx = Dxm*Dxm;
if (Dyp + Dym < 0.f) Dy = Dyp*Dyp;
else Dy = Dym*Dym;
if (Dzp + Dzm < 0.f) Dz = Dzp*Dzp;
else Dz = Dzm*Dzm;
}
//Dx = max(Dxp*Dxp,Dxm*Dxm);
//Dy = max(Dyp*Dyp,Dym*Dym);
//Dz = max(Dzp*Dzp,Dzm*Dzm);
norm=sqrt(Dx + Dy + Dz);
if (norm > 1.0) norm=1.0;
Distance(i,j,k) += dt*sign*(1.0 - norm);
LocalVar += dt*sign*(1.0 - norm);
if (fabs(dt*sign*(1.0 - norm)) > LocalMax)
LocalMax = fabs(dt*sign*(1.0 - norm));
}
}
}
MPI_Allreduce(&LocalVar,&GlobalVar,1,MPI_DOUBLE,MPI_SUM,Dm.Comm);
MPI_Allreduce(&LocalMax,&GlobalMax,1,MPI_DOUBLE,MPI_MAX,Dm.Comm);
GlobalVar /= Dm.Volume;
count++;
if (count%50 == 0 && Dm.rank()==0 )
printf("Time=%i, Max variation=%f, Global variation=%f \n",count,GlobalMax,GlobalVar);
if (fabs(GlobalMax) < 1e-5){
if (Dm.rank()==0) printf("Exiting with max tolerance of 1e-5 \n");
count=timesteps;
}
}
return GlobalVar;
}
// Explicit instantiations
template void CalcDist<float>( Array<float>&, const Array<char>&, const Domain&, const std::array<bool,3>&, const std::array<double,3>& );

View File

@ -16,6 +16,16 @@ struct Vec {
};
inline bool operator<(const Vec& l, const Vec& r){ return l.x*l.x+l.y*l.y+l.z*l.z < r.x*r.x+r.y*r.y+r.z*r.z; }
inline double minmod(double &a, double &b){
double value;
value = a;
if ( a*b < 0.0) value=0.0;
else if (fabs(a) > fabs(b)) value = b;
return value;
}
/*!
* @brief Calculate the distance using a simple method
@ -40,4 +50,16 @@ void CalcDist( Array<TYPE> &Distance, const Array<char> &ID, const Domain &Dm,
void CalcVecDist( Array<Vec> &Distance, const Array<int> &ID, const Domain &Dm,
const std::array<bool,3>& periodic = {true,true,true}, const std::array<double,3>& dx = {1,1,1} );
/*!
* @brief Calculate the distance based on solution of Eikonal equation
* @details This routine calculates the signed distance to the nearest domain surface.
* @param[out] Distance Distance function
* @param[in] ID Domain id
* @param[in] Dm Domain information
* @param[in] timesteps number of timesteps to run for Eikonal solver
* @param[in] periodic Directions that are periodic
*/
double Eikonal(DoubleArray &Distance, const Array<char> &ID, Domain &Dm, int timesteps, const std::array<bool,3>& periodic);
#endif

View File

@ -2,6 +2,33 @@
#include "math.h"
#include "ProfilerApp.h"
void Mean3D( const Array<double> &Input, Array<double> &Output )
{
PROFILE_START("Mean3D");
// Perform a 3D Mean filter on Input array
int i,j,k;
int Nx = int(Input.size(0));
int Ny = int(Input.size(1));
int Nz = int(Input.size(2));
for (k=1; k<Nz-1; k++){
for (j=1; j<Ny-1; j++){
for (i=1; i<Nx-1; i++){
double MeanValue = Input(i,j,k);
// next neighbors
MeanValue += Input(i+1,j,k)+Input(i,j+1,k)+Input(i,j,k+1)+Input(i-1,j,k)+Input(i,j-1,k)+Input(i,j,k-1);
MeanValue += Input(i+1,j+1,k)+Input(i-1,j+1,k)+Input(i+1,j-1,k)+Input(i-1,j-1,k);
MeanValue += Input(i+1,j,k+1)+Input(i-1,j,k+1)+Input(i+1,j,k-1)+Input(i-1,j,k-1);
MeanValue += Input(i,j+1,k+1)+Input(i,j-1,k+1)+Input(i,j+1,k-1)+Input(i,j-1,k-1);
MeanValue += Input(i+1,j+1,k+1)+Input(i-1,j+1,k+1)+Input(i+1,j-1,k+1)+Input(i-1,j-1,k+1);
MeanValue += Input(i+1,j+1,k-1)+Input(i-1,j+1,k-1)+Input(i+1,j-1,k-1)+Input(i-1,j-1,k-1);
Output(i,j,k) = MeanValue/27.0;
}
}
}
PROFILE_STOP("Mean3D");
}
void Med3D( const Array<float> &Input, Array<float> &Output )
{

View File

@ -4,6 +4,13 @@
#include "common/Array.h"
/*!
* @brief Filter image
* @details This routine performs a mean filter
* @param[in] Input Input image
* @param[out] Output Output image
*/
void Mean3D( const Array<double> &Input, Array<double> &Output );
/*!
* @brief Filter image
@ -13,7 +20,6 @@
*/
void Med3D( const Array<float> &Input, Array<float> &Output );
/*!
* @brief Filter image
* @details This routine performs a non-linear local means filter

View File

@ -58,11 +58,11 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain>
}
}
}
Dm->Comm.barrier();
MPI_Barrier(Dm->Comm);
// total Global is the number of nodes in the pore-space
totalGlobal = Dm->Comm.sumReduce( count );
maxdistGlobal = Dm->Comm.sumReduce( maxdist );
MPI_Allreduce(&count,&totalGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&maxdist,&maxdistGlobal,1,MPI_DOUBLE,MPI_MAX,Dm->Comm);
double volume=double(nprocx*nprocy*nprocz)*double(nx-2)*double(ny-2)*double(nz-2);
double volume_fraction=totalGlobal/volume;
if (rank==0) printf("Volume fraction for morphological opening: %f \n",volume_fraction);
@ -131,8 +131,9 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain>
// Increase the critical radius until the target saturation is met
double deltaR=0.05; // amount to change the radius in voxel units
double Rcrit_old;
double Rcrit_old=0.0;
double GlobalNumber = 1.f;
int imin,jmin,kmin,imax,jmax,kmax;
if (ErodeLabel == 1){
@ -219,7 +220,7 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain>
Dm->Comm.sendrecv(sendID_YZ,Dm->sendCount("YZ"),Dm->rank_YZ(),sendtag,recvID_yz,Dm->recvCount("yz"),Dm->rank_yz(),recvtag);
Dm->Comm.sendrecv(sendID_Yz,Dm->sendCount("Yz"),Dm->rank_Yz(),sendtag,recvID_yZ,Dm->recvCount("yZ"),Dm->rank_yZ(),recvtag);
Dm->Comm.sendrecv(sendID_yZ,Dm->sendCount("yZ"),Dm->rank_yZ(),sendtag,recvID_Yz,Dm->recvCount("Yz"),Dm->rank_Yz(),recvtag);
//......................................................................................
//......................................................................................
UnpackID(Dm->recvList("x"), Dm->recvCount("x") ,recvID_x, id);
UnpackID(Dm->recvList("X"), Dm->recvCount("X") ,recvID_X, id);
UnpackID(Dm->recvList("y"), Dm->recvCount("y") ,recvID_y, id);
@ -240,7 +241,7 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain>
UnpackID(Dm->recvList("YZ"), Dm->recvCount("YZ") ,recvID_YZ, id);
//......................................................................................
//double GlobalNumber = Dm->Comm.sumReduce( LocalNumber );
MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
count = 0.f;
for (int k=1; k<Nz-1; k++){
@ -253,7 +254,7 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain>
}
}
}
countGlobal = Dm->Comm.sumReduce( count );
MPI_Allreduce(&count,&countGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
void_fraction_new = countGlobal/totalGlobal;
void_fraction_diff_new = abs(void_fraction_new-VoidFraction);
/* if (rank==0){
@ -285,7 +286,7 @@ double morph_open()
fillHalo<char> fillChar(Dm->Comm,Dm->rank_info,{Nx-2,Ny-2,Nz-2},{1,1,1},0,1);
GlobalNumber = Dm->Comm.sumReduce( LocalNumber );
MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
count = 0.f;
for (int k=1; k<Nz-1; k++){
@ -298,7 +299,7 @@ double morph_open()
}
}
}
countGlobal = Dm->Comm.sumReduce( count );
MPI_Allreduce(&count,&countGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
return countGlobal;
}
*/
@ -341,11 +342,11 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain
}
}
Dm->Comm.barrier();
MPI_Barrier(Dm->Comm);
// total Global is the number of nodes in the pore-space
totalGlobal = Dm->Comm.sumReduce( count );
maxdistGlobal = Dm->Comm.sumReduce( maxdist );
MPI_Allreduce(&count,&totalGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
MPI_Allreduce(&maxdist,&maxdistGlobal,1,MPI_DOUBLE,MPI_MAX,Dm->Comm);
double volume=double(nprocx*nprocy*nprocz)*double(nx-2)*double(ny-2)*double(nz-2);
double volume_fraction=totalGlobal/volume;
if (rank==0) printf("Volume fraction for morphological opening: %f \n",volume_fraction);
@ -415,6 +416,7 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain
double deltaR=0.05; // amount to change the radius in voxel units
double Rcrit_old;
double GlobalNumber = 1.f;
int imin,jmin,kmin,imax,jmax,kmax;
double Rcrit_new = maxdistGlobal;
@ -422,7 +424,7 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain
// Rcrit_new = strtod(argv[2],NULL);
// if (rank==0) printf("Max. distance =%f, Initial critical radius = %f \n",maxdistGlobal,Rcrit_new);
//}
Dm->Comm.barrier();
MPI_Barrier(Dm->Comm);
FILE *DRAIN = fopen("morphdrain.csv","w");
@ -526,7 +528,7 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain
UnpackID(Dm->recvList("yZ"), Dm->recvCount("yZ") ,recvID_yZ, id);
UnpackID(Dm->recvList("YZ"), Dm->recvCount("YZ") ,recvID_YZ, id);
//......................................................................................
// double GlobalNumber = Dm->Comm.sumReduce( LocalNumber );
MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
for (int k=0; k<nz; k++){
for (int j=0; j<ny; j++){
@ -545,7 +547,7 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain
BlobIDstruct new_index;
double vF=0.0; double vS=0.0;
ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm);
Dm->Comm.barrier();
MPI_Barrier(Dm->Comm);
for (int k=0; k<nz; k++){
for (int j=0; j<ny; j++){
@ -581,7 +583,7 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain
}
ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm);
Dm->Comm.barrier();
MPI_Barrier(Dm->Comm);
for (int k=1; k<nz-1; k++){
for (int j=1; j<ny-1; j++){
@ -607,7 +609,7 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain
}
}
}
countGlobal = Dm->Comm.sumReduce( count );
MPI_Allreduce(&count,&countGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
void_fraction_new = countGlobal/totalGlobal;
void_fraction_diff_new = abs(void_fraction_new-VoidFraction);
if (rank==0){
@ -647,13 +649,13 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain
return final_void_fraction;
}
double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array<char> &id, std::shared_ptr<Domain> Dm, double TargetGrowth)
double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array<char> &id, std::shared_ptr<Domain> Dm, double TargetGrowth, double WallFactor)
{
int Nx = Dm->Nx;
int Ny = Dm->Ny;
int Nz = Dm->Nz;
int rank = Dm->rank();
double count=0.0;
for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
@ -664,7 +666,7 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array<char> &id,
}
}
}
double count_original = Dm->Comm.sumReduce( count);
double count_original=sumReduce( Dm->Comm, count);
// Estimate morph_delta
double morph_delta = 0.0;
@ -684,8 +686,7 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array<char> &id,
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
double walldist=BoundaryDist(i,j,k);
double wallweight = 1.0 / (1+exp(-5.f*(walldist-1.f)));
//wallweight = 1.0;
double wallweight = WallFactor/ (1+exp(-5.f*(walldist-1.f)));
if (fabs(wallweight*morph_delta) > MAX_DISPLACEMENT) MAX_DISPLACEMENT= fabs(wallweight*morph_delta);
if (Dist(i,j,k) - wallweight*morph_delta < 0.0){
@ -694,8 +695,8 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array<char> &id,
}
}
}
count = Dm->Comm.sumReduce( count );
MAX_DISPLACEMENT = Dm->Comm.maxReduce( MAX_DISPLACEMENT );
count=sumReduce( Dm->Comm, count);
MAX_DISPLACEMENT = maxReduce( Dm->Comm, MAX_DISPLACEMENT);
GrowthEstimate = count - count_original;
ERROR = fabs((GrowthEstimate-TargetGrowth) /TargetGrowth);
@ -731,14 +732,14 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array<char> &id,
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
double walldist=BoundaryDist(i,j,k);
double wallweight = 1.0 / (1+exp(-5.f*(walldist-1.f)));
double wallweight = WallFactor / (1+exp(-5.f*(walldist-1.f)));
//wallweight = 1.0;
Dist(i,j,k) -= wallweight*morph_delta;
if (Dist(i,j,k) < 0.0) count+=1.0;
}
}
}
count = Dm->Comm.sumReduce( count );
count=sumReduce( Dm->Comm, count);
return count;
}

View File

@ -5,4 +5,4 @@
double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain> Dm, double VoidFraction, signed char ErodeLabel, signed char ReplaceLabel);
double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain> Dm, double VoidFraction);
double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array<char> &id, std::shared_ptr<Domain> Dm, double TargetVol);
double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array<char> &id, std::shared_ptr<Domain> Dm, double TargetVol, double WallFactor);

View File

@ -3,7 +3,7 @@
#include "analysis/analysis.h"
#include "common/Array.h"
#include "common/Communication.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include "common/ScaLBL.h"
#include "models/ColorModel.h"
@ -462,7 +462,7 @@ private:
/******************************************************************
* MPI comm wrapper for use with analysis *
******************************************************************/
runAnalysis::commWrapper::commWrapper( int tag_, const Utilities::MPI& comm_, runAnalysis* analysis_ ):
runAnalysis::commWrapper::commWrapper( int tag_, MPI_Comm comm_, runAnalysis* analysis_ ):
comm(comm_),
tag(tag_),
analysis(analysis_)
@ -479,7 +479,7 @@ runAnalysis::commWrapper::~commWrapper()
{
if ( tag == -1 )
return;
comm.barrier();
MPI_Barrier( comm );
analysis->d_comm_used[tag] = false;
}
runAnalysis::commWrapper runAnalysis::getComm( )
@ -496,10 +496,10 @@ runAnalysis::commWrapper runAnalysis::getComm( )
if ( tag == -1 )
ERROR("Unable to get comm");
}
tag = d_comm.bcast( tag, 0 );
MPI_Bcast( &tag, 1, MPI_INT, 0, d_comm );
d_comm_used[tag] = true;
if ( d_comms[tag].isNull() )
d_comms[tag] = d_comm.dup();
if ( d_comms[tag] == MPI_COMM_NULL )
MPI_Comm_dup( MPI_COMM_WORLD, &d_comms[tag] );
return commWrapper(tag,d_comms[tag],this);
}
@ -560,7 +560,7 @@ runAnalysis::runAnalysis( std::shared_ptr<Database> input_db,
d_restartFile = restart_file + "." + rankString;
d_rank = d_comm.getRank();
d_rank = MPI_WORLD_RANK();
writeIDMap(ID_map_struct(),0,id_map_filename);
// Initialize IO for silo
IO::initialize("","silo","false");
@ -629,8 +629,11 @@ runAnalysis::runAnalysis( std::shared_ptr<Database> input_db,
// Initialize the comms
for (int i=0; i<1024; i++)
MPI_Comm_dup(MPI_COMM_WORLD,&d_comm);
for (int i=0; i<1024; i++) {
d_comms[i] = MPI_COMM_NULL;
d_comm_used[i] = false;
}
// Initialize the threads
int N_threads = db->getWithDefault<int>( "N_threads", 4 );
auto method = db->getWithDefault<std::string>( "load_balance", "default" );
@ -640,6 +643,12 @@ runAnalysis::~runAnalysis( )
{
// Finish processing analysis
finish();
// Clear internal data
MPI_Comm_free( &d_comm );
for (int i=0; i<1024; i++) {
if ( d_comms[i] != MPI_COMM_NULL )
MPI_Comm_free(&d_comms[i]);
}
}
void runAnalysis::finish( )
{
@ -653,7 +662,7 @@ void runAnalysis::finish( )
d_wait_subphase.reset();
d_wait_restart.reset();
// Syncronize
d_comm.barrier();
MPI_Barrier( d_comm );
PROFILE_STOP("finish");
}
@ -906,12 +915,12 @@ void runAnalysis::run(int timestep, std::shared_ptr<Database> input_db, TwoPhase
// Spawn a thread to write the restart file
// if ( matches(type,AnalysisType::CreateRestart) ) {
if (timestep%d_restart_interval==0){
auto Restart_db = input_db->cloneDatabase();
// Restart_db->putScalar<bool>( "Restart", true );
if (d_rank==0) {
input_db->putScalar<bool>( "Restart", true );
std::ofstream OutStream("Restart.db");
input_db->print(OutStream, "");
OutStream.close();
// std::ofstream OutStream("Restart.db");
// Restart_db->print(OutStream, "");
// OutStream.close();
}
// Write the restart file (using a seperate thread)
auto work = new WriteRestartWorkItem(d_restartFile.c_str(),cDen,cfq,d_Np);
@ -1010,21 +1019,21 @@ void runAnalysis::basic(int timestep, std::shared_ptr<Database> input_db, SubPha
cfq = std::shared_ptr<double>(new double[19*d_Np],DeleteArray<double>);
ScaLBL_CopyToHost(cfq.get(),fq,19*d_Np*sizeof(double));
ScaLBL_CopyToHost(cDen.get(),Den,2*d_Np*sizeof(double));
// clone the input database to avoid modifying shared data
auto Restart_db = input_db->cloneDatabase();
auto tmp_color_db = Restart_db->getDatabase( "Color" );
tmp_color_db->putScalar<int>("timestep",timestep);
tmp_color_db->putScalar<bool>( "Restart", true );
Restart_db->putDatabase("Color", tmp_color_db);
if (d_rank==0) {
color_db->putScalar<int>("timestep",timestep);
color_db->putScalar<bool>( "Restart", true );
input_db->putDatabase("Color", color_db);
std::ofstream OutStream("Restart.db");
input_db->print(OutStream, "");
Restart_db->print(OutStream, "");
OutStream.close();
}
// Write the restart file (using a seperate thread)
auto work1 = new WriteRestartWorkItem(d_restartFile.c_str(),cDen,cfq,d_Np);
work1->add_dependency(d_wait_restart);
d_wait_restart = d_tpool.add_work(work1);
}
if (timestep%d_visualization_interval==0){

View File

@ -68,10 +68,10 @@ public:
class commWrapper
{
public:
Utilities::MPI comm;
MPI_Comm comm;
int tag;
runAnalysis *analysis;
commWrapper( int tag, const Utilities::MPI& comm, runAnalysis *analysis );
commWrapper( int tag, MPI_Comm comm, runAnalysis *analysis );
commWrapper( ) = delete;
commWrapper( const commWrapper &rhs ) = delete;
commWrapper& operator=( const commWrapper &rhs ) = delete;
@ -100,8 +100,8 @@ private:
BlobIDList d_last_id_map;
std::vector<IO::MeshDataStruct> d_meshData;
std::string d_restartFile;
Utilities::MPI d_comm;
Utilities::MPI d_comms[1024];
MPI_Comm d_comm;
MPI_Comm d_comms[1024];
volatile bool d_comm_used[1024];
std::shared_ptr<ScaLBL_Communicator> d_ScaLBL_Comm;

View File

@ -228,7 +228,8 @@ void filter_final( Array<char>& ID, Array<float>& Dist,
Array<float>& Mean, Array<float>& Dist1, Array<float>& Dist2 )
{
PROFILE_SCOPED(timer,"filter_final");
int rank = Dm.Comm.getRank();
int rank;
MPI_Comm_rank(Dm.Comm,&rank);
int Nx = Dm.Nx-2;
int Ny = Dm.Ny-2;
int Nz = Dm.Nz-2;
@ -241,7 +242,7 @@ void filter_final( Array<char>& ID, Array<float>& Dist,
float tmp = 0;
for (size_t i=0; i<Dist0.length(); i++)
tmp += Dist0(i)*Dist0(i);
tmp = sqrt( Dm.Comm.sumReduce(tmp) / Dm.Comm.sumReduce<float>(Dist0.length()) );
tmp = sqrt( sumReduce(Dm.Comm,tmp) / sumReduce(Dm.Comm,(float)Dist0.length()) );
const float dx1 = 0.3*tmp;
const float dx2 = 1.05*dx1;
if (rank==0)
@ -284,7 +285,7 @@ void filter_final( Array<char>& ID, Array<float>& Dist,
Phase.fill(1);
ComputeGlobalBlobIDs( Nx, Ny, Nz, Dm.rank_info, Phase, SignDist, 0, 0, GlobalBlobID, Dm.Comm );
fillInt.fill(GlobalBlobID);
int N_blobs = Dm.Comm.maxReduce(GlobalBlobID.max()+1);
int N_blobs = maxReduce(Dm.Comm,GlobalBlobID.max()+1);
std::vector<float> mean(N_blobs,0);
std::vector<int> count(N_blobs,0);
for (int k=1; k<=Nz; k++) {
@ -320,8 +321,8 @@ void filter_final( Array<char>& ID, Array<float>& Dist,
}
}
}
mean = Dm.Comm.sumReduce(mean);
count = Dm.Comm.sumReduce(count);
mean = sumReduce(Dm.Comm,mean);
count = sumReduce(Dm.Comm,count);
for (size_t i=0; i<mean.size(); i++)
mean[i] /= count[i];
/*if (rank==0) {

View File

@ -1,579 +0,0 @@
###############################################################################
# FindHIP.cmake
###############################################################################
###############################################################################
# SET: Variable defaults
###############################################################################
# User defined flags
set(HIP_HIPCC_FLAGS "" CACHE STRING "Semicolon delimited flags for HIPCC")
set(HIP_HCC_FLAGS "" CACHE STRING "Semicolon delimited flags for HCC")
set(HIP_NVCC_FLAGS "" CACHE STRING "Semicolon delimted flags for NVCC")
mark_as_advanced(HIP_HIPCC_FLAGS HIP_HCC_FLAGS HIP_NVCC_FLAGS)
set(_hip_configuration_types ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE} Debug MinSizeRel Release RelWithDebInfo)
list(REMOVE_DUPLICATES _hip_configuration_types)
foreach(config ${_hip_configuration_types})
string(TOUPPER ${config} config_upper)
set(HIP_HIPCC_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for HIPCC")
set(HIP_HCC_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for HCC")
set(HIP_NVCC_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for NVCC")
mark_as_advanced(HIP_HIPCC_FLAGS_${config_upper} HIP_HCC_FLAGS_${config_upper} HIP_NVCC_FLAGS_${config_upper})
endforeach()
option(HIP_HOST_COMPILATION_CPP "Host code compilation mode" ON)
option(HIP_VERBOSE_BUILD "Print out the commands run while compiling the HIP source file. With the Makefile generator this defaults to VERBOSE variable specified on the command line, but can be forced on with this option." OFF)
mark_as_advanced(HIP_HOST_COMPILATION_CPP)
###############################################################################
# Set HIP CMAKE Flags
###############################################################################
# Copy the invocation styles from CXX to HIP
set(CMAKE_HIP_ARCHIVE_CREATE ${CMAKE_CXX_ARCHIVE_CREATE})
set(CMAKE_HIP_ARCHIVE_APPEND ${CMAKE_CXX_ARCHIVE_APPEND})
set(CMAKE_HIP_ARCHIVE_FINISH ${CMAKE_CXX_ARCHIVE_FINISH})
set(CMAKE_SHARED_LIBRARY_SONAME_HIP_FLAG ${CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG})
set(CMAKE_SHARED_LIBRARY_CREATE_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS})
set(CMAKE_SHARED_LIBRARY_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_CXX_FLAGS})
#set(CMAKE_SHARED_LIBRARY_LINK_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS})
set(CMAKE_SHARED_LIBRARY_RUNTIME_HIP_FLAG ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG})
set(CMAKE_SHARED_LIBRARY_RUNTIME_HIP_FLAG_SEP ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG_SEP})
set(CMAKE_SHARED_LIBRARY_LINK_STATIC_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_STATIC_CXX_FLAGS})
set(CMAKE_SHARED_LIBRARY_LINK_DYNAMIC_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_DYNAMIC_CXX_FLAGS})
# Set the CMake Flags to use the HCC Compilier.
set(CMAKE_HIP_CREATE_SHARED_LIBRARY "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_PATH} <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>")
set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_PATH} <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <LINK_LIBRARIES> -shared" )
set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_PATH} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
###############################################################################
# FIND: HIP and associated helper binaries
###############################################################################
# HIP is supported on Linux only
if(UNIX AND NOT APPLE AND NOT CYGWIN)
# Search for HIP installation
if(NOT HIP_ROOT_DIR)
# Search in user specified path first
find_path(
HIP_ROOT_DIR
NAMES hipconfig
PATHS
ENV ROCM_PATH
ENV HIP_PATH
PATH_SUFFIXES bin
DOC "HIP installed location"
NO_DEFAULT_PATH
)
# Now search in default path
find_path(
HIP_ROOT_DIR
NAMES hipconfig
PATHS
/opt/rocm
/opt/rocm/hip
PATH_SUFFIXES bin
DOC "HIP installed location"
)
# Check if we found HIP installation
if(HIP_ROOT_DIR)
# If so, fix the path
string(REGEX REPLACE "[/\\\\]?bin[64]*[/\\\\]?$" "" HIP_ROOT_DIR ${HIP_ROOT_DIR})
# And push it back to the cache
set(HIP_ROOT_DIR ${HIP_ROOT_DIR} CACHE PATH "HIP installed location" FORCE)
endif()
if(NOT EXISTS ${HIP_ROOT_DIR})
if(HIP_FIND_REQUIRED)
message(FATAL_ERROR "Specify HIP_ROOT_DIR")
elseif(NOT HIP_FIND_QUIETLY)
message("HIP_ROOT_DIR not found or specified")
endif()
endif()
endif()
# Find HIPCC executable
find_program(
HIP_HIPCC_EXECUTABLE
NAMES hipcc
PATHS
"${HIP_ROOT_DIR}"
ENV ROCM_PATH
ENV HIP_PATH
/opt/rocm
/opt/rocm/hip
PATH_SUFFIXES bin
NO_DEFAULT_PATH
)
if(NOT HIP_HIPCC_EXECUTABLE)
# Now search in default paths
find_program(HIP_HIPCC_EXECUTABLE hipcc)
endif()
mark_as_advanced(HIP_HIPCC_EXECUTABLE)
# Find HIPCONFIG executable
find_program(
HIP_HIPCONFIG_EXECUTABLE
NAMES hipconfig
PATHS
"${HIP_ROOT_DIR}"
ENV ROCM_PATH
ENV HIP_PATH
/opt/rocm
/opt/rocm/hip
PATH_SUFFIXES bin
NO_DEFAULT_PATH
)
if(NOT HIP_HIPCONFIG_EXECUTABLE)
# Now search in default paths
find_program(HIP_HIPCONFIG_EXECUTABLE hipconfig)
endif()
mark_as_advanced(HIP_HIPCONFIG_EXECUTABLE)
# Find HIPCC_CMAKE_LINKER_HELPER executable
find_program(
HIP_HIPCC_CMAKE_LINKER_HELPER
NAMES hipcc_cmake_linker_helper
PATHS
"${HIP_ROOT_DIR}"
ENV ROCM_PATH
ENV HIP_PATH
/opt/rocm
/opt/rocm/hip
PATH_SUFFIXES bin
NO_DEFAULT_PATH
)
if(NOT HIP_HIPCC_CMAKE_LINKER_HELPER)
# Now search in default paths
find_program(HIP_HIPCC_CMAKE_LINKER_HELPER hipcc_cmake_linker_helper)
endif()
mark_as_advanced(HIP_HIPCC_CMAKE_LINKER_HELPER)
if(HIP_HIPCONFIG_EXECUTABLE AND NOT HIP_VERSION)
# Compute the version
execute_process(
COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --version
OUTPUT_VARIABLE _hip_version
ERROR_VARIABLE _hip_error
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_STRIP_TRAILING_WHITESPACE
)
if(NOT _hip_error)
set(HIP_VERSION ${_hip_version} CACHE STRING "Version of HIP as computed from hipcc")
else()
set(HIP_VERSION "0.0.0" CACHE STRING "Version of HIP as computed by FindHIP()")
endif()
mark_as_advanced(HIP_VERSION)
endif()
if(HIP_VERSION)
string(REPLACE "." ";" _hip_version_list "${HIP_VERSION}")
list(GET _hip_version_list 0 HIP_VERSION_MAJOR)
list(GET _hip_version_list 1 HIP_VERSION_MINOR)
list(GET _hip_version_list 2 HIP_VERSION_PATCH)
set(HIP_VERSION_STRING "${HIP_VERSION}")
endif()
if(HIP_HIPCONFIG_EXECUTABLE AND NOT HIP_PLATFORM)
# Compute the platform
execute_process(
COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --platform
OUTPUT_VARIABLE _hip_platform
OUTPUT_STRIP_TRAILING_WHITESPACE
)
set(HIP_PLATFORM ${_hip_platform} CACHE STRING "HIP platform as computed by hipconfig")
mark_as_advanced(HIP_PLATFORM)
endif()
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(
HIP
REQUIRED_VARS
HIP_ROOT_DIR
HIP_HIPCC_EXECUTABLE
HIP_HIPCONFIG_EXECUTABLE
HIP_PLATFORM
VERSION_VAR HIP_VERSION
)
###############################################################################
# MACRO: Locate helper files
###############################################################################
macro(HIP_FIND_HELPER_FILE _name _extension)
set(_hip_full_name "${_name}.${_extension}")
get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
set(HIP_${_name} "${CMAKE_CURRENT_LIST_DIR}/FindHIP/${_hip_full_name}")
if(NOT EXISTS "${HIP_${_name}}")
set(error_message "${_hip_full_name} not found in ${CMAKE_CURRENT_LIST_DIR}/FindHIP")
if(HIP_FIND_REQUIRED)
message(FATAL_ERROR "${error_message}")
else()
if(NOT HIP_FIND_QUIETLY)
message(STATUS "${error_message}")
endif()
endif()
endif()
# Set this variable as internal, so the user isn't bugged with it.
set(HIP_${_name} ${HIP_${_name}} CACHE INTERNAL "Location of ${_full_name}" FORCE)
endmacro()
###############################################################################
hip_find_helper_file(run_make2cmake cmake)
hip_find_helper_file(run_hipcc cmake)
###############################################################################
###############################################################################
# MACRO: Reset compiler flags
###############################################################################
macro(HIP_RESET_FLAGS)
unset(HIP_HIPCC_FLAGS)
unset(HIP_HCC_FLAGS)
unset(HIP_NVCC_FLAGS)
foreach(config ${_hip_configuration_types})
string(TOUPPER ${config} config_upper)
unset(HIP_HIPCC_FLAGS_${config_upper})
unset(HIP_HCC_FLAGS_${config_upper})
unset(HIP_NVCC_FLAGS_${config_upper})
endforeach()
endmacro()
###############################################################################
# MACRO: Separate the options from the sources
###############################################################################
macro(HIP_GET_SOURCES_AND_OPTIONS _sources _cmake_options _hipcc_options _hcc_options _nvcc_options)
set(${_sources})
set(${_cmake_options})
set(${_hipcc_options})
set(${_hcc_options})
set(${_nvcc_options})
set(_hipcc_found_options FALSE)
set(_hcc_found_options FALSE)
set(_nvcc_found_options FALSE)
foreach(arg ${ARGN})
if("x${arg}" STREQUAL "xHIPCC_OPTIONS")
set(_hipcc_found_options TRUE)
set(_hcc_found_options FALSE)
set(_nvcc_found_options FALSE)
elseif("x${arg}" STREQUAL "xHCC_OPTIONS")
set(_hipcc_found_options FALSE)
set(_hcc_found_options TRUE)
set(_nvcc_found_options FALSE)
elseif("x${arg}" STREQUAL "xNVCC_OPTIONS")
set(_hipcc_found_options FALSE)
set(_hcc_found_options FALSE)
set(_nvcc_found_options TRUE)
elseif(
"x${arg}" STREQUAL "xEXCLUDE_FROM_ALL" OR
"x${arg}" STREQUAL "xSTATIC" OR
"x${arg}" STREQUAL "xSHARED" OR
"x${arg}" STREQUAL "xMODULE"
)
list(APPEND ${_cmake_options} ${arg})
else()
if(_hipcc_found_options)
list(APPEND ${_hipcc_options} ${arg})
elseif(_hcc_found_options)
list(APPEND ${_hcc_options} ${arg})
elseif(_nvcc_found_options)
list(APPEND ${_nvcc_options} ${arg})
else()
# Assume this is a file
list(APPEND ${_sources} ${arg})
endif()
endif()
endforeach()
endmacro()
###############################################################################
# MACRO: Add include directories to pass to the hipcc command
###############################################################################
set(HIP_HIPCC_INCLUDE_ARGS_USER "")
macro(HIP_INCLUDE_DIRECTORIES)
foreach(dir ${ARGN})
list(APPEND HIP_HIPCC_INCLUDE_ARGS_USER $<$<BOOL:${dir}>:-I${dir}>)
endforeach()
endmacro()
###############################################################################
# FUNCTION: Helper to avoid clashes of files with the same basename but different paths
###############################################################################
function(HIP_COMPUTE_BUILD_PATH path build_path)
# Convert to cmake style paths
file(TO_CMAKE_PATH "${path}" bpath)
if(IS_ABSOLUTE "${bpath}")
string(FIND "${bpath}" "${CMAKE_CURRENT_BINARY_DIR}" _binary_dir_pos)
if(_binary_dir_pos EQUAL 0)
file(RELATIVE_PATH bpath "${CMAKE_CURRENT_BINARY_DIR}" "${bpath}")
else()
file(RELATIVE_PATH bpath "${CMAKE_CURRENT_SOURCE_DIR}" "${bpath}")
endif()
endif()
# Remove leading /
string(REGEX REPLACE "^[/]+" "" bpath "${bpath}")
# Avoid absolute paths by removing ':'
string(REPLACE ":" "_" bpath "${bpath}")
# Avoid relative paths that go up the tree
string(REPLACE "../" "__/" bpath "${bpath}")
# Avoid spaces
string(REPLACE " " "_" bpath "${bpath}")
# Strip off the filename
get_filename_component(bpath "${bpath}" PATH)
set(${build_path} "${bpath}" PARENT_SCOPE)
endfunction()
###############################################################################
# MACRO: Parse OPTIONS from ARGN & set variables prefixed by _option_prefix
###############################################################################
macro(HIP_PARSE_HIPCC_OPTIONS _option_prefix)
set(_hip_found_config)
foreach(arg ${ARGN})
# Determine if we are dealing with a per-configuration flag
foreach(config ${_hip_configuration_types})
string(TOUPPER ${config} config_upper)
if(arg STREQUAL "${config_upper}")
set(_hip_found_config _${arg})
# Clear arg to prevent it from being processed anymore
set(arg)
endif()
endforeach()
if(arg)
list(APPEND ${_option_prefix}${_hip_found_config} "${arg}")
endif()
endforeach()
endmacro()
###############################################################################
# MACRO: Try and include dependency file if it exists
###############################################################################
macro(HIP_INCLUDE_HIPCC_DEPENDENCIES dependency_file)
set(HIP_HIPCC_DEPEND)
set(HIP_HIPCC_DEPEND_REGENERATE FALSE)
# Create the dependency file if it doesn't exist
if(NOT EXISTS ${dependency_file})
file(WRITE ${dependency_file} "# Generated by: FindHIP.cmake. Do not edit.\n")
endif()
# Include the dependency file
include(${dependency_file})
# Verify the existence of all the included files
if(HIP_HIPCC_DEPEND)
foreach(f ${HIP_HIPCC_DEPEND})
if(NOT EXISTS ${f})
# If they aren't there, regenerate the file again
set(HIP_HIPCC_DEPEND_REGENERATE TRUE)
endif()
endforeach()
else()
# No dependencies, so regenerate the file
set(HIP_HIPCC_DEPEND_REGENERATE TRUE)
endif()
# Regenerate the dependency file if needed
if(HIP_HIPCC_DEPEND_REGENERATE)
set(HIP_HIPCC_DEPEND ${dependency_file})
file(WRITE ${dependency_file} "# Generated by: FindHIP.cmake. Do not edit.\n")
endif()
endmacro()
###############################################################################
# MACRO: Prepare cmake commands for the target
###############################################################################
macro(HIP_PREPARE_TARGET_COMMANDS _target _format _generated_files _source_files)
set(_hip_flags "")
string(TOUPPER "${CMAKE_BUILD_TYPE}" _hip_build_configuration)
if(HIP_HOST_COMPILATION_CPP)
set(HIP_C_OR_CXX CXX)
else()
set(HIP_C_OR_CXX C)
endif()
set(generated_extension ${CMAKE_${HIP_C_OR_CXX}_OUTPUT_EXTENSION})
# Initialize list of includes with those specified by the user. Append with
# ones specified to cmake directly.
set(HIP_HIPCC_INCLUDE_ARGS ${HIP_HIPCC_INCLUDE_ARGS_USER})
# Add the include directories
set(include_directories_generator "$<TARGET_PROPERTY:${_target},INCLUDE_DIRECTORIES>")
list(APPEND HIP_HIPCC_INCLUDE_ARGS "$<$<BOOL:${include_directories_generator}>:-I$<JOIN:${include_directories_generator}, -I>>")
get_directory_property(_hip_include_directories INCLUDE_DIRECTORIES)
list(REMOVE_DUPLICATES _hip_include_directories)
if(_hip_include_directories)
foreach(dir ${_hip_include_directories})
list(APPEND HIP_HIPCC_INCLUDE_ARGS $<$<BOOL:${dir}>:-I${dir}>)
endforeach()
endif()
HIP_GET_SOURCES_AND_OPTIONS(_hip_sources _hip_cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN})
HIP_PARSE_HIPCC_OPTIONS(HIP_HIPCC_FLAGS ${_hipcc_options})
HIP_PARSE_HIPCC_OPTIONS(HIP_HCC_FLAGS ${_hcc_options})
HIP_PARSE_HIPCC_OPTIONS(HIP_NVCC_FLAGS ${_nvcc_options})
# Add the compile definitions
set(compile_definition_generator "$<TARGET_PROPERTY:${_target},COMPILE_DEFINITIONS>")
list(APPEND HIP_HIPCC_FLAGS "$<$<BOOL:${compile_definition_generator}>:-D$<JOIN:${compile_definition_generator}, -D>>")
# Check if we are building shared library.
set(_hip_build_shared_libs FALSE)
list(FIND _hip_cmake_options SHARED _hip_found_SHARED)
list(FIND _hip_cmake_options MODULE _hip_found_MODULE)
if(_hip_found_SHARED GREATER -1 OR _hip_found_MODULE GREATER -1)
set(_hip_build_shared_libs TRUE)
endif()
list(FIND _hip_cmake_options STATIC _hip_found_STATIC)
if(_hip_found_STATIC GREATER -1)
set(_hip_build_shared_libs FALSE)
endif()
# If we are building a shared library, add extra flags to HIP_HIPCC_FLAGS
if(_hip_build_shared_libs)
list(APPEND HIP_HCC_FLAGS "-fPIC")
list(APPEND HIP_NVCC_FLAGS "--shared -Xcompiler '-fPIC'")
endif()
# Set host compiler
set(HIP_HOST_COMPILER "${CMAKE_${HIP_C_OR_CXX}_COMPILER}")
# Set compiler flags
set(_HIP_HOST_FLAGS "set(CMAKE_HOST_FLAGS ${CMAKE_${HIP_C_OR_CXX}_FLAGS})")
set(_HIP_HIPCC_FLAGS "set(HIP_HIPCC_FLAGS ${HIP_HIPCC_FLAGS})")
set(_HIP_HCC_FLAGS "set(HIP_HCC_FLAGS ${HIP_HCC_FLAGS})")
set(_HIP_NVCC_FLAGS "set(HIP_NVCC_FLAGS ${HIP_NVCC_FLAGS})")
foreach(config ${_hip_configuration_types})
string(TOUPPER ${config} config_upper)
set(_HIP_HOST_FLAGS "${_HIP_HOST_FLAGS}\nset(CMAKE_HOST_FLAGS_${config_upper} ${CMAKE_${HIP_C_OR_CXX}_FLAGS_${config_upper}})")
set(_HIP_HIPCC_FLAGS "${_HIP_HIPCC_FLAGS}\nset(HIP_HIPCC_FLAGS_${config_upper} ${HIP_HIPCC_FLAGS_${config_upper}})")
set(_HIP_HCC_FLAGS "${_HIP_HCC_FLAGS}\nset(HIP_HCC_FLAGS_${config_upper} ${HIP_HCC_FLAGS_${config_upper}})")
set(_HIP_NVCC_FLAGS "${_HIP_NVCC_FLAGS}\nset(HIP_NVCC_FLAGS_${config_upper} ${HIP_NVCC_FLAGS_${config_upper}})")
endforeach()
# Reset the output variable
set(_hip_generated_files "")
set(_hip_source_files "")
# Iterate over all arguments and create custom commands for all source files
foreach(file ${ARGN})
# Ignore any file marked as a HEADER_FILE_ONLY
get_source_file_property(_is_header ${file} HEADER_FILE_ONLY)
# Allow per source file overrides of the format. Also allows compiling non .cu files.
get_source_file_property(_hip_source_format ${file} HIP_SOURCE_PROPERTY_FORMAT)
if((${file} MATCHES "\\.cu$" OR _hip_source_format) AND NOT _is_header)
set(host_flag FALSE)
else()
set(host_flag TRUE)
endif()
if(NOT host_flag)
# Determine output directory
HIP_COMPUTE_BUILD_PATH("${file}" hip_build_path)
set(hip_compile_output_dir "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${_target}.dir/${hip_build_path}")
get_filename_component(basename ${file} NAME)
set(generated_file_path "${hip_compile_output_dir}/${CMAKE_CFG_INTDIR}")
set(generated_file_basename "${_target}_generated_${basename}${generated_extension}")
# Set file names
set(generated_file "${generated_file_path}/${generated_file_basename}")
set(cmake_dependency_file "${hip_compile_output_dir}/${generated_file_basename}.depend")
set(custom_target_script_pregen "${hip_compile_output_dir}/${generated_file_basename}.cmake.pre-gen")
set(custom_target_script "${hip_compile_output_dir}/${generated_file_basename}.cmake")
# Set properties for object files
set_source_files_properties("${generated_file}"
PROPERTIES
EXTERNAL_OBJECT true # This is an object file not to be compiled, but only be linked
)
# Don't add CMAKE_CURRENT_SOURCE_DIR if the path is already an absolute path
get_filename_component(file_path "${file}" PATH)
if(IS_ABSOLUTE "${file_path}")
set(source_file "${file}")
else()
set(source_file "${CMAKE_CURRENT_SOURCE_DIR}/${file}")
endif()
# Bring in the dependencies
HIP_INCLUDE_HIPCC_DEPENDENCIES(${cmake_dependency_file})
# Configure the build script
configure_file("${HIP_run_hipcc}" "${custom_target_script_pregen}" @ONLY)
file(GENERATE
OUTPUT "${custom_target_script}"
INPUT "${custom_target_script_pregen}"
)
set(main_dep DEPENDS ${source_file})
if(CMAKE_GENERATOR MATCHES "Makefiles")
set(verbose_output "$(VERBOSE)")
elseif(HIP_VERBOSE_BUILD)
set(verbose_output ON)
else()
set(verbose_output OFF)
endif()
# Create up the comment string
file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}")
set(hip_build_comment_string "Building HIPCC object ${generated_file_relative_path}")
# Build the generated file and dependency file
add_custom_command(
OUTPUT ${generated_file}
# These output files depend on the source_file and the contents of cmake_dependency_file
${main_dep}
DEPENDS ${HIP_HIPCC_DEPEND}
DEPENDS ${custom_target_script}
# Make sure the output directory exists before trying to write to it.
COMMAND ${CMAKE_COMMAND} -E make_directory "${generated_file_path}"
COMMAND ${CMAKE_COMMAND} ARGS
-D verbose:BOOL=${verbose_output}
-D build_configuration:STRING=${_hip_build_configuration}
-D "generated_file:STRING=${generated_file}"
-P "${custom_target_script}"
WORKING_DIRECTORY "${hip_compile_output_dir}"
COMMENT "${hip_build_comment_string}"
)
# Make sure the build system knows the file is generated
set_source_files_properties(${generated_file} PROPERTIES GENERATED TRUE)
list(APPEND _hip_generated_files ${generated_file})
list(APPEND _hip_source_files ${file})
endif()
endforeach()
# Set the return parameter
set(${_generated_files} ${_hip_generated_files})
set(${_source_files} ${_hip_source_files})
endmacro()
###############################################################################
# HIP_ADD_EXECUTABLE
###############################################################################
macro(HIP_ADD_EXECUTABLE hip_target)
# Separate the sources from the options
HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN})
HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options})
if(_source_files)
list(REMOVE_ITEM _sources ${_source_files})
endif()
if("x${HCC_HOME}" STREQUAL "x")
set(HCC_HOME "/opt/rocm/hcc")
endif()
set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
add_executable(${hip_target} ${_cmake_options} ${_generated_files} ${_sources})
set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE HIP)
endmacro()
###############################################################################
# HIP_ADD_LIBRARY
###############################################################################
macro(HIP_ADD_LIBRARY hip_target)
# Separate the sources from the options
HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN})
HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} ${_cmake_options} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options})
if(_source_files)
list(REMOVE_ITEM _sources ${_source_files})
endif()
add_library(${hip_target} ${_cmake_options} ${_generated_files} ${_sources})
set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE ${HIP_C_OR_CXX})
endmacro()
# vim: ts=4:sw=4:expandtab:smartindent

View File

@ -1,7 +1,7 @@
#ifndef COMMUNICATION_H_INC
#define COMMUNICATION_H_INC
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include "common/Utilities.h"
#include "common/Array.h"
@ -38,7 +38,7 @@ struct RankInfoStruct {
//! Redistribute domain data (dst may be smaller than the src)
template<class TYPE>
Array<TYPE> redistribute( const RankInfoStruct& src_rank, const Array<TYPE>& src_data,
const RankInfoStruct& dst_rank, std::array<int,3> dst_size, const Utilities::MPI& comm );
const RankInfoStruct& dst_rank, std::array<int,3> dst_size, MPI_Comm comm );
/*!
@ -59,7 +59,7 @@ public:
* @param[in] fill Fill {faces,edges,corners}
* @param[in] periodic Periodic dimensions
*/
fillHalo( const Utilities::MPI& comm, const RankInfoStruct& info,
fillHalo( MPI_Comm comm, const RankInfoStruct& info,
std::array<int,3> n, std::array<int,3> ng, int tag, int depth,
std::array<bool,3> fill = {true,true,true},
std::array<bool,3> periodic = {true,true,true} );
@ -83,7 +83,7 @@ public:
private:
Utilities::MPI comm;
MPI_Comm comm;
RankInfoStruct info;
std::array<int,3> n, ng;
int depth;
@ -93,6 +93,8 @@ private:
TYPE *mem;
TYPE *send[3][3][3], *recv[3][3][3];
MPI_Request send_req[3][3][3], recv_req[3][3][3];
size_t N_type;
MPI_Datatype datatype;
fillHalo(); // Private empty constructor
fillHalo(const fillHalo&); // Private copy constructor
fillHalo& operator=(const fillHalo&); // Private assignment operator
@ -134,7 +136,7 @@ void InitializeRanks( const int rank, const int nprocx, const int nprocy, const
//***************************************************************************************
inline void CommunicateSendRecvCounts( const Utilities::MPI& comm, int sendtag, int recvtag,
inline void CommunicateSendRecvCounts( MPI_Comm Communicator, int sendtag, int recvtag,
int rank_x, int rank_y, int rank_z,
int rank_X, int rank_Y, int rank_Z,
int rank_xy, int rank_XY, int rank_xY, int rank_Xy,
@ -152,53 +154,54 @@ inline void CommunicateSendRecvCounts( const Utilities::MPI& comm, int sendtag,
int& recvCount_yz, int& recvCount_YZ, int& recvCount_yZ, int& recvCount_Yz )
{
MPI_Request req1[18], req2[18];
req1[0] = comm.Isend(&sendCount_x,1,rank_x,sendtag+0);
req2[0] = comm.Irecv(&recvCount_X,1,rank_X,recvtag+0);
req1[1] = comm.Isend(&sendCount_X,1,rank_X,sendtag+1);
req2[1] = comm.Irecv(&recvCount_x,1,rank_x,recvtag+1);
req1[2] = comm.Isend(&sendCount_y,1,rank_y,sendtag+2);
req2[2] = comm.Irecv(&recvCount_Y,1,rank_Y,recvtag+2);
req1[3] = comm.Isend(&sendCount_Y,1,rank_Y,sendtag+3);
req2[3] = comm.Irecv(&recvCount_y,1,rank_y,recvtag+3);
req1[4] = comm.Isend(&sendCount_z,1,rank_z,sendtag+4);
req2[4] = comm.Irecv(&recvCount_Z,1,rank_Z,recvtag+4);
req1[5] = comm.Isend(&sendCount_Z,1,rank_Z,sendtag+5);
req2[5] = comm.Irecv(&recvCount_z,1,rank_z,recvtag+5);
MPI_Status stat1[18],stat2[18];
MPI_Isend(&sendCount_x, 1,MPI_INT,rank_x,sendtag+0,Communicator,&req1[0]);
MPI_Irecv(&recvCount_X, 1,MPI_INT,rank_X,recvtag+0,Communicator,&req2[0]);
MPI_Isend(&sendCount_X, 1,MPI_INT,rank_X,sendtag+1,Communicator,&req1[1]);
MPI_Irecv(&recvCount_x, 1,MPI_INT,rank_x,recvtag+1,Communicator,&req2[1]);
MPI_Isend(&sendCount_y, 1,MPI_INT,rank_y,sendtag+2,Communicator,&req1[2]);
MPI_Irecv(&recvCount_Y, 1,MPI_INT,rank_Y,recvtag+2,Communicator,&req2[2]);
MPI_Isend(&sendCount_Y, 1,MPI_INT,rank_Y,sendtag+3,Communicator,&req1[3]);
MPI_Irecv(&recvCount_y, 1,MPI_INT,rank_y,recvtag+3,Communicator,&req2[3]);
MPI_Isend(&sendCount_z, 1,MPI_INT,rank_z,sendtag+4,Communicator,&req1[4]);
MPI_Irecv(&recvCount_Z, 1,MPI_INT,rank_Z,recvtag+4,Communicator,&req2[4]);
MPI_Isend(&sendCount_Z, 1,MPI_INT,rank_Z,sendtag+5,Communicator,&req1[5]);
MPI_Irecv(&recvCount_z, 1,MPI_INT,rank_z,recvtag+5,Communicator,&req2[5]);
req1[6] = comm.Isend(&sendCount_xy,1,rank_xy,sendtag+6);
req2[6] = comm.Irecv(&recvCount_XY,1,rank_XY,recvtag+6);
req1[7] = comm.Isend(&sendCount_XY,1,rank_XY,sendtag+7);
req2[7] = comm.Irecv(&recvCount_xy,1,rank_xy,recvtag+7);
req1[8] = comm.Isend(&sendCount_Xy,1,rank_Xy,sendtag+8);
req2[8] = comm.Irecv(&recvCount_xY,1,rank_xY,recvtag+8);
req1[9] = comm.Isend(&sendCount_xY,1,rank_xY,sendtag+9);
req2[9] = comm.Irecv(&recvCount_Xy,1,rank_Xy,recvtag+9);
MPI_Isend(&sendCount_xy, 1,MPI_INT,rank_xy,sendtag+6,Communicator,&req1[6]);
MPI_Irecv(&recvCount_XY, 1,MPI_INT,rank_XY,recvtag+6,Communicator,&req2[6]);
MPI_Isend(&sendCount_XY, 1,MPI_INT,rank_XY,sendtag+7,Communicator,&req1[7]);
MPI_Irecv(&recvCount_xy, 1,MPI_INT,rank_xy,recvtag+7,Communicator,&req2[7]);
MPI_Isend(&sendCount_Xy, 1,MPI_INT,rank_Xy,sendtag+8,Communicator,&req1[8]);
MPI_Irecv(&recvCount_xY, 1,MPI_INT,rank_xY,recvtag+8,Communicator,&req2[8]);
MPI_Isend(&sendCount_xY, 1,MPI_INT,rank_xY,sendtag+9,Communicator,&req1[9]);
MPI_Irecv(&recvCount_Xy, 1,MPI_INT,rank_Xy,recvtag+9,Communicator,&req2[9]);
req1[10] = comm.Isend(&sendCount_xz,1,rank_xz,sendtag+10);
req2[10] = comm.Irecv(&recvCount_XZ,1,rank_XZ,recvtag+10);
req1[11] = comm.Isend(&sendCount_XZ,1,rank_XZ,sendtag+11);
req2[11] = comm.Irecv(&recvCount_xz,1,rank_xz,recvtag+11);
req1[12] = comm.Isend(&sendCount_Xz,1,rank_Xz,sendtag+12);
req2[12] = comm.Irecv(&recvCount_xZ,1,rank_xZ,recvtag+12);
req1[13] = comm.Isend(&sendCount_xZ,1,rank_xZ,sendtag+13);
req2[13] = comm.Irecv(&recvCount_Xz,1,rank_Xz,recvtag+13);
MPI_Isend(&sendCount_xz, 1,MPI_INT,rank_xz,sendtag+10,Communicator,&req1[10]);
MPI_Irecv(&recvCount_XZ, 1,MPI_INT,rank_XZ,recvtag+10,Communicator,&req2[10]);
MPI_Isend(&sendCount_XZ, 1,MPI_INT,rank_XZ,sendtag+11,Communicator,&req1[11]);
MPI_Irecv(&recvCount_xz, 1,MPI_INT,rank_xz,recvtag+11,Communicator,&req2[11]);
MPI_Isend(&sendCount_Xz, 1,MPI_INT,rank_Xz,sendtag+12,Communicator,&req1[12]);
MPI_Irecv(&recvCount_xZ, 1,MPI_INT,rank_xZ,recvtag+12,Communicator,&req2[12]);
MPI_Isend(&sendCount_xZ, 1,MPI_INT,rank_xZ,sendtag+13,Communicator,&req1[13]);
MPI_Irecv(&recvCount_Xz, 1,MPI_INT,rank_Xz,recvtag+13,Communicator,&req2[13]);
req1[14] = comm.Isend(&sendCount_yz,1,rank_yz,sendtag+14);
req2[14] = comm.Irecv(&recvCount_YZ,1,rank_YZ,recvtag+14);
req1[15] = comm.Isend(&sendCount_YZ,1,rank_YZ,sendtag+15);
req2[15] = comm.Irecv(&recvCount_yz,1,rank_yz,recvtag+15);
req1[16] = comm.Isend(&sendCount_Yz,1,rank_Yz,sendtag+16);
req2[16] = comm.Irecv(&recvCount_yZ,1,rank_yZ,recvtag+16);
req1[17] = comm.Isend(&sendCount_yZ,1,rank_yZ,sendtag+17);
req2[17] = comm.Irecv(&recvCount_Yz,1,rank_Yz,recvtag+17);
comm.waitAll( 18, req1 );
comm.waitAll( 18, req2 );
comm.barrier();
MPI_Isend(&sendCount_yz, 1,MPI_INT,rank_yz,sendtag+14,Communicator,&req1[14]);
MPI_Irecv(&recvCount_YZ, 1,MPI_INT,rank_YZ,recvtag+14,Communicator,&req2[14]);
MPI_Isend(&sendCount_YZ, 1,MPI_INT,rank_YZ,sendtag+15,Communicator,&req1[15]);
MPI_Irecv(&recvCount_yz, 1,MPI_INT,rank_yz,recvtag+15,Communicator,&req2[15]);
MPI_Isend(&sendCount_Yz, 1,MPI_INT,rank_Yz,sendtag+16,Communicator,&req1[16]);
MPI_Irecv(&recvCount_yZ, 1,MPI_INT,rank_yZ,recvtag+16,Communicator,&req2[16]);
MPI_Isend(&sendCount_yZ, 1,MPI_INT,rank_yZ,sendtag+17,Communicator,&req1[17]);
MPI_Irecv(&recvCount_Yz, 1,MPI_INT,rank_Yz,recvtag+17,Communicator,&req2[17]);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
MPI_Barrier(Communicator);
}
//***************************************************************************************
inline void CommunicateRecvLists( const Utilities::MPI& comm, int sendtag, int recvtag,
inline void CommunicateRecvLists( MPI_Comm Communicator, int sendtag, int recvtag,
int *sendList_x, int *sendList_y, int *sendList_z, int *sendList_X, int *sendList_Y, int *sendList_Z,
int *sendList_xy, int *sendList_XY, int *sendList_xY, int *sendList_Xy,
int *sendList_xz, int *sendList_XZ, int *sendList_xZ, int *sendList_Xz,
@ -219,52 +222,53 @@ inline void CommunicateRecvLists( const Utilities::MPI& comm, int sendtag, int r
int rank_Xy, int rank_xz, int rank_XZ, int rank_xZ, int rank_Xz, int rank_yz, int rank_YZ, int rank_yZ, int rank_Yz)
{
MPI_Request req1[18], req2[18];
req1[0] = comm.Isend(sendList_x,sendCount_x,rank_x,sendtag);
req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_X,recvtag);
req1[1] = comm.Isend(sendList_X,sendCount_X,rank_X,sendtag);
req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_x,recvtag);
req1[2] = comm.Isend(sendList_y,sendCount_y,rank_y,sendtag);
req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_Y,recvtag);
req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_Y,sendtag);
req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_y,recvtag);
req1[4] = comm.Isend(sendList_z,sendCount_z,rank_z,sendtag);
req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_Z,recvtag);
req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_Z,sendtag);
req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_z,recvtag);
MPI_Status stat1[18],stat2[18];
MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_x,sendtag,Communicator,&req1[0]);
MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_X,recvtag,Communicator,&req2[0]);
MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_X,sendtag,Communicator,&req1[1]);
MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_x,recvtag,Communicator,&req2[1]);
MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_y,sendtag,Communicator,&req1[2]);
MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_Y,recvtag,Communicator,&req2[2]);
MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_Y,sendtag,Communicator,&req1[3]);
MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_y,recvtag,Communicator,&req2[3]);
MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_z,sendtag,Communicator,&req1[4]);
MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_Z,recvtag,Communicator,&req2[4]);
MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_Z,sendtag,Communicator,&req1[5]);
MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_z,recvtag,Communicator,&req2[5]);
req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_xy,sendtag);
req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_XY,recvtag);
req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_XY,sendtag);
req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_xy,recvtag);
req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_Xy,sendtag);
req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_xY,recvtag);
req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_xY,sendtag);
req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_Xy,recvtag);
MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_xy,sendtag,Communicator,&req1[6]);
MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_XY,recvtag,Communicator,&req2[6]);
MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_XY,sendtag,Communicator,&req1[7]);
MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_xy,recvtag,Communicator,&req2[7]);
MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_Xy,sendtag,Communicator,&req1[8]);
MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_xY,recvtag,Communicator,&req2[8]);
MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_xY,sendtag,Communicator,&req1[9]);
MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_Xy,recvtag,Communicator,&req2[9]);
req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_xz,sendtag);
req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_XZ,recvtag);
req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_XZ,sendtag);
req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_xz,recvtag);
req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_Xz,sendtag);
req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_xZ,recvtag);
req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_xZ,sendtag);
req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_Xz,recvtag);
MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_xz,sendtag,Communicator,&req1[10]);
MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_XZ,recvtag,Communicator,&req2[10]);
MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_XZ,sendtag,Communicator,&req1[11]);
MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_xz,recvtag,Communicator,&req2[11]);
MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_Xz,sendtag,Communicator,&req1[12]);
MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_xZ,recvtag,Communicator,&req2[12]);
MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_xZ,sendtag,Communicator,&req1[13]);
MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_Xz,recvtag,Communicator,&req2[13]);
req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_yz,sendtag);
req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_YZ,recvtag);
req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_YZ,sendtag);
req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_yz,recvtag);
req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_Yz,sendtag);
req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_yZ,recvtag);
req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_yZ,sendtag);
req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_Yz,recvtag);
comm.waitAll( 18, req1 );
comm.waitAll( 18, req2 );
MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_yz,sendtag,Communicator,&req1[14]);
MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_YZ,recvtag,Communicator,&req2[14]);
MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_YZ,sendtag,Communicator,&req1[15]);
MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_yz,recvtag,Communicator,&req2[15]);
MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_Yz,sendtag,Communicator,&req1[16]);
MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_yZ,recvtag,Communicator,&req2[16]);
MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_yZ,sendtag,Communicator,&req1[17]);
MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_Yz,recvtag,Communicator,&req2[17]);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
}
//***************************************************************************************
inline void CommunicateMeshHalo(DoubleArray &Mesh, const Utilities::MPI& comm,
inline void CommunicateMeshHalo(DoubleArray &Mesh, MPI_Comm Communicator,
double *sendbuf_x,double *sendbuf_y,double *sendbuf_z,double *sendbuf_X,double *sendbuf_Y,double *sendbuf_Z,
double *sendbuf_xy,double *sendbuf_XY,double *sendbuf_xY,double *sendbuf_Xy,
double *sendbuf_xz,double *sendbuf_XZ,double *sendbuf_xZ,double *sendbuf_Xz,
@ -314,24 +318,42 @@ inline void CommunicateMeshHalo(DoubleArray &Mesh, const Utilities::MPI& comm,
PackMeshData(sendList_yZ, sendCount_yZ ,sendbuf_yZ, MeshData);
PackMeshData(sendList_YZ, sendCount_YZ ,sendbuf_YZ, MeshData);
//......................................................................................
comm.sendrecv(sendbuf_x,sendCount_x,rank_x,sendtag,recvbuf_X,recvCount_X,rank_X,recvtag);
comm.sendrecv(sendbuf_X,sendCount_X,rank_X,sendtag,recvbuf_x,recvCount_x,rank_x,recvtag);
comm.sendrecv(sendbuf_y,sendCount_y,rank_y,sendtag,recvbuf_Y,recvCount_Y,rank_Y,recvtag);
comm.sendrecv(sendbuf_Y,sendCount_Y,rank_Y,sendtag,recvbuf_y,recvCount_y,rank_y,recvtag);
comm.sendrecv(sendbuf_z,sendCount_z,rank_z,sendtag,recvbuf_Z,recvCount_Z,rank_Z,recvtag);
comm.sendrecv(sendbuf_Z,sendCount_Z,rank_Z,sendtag,recvbuf_z,recvCount_z,rank_z,recvtag);
comm.sendrecv(sendbuf_xy,sendCount_xy,rank_xy,sendtag,recvbuf_XY,recvCount_XY,rank_XY,recvtag);
comm.sendrecv(sendbuf_XY,sendCount_XY,rank_XY,sendtag,recvbuf_xy,recvCount_xy,rank_xy,recvtag);
comm.sendrecv(sendbuf_Xy,sendCount_Xy,rank_Xy,sendtag,recvbuf_xY,recvCount_xY,rank_xY,recvtag);
comm.sendrecv(sendbuf_xY,sendCount_xY,rank_xY,sendtag,recvbuf_Xy,recvCount_Xy,rank_Xy,recvtag);
comm.sendrecv(sendbuf_xz,sendCount_xz,rank_xz,sendtag,recvbuf_XZ,recvCount_XZ,rank_XZ,recvtag);
comm.sendrecv(sendbuf_XZ,sendCount_XZ,rank_XZ,sendtag,recvbuf_xz,recvCount_xz,rank_xz,recvtag);
comm.sendrecv(sendbuf_Xz,sendCount_Xz,rank_Xz,sendtag,recvbuf_xZ,recvCount_xZ,rank_xZ,recvtag);
comm.sendrecv(sendbuf_xZ,sendCount_xZ,rank_xZ,sendtag,recvbuf_Xz,recvCount_Xz,rank_Xz,recvtag);
comm.sendrecv(sendbuf_yz,sendCount_yz,rank_yz,sendtag,recvbuf_YZ,recvCount_YZ,rank_YZ,recvtag);
comm.sendrecv(sendbuf_YZ,sendCount_YZ,rank_YZ,sendtag,recvbuf_yz,recvCount_yz,rank_yz,recvtag);
comm.sendrecv(sendbuf_Yz,sendCount_Yz,rank_Yz,sendtag,recvbuf_yZ,recvCount_yZ,rank_yZ,recvtag);
comm.sendrecv(sendbuf_yZ,sendCount_yZ,rank_yZ,sendtag,recvbuf_Yz,recvCount_Yz,rank_Yz,recvtag);
MPI_Sendrecv(sendbuf_x,sendCount_x,MPI_DOUBLE,rank_x,sendtag,
recvbuf_X,recvCount_X,MPI_DOUBLE,rank_X,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_X,sendCount_X,MPI_DOUBLE,rank_X,sendtag,
recvbuf_x,recvCount_x,MPI_DOUBLE,rank_x,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_y,sendCount_y,MPI_DOUBLE,rank_y,sendtag,
recvbuf_Y,recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_Y,sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,
recvbuf_y,recvCount_y,MPI_DOUBLE,rank_y,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_z,sendCount_z,MPI_DOUBLE,rank_z,sendtag,
recvbuf_Z,recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_Z,sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,
recvbuf_z,recvCount_z,MPI_DOUBLE,rank_z,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_xy,sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,
recvbuf_XY,recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_XY,sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,
recvbuf_xy,recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_Xy,sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,
recvbuf_xY,recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_xY,sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,
recvbuf_Xy,recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_xz,sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,
recvbuf_XZ,recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_XZ,sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,
recvbuf_xz,recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_Xz,sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,
recvbuf_xZ,recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_xZ,sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,
recvbuf_Xz,recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_yz,sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,
recvbuf_YZ,recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_YZ,sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,
recvbuf_yz,recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_Yz,sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,
recvbuf_yZ,recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_yZ,sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,
recvbuf_Yz,recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,Communicator,MPI_STATUS_IGNORE);
//........................................................................................
UnpackMeshData(recvList_x, recvCount_x ,recvbuf_x, MeshData);
UnpackMeshData(recvList_X, recvCount_X ,recvbuf_X, MeshData);

View File

@ -2,8 +2,9 @@
#define COMMUNICATION_HPP_INC
#include "common/Communication.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include "common/Utilities.h"
//#include "ProfilerApp.h"
/********************************************************
@ -11,19 +12,17 @@
********************************************************/
template<class TYPE>
Array<TYPE> redistribute( const RankInfoStruct& src_rank, const Array<TYPE>& src_data,
const RankInfoStruct& dst_rank, std::array<int,3> dst_size, const Utilities::MPI& comm )
const RankInfoStruct& dst_rank, std::array<int,3> dst_size, MPI_Comm comm )
{
if ( comm.getSize() == 1 ) {
return src_data.subset( { 0, (size_t) dst_size[0]-1, 0, (size_t) dst_size[1]-1, 0, (size_t) dst_size[2]-1 } );
}
#ifdef USE_MPI
// Get the src size
std::array<int,3> src_size;
int size0[3] = { (int) src_data.size(0), (int) src_data.size(1), (int) src_data.size(2) };
comm.maxReduce( size0, src_size.data(), 3 );
MPI_Allreduce( size0, src_size.data(), 3, MPI_INT, MPI_MAX, comm );
if ( !src_data.empty() )
ASSERT( src_size[0] == size0[0] && src_size[1] == size0[1] && src_size[2] == size0[2] );
// Check that dst_size matches on all ranks
comm.maxReduce( dst_size.data(), size0, 3 );
MPI_Allreduce( dst_size.data(), size0, 3, MPI_INT, MPI_MAX, comm );
ASSERT( dst_size[0] == size0[0] && dst_size[1] == size0[1] && dst_size[2] == size0[2] );
// Function to get overlap range
auto calcOverlap = []( int i1[3], int i2[3], int j1[3], int j2[3] ) {
@ -61,7 +60,7 @@ Array<TYPE> redistribute( const RankInfoStruct& src_rank, const Array<TYPE>& src
}
std::vector<MPI_Request> send_request( send_rank.size() );
for (size_t i=0; i<send_rank.size(); i++)
send_request[i] = comm.Isend( send_data[i].data(), send_data[i].length(), send_rank[i], 5462 );
MPI_Isend( send_data[i].data(), sizeof(TYPE)*send_data[i].length(), MPI_BYTE, send_rank[i], 5462, comm, &send_request[i]);
// Unpack data from the appropriate ranks (including myself)
Array<TYPE> dst_data( dst_size[0], dst_size[1], dst_size[2] );
int i1[3] = { dst_size[0] * dst_rank.ix, dst_size[1] * dst_rank.jy, dst_size[2] * dst_rank.kz };
@ -76,14 +75,17 @@ Array<TYPE> redistribute( const RankInfoStruct& src_rank, const Array<TYPE>& src
continue;
int rank = src_rank.getRankForBlock(i,j,k);
Array<TYPE> data( index[1] - index[0] + 1, index[3] - index[2] + 1, index[5] - index[4] + 1 );
comm.recv( data.data(), data.length(), rank, 5462 );
MPI_Recv( data.data(), sizeof(TYPE)*data.length(), MPI_BYTE, rank, 5462, comm, MPI_STATUS_IGNORE );
dst_data.copySubset( index, data );
}
}
}
// Free data
comm.waitAll( send_request.size(), send_request.data() );
MPI_Waitall( send_request.size(), send_request.data(), MPI_STATUSES_IGNORE );
return dst_data;
#else
return src_data.subset( { 0, dst_size[0]-1, 0, dst_size[1]-1, 0, dst_size[2]-1 );
#endif
}
@ -92,11 +94,27 @@ Array<TYPE> redistribute( const RankInfoStruct& src_rank, const Array<TYPE>& src
* Structure to fill halo cells *
********************************************************/
template<class TYPE>
fillHalo<TYPE>::fillHalo( const Utilities::MPI& comm_, const RankInfoStruct& info_,
fillHalo<TYPE>::fillHalo( MPI_Comm comm_, const RankInfoStruct& info_,
std::array<int,3> n_, std::array<int,3> ng_, int tag0, int depth_,
std::array<bool,3> fill, std::array<bool,3> periodic ):
comm(comm_), info(info_), n(n_), ng(ng_), depth(depth_)
{
if ( std::is_same<TYPE,double>() ) {
N_type = 1;
datatype = MPI_DOUBLE;
} else if ( std::is_same<TYPE,float>() ) {
N_type = 1;
datatype = MPI_FLOAT;
} else if ( sizeof(TYPE)%sizeof(double)==0 ) {
N_type = sizeof(TYPE) / sizeof(double);
datatype = MPI_DOUBLE;
} else if ( sizeof(TYPE)%sizeof(float)==0 ) {
N_type = sizeof(TYPE) / sizeof(float);
datatype = MPI_FLOAT;
} else {
N_type = sizeof(TYPE);
datatype = MPI_BYTE;
}
// Set the fill pattern
memset(fill_pattern,0,sizeof(fill_pattern));
if ( fill[0] ) {
@ -233,8 +251,8 @@ void fillHalo<TYPE>::fill( Array<TYPE>& data )
for (int k=0; k<3; k++) {
if ( !fill_pattern[i][j][k] )
continue;
recv_req[i][j][k] = comm.Irecv( recv[i][j][k], depth2*N_send_recv[i][j][k],
info.rank[i][j][k], tag[2-i][2-j][2-k] );
MPI_Irecv( recv[i][j][k], N_type*depth2*N_send_recv[i][j][k], datatype,
info.rank[i][j][k], tag[2-i][2-j][2-k], comm, &recv_req[i][j][k] );
}
}
}
@ -245,18 +263,19 @@ void fillHalo<TYPE>::fill( Array<TYPE>& data )
if ( !fill_pattern[i][j][k] )
continue;
pack( data, i-1, j-1, k-1, send[i][j][k] );
send_req[i][j][k] = comm.Isend( send[i][j][k], depth2*N_send_recv[i][j][k],
info.rank[i][j][k], tag[i][j][k] );
MPI_Isend( send[i][j][k], N_type*depth2*N_send_recv[i][j][k], datatype,
info.rank[i][j][k], tag[i][j][k], comm, &send_req[i][j][k] );
}
}
}
// Recv the dst data and unpack (we recive in reverse order to match the sends)
MPI_Status status;
for (int i=2; i>=0; i--) {
for (int j=2; j>=0; j--) {
for (int k=2; k>=0; k--) {
if ( !fill_pattern[i][j][k] )
continue;
comm.wait( recv_req[i][j][k] );
MPI_Wait(&recv_req[i][j][k],&status);
unpack( data, i-1, j-1, k-1, recv[i][j][k] );
}
}
@ -267,7 +286,7 @@ void fillHalo<TYPE>::fill( Array<TYPE>& data )
for (int k=0; k<3; k++) {
if ( !fill_pattern[i][j][k] )
continue;
comm.wait( send_req[i][j][k] );
MPI_Wait(&send_req[i][j][k],&status);
}
}
}

266
common/MPI_Helpers.cpp Normal file
View File

@ -0,0 +1,266 @@
#include "common/MPI_Helpers.h"
#include "common/Utilities.h"
/********************************************************
* Return the MPI data type *
********************************************************/
template<> MPI_Datatype getMPItype<char>() {
return MPI_CHAR;
}
template<> MPI_Datatype getMPItype<unsigned char>() {
return MPI_UNSIGNED_CHAR;
}
template<> MPI_Datatype getMPItype<int>() {
return MPI_INT;
}
template<> MPI_Datatype getMPItype<long>() {
return MPI_LONG;
}
template<> MPI_Datatype getMPItype<unsigned long>() {
return MPI_UNSIGNED_LONG;
}
template<> MPI_Datatype getMPItype<long long>() {
return MPI_LONG_LONG;
}
template<> MPI_Datatype getMPItype<float>() {
return MPI_FLOAT;
}
template<> MPI_Datatype getMPItype<double>() {
return MPI_DOUBLE;
}
/********************************************************
* Concrete implimentations for packing/unpacking *
********************************************************/
// unsigned char
template<>
size_t packsize<unsigned char>( const unsigned char& )
{
return sizeof(unsigned char);
}
template<>
void pack<unsigned char>( const unsigned char& rhs, char *buffer )
{
memcpy(buffer,&rhs,sizeof(unsigned char));
}
template<>
void unpack<unsigned char>( unsigned char& data, const char *buffer )
{
memcpy(&data,buffer,sizeof(unsigned char));
}
// char
template<>
size_t packsize<char>( const char& )
{
return sizeof(char);
}
template<>
void pack<char>( const char& rhs, char *buffer )
{
memcpy(buffer,&rhs,sizeof(char));
}
template<>
void unpack<char>( char& data, const char *buffer )
{
memcpy(&data,buffer,sizeof(char));
}
// int
template<>
size_t packsize<int>( const int& )
{
return sizeof(int);
}
template<>
void pack<int>( const int& rhs, char *buffer )
{
memcpy(buffer,&rhs,sizeof(int));
}
template<>
void unpack<int>( int& data, const char *buffer )
{
memcpy(&data,buffer,sizeof(int));
}
// unsigned int
template<>
size_t packsize<unsigned int>( const unsigned int& )
{
return sizeof(unsigned int);
}
template<>
void pack<unsigned int>( const unsigned int& rhs, char *buffer )
{
memcpy(buffer,&rhs,sizeof(int));
}
template<>
void unpack<unsigned int>( unsigned int& data, const char *buffer )
{
memcpy(&data,buffer,sizeof(int));
}
// size_t
template<>
size_t packsize<size_t>( const size_t& )
{
return sizeof(size_t);
}
template<>
void pack<size_t>( const size_t& rhs, char *buffer )
{
memcpy(buffer,&rhs,sizeof(size_t));
}
template<>
void unpack<size_t>( size_t& data, const char *buffer )
{
memcpy(&data,buffer,sizeof(size_t));
}
// std::string
template<>
size_t packsize<std::string>( const std::string& rhs )
{
return rhs.size()+1;
}
template<>
void pack<std::string>( const std::string& rhs, char *buffer )
{
memcpy(buffer,rhs.c_str(),rhs.size()+1);
}
template<>
void unpack<std::string>( std::string& data, const char *buffer )
{
data = std::string(buffer);
}
/********************************************************
* Fake MPI routines *
********************************************************/
#ifndef USE_MPI
int MPI_Init(int*,char***)
{
return 0;
}
int MPI_Init_thread(int*,char***, int required, int *provided )
{
*provided = required;
return 0;
}
int MPI_Finalize()
{
return 0;
}
int MPI_Comm_size( MPI_Comm, int *size )
{
*size = 1;
return 0;
}
int MPI_Comm_rank( MPI_Comm, int *rank )
{
*rank = 0;
return 0;
}
int MPI_Barrier( MPI_Comm )
{
return 0;
}
int MPI_Waitall( int, MPI_Request[], MPI_Status[] )
{
return 0;
}
int MPI_Wait( MPI_Request*, MPI_Status* )
{
return 0;
}
int MPI_Bcast( void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm )
{
return 0;
}
int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
MPI_Comm comm)
{
ERROR("Not implimented yet");
return 0;
}
int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag,
MPI_Comm comm, MPI_Status *status)
{
ERROR("Not implimented yet");
return 0;
}
int MPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
MPI_Comm comm, MPI_Request *request)
{
ERROR("Not implimented yet");
return 0;
}
int MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source,
int tag, MPI_Comm comm, MPI_Request *request)
{
ERROR("Not implimented yet");
return 0;
}
int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count,
MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
{
ERROR("Not implimented yet");
return 0;
}
int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
MPI_Comm comm)
{
ERROR("Not implimented yet");
return 0;
}
int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, const int *recvcounts, const int *displs,
MPI_Datatype recvtype, MPI_Comm comm)
{
ERROR("Not implimented yet");
return 0;
}
int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
int dest, int sendtag,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
int source, int recvtag,
MPI_Comm comm, MPI_Status *status)
{
ERROR("Not implimented yet");
return 0;
}
int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
MPI_Op op, int root, MPI_Comm comm)
{
ERROR("Not implimented yet");
return 0;
}
int MPI_Comm_group(MPI_Comm comm, MPI_Group *group)
{
ERROR("Not implimented yet");
return 0;
}
int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm)
{
ERROR("Not implimented yet");
return 0;
}
int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm)
{
*newcomm = comm;
return 0;
}
double MPI_Wtime( void )
{
return 0.0;
}
int MPI_Comm_free(MPI_Comm *group)
{
return 0;
}
int MPI_Group_free(MPI_Group *group)
{
return 0;
}
#endif

239
common/MPI_Helpers.h Normal file
View File

@ -0,0 +1,239 @@
// This file contains wrappers for MPI routines and functions to pack/unpack data structures
#ifndef MPI_WRAPPERS_INC
#define MPI_WRAPPERS_INC
#include <string.h>
#include <vector>
#include <set>
#include <map>
#ifdef USE_MPI
// Inlcude MPI
#include "mpi.h"
#else
// Create fake MPI types
typedef int MPI_Comm;
typedef int MPI_Request;
typedef int MPI_Status;
#define MPI_COMM_WORLD 0
#define MPI_COMM_SELF 0
#define MPI_COMM_NULL -1
#define MPI_GROUP_NULL -2
#define MPI_STATUS_IGNORE NULL
enum MPI_Datatype { MPI_LOGICAL, MPI_CHAR, MPI_UNSIGNED_CHAR, MPI_INT,
MPI_UNSIGNED, MPI_LONG, MPI_UNSIGNED_LONG, MPI_LONG_LONG, MPI_FLOAT, MPI_DOUBLE };
enum MPI_Op { MPI_MIN, MPI_MAX, MPI_SUM };
typedef int MPI_Group;
#define MPI_THREAD_SINGLE 0
#define MPI_THREAD_FUNNELED 1
#define MPI_THREAD_SERIALIZED 2
#define MPI_THREAD_MULTIPLE 3
// Fake MPI functions
int MPI_Init(int*,char***);
int MPI_Init_thread( int *argc, char ***argv, int required, int *provided );
int MPI_Finalize();
int MPI_Comm_size( MPI_Comm, int *size );
int MPI_Comm_rank( MPI_Comm, int *rank );
int MPI_Barrier(MPI_Comm);
int MPI_Wait(MPI_Request*,MPI_Status*);
int MPI_Waitall(int,MPI_Request[],MPI_Status[]);
int MPI_Bcast(void*,int,MPI_Datatype,int,MPI_Comm);
int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
MPI_Comm comm);
int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag,
MPI_Comm comm, MPI_Status *status);
int MPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
MPI_Comm comm, MPI_Request *request);
int MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source,
int tag, MPI_Comm comm, MPI_Request *request);
int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count,
MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
MPI_Comm comm);
int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, const int *recvcounts, const int *displs,
MPI_Datatype recvtype, MPI_Comm comm);
int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
int dest, int sendtag,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
int source, int recvtag,
MPI_Comm comm, MPI_Status *status);
int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
MPI_Op op, int root, MPI_Comm comm);
double MPI_Wtime( void );
int MPI_Comm_group(MPI_Comm comm, MPI_Group *group);
int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm);
int MPI_Comm_free(MPI_Comm *group);
int MPI_Group_free(MPI_Group *group);
int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm);
#endif
//! Get the size of the MPI_Comm
// Note: this is a thread and interrupt safe function
inline int comm_size( MPI_Comm comm ) {
int size = 1;
MPI_Comm_size( comm, &size );
return size;
}
//! Get the rank of the MPI_Comm
// Note: this is a thread and interrupt safe function
inline int comm_rank( MPI_Comm comm ) {
int rank = 1;
MPI_Comm_rank( comm, &rank );
return rank;
}
//! Get the size of MPI_COMM_WORLD
inline int MPI_WORLD_SIZE( ) {
return comm_size( MPI_COMM_WORLD );
}
//! Get the size of MPI_COMM_WORLD
inline int MPI_WORLD_RANK( ) {
return comm_rank( MPI_COMM_WORLD );
}
//! Return the appropriate MPI datatype for a class
template<class TYPE>
MPI_Datatype getMPItype();
//! Template function to return the buffer size required to pack a class
template<class TYPE>
size_t packsize( const TYPE& rhs );
//! Template function to pack a class to a buffer
template<class TYPE>
void pack( const TYPE& rhs, char *buffer );
//! Template function to unpack a class from a buffer
template<class TYPE>
void unpack( TYPE& data, const char *buffer );
//! Template function to return the buffer size required to pack a std::vector
template<class TYPE>
size_t packsize( const std::vector<TYPE>& rhs );
//! Template function to pack a class to a buffer
template<class TYPE>
void pack( const std::vector<TYPE>& rhs, char *buffer );
//! Template function to pack a class to a buffer
template<class TYPE>
void unpack( std::vector<TYPE>& data, const char *buffer );
//! Template function to return the buffer size required to pack a std::pair
template<class TYPE1, class TYPE2>
size_t packsize( const std::pair<TYPE1,TYPE2>& rhs );
//! Template function to pack a class to a buffer
template<class TYPE1, class TYPE2>
void pack( const std::pair<TYPE1,TYPE2>& rhs, char *buffer );
//! Template function to pack a class to a buffer
template<class TYPE1, class TYPE2>
void unpack( std::pair<TYPE1,TYPE2>& data, const char *buffer );
//! Template function to return the buffer size required to pack a std::map
template<class TYPE1, class TYPE2>
size_t packsize( const std::map<TYPE1,TYPE2>& rhs );
//! Template function to pack a class to a buffer
template<class TYPE1, class TYPE2>
void pack( const std::map<TYPE1,TYPE2>& rhs, char *buffer );
//! Template function to pack a class to a buffer
template<class TYPE1, class TYPE2>
void unpack( std::map<TYPE1,TYPE2>& data, const char *buffer );
//! Template function to return the buffer size required to pack a std::set
template<class TYPE>
size_t packsize( const std::set<TYPE>& rhs );
//! Template function to pack a class to a buffer
template<class TYPE>
void pack( const std::set<TYPE>& rhs, char *buffer );
//! Template function to pack a class to a buffer
template<class TYPE>
void unpack( std::set<TYPE>& data, const char *buffer );
// Helper functions
inline double sumReduce( MPI_Comm comm, double x )
{
double y = 0;
MPI_Allreduce(&x,&y,1,MPI_DOUBLE,MPI_SUM,comm);
return y;
}
inline float sumReduce( MPI_Comm comm, float x )
{
float y = 0;
MPI_Allreduce(&x,&y,1,MPI_FLOAT,MPI_SUM,comm);
return y;
}
inline int sumReduce( MPI_Comm comm, int x )
{
int y = 0;
MPI_Allreduce(&x,&y,1,MPI_INT,MPI_SUM,comm);
return y;
}
inline long long sumReduce( MPI_Comm comm, long long x )
{
long long y = 0;
MPI_Allreduce(&x,&y,1,MPI_LONG_LONG,MPI_SUM,comm);
return y;
}
inline bool sumReduce( MPI_Comm comm, bool x )
{
int y = sumReduce( comm, x?1:0 );
return y>0;
}
inline std::vector<float> sumReduce( MPI_Comm comm, const std::vector<float>& x )
{
auto y = x;
MPI_Allreduce(x.data(),y.data(),x.size(),MPI_FLOAT,MPI_SUM,comm);
return y;
}
inline std::vector<int> sumReduce( MPI_Comm comm, const std::vector<int>& x )
{
auto y = x;
MPI_Allreduce(x.data(),y.data(),x.size(),MPI_INT,MPI_SUM,comm);
return y;
}
inline double maxReduce( MPI_Comm comm, double x )
{
double y = 0;
MPI_Allreduce(&x,&y,1,MPI_DOUBLE,MPI_MAX,comm);
return y;
}
inline float maxReduce( MPI_Comm comm, float x )
{
float y = 0;
MPI_Allreduce(&x,&y,1,MPI_FLOAT,MPI_MAX,comm);
return y;
}
inline int maxReduce( MPI_Comm comm, int x )
{
int y = 0;
MPI_Allreduce(&x,&y,1,MPI_INT,MPI_MAX,comm);
return y;
}
#endif
#include "common/MPI_Helpers.hpp"

View File

@ -1,9 +1,8 @@
// This file functions to pack/unpack data structures
#ifndef included_PackData_hpp
#define included_PackData_hpp
#include "IO/PackData.h"
// This file contains wrappers for MPI routines and functions to pack/unpack data structures
#ifndef MPI_WRAPPERS_HPP
#define MPI_WRAPPERS_HPP
#include "common/MPI_Helpers.h"
#include <string.h>
#include <vector>
#include <set>

View File

@ -64,11 +64,11 @@ Array<uint8_t> readMicroCT( const std::string& filename )
// Read the compressed micro CT data and distribute
Array<uint8_t> readMicroCT( const Database& domain, const Utilities::MPI& comm )
Array<uint8_t> readMicroCT( const Database& domain, MPI_Comm comm )
{
// Get the local problem info
auto n = domain.getVector<int>( "n" );
int rank = comm.getRank();
int rank = comm_rank(MPI_COMM_WORLD);
auto nproc = domain.getVector<int>( "nproc" );
RankInfoStruct rankInfo( rank, nproc[0], nproc[1], nproc[2] );

View File

@ -5,12 +5,11 @@
#include "common/Array.h"
#include "common/Communication.h"
#include "common/Database.h"
#include "common/MPI.h"
Array<uint8_t> readMicroCT( const std::string& filename );
Array<uint8_t> readMicroCT( const Database& domain, const Utilities::MPI& comm );
Array<uint8_t> readMicroCT( const Database& domain, MPI_Comm comm );
#endif

View File

@ -9,6 +9,7 @@
#include "common/Array.h"
#include "common/Utilities.h"
#include "common/MPI_Helpers.h"
#include "common/Communication.h"
#include "common/Database.h"
#include "common/SpherePack.h"

View File

@ -12,6 +12,7 @@
#include "common/Array.h"
#include "common/Utilities.h"
#include "common/MPI_Helpers.h"
#include "common/Communication.h"
#include "common/Database.h"

View File

@ -14,49 +14,44 @@
/********************************************************************
* Constructor/Destructor *
********************************************************************/
UnitTest::UnitTest() : d_verbose( false ), d_comm( MPI_COMM_SELF )
UnitTest::UnitTest()
{
if ( Utilities::MPI::MPI_active() )
d_comm = MPI_COMM_WORLD;
#ifdef USE_MPI
comm = MPI_COMM_WORLD;
#endif
}
UnitTest::~UnitTest() { reset(); }
void UnitTest::reset()
{
d_mutex.lock();
mutex.lock();
// Clear the data forcing a reallocation
std::vector<std::string>().swap( d_pass );
std::vector<std::string>().swap( d_fail );
std::vector<std::string>().swap( d_expected );
d_mutex.unlock();
std::vector<std::string>().swap( pass_messages );
std::vector<std::string>().swap( fail_messages );
std::vector<std::string>().swap( expected_fail_messages );
mutex.unlock();
}
/********************************************************************
* Add a pass, fail, expected failure message in a thread-safe way *
********************************************************************/
void UnitTest::passes( std::string in )
void UnitTest::passes( const std::string &in )
{
d_mutex.lock();
if ( d_verbose )
printf( "UnitTest: %i passes: %s\n", d_comm.getRank(), in.data() );
d_pass.emplace_back( std::move( in ) );
d_mutex.unlock();
mutex.lock();
pass_messages.push_back( in );
mutex.unlock();
}
void UnitTest::failure( std::string in )
void UnitTest::failure( const std::string &in )
{
d_mutex.lock();
if ( d_verbose )
printf( "UnitTest: %i failed: %s\n", d_comm.getRank(), in.data() );
d_fail.emplace_back( std::move( in ) );
d_mutex.unlock();
mutex.lock();
fail_messages.push_back( in );
mutex.unlock();
}
void UnitTest::expected_failure( std::string in )
void UnitTest::expected_failure( const std::string &in )
{
d_mutex.lock();
if ( d_verbose )
printf( "UnitTest: %i expected_failure: %s\n", d_comm.getRank(), in.data() );
d_expected.emplace_back( std::move( in ) );
d_mutex.unlock();
mutex.lock();
expected_fail_messages.push_back( in );
mutex.unlock();
}
@ -64,6 +59,23 @@ void UnitTest::expected_failure( std::string in )
* Print a global report *
* Note: only rank 0 will print, all messages will be aggregated *
********************************************************************/
inline std::vector<int> UnitTest::allGather( int value ) const
{
int size = getSize();
std::vector<int> data( size, value );
#ifdef USE_MPI
if ( size > 1 )
MPI_Allgather( &value, 1, MPI_INT, data.data(), 1, MPI_INT, comm );
#endif
return data;
}
inline void UnitTest::barrier() const
{
#ifdef USE_MPI
if ( getSize() > 1 )
MPI_Barrier( comm );
#endif
}
static inline void print_messages( const std::vector<std::vector<std::string>> &messages )
{
if ( messages.size() > 1 ) {
@ -81,27 +93,28 @@ static inline void print_messages( const std::vector<std::vector<std::string>> &
}
void UnitTest::report( const int level0 ) const
{
d_mutex.lock();
int size = d_comm.getSize();
int rank = d_comm.getRank();
// Give all processors a chance to print any remaining messages
d_comm.barrier();
Utilities::sleep_ms( 10 );
mutex.lock();
int size = getSize();
int rank = getRank();
// Broadcast the print level from rank 0
int level = d_comm.bcast( level0, 0 );
int level = level0;
#ifdef USE_MPI
if ( getSize() > 1 )
MPI_Bcast( &level, 1, MPI_INT, 0, comm );
#endif
if ( level < 0 || level > 2 )
ERROR( "Invalid print level" );
// Perform a global all gather to get the number of failures per processor
auto N_pass = d_comm.allGather<int>( d_pass.size() );
auto N_fail = d_comm.allGather<int>( d_fail.size() );
auto N_expected = d_comm.allGather<int>( d_expected.size() );
int N_pass_tot = 0;
int N_fail_tot = 0;
int N_expected_tot = 0;
auto N_pass = allGather( pass_messages.size() );
auto N_fail = allGather( fail_messages.size() );
auto N_expected_fail = allGather( expected_fail_messages.size() );
int N_pass_tot = 0;
int N_fail_tot = 0;
int N_expected_fail_tot = 0;
for ( int i = 0; i < size; i++ ) {
N_pass_tot += N_pass[i];
N_fail_tot += N_fail[i];
N_expected_tot += N_expected[i];
N_expected_fail_tot += N_expected_fail[i];
}
// Send all messages to rank 0 (if needed)
std::vector<std::vector<std::string>> pass_messages_rank( size );
@ -109,13 +122,13 @@ void UnitTest::report( const int level0 ) const
std::vector<std::vector<std::string>> expected_fail_rank( size );
// Get the pass messages
if ( ( level == 1 && N_pass_tot <= 20 ) || level == 2 )
pass_messages_rank = UnitTest::gatherMessages( d_pass, 1 );
pass_messages_rank = UnitTest::gatherMessages( pass_messages, 1 );
// Get the fail messages
if ( level == 1 || level == 2 )
fail_messages_rank = UnitTest::gatherMessages( d_fail, 2 );
fail_messages_rank = UnitTest::gatherMessages( fail_messages, 2 );
// Get the expected_fail messages
if ( ( level == 1 && N_expected_tot <= 50 ) || level == 2 )
expected_fail_rank = UnitTest::gatherMessages( d_expected, 2 );
if ( ( level == 1 && N_expected_fail_tot <= 50 ) || level == 2 )
expected_fail_rank = UnitTest::gatherMessages( expected_fail_messages, 2 );
// Print the results of all messages (only rank 0 will print)
if ( rank == 0 ) {
pout << std::endl;
@ -161,31 +174,31 @@ void UnitTest::report( const int level0 ) const
pout << std::endl;
// Print the tests that expected failed
pout << "Tests expected failed" << std::endl;
if ( level == 0 || ( level == 1 && N_expected_tot > 50 ) ) {
if ( level == 0 || ( level == 1 && N_expected_fail_tot > 50 ) ) {
// We want to print a summary
if ( size > 8 ) {
// Print 1 summary for all processors
printp( " %i tests expected failed (use report level 2 for more detail)\n",
N_expected_tot );
N_expected_fail_tot );
} else {
// Print a summary for each processor
for ( int i = 0; i < size; i++ )
printp( " %i tests expected failed (proc %i) (use report level 2 for more "
"detail)\n",
N_expected[i], i );
N_expected_fail[i], i );
}
} else {
// We want to print all messages
for ( int i = 0; i < size; i++ )
ASSERT( (int) expected_fail_rank[i].size() == N_expected[i] );
ASSERT( (int) expected_fail_rank[i].size() == N_expected_fail[i] );
print_messages( expected_fail_rank );
}
pout << std::endl;
}
// Add a barrier to synchronize all processors (rank 0 is much slower)
d_comm.barrier();
barrier();
Utilities::sleep_ms( 10 ); // Need a brief pause to allow any printing to finish
d_mutex.unlock();
mutex.unlock();
}
@ -195,8 +208,8 @@ void UnitTest::report( const int level0 ) const
std::vector<std::vector<std::string>> UnitTest::gatherMessages(
const std::vector<std::string> &local_messages, int tag ) const
{
const int rank = d_comm.getRank();
const int size = d_comm.getSize();
const int rank = getRank();
const int size = getSize();
std::vector<std::vector<std::string>> messages( size );
if ( rank == 0 ) {
// Rank 0 should receive all messages
@ -220,6 +233,7 @@ std::vector<std::vector<std::string>> UnitTest::gatherMessages(
void UnitTest::pack_message_stream(
const std::vector<std::string> &messages, const int rank, const int tag ) const
{
#ifdef USE_MPI
// Get the size of the messages
auto N_messages = (int) messages.size();
auto *msg_size = new int[N_messages];
@ -240,11 +254,18 @@ void UnitTest::pack_message_stream(
k += msg_size[i];
}
// Send the message stream (using a non-blocking send)
auto request = d_comm.Isend( data, size_data, rank, tag );
MPI_Request request;
MPI_Isend( data, size_data, MPI_CHAR, rank, tag, comm, &request );
// Wait for the communication to send and free the temporary memory
d_comm.wait( request );
MPI_Status status;
MPI_Wait( &request, &status );
delete[] data;
delete[] msg_size;
#else
NULL_USE( messages );
NULL_USE( rank );
NULL_USE( tag );
#endif
}
@ -253,15 +274,20 @@ void UnitTest::pack_message_stream(
********************************************************************/
std::vector<std::string> UnitTest::unpack_message_stream( const int rank, const int tag ) const
{
#ifdef USE_MPI
// Probe the message to get the message size
int size_data = d_comm.probe( rank, tag );
MPI_Status status;
MPI_Probe( rank, tag, comm, &status );
int size_data = -1;
MPI_Get_count( &status, MPI_BYTE, &size_data );
ASSERT( size_data >= 0 );
// Allocate memory to receive the data
auto *data = new char[size_data];
// receive the data (using a non-blocking receive)
auto request = d_comm.Irecv( data, size_data, rank, tag );
MPI_Request request;
MPI_Irecv( data, size_data, MPI_CHAR, rank, tag, comm, &request );
// Wait for the communication to be received
d_comm.wait( request );
MPI_Wait( &request, &status );
// Unpack the message stream
int N_messages = 0;
memcpy( &N_messages, data, sizeof( int ) );
@ -277,16 +303,77 @@ std::vector<std::string> UnitTest::unpack_message_stream( const int rank, const
messages[i] = std::string( &data[k], msg_size[i] );
k += msg_size[i];
}
// Delete the temporary memory
delete[] data;
return messages;
#else
NULL_USE( rank );
NULL_USE( tag );
return std::vector<std::string>();
#endif
}
/********************************************************************
* Other functions *
********************************************************************/
size_t UnitTest::NumPassGlobal() const { return d_comm.sumReduce( d_pass.size() ); }
size_t UnitTest::NumFailGlobal() const { return d_comm.sumReduce( d_fail.size() ); }
size_t UnitTest::NumExpectedFailGlobal() const { return d_comm.sumReduce( d_expected.size() ); }
int UnitTest::getRank() const
{
int rank = 0;
#ifdef USE_MPI
int flag = 0;
MPI_Initialized( &flag );
if ( flag )
MPI_Comm_rank( comm, &rank );
#endif
return rank;
}
int UnitTest::getSize() const
{
int size = 1;
#ifdef USE_MPI
int flag = 0;
MPI_Initialized( &flag );
if ( flag )
MPI_Comm_size( comm, &size );
#endif
return size;
}
size_t UnitTest::NumPassGlobal() const
{
size_t num = pass_messages.size();
#ifdef USE_MPI
if ( getSize() > 1 ) {
auto send = static_cast<int>( num );
int sum = 0;
MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm );
num = static_cast<size_t>( sum );
}
#endif
return num;
}
size_t UnitTest::NumFailGlobal() const
{
size_t num = fail_messages.size();
#ifdef USE_MPI
if ( getSize() > 1 ) {
auto send = static_cast<int>( num );
int sum = 0;
MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm );
num = static_cast<size_t>( sum );
}
#endif
return num;
}
size_t UnitTest::NumExpectedFailGlobal() const
{
size_t num = expected_fail_messages.size();
#ifdef USE_MPI
if ( getSize() > 1 ) {
auto send = static_cast<int>( num );
int sum = 0;
MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm );
num = static_cast<size_t>( sum );
}
#endif
return num;
}

View File

@ -1,11 +1,13 @@
#ifndef included_UnitTest
#define included_UnitTest
#include "common/MPI.h"
#include <mutex>
#include <sstream>
#include <string>
#include <vector>
#ifdef USE_MPI
#include "mpi.h"
#endif
/*!
@ -26,47 +28,47 @@
* \endcode
*/
class UnitTest final
class UnitTest
{
public:
//! Constructor
UnitTest();
//! Destructor
~UnitTest();
// Copy constructor
UnitTest( const UnitTest & ) = delete;
// Assignment operator
UnitTest &operator=( const UnitTest & ) = delete;
virtual ~UnitTest();
//! Indicate a passed test (thread-safe)
void passes( std::string in );
virtual void passes( const std::string &in );
//! Indicate a failed test (thread-safe)
void failure( std::string in );
virtual void failure( const std::string &in );
//! Indicate an expected failed test (thread-safe)
void expected_failure( std::string in );
virtual void expected_failure( const std::string &in );
//! Return the number of passed tests locally
inline size_t NumPassLocal() const { return d_pass.size(); }
virtual size_t NumPassLocal() const { return pass_messages.size(); }
//! Return the number of failed tests locally
inline size_t NumFailLocal() const { return d_fail.size(); }
virtual size_t NumFailLocal() const { return fail_messages.size(); }
//! Return the number of expected failed tests locally
inline size_t NumExpectedFailLocal() const { return d_expected.size(); }
virtual size_t NumExpectedFailLocal() const { return expected_fail_messages.size(); }
//! Return the number of passed tests locally
size_t NumPassGlobal() const;
virtual size_t NumPassGlobal() const;
//! Return the number of failed tests locally
size_t NumFailGlobal() const;
virtual size_t NumFailGlobal() const;
//! Return the number of expected failed tests locally
size_t NumExpectedFailGlobal() const;
virtual size_t NumExpectedFailGlobal() const;
//! Return the rank of the current processor
int getRank() const;
//! Return the number of processors
int getSize() const;
/*!
* Print a report of the passed and failed tests.
@ -75,28 +77,29 @@ public:
* to print correctly).
* @param level Optional integer specifying the level of reporting (default: 1)
* 0: Report the number of tests passed, failed, and expected failures.
* 1: Report the passed tests (if <=20) or number passed,
* Report all failures,
* Report the expected failed tests (if <=50) or the number passed.
* 1: Report the number of passed tests (if <=20) or the number passed
* otherwise, report all failures, report the number of expected
* failed tests (if <=50) or the number passed otherwise.
* 2: Report all passed, failed, and expected failed tests.
*/
void report( const int level = 1 ) const;
virtual void report( const int level = 1 ) const;
//! Clear the messages
void reset();
//! Make the unit test operator verbose?
void verbose( bool verbose = true ) { d_verbose = verbose; }
protected:
std::vector<std::string> pass_messages;
std::vector<std::string> fail_messages;
std::vector<std::string> expected_fail_messages;
mutable std::mutex mutex;
#ifdef USE_MPI
MPI_Comm comm;
#endif
private:
std::vector<std::string> d_pass;
std::vector<std::string> d_fail;
std::vector<std::string> d_expected;
bool d_verbose;
mutable std::mutex d_mutex;
Utilities::MPI d_comm;
// Make the copy constructor private
UnitTest( const UnitTest & ) {}
private:
// Function to pack the messages into a single data stream and send to the given processor
// Note: This function does not return until the message stream has been sent
void pack_message_stream(
@ -106,7 +109,9 @@ private:
// Note: This function does not return until the message stream has been received
std::vector<std::string> unpack_message_stream( const int rank, const int tag ) const;
// Gather the messages
// Helper functions
inline void barrier() const;
inline std::vector<int> allGather( int value ) const;
inline std::vector<std::vector<std::string>> gatherMessages(
const std::vector<std::string> &local_messages, int tag ) const;
};

View File

@ -143,43 +143,35 @@
* Be sure to follow with ENABLE_WARNINGS
*/
// clang-format off
#ifndef DISABLE_WARNINGS
#if defined( USING_MSVC )
#ifdef DISABLE_WARNINGS
// Macros previously defined
#elif defined( USING_MSVC )
#define DISABLE_WARNINGS __pragma( warning( push, 0 ) )
#define ENABLE_WARNINGS __pragma( warning( pop ) )
#elif defined( USING_CLANG )
#define DISABLE_WARNINGS \
_Pragma( "clang diagnostic push" ) \
_Pragma( "clang diagnostic ignored \"-Wall\"" ) \
_Pragma( "clang diagnostic push" ) _Pragma( "clang diagnostic ignored \"-Wall\"" ) \
_Pragma( "clang diagnostic ignored \"-Wextra\"" ) \
_Pragma( "clang diagnostic ignored \"-Wunused-private-field\"" ) \
_Pragma( "clang diagnostic ignored \"-Wdeprecated-declarations\"" ) \
_Pragma( "clang diagnostic ignored \"-Winteger-overflow\"" )
_Pragma( "clang diagnostic ignored \"-Wmismatched-new-delete\"" )
#define ENABLE_WARNINGS _Pragma( "clang diagnostic pop" )
#elif defined( USING_GCC )
// Note: We cannot disable the -Wliteral-suffix message with this macro because the
// pragma command cannot suppress warnings from the C++ preprocessor. See gcc bug #53431.
#define DISABLE_WARNINGS \
_Pragma( "GCC diagnostic push" ) \
_Pragma( "GCC diagnostic ignored \"-Wpragmas\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wall\"" ) \
_Pragma( "GCC diagnostic push" ) _Pragma( "GCC diagnostic ignored \"-Wall\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wextra\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wpedantic\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wpragmas\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wunused-local-typedefs\"" ) \
_Pragma( "GCC diagnostic ignored \"-Woverloaded-virtual\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wunused-parameter\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wdeprecated-declarations\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wvirtual-move-assign\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wunused-function\"" ) \
_Pragma( "GCC diagnostic ignored \"-Woverflow\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wunused-variable\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wignored-qualifiers\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wenum-compare\"" ) \
_Pragma( "GCC diagnostic ignored \"-Warray-bounds\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wterminate\"" )
#define ENABLE_WARNINGS _Pragma( "GCC diagnostic pop" )
#else
#define DISABLE_WARNINGS
#define ENABLE_WARNINGS
#endif
#endif
// clang-format on

View File

@ -1,4 +1,5 @@
extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
int n;
// conserved momemnts
double rho,ux,uy,uz,uu;
// non-conserved moments
@ -110,12 +111,14 @@ extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int
}
extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
int n;
// conserved momemnts
double rho,ux,uy,uz,uu;
// non-conserved moments
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18;
int nr1,nr2,nr3,nr4,nr5,nr6,nr7,nr8,nr9,nr10,nr11,nr12,nr13,nr14,nr15,nr16,nr17,nr18;
int nread;
for (int n=start; n<finish; n++){
// q=0
@ -272,4 +275,4 @@ extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int star
rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz);
}
}
}

View File

@ -920,17 +920,21 @@ extern "C" void ScaLBL_D3Q7_ColorCollideMass(char *ID, double *A_even, double *A
double *Den, double *Phi, double *ColorGrad, double *Velocity, double beta, int N, bool pBC)
{
char id;
int idx,n,q,Cqx,Cqy,Cqz;
// int sendLoc;
double f0,f1,f2,f3,f4,f5,f6;
double na,nb,nab; // density values
double ux,uy,uz; // flow velocity
double nx,ny,nz,C; // color gradient components
double a1,a2,b1,b2;
double delta;
double sp,delta;
//double feq[6]; // equilibrium distributions
// Set of Discrete velocities for the D3Q19 Model
//int D3Q7[3][3]={{1,0,0},{0,1,0},{0,0,1}};
for (int n=0; n<N; n++){
for (n=0; n<N; n++){
id = ID[n];
if (id != 0 ){
@ -1220,20 +1224,25 @@ extern "C" void DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *P
extern "C" void ScaLBL_ComputePhaseField(char *ID, double *Phi, double *Den, int N)
{
int n;
double Na,Nb;
//...................................................................
// Update Phi
for (int n=0; n<N; n++){
for (n=0; n<N; n++){
if (ID[n] > 0 ){
// Get the density value (Streaming already performed)
double Na = Den[n];
double Nb = Den[N+n];
Na = Den[n];
Nb = Den[N+n];
Phi[n] = (Na-Nb)/(Na+Nb);
}
}
//...................................................................
}
extern "C" void ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice){
for (int n=Slice*Nx*Ny; n<(Slice+1)*Nx*Ny; n++){
int n;
for (n=Slice*Nx*Ny; n<(Slice+1)*Nx*Ny; n++){
Phi[n] = value;
}
}
@ -1246,7 +1255,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, do
double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta,
double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){
int ijk,nn;
int ijk,nn,n;
double fq;
// conserved momemnts
double rho,jx,jy,jz;
@ -1829,7 +1838,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *di
double *Phi, double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta,
double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){
int nn,ijk,nread;
int n,nn,ijk,nread;
int nr1,nr2,nr3,nr4,nr5,nr6;
int nr7,nr8,nr9,nr10;
int nr11,nr12,nr13,nr14;
@ -1860,7 +1869,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *di
const double mrt_V12=0.04166666666666666;
for (int n=start; n<finish; n++){
// read the component number densities
nA = Den[n];
nB = Den[Np + n];
@ -2483,7 +2492,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *di
extern "C" void ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double *Aq, double *Bq,
double *Den, double *Phi, int start, int finish, int Np){
int idx, nread;
int idx,n,nread;
double fq,nA,nB;
for (int n=start; n<finish; n++){
@ -2569,11 +2578,11 @@ extern "C" void ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double
}
extern "C" void ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq, double *Den, double *Phi,
int start, int finish, int Np)
{
int start, int finish, int Np){
int idx,n,nread;
double fq,nA,nB;
for (int n=start; n<finish; n++){
double fq,nA,nB;
// compute number density for component A
// q=0
fq = Aq[n];
@ -2637,25 +2646,27 @@ extern "C" void ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq,
Den[Np+n] = nB;
// save the phase indicator field
int idx = Map[n];
idx = Map[n];
Phi[idx] = (nA-nB)/(nA+nB);
}
}
extern "C" void ScaLBL_D3Q19_Gradient(int *Map, double *phi, double *ColorGrad, int start, int finish, int Np, int Nx, int Ny, int Nz){
int idx,n,N,i,j,k,nn;
// distributions
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
double nx,ny,nz;
for (int idx=0; idx<Np; idx++){
for (idx=0; idx<Np; idx++){
// Get the 1D index based on regular data layout
int n = Map[idx];
n = Map[idx];
//.......Back out the 3D indices for node n..............
int k = n/(Nx*Ny);
int j = (n-Nx*Ny*k)/Nx;
int i = n-Nx*Ny*k-Nx*j;
k = n/(Nx*Ny);
j = (n-Nx*Ny*k)/Nx;
i = n-Nx*Ny*k-Nx*j;
//........................................................................
//........Get 1-D index for this thread....................
// n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
@ -2664,7 +2675,7 @@ extern "C" void ScaLBL_D3Q19_Gradient(int *Map, double *phi, double *ColorGrad,
//........................................................................
//.................Read Phase Indicator Values............................
//........................................................................
int nn = n-1; // neighbor index (get convention)
nn = n-1; // neighbor index (get convention)
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
f1 = phi[nn]; // get neighbor for phi - 1
//........................................................................
@ -2799,3 +2810,11 @@ extern "C" void ScaLBL_PhaseField_Init(int *Map, double *Phi, double *Den, doubl
}
}
extern "C" void ScaLBL_CopySlice_z(double *Phi, int Nx, int Ny, int Nz, int Source, int Dest){
int n; double value;
for (n=0; n<Nx*Ny; n++){
value = Phi[Source*Nx*Ny+n];
Phi[Dest*Nx*Ny+n] = value;
}
}

View File

@ -421,6 +421,42 @@ extern "C" double ScaLBL_D3Q19_Flux_BC_Z(double *disteven, double *distodd, doub
return dout;
}
extern "C" void ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){
for (int idx=0; idx<count; idx++){
int n = list[idx];
double f5 = 0.111111111111111111111111 - dist[6*Np+n];
double f11 = 0.05555555555555555555556 - dist[12*Np+n];
double f14 = 0.05555555555555555555556 - dist[13*Np+n];
double f15 = 0.05555555555555555555556 - dist[16*Np+n];
double f18 = 0.05555555555555555555556 - dist[17*Np+n];
dist[6*Np+n] = f5;
dist[12*Np+n] = f11;
dist[13*Np+n] = f14;
dist[16*Np+n] = f15;
dist[17*Np+n] = f18;
}
}
extern "C" void ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){
for (int idx=0; idx<count; idx++){
int n = list[idx];
double f6 = 0.111111111111111111111111 - dist[5*Np+n];
double f12 = 0.05555555555555555555556 - dist[11*Np+n];
double f13 = 0.05555555555555555555556 - dist[14*Np+n] ;
double f16 = 0.05555555555555555555556 - dist[15*Np+n];
double f17 = 0.05555555555555555555556 - dist[18*Np+n];
dist[5*Np+n] = f6;
dist[11*Np+n] = f12;
dist[14*Np+n] = f13;
dist[15*Np+n] = f16;
dist[18*Np+n] = f17;
}
}
extern "C" void ScaLBL_D3Q19_AAeven_Pressure_BC_z(int *list, double *dist, double din, int count, int Np)
{
// distributions
@ -680,145 +716,6 @@ extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_Z(int *d_neighborList, int *list,
}
}
extern "C" void ScaLBL_D3Q19_Pressure_BC_z(int *list, double *dist, double din, int count, int Np)
{
int n;
// distributions
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
double ux,uy,uz;
double Cxz,Cyz;
for (int idx=0; idx<count; idx++){
n = list[idx];
//........................................................................
// Read distributions from "opposite" memory convention
//........................................................................
//........................................................................
f0 = dist[n];
f1 = dist[Np+n];
f2 = dist[2*Np+n];
f3 = dist[3*Np+n];
f4 = dist[4*Np+n];
f6 = dist[6*Np+n];
f7 = dist[7*Np+n];
f8 = dist[8*Np+n];
f9 = dist[9*Np+n];
f10 = dist[10*Np+n];
f12 = dist[12*Np+n];
f13 = dist[13*Np+n];
f16 = dist[16*Np+n];
f17 = dist[17*Np+n];
//...................................................
//........Determine the inlet flow velocity.........
// uz = -1 + (f0+f3+f4+f1+f2+f7+f8+f10+f9
// + 2*(f5+f15+f18+f11+f14))/din;
//........Set the unknown distributions..............
// f6 = f5 - 0.3333333333333333*din*uz;
// f16 = f15 - 0.1666666666666667*din*uz;
// f17 = f16 - f3 + f4-f15+f18-f7+f8-f10+f9;
// f12= 0.5*(-din*uz+f5+f15+f18+f11+f14-f6-f16-
// f17+f1-f2-f14+f11+f7-f8-f10+f9);
// f13= -din*uz+f5+f15+f18+f11+f14-f6-f16-f17-f12;
// Determine the inlet flow velocity
ux = (f1-f2+f7-f8+f9-f10+f11-f12+f13-f14);
uy = (f3-f4+f7-f8-f9+f10+f15-f16+f17-f18);
uz = din - (f0+f1+f2+f3+f4+f7+f8+f9+f10 + 2*(f6+f12+f13+f16+f17));
Cxz = 0.5*(f1+f7+f9-f2-f10-f8) - 0.3333333333333333*ux;
Cyz = 0.5*(f3+f7+f10-f4-f9-f8) - 0.3333333333333333*uy;
f5 = f6 + 0.33333333333333338*uz;
f11 = f12 + 0.16666666666666678*(uz+ux)-Cxz;
f14 = f13 + 0.16666666666666678*(uz-ux)+Cxz;
f15 = f16 + 0.16666666666666678*(uy+uz)-Cyz;
f18 = f17 + 0.16666666666666678*(uz-uy)+Cyz;
//........Store in "opposite" memory location..........
dist[5*Np+n] = f5;
dist[11*Np+n] = f11;
dist[14*Np+n] = f14;
dist[15*Np+n] = f15;
dist[18*Np+n] = f18;
/*
printf("Site=%i\n",n);
printf("ux=%f, uy=%f, uz=%f\n",ux,uy,uz);
printf("Cxz=%f, Cyz=%f\n",Cxz,Cyz);
n = N;
*/
//...................................................
}
}
extern "C" void ScaLBL_D3Q19_Pressure_BC_Z(int *list, double *dist, double dout, int count, int Np)
{
int n;
// distributions
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
double ux,uy,uz;
double Cxz,Cyz;
for (int idx=0; idx<count; idx++){
n = list[idx];
//........................................................................
// Read distributions
//........................................................................
f0 = dist[n];
f1 = dist[Np+n];
f2 = dist[2*Np+n];
f3 = dist[3*Np+n];
f4 = dist[4*Np+n];
f5 = dist[5*Np+n];
f7 = dist[7*Np+n];
f8 = dist[8*Np+n];
f9 = dist[9*Np+n];
f10 = dist[10*Np+n];
f11 = dist[11*Np+n];
f14 = dist[14*Np+n];
f15 = dist[15*Np+n];
f18 = dist[18*Np+n];
//........Determine the outlet flow velocity.........
// uz = 1 - (f0+f3+f4+f1+f2+f7+f8+f10+f9+
// 2*(f6+f16+f17+f12+f13))/dout;
//...................................................
//........Set the Unknown Distributions..............
// f5 = f6 + 0.33333333333333338*dout*uz;
// f15 = f16 + 0.16666666666666678*dout*uz;
// f18 = f15+f3-f4-f16+f17+f7-f8+f10-f9;
// f11= 0.5*(dout*uz+f6+ f16+f17+f12+f13-f5
// -f15-f18-f1+f2-f13+f12-f7+f8+f10-f9);
// f14= dout*uz+f6+ f16+f17+f12+f13-f5-f15-f18-f11;
// Determine the outlet flow velocity
//ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
//uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
//uz = -1.0 + (f0+f4+f3+f2+f1+f8+f7+f9+f10 + 2*(f6+f16+f17+f12+f13))/dout;
// Determine the inlet flow velocity
ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
uz = -dout + (f0+f1+f2+f3+f4+f7+f8+f9+f10 + 2*(f5+f11+f14+f15+f18));
Cxz = 0.5*(f1+f7+f9-f2-f10-f8) - 0.3333333333333333*ux;
Cyz = 0.5*(f3+f7+f10-f4-f9-f8) - 0.3333333333333333*uy;
f6 = f5 - 0.33333333333333338*uz;
f12 = f11 - 0.16666666666666678*(uz+ux)+Cxz;
f13 = f14 - 0.16666666666666678*(uz-ux)-Cxz;
f16 = f15 - 0.16666666666666678*(uy+uz)+Cyz;
f17 = f18 - 0.16666666666666678*(uz-uy)-Cyz;
//........Store in "opposite" memory location..........
dist[6*Np+n] = f6;
dist[12*Np+n] = f12;
dist[13*Np+n] = f13;
dist[16*Np+n] = f16;
dist[17*Np+n] = f17;
//...................................................
}
}
extern "C" void ScaLBL_D3Q19_Velocity_BC_z(double *disteven, double *distodd, double uz,
int Nx, int Ny, int Nz)
{

View File

@ -72,6 +72,23 @@ extern "C" void ScaLBL_UnpackDenD3Q7(int *list, int count, double *recvbuf, int
}
}
extern "C" void ScaLBL_D3Q7_Reflection_BC_z(int *list, double *dist, int count, int Np){
int n;
for (int idx=0; idx<count; idx++){
n = list[idx];
double f5 = 0.222222222222222222222222 - dist[6*Np+n];
dist[6*Np+n] = f5;
}
}
extern "C" void ScaLBL_D3Q7_Reflection_BC_Z(int *list, double *dist, int count, int Np){
int n;
for (int idx=0; idx<count; idx++){
n = list[idx];
double f6 = 0.222222222222222222222222 - dist[5*Np+n];
dist[5*Np+n] = f6;
}
}
extern "C" void ScaLBL_D3Q7_Init(char *ID, double *f_even, double *f_odd, double *Den, int Nx, int Ny, int Nz)
{
int n,N;

347
cpu/D3Q7BC.cpp Normal file
View File

@ -0,0 +1,347 @@
// CPU Functions for D3Q7 Lattice Boltzmann Methods
// Boundary Conditions
extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist,double *BoundaryValue,int *BounceBackDist_list,int *BounceBackSolid_list,int N){
int idx;
int iq,ib;
double value_b,value_q;
for (idx=0; idx<N; idx++){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
value_b = BoundaryValue[ib];//get boundary value from a solid site
value_q = dist[iq];
dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice
}
}
extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist,double *BoundaryValue,int *BounceBackDist_list,int *BounceBackSolid_list,int N){
int idx;
int iq,ib;
double value_b,value_q;
for (idx=0; idx<N; idx++){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
value_b = BoundaryValue[ib];//get boundary value from a solid site
value_q = dist[iq];
dist[iq] = value_q + value_b;
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){
for (int idx=0; idx<count; idx++){
int n = list[idx];
double f0 = dist[n];
double f1 = dist[2*Np+n];
double f2 = dist[1*Np+n];
double f3 = dist[4*Np+n];
double f4 = dist[3*Np+n];
double f6 = dist[5*Np+n];
//...................................................
double f5 = Vin - (f0+f1+f2+f3+f4+f6);
dist[6*Np+n] = f5;
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np){
for (int idx=0; idx<count; idx++){
int n = list[idx];
double f0 = dist[n];
double f1 = dist[2*Np+n];
double f2 = dist[1*Np+n];
double f3 = dist[4*Np+n];
double f4 = dist[3*Np+n];
double f5 = dist[6*Np+n];
//...................................................
double f6 = Vout - (f0+f1+f2+f3+f4+f5);
dist[5*Np+n] = f6;
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np){
int nread,nr5;
for (int idx=0; idx<count; idx++){
int n = list[idx];
double f0 = dist[n];
nread = d_neighborList[n];
double f1 = dist[nread];
nread = d_neighborList[n+2*Np];
double f3 = dist[nread];
nread = d_neighborList[n+Np];
double f2 = dist[nread];
nread = d_neighborList[n+3*Np];
double f4 = dist[nread];
nread = d_neighborList[n+5*Np];
double f6 = dist[nread];
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
double f5 = Vin - (f0+f1+f2+f3+f4+f6);
dist[nr5] = f5;
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np){
int nread,nr6;
for (int idx=0; idx<count; idx++){
int n = list[idx];
double f0 = dist[n];
nread = d_neighborList[n];
double f1 = dist[nread];
nread = d_neighborList[n+2*Np];
double f3 = dist[nread];
nread = d_neighborList[n+4*Np];
double f5 = dist[nread];
nread = d_neighborList[n+Np];
double f2 = dist[nread];
nread = d_neighborList[n+3*Np];
double f4 = dist[nread];
// unknown distributions
nr6 = d_neighborList[n+5*Np];
double f6 = Vout - (f0+f1+f2+f3+f4+f5);
dist[nr6] = f6;
}
}
extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count)
{
int idx,n,nm;
for (idx=0; idx<count; idx++){
n = list[idx];
nm = Map[n];
Psi[nm] = Vin;
}
}
extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count)
{
int idx,n,nm;
for (idx=0; idx<count; idx++){
n = list[idx];
nm = Map[n];
Psi[nm] = Vout;
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np){
for (int idx=0; idx<count; idx++){
int n = list[idx];
double f0 = dist[n];
double f1 = dist[2*Np+n];
double f2 = dist[1*Np+n];
double f3 = dist[4*Np+n];
double f4 = dist[3*Np+n];
double f6 = dist[5*Np+n];
//...................................................
double f5 = Cin - (f0+f1+f2+f3+f4+f6);
dist[6*Np+n] = f5;
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np){
for (int idx=0; idx<count; idx++){
int n = list[idx];
double f0 = dist[n];
double f1 = dist[2*Np+n];
double f2 = dist[1*Np+n];
double f3 = dist[4*Np+n];
double f4 = dist[3*Np+n];
double f5 = dist[6*Np+n];
//...................................................
double f6 = Cout - (f0+f1+f2+f3+f4+f5);
dist[5*Np+n] = f6;
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np){
int nread,nr5;
for (int idx=0; idx<count; idx++){
int n = list[idx];
double f0 = dist[n];
nread = d_neighborList[n];
double f1 = dist[nread];
nread = d_neighborList[n+2*Np];
double f3 = dist[nread];
nread = d_neighborList[n+Np];
double f2 = dist[nread];
nread = d_neighborList[n+3*Np];
double f4 = dist[nread];
nread = d_neighborList[n+5*Np];
double f6 = dist[nread];
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
double f5 = Cin - (f0+f1+f2+f3+f4+f6);
dist[nr5] = f5;
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np){
int nread,nr6;
for (int idx=0; idx<count; idx++){
int n = list[idx];
double f0 = dist[n];
nread = d_neighborList[n];
double f1 = dist[nread];
nread = d_neighborList[n+2*Np];
double f3 = dist[nread];
nread = d_neighborList[n+4*Np];
double f5 = dist[nread];
nread = d_neighborList[n+Np];
double f2 = dist[nread];
nread = d_neighborList[n+3*Np];
double f4 = dist[nread];
// unknown distributions
nr6 = d_neighborList[n+5*Np];
double f6 = Cout - (f0+f1+f2+f3+f4+f5);
dist[nr6] = f6;
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
//NOTE: FluxIn is the inward flux
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
int n;
double uz;
for (int idx=0; idx<count; idx++){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
dist[6*Np+n] = f5;
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
//NOTE: FluxIn is the inward flux
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
int n;
double uz;
for (int idx=0; idx<count; idx++){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
dist[5*Np+n] = f6;
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
//NOTE: FluxIn is the inward flux
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
int n;
int nread,nr5;
double uz;
for (int idx=0; idx<count; idx++){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
dist[nr5] = f5;
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
//NOTE: FluxIn is the inward flux
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
int n;
int nread,nr6;
double uz;
for (int idx=0; idx<count; idx++){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
// unknown distributions
nr6 = d_neighborList[n+5*Np];
dist[nr6] = f6;
}
}

2629
cpu/Greyscale.cpp Normal file

File diff suppressed because it is too large Load Diff

1396
cpu/GreyscaleColor.cpp Normal file

File diff suppressed because it is too large Load Diff

254
cpu/Ion.cpp Normal file
View File

@ -0,0 +1,254 @@
#include <stdio.h>
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
int n,nread;
double fq,Ci;
for (n=start; n<finish; n++){
// q=0
fq = dist[n];
Ci = fq;
// q=1
nread = neighborList[n];
fq = dist[nread];
Ci += fq;
// q=2
nread = neighborList[n+Np];
fq = dist[nread];
Ci += fq;
// q=3
nread = neighborList[n+2*Np];
fq = dist[nread];
Ci += fq;
// q=4
nread = neighborList[n+3*Np];
fq = dist[nread];
Ci += fq;
// q=5
nread = neighborList[n+4*Np];
fq = dist[nread];
Ci += fq;
// q=6
nread = neighborList[n+5*Np];
fq = dist[nread];
Ci += fq;
Den[n]=Ci;
}
}
extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
int n;
double fq,Ci;
for (n=start; n<finish; n++){
// q=0
fq = dist[n];
Ci = fq;
// q=1
fq = dist[2*Np+n];
Ci += fq;
// q=2
fq = dist[1*Np+n];
Ci += fq;
// q=3
fq = dist[4*Np+n];
Ci += fq;
// q=4
fq = dist[3*Np+n];
Ci += fq;
// q=5
fq = dist[6*Np+n];
Ci += fq;
// q=6
fq = dist[5*Np+n];
Ci += fq;
Den[n]=Ci;
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
int n;
double Ci;
double ux,uy,uz;
double uEPx,uEPy,uEPz;//electrochemical induced velocity
double Ex,Ey,Ez;//electrical field
double f0,f1,f2,f3,f4,f5,f6;
int nr1,nr2,nr3,nr4,nr5,nr6;
for (n=start; n<finish; n++){
//Load data
Ci=Den[n];
Ex=ElectricField[n+0*Np];
Ey=ElectricField[n+1*Np];
Ez=ElectricField[n+2*Np];
ux=Velocity[n+0*Np];
uy=Velocity[n+1*Np];
uz=Velocity[n+2*Np];
uEPx=zi*Di/Vt*Ex;
uEPy=zi*Di/Vt*Ey;
uEPz=zi*Di/Vt*Ez;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
// q=2
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n+2*Np]; // neighbor 4
f3 = dist[nr3];
// q=4
nr4 = neighborList[n+3*Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n+4*Np];
f5 = dist[nr5];
// q=6
nr6 = neighborList[n+5*Np];
f6 = dist[nr6];
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
// q = 1
dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
// q=2
dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
// q = 3
dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
// q = 4
dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
// q = 5
dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
// q = 6
dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
int n;
double Ci;
double ux,uy,uz;
double uEPx,uEPy,uEPz;//electrochemical induced velocity
double Ex,Ey,Ez;//electrical field
double f0,f1,f2,f3,f4,f5,f6;
for (n=start; n<finish; n++){
//Load data
Ci=Den[n];
Ex=ElectricField[n+0*Np];
Ey=ElectricField[n+1*Np];
Ez=ElectricField[n+2*Np];
ux=Velocity[n+0*Np];
uy=Velocity[n+1*Np];
uz=Velocity[n+2*Np];
uEPx=zi*Di/Vt*Ex;
uEPy=zi*Di/Vt*Ey;
uEPz=zi*Di/Vt*Ez;
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
f6 = dist[5*Np+n];
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
// q = 1
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
// q=2
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
// q = 3
dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
// q = 4
dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
// q = 5
dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
// q = 6
dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
}
}
extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np)
{
int n;
for (n=0; n<Np; n++){
dist[0*Np+n] = 0.25*DenInit;
dist[1*Np+n] = 0.125*DenInit;
dist[2*Np+n] = 0.125*DenInit;
dist[3*Np+n] = 0.125*DenInit;
dist[4*Np+n] = 0.125*DenInit;
dist[5*Np+n] = 0.125*DenInit;
dist[6*Np+n] = 0.125*DenInit;
Den[n] = DenInit;
}
}
extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np)
{
int n;
double DenInit;
for (n=0; n<Np; n++){
DenInit = Den[n];
dist[0*Np+n] = 0.25*DenInit;
dist[1*Np+n] = 0.125*DenInit;
dist[2*Np+n] = 0.125*DenInit;
dist[3*Np+n] = 0.125*DenInit;
dist[4*Np+n] = 0.125*DenInit;
dist[5*Np+n] = 0.125*DenInit;
dist[6*Np+n] = 0.125*DenInit;
}
}
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
int n;
double Ci;//ion concentration of species i
double CD;//charge density
double CD_tmp;
double F = 96485.0;//Faraday's constant; unit[C/mol]; F=e*Na, where Na is the Avogadro constant
for (n=start; n<finish; n++){
Ci = Den[n+ion_component*Np];
CD = ChargeDensity[n];
CD_tmp = F*IonValence*Ci;
ChargeDensity[n] = CD*(ion_component>0) + CD_tmp;
}
}

372
cpu/Poisson.cpp Normal file
View File

@ -0,0 +1,372 @@
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
double psi;//electric potential
double fq;
int nread;
int idx;
for (n=start; n<finish; n++){
// q=0
fq = dist[n];
psi = fq;
// q=1
nread = neighborList[n];
fq = dist[nread];
psi += fq;
// q=2
nread = neighborList[n+Np];
fq = dist[nread];
psi += fq;
// q=3
nread = neighborList[n+2*Np];
fq = dist[nread];
psi += fq;
// q = 4
nread = neighborList[n+3*Np];
fq = dist[nread];
psi += fq;
// q=5
nread = neighborList[n+4*Np];
fq = dist[nread];
psi += fq;
// q = 6
nread = neighborList[n+5*Np];
fq = dist[nread];
psi += fq;
idx=Map[n];
Psi[idx] = psi;
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
double psi;//electric potential
double fq;
int idx;
for (n=start; n<finish; n++){
// q=0
fq = dist[n];
psi = fq;
// q=1
fq = dist[2*Np+n];
psi += fq;
// q=2
fq = dist[1*Np+n];
psi += fq;
// q=3
fq = dist[4*Np+n];
psi += fq;
// q=4
fq = dist[3*Np+n];
psi += fq;
// q=5
fq = dist[6*Np+n];
psi += fq;
// q=6
fq = dist[5*Np+n];
psi += fq;
idx=Map[n];
Psi[idx] = psi;
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
int n;
double psi;//electric potential
double Ex,Ey,Ez;//electric field
double rho_e;//local charge density
double f0,f1,f2,f3,f4,f5,f6;
int nr1,nr2,nr3,nr4,nr5,nr6;
double rlx=1.0/tau;
int idx;
for (n=start; n<finish; n++){
//Load data
rho_e = Den_charge[n];
rho_e = rho_e/epsilon_LB;
idx=Map[n];
psi = Psi[idx];
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n+2*Np]; // neighbor 4
f3 = dist[nr3];
// q = 4
nr4 = neighborList[n+3*Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n+4*Np];
f5 = dist[nr5];
// q = 6
nr6 = neighborList[n+5*Np];
f6 = dist[nr6];
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
Ez = (f5-f6)*rlx*4.0;
ElectricField[n+0*Np] = Ex;
ElectricField[n+1*Np] = Ey;
ElectricField[n+2*Np] = Ez;
// q = 0
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
// q = 1
dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 2
dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 3
dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 4
dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 5
dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 6
dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
//........................................................................
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
int n;
double psi;//electric potential
double Ex,Ey,Ez;//electric field
double rho_e;//local charge density
double f0,f1,f2,f3,f4,f5,f6;
double rlx=1.0/tau;
int idx;
for (n=start; n<finish; n++){
//Load data
rho_e = Den_charge[n];
rho_e = rho_e/epsilon_LB;
idx=Map[n];
psi = Psi[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
f6 = dist[5*Np+n];
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
Ez = (f5-f6)*rlx*4.0;
ElectricField[n+0*Np] = Ex;
ElectricField[n+1*Np] = Ey;
ElectricField[n+2*Np] = Ez;
// q = 0
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
// q = 1
dist[1*Np+n] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 2
dist[2*Np+n] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 3
dist[3*Np+n] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 4
dist[4*Np+n] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 5
dist[5*Np+n] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 6
dist[6*Np+n] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
//........................................................................
}
}
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np)
{
int n;
int ijk;
for (n=start; n<finish; n++){
ijk = Map[n];
dist[0*Np+n] = 0.25*Psi[ijk];
dist[1*Np+n] = 0.125*Psi[ijk];
dist[2*Np+n] = 0.125*Psi[ijk];
dist[3*Np+n] = 0.125*Psi[ijk];
dist[4*Np+n] = 0.125*Psi[ijk];
dist[5*Np+n] = 0.125*Psi[ijk];
dist[6*Np+n] = 0.125*Psi[ijk];
}
}
//extern "C" void ScaLBL_D3Q7_Poisson_ElectricField(int *neighborList, int *Map, signed char *ID, double *Psi, double *ElectricField, int SolidBC,
// int strideY, int strideZ,int start, int finish, int Np){
//
// int n,nn;
// int ijk;
// int id;
// // distributions
// double m1,m2,m3,m4,m5,m6,m7,m8,m9;
// double m10,m11,m12,m13,m14,m15,m16,m17,m18;
// double nx,ny,nz;
//
// for (n=start; n<finish; n++){
//
// // Get the 1D index based on regular data layout
// ijk = Map[n];
// // COMPUTE THE COLOR GRADIENT
// //........................................................................
// //.................Read Phase Indicator Values............................
// //........................................................................
// nn = ijk-1; // neighbor index (get convention)
// id = ID[nn];
// m1 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 1
// //........................................................................
// nn = ijk+1; // neighbor index (get convention)
// id = ID[nn];
// m2 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 2
// //........................................................................
// nn = ijk-strideY; // neighbor index (get convention)
// id = ID[nn];
// m3 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 3
// //........................................................................
// nn = ijk+strideY; // neighbor index (get convention)
// id = ID[nn];
// m4 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 4
// //........................................................................
// nn = ijk-strideZ; // neighbor index (get convention)
// id = ID[nn];
// m5 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 5
// //........................................................................
// nn = ijk+strideZ; // neighbor index (get convention)
// id = ID[nn];
// m6 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 6
// //........................................................................
// nn = ijk-strideY-1; // neighbor index (get convention)
// id = ID[nn];
// m7 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 7
// //........................................................................
// nn = ijk+strideY+1; // neighbor index (get convention)
// id = ID[nn];
// m8 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 8
// //........................................................................
// nn = ijk+strideY-1; // neighbor index (get convention)
// id = ID[nn];
// m9 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 9
// //........................................................................
// nn = ijk-strideY+1; // neighbor index (get convention)
// id = ID[nn];
// m10 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 10
// //........................................................................
// nn = ijk-strideZ-1; // neighbor index (get convention)
// id = ID[nn];
// m11 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 11
// //........................................................................
// nn = ijk+strideZ+1; // neighbor index (get convention)
// id = ID[nn];
// m12 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 12
// //........................................................................
// nn = ijk+strideZ-1; // neighbor index (get convention)
// id = ID[nn];
// m13 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 13
// //........................................................................
// nn = ijk-strideZ+1; // neighbor index (get convention)
// id = ID[nn];
// m14 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 14
// //........................................................................
// nn = ijk-strideZ-strideY; // neighbor index (get convention)
// id = ID[nn];
// m15 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 15
// //........................................................................
// nn = ijk+strideZ+strideY; // neighbor index (get convention)
// id = ID[nn];
// m16 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 16
// //........................................................................
// nn = ijk+strideZ-strideY; // neighbor index (get convention)
// id = ID[nn];
// m17 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 17
// //........................................................................
// nn = ijk-strideZ+strideY; // neighbor index (get convention)
// id = ID[nn];
// m18 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 18
// //............Compute the Color Gradient...................................
// //nx = 1.f/6.f*(m1-m2+0.5*(m7-m8+m9-m10+m11-m12+m13-m14));
// //ny = 1.f/6.f*(m3-m4+0.5*(m7-m8-m9+m10+m15-m16+m17-m18));
// //nz = 1.f/6.f*(m5-m6+0.5*(m11-m12-m13+m14+m15-m16-m17+m18));
// nx = 1.f/6.f*(m1-m2);//but looks like it needs to multiply another factor of 3
// ny = 1.f/6.f*(m3-m4);
// nz = 1.f/6.f*(m5-m6);
//
// ElectricField[n] = nx;
// ElectricField[Np+n] = ny;
// ElectricField[2*Np+n] = nz;
// }
//}
//extern "C" void ScaLBL_D3Q7_Poisson_getElectricField(double *dist, double *ElectricField, double tau, int Np){
// int n;
// // distributions
// double f1,f2,f3,f4,f5,f6;
// double Ex,Ey,Ez;
// double rlx=1.0/tau;
//
// for (n=0; n<Np; n++){
// //........................................................................
// // Registers to store the distributions
// //........................................................................
// f1 = dist[Np+n];
// f2 = dist[2*Np+n];
// f3 = dist[3*Np+n];
// f4 = dist[4*Np+n];
// f5 = dist[5*Np+n];
// f6 = dist[6*Np+n];
// //.................Compute the Electric Field...................................
// //Ex = (f1-f2)*rlx*4.5;//NOTE the unit of electric field here is V/lu
// //Ey = (f3-f4)*rlx*4.5;
// //Ez = (f5-f6)*rlx*4.5;
// Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
// Ey = (f3-f4)*rlx*4.0;
// Ez = (f5-f6)*rlx*4.0;
// //..................Write the Electric Field.....................................
// ElectricField[0*Np+n] = Ex;
// ElectricField[1*Np+n] = Ey;
// ElectricField[2*Np+n] = Ez;
// //........................................................................
// }
//}

999
cpu/Stokes.cpp Normal file
View File

@ -0,0 +1,999 @@
#include <stdio.h>
extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np)
{
double fq;
// conserved momemnts
double rho,jx,jy,jz;
double ux,uy,uz;
// non-conserved moments
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
// body force due to electric field
double rhoE;//charge density
double Ex,Ey,Ez;
// total body force
double Fx,Fy,Fz;
constexpr double mrt_V1=0.05263157894736842;
constexpr double mrt_V2=0.012531328320802;
constexpr double mrt_V3=0.04761904761904762;
constexpr double mrt_V4=0.004594820384294068;
constexpr double mrt_V5=0.01587301587301587;
constexpr double mrt_V6=0.0555555555555555555555555;
constexpr double mrt_V7=0.02777777777777778;
constexpr double mrt_V8=0.08333333333333333;
constexpr double mrt_V9=0.003341687552213868;
constexpr double mrt_V10=0.003968253968253968;
constexpr double mrt_V11=0.01388888888888889;
constexpr double mrt_V12=0.04166666666666666;
for (int n=start; n<finish; n++){
//Load data
rhoE = ChargeDensity[n];
Ex = ElectricField[n+0*Np];
Ey = ElectricField[n+1*Np];
Ez = ElectricField[n+2*Np];
//compute total body force, including input body force (Gx,Gy,Gz)
Fx = Gx + rhoE*Ex*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;//the extra factors at the end necessarily convert unit from phys to LB
Fy = Gy + rhoE*Ey*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
Fz = Gz + rhoE*Ez*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
// q=0
fq = dist[n];
rho = fq;
m1 = -30.0*fq;
m2 = 12.0*fq;
// q=1
fq = dist[2*Np+n];
rho += fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jx = fq;
m4 = -4.0*fq;
m9 = 2.0*fq;
m10 = -4.0*fq;
// f2 = dist[10*Np+n];
fq = dist[1*Np+n];
rho += fq;
m1 -= 11.0*(fq);
m2 -= 4.0*(fq);
jx -= fq;
m4 += 4.0*(fq);
m9 += 2.0*(fq);
m10 -= 4.0*(fq);
// q=3
fq = dist[4*Np+n];
rho += fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jy = fq;
m6 = -4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 = fq;
m12 = -2.0*fq;
// q = 4
fq = dist[3*Np+n];
rho+= fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jy -= fq;
m6 += 4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 += fq;
m12 -= 2.0*fq;
// q=5
fq = dist[6*Np+n];
rho += fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jz = fq;
m8 = -4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 -= fq;
m12 += 2.0*fq;
// q = 6
fq = dist[5*Np+n];
rho+= fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jz -= fq;
m8 += 4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 -= fq;
m12 += 2.0*fq;
// q=7
fq = dist[8*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jy += fq;
m6 += fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 = fq;
m16 = fq;
m17 = -fq;
// q = 8
fq = dist[7*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jy -= fq;
m6 -= fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 += fq;
m16 -= fq;
m17 += fq;
// q=9
fq = dist[10*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jy -= fq;
m6 -= fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 -= fq;
m16 += fq;
m17 += fq;
// q = 10
fq = dist[9*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jy += fq;
m6 += fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 -= fq;
m16 -= fq;
m17 -= fq;
// q=11
fq = dist[12*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jz += fq;
m8 += fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 = fq;
m16 -= fq;
m18 = fq;
// q=12
fq = dist[11*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jz -= fq;
m8 -= fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 += fq;
m16 += fq;
m18 -= fq;
// q=13
fq = dist[14*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jz -= fq;
m8 -= fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 -= fq;
m16 -= fq;
m18 -= fq;
// q=14
fq = dist[13*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jz += fq;
m8 += fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 -= fq;
m16 += fq;
m18 += fq;
// q=15
fq = dist[16*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy += fq;
m6 += fq;
jz += fq;
m8 += fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 = fq;
m17 += fq;
m18 -= fq;
// q=16
fq = dist[15*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy -= fq;
m6 -= fq;
jz -= fq;
m8 -= fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 += fq;
m17 -= fq;
m18 += fq;
// q=17
fq = dist[18*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy += fq;
m6 += fq;
jz -= fq;
m8 -= fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 -= fq;
m17 += fq;
m18 += fq;
// q=18
fq = dist[17*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy -= fq;
m6 -= fq;
jz += fq;
m8 += fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 -= fq;
m17 -= fq;
m18 -= fq;
// write the velocity
ux = jx / rho0;
uy = jy / rho0;
uz = jz / rho0;
Velocity[n] = ux;
Velocity[Np+n] = uy;
Velocity[2*Np+n] = uz;
//........................................................................
// READ THE DISTRIBUTIONS
// (read from opposite array due to previous swap operation)
//........................................................................
//..............incorporate external force................................................
//..............carry out relaxation process...............................................
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0) - m2);
m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4);
m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6);
m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) - m9);
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) - m11);
m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho0) - m12);
m13 = m13 + rlx_setA*((jx*jy/rho0) - m13);
m14 = m14 + rlx_setA*((jy*jz/rho0) - m14);
m15 = m15 + rlx_setA*((jx*jz/rho0) - m15);
m16 = m16 + rlx_setB*( - m16);
m17 = m17 + rlx_setB*( - m17);
m18 = m18 + rlx_setB*( - m18);
//.......................................................................................................
//.................inverse transformation......................................................
// q=0
fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2;
dist[n] = fq;
// q = 1
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10) + 0.16666666*Fx;
dist[1*Np+n] = fq;
// q=2
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx;
dist[2*Np+n] = fq;
// q = 3
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy;
dist[3*Np+n] = fq;
// q = 4
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy;
dist[4*Np+n] = fq;
// q = 5
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz;
dist[5*Np+n] = fq;
// q = 6
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz;
dist[6*Np+n] = fq;
// q = 7
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy);
dist[7*Np+n] = fq;
// q = 8
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy);
dist[8*Np+n] = fq;
// q = 9
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy);
dist[9*Np+n] = fq;
// q = 10
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy);
dist[10*Np+n] = fq;
// q = 11
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz);
dist[11*Np+n] = fq;
// q = 12
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz);
dist[12*Np+n] = fq;
// q = 13
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz);
dist[13*Np+n] = fq;
// q= 14
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz);
dist[14*Np+n] = fq;
// q = 15
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz);
dist[15*Np+n] = fq;
// q = 16
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz);
dist[16*Np+n] = fq;
// q = 17
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)
-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz);
dist[17*Np+n] = fq;
// q = 18
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)
-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz);
dist[18*Np+n] = fq;
//........................................................................
}
}
extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz, double rho0, double den_scale, double h, double time_conv,int start, int finish, int Np)
{
double fq;
// conserved momemnts
double rho,jx,jy,jz;
double ux,uy,uz;
// non-conserved moments
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
int nread;
// body force due to electric field
double rhoE;//charge density
double Ex,Ey,Ez;
// total body force
double Fx,Fy,Fz;
constexpr double mrt_V1=0.05263157894736842;
constexpr double mrt_V2=0.012531328320802;
constexpr double mrt_V3=0.04761904761904762;
constexpr double mrt_V4=0.004594820384294068;
constexpr double mrt_V5=0.01587301587301587;
constexpr double mrt_V6=0.0555555555555555555555555;
constexpr double mrt_V7=0.02777777777777778;
constexpr double mrt_V8=0.08333333333333333;
constexpr double mrt_V9=0.003341687552213868;
constexpr double mrt_V10=0.003968253968253968;
constexpr double mrt_V11=0.01388888888888889;
constexpr double mrt_V12=0.04166666666666666;
for (int n=start; n<finish; n++){
//Load data
rhoE = ChargeDensity[n];
Ex = ElectricField[n+0*Np];
Ey = ElectricField[n+1*Np];
Ez = ElectricField[n+2*Np];
//compute total body force, including input body force (Gx,Gy,Gz)
Fx = Gx + rhoE*Ex*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
Fy = Gy + rhoE*Ey*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
Fz = Gz + rhoE*Ez*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
// q=0
fq = dist[n];
rho = fq;
m1 = -30.0*fq;
m2 = 12.0*fq;
// q=1
nread = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
fq = dist[nread]; // reading the f1 data into register fq
//fp = dist[10*Np+n];
rho += fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jx = fq;
m4 = -4.0*fq;
m9 = 2.0*fq;
m10 = -4.0*fq;
// f2 = dist[10*Np+n];
nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
fq = dist[nread]; // reading the f2 data into register fq
//fq = dist[Np+n];
rho += fq;
m1 -= 11.0*(fq);
m2 -= 4.0*(fq);
jx -= fq;
m4 += 4.0*(fq);
m9 += 2.0*(fq);
m10 -= 4.0*(fq);
// q=3
nread = neighborList[n+2*Np]; // neighbor 4
fq = dist[nread];
//fq = dist[11*Np+n];
rho += fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jy = fq;
m6 = -4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 = fq;
m12 = -2.0*fq;
// q = 4
nread = neighborList[n+3*Np]; // neighbor 3
fq = dist[nread];
//fq = dist[2*Np+n];
rho+= fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jy -= fq;
m6 += 4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 += fq;
m12 -= 2.0*fq;
// q=5
nread = neighborList[n+4*Np];
fq = dist[nread];
//fq = dist[12*Np+n];
rho += fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jz = fq;
m8 = -4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 -= fq;
m12 += 2.0*fq;
// q = 6
nread = neighborList[n+5*Np];
fq = dist[nread];
//fq = dist[3*Np+n];
rho+= fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jz -= fq;
m8 += 4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 -= fq;
m12 += 2.0*fq;
// q=7
nread = neighborList[n+6*Np];
fq = dist[nread];
//fq = dist[13*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jy += fq;
m6 += fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 = fq;
m16 = fq;
m17 = -fq;
// q = 8
nread = neighborList[n+7*Np];
fq = dist[nread];
//fq = dist[4*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jy -= fq;
m6 -= fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 += fq;
m16 -= fq;
m17 += fq;
// q=9
nread = neighborList[n+8*Np];
fq = dist[nread];
//fq = dist[14*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jy -= fq;
m6 -= fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 -= fq;
m16 += fq;
m17 += fq;
// q = 10
nread = neighborList[n+9*Np];
fq = dist[nread];
//fq = dist[5*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jy += fq;
m6 += fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 -= fq;
m16 -= fq;
m17 -= fq;
// q=11
nread = neighborList[n+10*Np];
fq = dist[nread];
//fq = dist[15*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jz += fq;
m8 += fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 = fq;
m16 -= fq;
m18 = fq;
// q=12
nread = neighborList[n+11*Np];
fq = dist[nread];
//fq = dist[6*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jz -= fq;
m8 -= fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 += fq;
m16 += fq;
m18 -= fq;
// q=13
nread = neighborList[n+12*Np];
fq = dist[nread];
//fq = dist[16*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jz -= fq;
m8 -= fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 -= fq;
m16 -= fq;
m18 -= fq;
// q=14
nread = neighborList[n+13*Np];
fq = dist[nread];
//fq = dist[7*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jz += fq;
m8 += fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 -= fq;
m16 += fq;
m18 += fq;
// q=15
nread = neighborList[n+14*Np];
fq = dist[nread];
//fq = dist[17*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy += fq;
m6 += fq;
jz += fq;
m8 += fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 = fq;
m17 += fq;
m18 -= fq;
// q=16
nread = neighborList[n+15*Np];
fq = dist[nread];
//fq = dist[8*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy -= fq;
m6 -= fq;
jz -= fq;
m8 -= fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 += fq;
m17 -= fq;
m18 += fq;
// q=17
//fq = dist[18*Np+n];
nread = neighborList[n+16*Np];
fq = dist[nread];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy += fq;
m6 += fq;
jz -= fq;
m8 -= fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 -= fq;
m17 += fq;
m18 += fq;
// q=18
nread = neighborList[n+17*Np];
fq = dist[nread];
//fq = dist[9*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy -= fq;
m6 -= fq;
jz += fq;
m8 += fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 -= fq;
m17 -= fq;
m18 -= fq;
// write the velocity
ux = jx / rho0;
uy = jy / rho0;
uz = jz / rho0;
Velocity[n] = ux;
Velocity[Np+n] = uy;
Velocity[2*Np+n] = uz;
//..............incorporate external force................................................
//..............carry out relaxation process...............................................
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0) - m2);
m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4);
m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6);
m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) - m9);
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) - m11);
m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho0) - m12);
m13 = m13 + rlx_setA*((jx*jy/rho0) - m13);
m14 = m14 + rlx_setA*((jy*jz/rho0) - m14);
m15 = m15 + rlx_setA*((jx*jz/rho0) - m15);
m16 = m16 + rlx_setB*( - m16);
m17 = m17 + rlx_setB*( - m17);
m18 = m18 + rlx_setB*( - m18);
//.......................................................................................................
//.................inverse transformation......................................................
// q=0
fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2;
dist[n] = fq;
// q = 1
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx;
nread = neighborList[n+Np];
dist[nread] = fq;
// q=2
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx;
nread = neighborList[n];
dist[nread] = fq;
// q = 3
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy;
nread = neighborList[n+3*Np];
dist[nread] = fq;
// q = 4
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy;
nread = neighborList[n+2*Np];
dist[nread] = fq;
// q = 5
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz;
nread = neighborList[n+5*Np];
dist[nread] = fq;
// q = 6
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz;
nread = neighborList[n+4*Np];
dist[nread] = fq;
// q = 7
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy);
nread = neighborList[n+7*Np];
dist[nread] = fq;
// q = 8
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy);
nread = neighborList[n+6*Np];
dist[nread] = fq;
// q = 9
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy);
nread = neighborList[n+9*Np];
dist[nread] = fq;
// q = 10
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy);
nread = neighborList[n+8*Np];
dist[nread] = fq;
// q = 11
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz);
nread = neighborList[n+11*Np];
dist[nread] = fq;
// q = 12
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz);
nread = neighborList[n+10*Np];
dist[nread]= fq;
// q = 13
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz);
nread = neighborList[n+13*Np];
dist[nread] = fq;
// q= 14
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz);
nread = neighborList[n+12*Np];
dist[nread] = fq;
// q = 15
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz);
nread = neighborList[n+15*Np];
dist[nread] = fq;
// q = 16
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz);
nread = neighborList[n+14*Np];
dist[nread] = fq;
// q = 17
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)
-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz);
nread = neighborList[n+17*Np];
dist[nread] = fq;
// q = 18
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)
-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz);
nread = neighborList[n+16*Np];
dist[nread] = fq;
}
}
//extern "C" void ScaLBL_D3Q19_Momentum_Phys(double *dist, double *vel, double h, double time_conv, int Np)
//{
// //h: resolution [um/lu]
// //time_conv: time conversion factor [sec/lt]
// int n;
// // distributions
// double f1,f2,f3,f4,f5,f6,f7,f8,f9;
// double f10,f11,f12,f13,f14,f15,f16,f17,f18;
// double vx,vy,vz;
//
// for (n=0; n<Np; n++){
// //........................................................................
// // Registers to store the distributions
// //........................................................................
// f2 = dist[2*Np+n];
// f4 = dist[4*Np+n];
// f6 = dist[6*Np+n];
// f8 = dist[8*Np+n];
// f10 = dist[10*Np+n];
// f12 = dist[12*Np+n];
// f14 = dist[14*Np+n];
// f16 = dist[16*Np+n];
// f18 = dist[18*Np+n];
// //........................................................................
// f1 = dist[Np+n];
// f3 = dist[3*Np+n];
// f5 = dist[5*Np+n];
// f7 = dist[7*Np+n];
// f9 = dist[9*Np+n];
// f11 = dist[11*Np+n];
// f13 = dist[13*Np+n];
// f15 = dist[15*Np+n];
// f17 = dist[17*Np+n];
// //.................Compute the velocity...................................
// vx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
// vy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
// vz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
// //..................Write the velocity.....................................
// vel[0*Np+n] = vx*(h*1.0e-6)/time_conv;
// vel[1*Np+n] = vy*(h*1.0e-6)/time_conv;
// vel[2*Np+n] = vz*(h*1.0e-6)/time_conv;
// //........................................................................
// }
//}

View File

@ -4,7 +4,7 @@
#include "D3Q19.h"
#include "D3Q7.h"
#include "Color.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
using namespace std;
@ -36,11 +36,15 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){
//***************************************************************************************
int main(int argc, char **argv)
{
//*****************************************
// ***** MPI STUFF ****************
//*****************************************
// Initialize MPI
int rank,nprocs;
MPI_Init(&argc,&argv);
Utilities::MPI comm( MPI_COMM_WORLD );
int rank = comm.getRank();
int nprocs = comm.getSize();
MPI_Comm comm = MPI_COMM_WORLD;
MPI_Comm_rank(comm,&rank);
MPI_Comm_size(comm,&nprocs);
// parallel domain size (# of sub-domains)
int nprocx,nprocy,nprocz;
int iproc,jproc,kproc;
@ -54,6 +58,7 @@ int main(int argc, char **argv)
int rank_yz,rank_YZ,rank_yZ,rank_Yz;
//**********************************
MPI_Request req1[18],req2[18];
MPI_Status stat1[18],stat2[18];
if (rank == 0){
printf("********************************************************\n");
@ -110,30 +115,31 @@ int main(int argc, char **argv)
}
// **************************************************************
// Broadcast simulation parameters from rank 0 to all other procs
comm.barrier();
MPI_Barrier(comm);
//.................................................
comm.bcast(&Nz,1,0);
comm.bcast(&nBlocks,1,0);
comm.bcast(&nthreads,1,0);
comm.bcast(&Fx,1,0);
comm.bcast(&Fy,1,0);
comm.bcast(&Fz,1,0);
comm.bcast(&tau,1,0);
comm.bcast(&alpha,1,0);
comm.bcast(&beta,1,0);
comm.bcast(&das,1,0);
comm.bcast(&dbs,1,0);
comm.bcast(&pBC,1,0);
comm.bcast(&din,1,0);
comm.bcast(&dout,1,0);
comm.bcast(&timestepMax,1,0);
comm.bcast(&interval,1,0);
comm.bcast(&tol,1,0);
comm.bcast(&nprocx,1,0);
comm.bcast(&nprocy,1,0);
comm.bcast(&nprocz,1,0);
MPI_Bcast(&Nz,1,MPI_INT,0,comm);
MPI_Bcast(&nBlocks,1,MPI_INT,0,comm);
MPI_Bcast(&nthreads,1,MPI_INT,0,comm);
MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&das,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&dbs,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm);
MPI_Bcast(&din,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&timestepMax,1,MPI_INT,0,comm);
MPI_Bcast(&interval,1,MPI_INT,0,comm);
MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&nprocx,1,MPI_INT,0,comm);
MPI_Bcast(&nprocy,1,MPI_INT,0,comm);
MPI_Bcast(&nprocz,1,MPI_INT,0,comm);
//.................................................
comm.barrier();
MPI_Barrier(comm);
// **************************************************************
// **************************************************************
@ -163,7 +169,7 @@ int main(int argc, char **argv)
}
comm.barrier();
MPI_Barrier(comm);
kproc = rank/(nprocx*nprocy);
jproc = (rank-nprocx*nprocy*kproc)/nprocx;
iproc = rank-nprocx*nprocy*kproc-nprocz*jproc;
@ -445,7 +451,7 @@ int main(int argc, char **argv)
PM.close();
// printf("File porosity = %f\n", double(sum)/N);
//...........................................................................
comm.barrier();
MPI_Barrier(comm);
if (rank == 0) cout << "Domain set." << endl;
//...........................................................................
// Write the communcation structure into a file for debugging
@ -582,7 +588,7 @@ int main(int argc, char **argv)
}
}
}
comm.barrier();
MPI_Barrier(comm);
if (rank==0) printf ("SendLists are ready on host\n");
//......................................................................................
// Use MPI to fill in the recvCounts form the associated processes
@ -593,46 +599,46 @@ int main(int argc, char **argv)
//**********************************************************************************
// Fill in the recieve counts using MPI
sendtag = recvtag = 3;
comm.Send(&sendCount_x,1,rank_X,sendtag);
comm.Recv(&recvCount_X,1,rank_x,recvtag);
comm.Send(&sendCount_X,1,rank_x,sendtag);
comm.Recv(&recvCount_x,1,rank_X,recvtag);
comm.Send(&sendCount_y,1,rank_Y,sendtag);
comm.Recv(&recvCount_Y,1,rank_y,recvtag);
comm.Send(&sendCount_Y,1,rank_y,sendtag);
comm.Recv(&recvCount_y,1,rank_Y,recvtag);
comm.Send(&sendCount_z,1,rank_Z,sendtag);
comm.Recv(&recvCount_Z,1,rank_z,recvtag);
comm.Send(&sendCount_Z,1,rank_z,sendtag);
comm.Recv(&recvCount_z,1,rank_Z,recvtag);
MPI_Send(&sendCount_x,1,MPI_INT,rank_X,sendtag,comm);
MPI_Recv(&recvCount_X,1,MPI_INT,rank_x,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_X,1,MPI_INT,rank_x,sendtag,comm);
MPI_Recv(&recvCount_x,1,MPI_INT,rank_X,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_y,1,MPI_INT,rank_Y,sendtag,comm);
MPI_Recv(&recvCount_Y,1,MPI_INT,rank_y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Y,1,MPI_INT,rank_y,sendtag,comm);
MPI_Recv(&recvCount_y,1,MPI_INT,rank_Y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_z,1,MPI_INT,rank_Z,sendtag,comm);
MPI_Recv(&recvCount_Z,1,MPI_INT,rank_z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Z,1,MPI_INT,rank_z,sendtag,comm);
MPI_Recv(&recvCount_z,1,MPI_INT,rank_Z,recvtag,comm,MPI_STATUS_IGNORE);
comm.Send(&sendCount_xy,1,rank_XY,sendtag);
comm.Recv(&recvCount_XY,1,rank_xy,recvtag);
comm.Send(&sendCount_XY,1,rank_xy,sendtag);
comm.Recv(&recvCount_xy,1,rank_XY,recvtag);
comm.Send(&sendCount_Xy,1,rank_xY,sendtag);
comm.Recv(&recvCount_xY,1,rank_Xy,recvtag);
comm.Send(&sendCount_xY,1,rank_Xy,sendtag);
comm.Recv(&recvCount_Xy,1,rank_xY,recvtag);
MPI_Send(&sendCount_xy,1,MPI_INT,rank_XY,sendtag,comm);
MPI_Recv(&recvCount_XY,1,MPI_INT,rank_xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_XY,1,MPI_INT,rank_xy,sendtag,comm);
MPI_Recv(&recvCount_xy,1,MPI_INT,rank_XY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Xy,1,MPI_INT,rank_xY,sendtag,comm);
MPI_Recv(&recvCount_xY,1,MPI_INT,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_xY,1,MPI_INT,rank_Xy,sendtag,comm);
MPI_Recv(&recvCount_Xy,1,MPI_INT,rank_xY,recvtag,comm,MPI_STATUS_IGNORE);
comm.Send(&sendCount_xz,1,rank_XZ,sendtag);
comm.Recv(&recvCount_XZ,1,rank_xz,recvtag);
comm.Send(&sendCount_XZ,1,rank_xz,sendtag);
comm.Recv(&recvCount_xz,1,rank_XZ,recvtag);
comm.Send(&sendCount_Xz,1,rank_xZ,sendtag);
comm.Recv(&recvCount_xZ,1,rank_Xz,recvtag);
comm.Send(&sendCount_xZ,1,rank_Xz,sendtag);
comm.Recv(&recvCount_Xz,1,rank_xZ,recvtag);
MPI_Send(&sendCount_xz,1,MPI_INT,rank_XZ,sendtag,comm);
MPI_Recv(&recvCount_XZ,1,MPI_INT,rank_xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_XZ,1,MPI_INT,rank_xz,sendtag,comm);
MPI_Recv(&recvCount_xz,1,MPI_INT,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Xz,1,MPI_INT,rank_xZ,sendtag,comm);
MPI_Recv(&recvCount_xZ,1,MPI_INT,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_xZ,1,MPI_INT,rank_Xz,sendtag,comm);
MPI_Recv(&recvCount_Xz,1,MPI_INT,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE);
comm.Send(&sendCount_yz,1,rank_YZ,sendtag);
comm.Recv(&recvCount_YZ,1,rank_yz,recvtag);
comm.Send(&sendCount_YZ,1,rank_yz,sendtag);
comm.Recv(&recvCount_yz,1,rank_YZ,recvtag);
comm.Send(&sendCount_Yz,1,rank_yZ,sendtag);
comm.Recv(&recvCount_yZ,1,rank_Yz,recvtag);
comm.Send(&sendCount_yZ,1,rank_Yz,sendtag);
comm.Recv(&recvCount_Yz,1,rank_yZ,recvtag);
comm.barrier();
MPI_Send(&sendCount_yz,1,MPI_INT,rank_YZ,sendtag,comm);
MPI_Recv(&recvCount_YZ,1,MPI_INT,rank_yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_YZ,1,MPI_INT,rank_yz,sendtag,comm);
MPI_Recv(&recvCount_yz,1,MPI_INT,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Yz,1,MPI_INT,rank_yZ,sendtag,comm);
MPI_Recv(&recvCount_yZ,1,MPI_INT,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_yZ,1,MPI_INT,rank_Yz,sendtag,comm);
MPI_Recv(&recvCount_Yz,1,MPI_INT,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Barrier(comm);
//**********************************************************************************
//......................................................................................
int *recvList_x, *recvList_y, *recvList_z, *recvList_X, *recvList_Y, *recvList_Z;
@ -663,48 +669,48 @@ int main(int argc, char **argv)
// Use MPI to fill in the appropriate values for recvList
// Fill in the recieve lists using MPI
sendtag = recvtag = 4;
req1[0] = comm.Isend(sendList_x,sendCount_x,rank_X,sendtag);
req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_x,recvtag);
req1[1] = comm.Isend(sendList_X,sendCount_X,rank_x,sendtag);
req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_X,recvtag);
req1[2] = comm.Isend(sendList_y,sendCount_y,rank_Y,sendtag);
req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_y,recvtag);
req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_y,sendtag);
req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_Y,recvtag);
req1[4] = comm.Isend(sendList_z,sendCount_z,rank_Z,sendtag);
req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_z,recvtag);
req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_z,sendtag);
req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_Z,recvtag);
MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_Z,recvtag,comm,&req2[5]);
req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_XY,sendtag);
req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_xy,recvtag);
req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_xy,sendtag);
req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_XY,recvtag);
req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_xY,sendtag);
req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_Xy,recvtag);
req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_Xy,sendtag);
req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_xY,recvtag);
MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_XY,sendtag,comm,&req1[6]);
MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_xy,recvtag,comm,&req2[6]);
MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_xy,sendtag,comm,&req1[7]);
MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_XY,recvtag,comm,&req2[7]);
MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_xY,sendtag,comm,&req1[8]);
MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_Xy,recvtag,comm,&req2[8]);
MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_Xy,sendtag,comm,&req1[9]);
MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_xY,recvtag,comm,&req2[9]);
req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_XZ,sendtag);
req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_xz,recvtag);
req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_xz,sendtag);
req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_XZ,recvtag);
req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_xZ,sendtag);
req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_Xz,recvtag);
req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_Xz,sendtag);
req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_xZ,recvtag);
MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_XZ,sendtag,comm,&req1[10]);
MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_xz,recvtag,comm,&req2[10]);
MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_xz,sendtag,comm,&req1[11]);
MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_XZ,recvtag,comm,&req2[11]);
MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_xZ,sendtag,comm,&req1[12]);
MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_Xz,recvtag,comm,&req2[12]);
MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_Xz,sendtag,comm,&req1[13]);
MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_xZ,recvtag,comm,&req2[13]);
req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_YZ,sendtag);
req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_yz,recvtag);
req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_yz,sendtag);
req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_YZ,recvtag);
req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_yZ,sendtag);
req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_Yz,recvtag);
req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_Yz,sendtag);
req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_yZ,recvtag);
comm.waitAll(18,req1);
comm.waitAll(18,req2);
comm.barrier();
MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_YZ,sendtag,comm,&req1[14]);
MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_yz,recvtag,comm,&req2[14]);
MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_yz,sendtag,comm,&req1[15]);
MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_YZ,recvtag,comm,&req2[15]);
MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_yZ,sendtag,comm,&req1[16]);
MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_Yz,recvtag,comm,&req2[16]);
MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_Yz,sendtag,comm,&req1[17]);
MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_yZ,recvtag,comm,&req2[17]);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
MPI_Barrier(comm);
//......................................................................................
for (int idx=0; idx<recvCount_x; idx++) recvList_x[idx] -= (Nx-2);
for (int idx=0; idx<recvCount_X; idx++) recvList_X[idx] += (Nx-2);
@ -840,24 +846,42 @@ int main(int argc, char **argv)
PackID(sendList_yZ, sendCount_yZ ,sendID_yZ, id);
PackID(sendList_YZ, sendCount_YZ ,sendID_YZ, id);
//......................................................................................
comm.sendrecv(sendID_x,sendCount_x,rank_X,sendtag,recvID_X,recvCount_X,rank_x,recvtag);
comm.sendrecv(sendID_X,sendCount_X,rank_x,sendtag,recvID_x,recvCount_x,rank_X,recvtag);
comm.sendrecv(sendID_y,sendCount_y,rank_Y,sendtag,recvID_Y,recvCount_Y,rank_y,recvtag);
comm.sendrecv(sendID_Y,sendCount_Y,rank_y,sendtag,recvID_y,recvCount_y,rank_Y,recvtag);
comm.sendrecv(sendID_z,sendCount_z,rank_Z,sendtag,recvID_Z,recvCount_Z,rank_z,recvtag);
comm.sendrecv(sendID_Z,sendCount_Z,rank_z,sendtag,recvID_z,recvCount_z,rank_Z,recvtag);
comm.sendrecv(sendID_xy,sendCount_xy,rank_XY,sendtag,recvID_XY,recvCount_XY,rank_xy,recvtag);
comm.sendrecv(sendID_XY,sendCount_XY,rank_xy,sendtag,recvID_xy,recvCount_xy,rank_XY,recvtag);
comm.sendrecv(sendID_Xy,sendCount_Xy,rank_xY,sendtag,recvID_xY,recvCount_xY,rank_Xy,recvtag);
comm.sendrecv(sendID_xY,sendCount_xY,rank_Xy,sendtag,recvID_Xy,recvCount_Xy,rank_xY,recvtag);
comm.sendrecv(sendID_xz,sendCount_xz,rank_XZ,sendtag,recvID_XZ,recvCount_XZ,rank_xz,recvtag);
comm.sendrecv(sendID_XZ,sendCount_XZ,rank_xz,sendtag,recvID_xz,recvCount_xz,rank_XZ,recvtag);
comm.sendrecv(sendID_Xz,sendCount_Xz,rank_xZ,sendtag,recvID_xZ,recvCount_xZ,rank_Xz,recvtag);
comm.sendrecv(sendID_xZ,sendCount_xZ,rank_Xz,sendtag,recvID_Xz,recvCount_Xz,rank_xZ,recvtag);
comm.sendrecv(sendID_yz,sendCount_yz,rank_YZ,sendtag,recvID_YZ,recvCount_YZ,rank_yz,recvtag);
comm.sendrecv(sendID_YZ,sendCount_YZ,rank_yz,sendtag,recvID_yz,recvCount_yz,rank_YZ,recvtag);
comm.sendrecv(sendID_Yz,sendCount_Yz,rank_yZ,sendtag,recvID_yZ,recvCount_yZ,rank_Yz,recvtag);
comm.sendrecv(sendID_yZ,sendCount_yZ,rank_Yz,sendtag,recvID_Yz,recvCount_Yz,rank_yZ,recvtag);
MPI_Sendrecv(sendID_x,sendCount_x,MPI_CHAR,rank_X,sendtag,
recvID_X,recvCount_X,MPI_CHAR,rank_x,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_X,sendCount_X,MPI_CHAR,rank_x,sendtag,
recvID_x,recvCount_x,MPI_CHAR,rank_X,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_y,sendCount_y,MPI_CHAR,rank_Y,sendtag,
recvID_Y,recvCount_Y,MPI_CHAR,rank_y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Y,sendCount_Y,MPI_CHAR,rank_y,sendtag,
recvID_y,recvCount_y,MPI_CHAR,rank_Y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_z,sendCount_z,MPI_CHAR,rank_Z,sendtag,
recvID_Z,recvCount_Z,MPI_CHAR,rank_z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Z,sendCount_Z,MPI_CHAR,rank_z,sendtag,
recvID_z,recvCount_z,MPI_CHAR,rank_Z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xy,sendCount_xy,MPI_CHAR,rank_XY,sendtag,
recvID_XY,recvCount_XY,MPI_CHAR,rank_xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_XY,sendCount_XY,MPI_CHAR,rank_xy,sendtag,
recvID_xy,recvCount_xy,MPI_CHAR,rank_XY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Xy,sendCount_Xy,MPI_CHAR,rank_xY,sendtag,
recvID_xY,recvCount_xY,MPI_CHAR,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xY,sendCount_xY,MPI_CHAR,rank_Xy,sendtag,
recvID_Xy,recvCount_Xy,MPI_CHAR,rank_xY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xz,sendCount_xz,MPI_CHAR,rank_XZ,sendtag,
recvID_XZ,recvCount_XZ,MPI_CHAR,rank_xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_XZ,sendCount_XZ,MPI_CHAR,rank_xz,sendtag,
recvID_xz,recvCount_xz,MPI_CHAR,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Xz,sendCount_Xz,MPI_CHAR,rank_xZ,sendtag,
recvID_xZ,recvCount_xZ,MPI_CHAR,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xZ,sendCount_xZ,MPI_CHAR,rank_Xz,sendtag,
recvID_Xz,recvCount_Xz,MPI_CHAR,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_yz,sendCount_yz,MPI_CHAR,rank_YZ,sendtag,
recvID_YZ,recvCount_YZ,MPI_CHAR,rank_yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_YZ,sendCount_YZ,MPI_CHAR,rank_yz,sendtag,
recvID_yz,recvCount_yz,MPI_CHAR,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Yz,sendCount_Yz,MPI_CHAR,rank_yZ,sendtag,
recvID_yZ,recvCount_yZ,MPI_CHAR,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_yZ,sendCount_yZ,MPI_CHAR,rank_Yz,sendtag,
recvID_Yz,recvCount_Yz,MPI_CHAR,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE);
//......................................................................................
UnpackID(recvList_x, recvCount_x ,recvID_x, id);
UnpackID(recvList_X, recvCount_X ,recvID_X, id);
@ -890,7 +914,7 @@ int main(int argc, char **argv)
free(recvID_yz); free(recvID_YZ); free(recvID_yZ); free(recvID_Yz);
*/ //......................................................................................
if (rank==0) printf ("Devices are ready to communicate. \n");
comm.barrier();
MPI_Barrier(comm);
//...........device phase ID.................................................
if (rank==0) printf ("Copying phase ID to device \n");
@ -970,49 +994,48 @@ int main(int argc, char **argv)
PackValues(sendList_YZ, sendCount_YZ,sendbuf_YZ, Phi, N);
//...................................................................................
// Send / Recv all the phase indcator field values
//...................................................................................
req1[0] = comm.Isend(sendbuf_x,sendCount_x,rank_X,sendtag);
req2[0] = comm.Irecv(recvbuf_X,recvCount_X,rank_x,recvtag);
req1[1] = comm.Isend(sendbuf_X,sendCount_X,rank_x,sendtag);
req2[1] = comm.Irecv(recvbuf_x,recvCount_x,rank_X,recvtag);
req1[2] = comm.Isend(sendbuf_y,sendCount_y,rank_Y,sendtag);
req2[2] = comm.Irecv(recvbuf_Y,recvCount_Y,rank_y,recvtag);
req1[3] = comm.Isend(sendbuf_Y,sendCount_Y,rank_y,sendtag);
req2[3] = comm.Irecv(recvbuf_y,recvCount_y,rank_Y,recvtag);
req1[4] = comm.Isend(sendbuf_z,sendCount_z,rank_Z,sendtag);
req2[4] = comm.Irecv(recvbuf_Z,recvCount_Z,rank_z,recvtag);
req1[5] = comm.Isend(sendbuf_Z,sendCount_Z,rank_z,sendtag);
req2[5] = comm.Irecv(recvbuf_z,recvCount_z,rank_Z,recvtag);
req1[6] = comm.Isend(sendbuf_xy,sendCount_xy,rank_XY,sendtag);
req2[6] = comm.Irecv(recvbuf_XY,recvCount_XY,rank_xy,recvtag);
req1[7] = comm.Isend(sendbuf_XY,sendCount_XY,rank_xy,sendtag);
req2[7] = comm.Irecv(recvbuf_xy,recvCount_xy,rank_XY,recvtag);
req1[8] = comm.Isend(sendbuf_Xy,sendCount_Xy,rank_xY,sendtag);
req2[8] = comm.Irecv(recvbuf_xY,recvCount_xY,rank_Xy,recvtag);
req1[9] = comm.Isend(sendbuf_xY,sendCount_xY,rank_Xy,sendtag);
req2[9] = comm.Irecv(recvbuf_Xy,recvCount_Xy,rank_xY,recvtag);
req1[10] = comm.Isend(sendbuf_xz,sendCount_xz,rank_XZ,sendtag);
req2[10] = comm.Irecv(recvbuf_XZ,recvCount_XZ,rank_xz,recvtag);
req1[11] = comm.Isend(sendbuf_XZ,sendCount_XZ,rank_xz,sendtag);
req2[11] = comm.Irecv(recvbuf_xz,recvCount_xz,rank_XZ,recvtag);
req1[12] = comm.Isend(sendbuf_Xz,sendCount_Xz,rank_xZ,sendtag);
req2[12] = comm.Irecv(recvbuf_xZ,recvCount_xZ,rank_Xz,recvtag);
req1[13] = comm.Isend(sendbuf_xZ,sendCount_xZ,rank_Xz,sendtag);
req2[13] = comm.Irecv(recvbuf_Xz,recvCount_Xz,rank_xZ,recvtag);
req1[14] = comm.Isend(sendbuf_yz,sendCount_yz,rank_YZ,sendtag);
req2[14] = comm.Irecv(recvbuf_YZ,recvCount_YZ,rank_yz,recvtag);
req1[15] = comm.Isend(sendbuf_YZ,sendCount_YZ,rank_yz,sendtag);
req2[15] = comm.Irecv(recvbuf_yz,recvCount_yz,rank_YZ,recvtag);
req1[16] = comm.Isend(sendbuf_Yz,sendCount_Yz,rank_yZ,sendtag);
req2[16] = comm.Irecv(recvbuf_yZ,recvCount_yZ,rank_Yz,recvtag);
req1[17] = comm.Isend(sendbuf_yZ,sendCount_yZ,rank_Yz,sendtag);
req2[17] = comm.Irecv(recvbuf_Yz,recvCount_Yz,rank_yZ,recvtag);
MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(sendbuf_y, sendCount_y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(recvbuf_Y, recvCount_Y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(sendbuf_Z, sendCount_Z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(recvbuf_z, recvCount_z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[5]);
MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[6]);
MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[6]);
MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[7]);
MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[7]);
MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[8]);
MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[8]);
MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[9]);
MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[9]);
MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[10]);
MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[10]);
MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[11]);
MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[11]);
MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[12]);
MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[12]);
MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[13]);
MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[13]);
MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[14]);
MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[14]);
MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[15]);
MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[15]);
MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[16]);
MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[16]);
MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[17]);
MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[17]);
//...................................................................................
//...................................................................................
//...................................................................................
// Wait for completion of Indicator Field communication
comm.waitAll(18,req1);
comm.waitAll(18,req2);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
//...................................................................................
//...................................................................................
UnpackValues(recvList_x, recvCount_x,recvbuf_x, Phi, N);
@ -1041,8 +1064,8 @@ int main(int argc, char **argv)
//.......create and start timer............
double starttime,stoptime,cputime;
comm.barrier();
starttime = Utilities::MPI::time();
MPI_Barrier(comm);
starttime = MPI_Wtime();
//.........................................
sendtag = recvtag = 5;
@ -1135,42 +1158,42 @@ int main(int argc, char **argv)
//...................................................................................
// Send all the distributions
req1[0] = comm.Isend(sendbuf_x,5*sendCount_x,rank_X,sendtag);
req2[0] = comm.Irecv(recvbuf_X,5*recvCount_X,rank_x,recvtag);
req1[1] = comm.Isend(sendbuf_X,5*sendCount_X,rank_x,sendtag);
req2[1] = comm.Irecv(recvbuf_x,5*recvCount_x,rank_X,recvtag);
req1[2] = comm.Isend(sendbuf_y,5*sendCount_y,rank_Y,sendtag);
req2[2] = comm.Irecv(recvbuf_Y,5*recvCount_Y,rank_y,recvtag);
req1[3] = comm.Isend(sendbuf_Y,5*sendCount_Y,rank_y,sendtag);
req2[3] = comm.Irecv(recvbuf_y,5*recvCount_y,rank_Y,recvtag);
req1[4] = comm.Isend(sendbuf_z,5*sendCount_z,rank_Z,sendtag);
req2[4] = comm.Irecv(recvbuf_Z,5*recvCount_Z,rank_z,recvtag);
req1[5] = comm.Isend(sendbuf_Z,5*sendCount_Z,rank_z,sendtag);
req2[5] = comm.Irecv(recvbuf_z,5*recvCount_z,rank_Z,recvtag);
req1[6] = comm.Isend(sendbuf_xy,sendCount_xy,rank_XY,sendtag);
req2[6] = comm.Irecv(recvbuf_XY,recvCount_XY,rank_xy,recvtag);
req1[7] = comm.Isend(sendbuf_XY,sendCount_XY,rank_xy,sendtag);
req2[7] = comm.Irecv(recvbuf_xy,recvCount_xy,rank_XY,recvtag);
req1[8] = comm.Isend(sendbuf_Xy,sendCount_Xy,rank_xY,sendtag);
req2[8] = comm.Irecv(recvbuf_xY,recvCount_xY,rank_Xy,recvtag);
req1[9] = comm.Isend(sendbuf_xY,sendCount_xY,rank_Xy,sendtag);
req2[9] = comm.Irecv(recvbuf_Xy,recvCount_Xy,rank_xY,recvtag);
req1[10] = comm.Isend(sendbuf_xz,sendCount_xz,rank_XZ,sendtag);
req2[10] = comm.Irecv(recvbuf_XZ,recvCount_XZ,rank_xz,recvtag);
req1[11] = comm.Isend(sendbuf_XZ,sendCount_XZ,rank_xz,sendtag);
req2[11] = comm.Irecv(recvbuf_xz,recvCount_xz,rank_XZ,recvtag);
req1[12] = comm.Isend(sendbuf_Xz,sendCount_Xz,rank_xZ,sendtag);
req2[12] = comm.Irecv(recvbuf_xZ,recvCount_xZ,rank_Xz,recvtag);
req1[13] = comm.Isend(sendbuf_xZ,sendCount_xZ,rank_Xz,sendtag);
req2[13] = comm.Irecv(recvbuf_Xz,recvCount_Xz,rank_xZ,recvtag);
req1[14] = comm.Isend(sendbuf_yz,sendCount_yz,rank_YZ,sendtag);
req2[14] = comm.Irecv(recvbuf_YZ,recvCount_YZ,rank_yz,recvtag);
req1[15] = comm.Isend(sendbuf_YZ,sendCount_YZ,rank_yz,sendtag);
req2[15] = comm.Irecv(recvbuf_yz,recvCount_yz,rank_YZ,recvtag);
req1[16] = comm.Isend(sendbuf_Yz,sendCount_Yz,rank_yZ,sendtag);
req2[16] = comm.Irecv(recvbuf_yZ,recvCount_yZ,rank_Yz,recvtag);
req1[17] = comm.Isend(sendbuf_yZ,sendCount_yZ,rank_Yz,sendtag);
req2[17] = comm.Irecv(recvbuf_Yz,recvCount_Yz,rank_yZ,recvtag);
MPI_Isend(sendbuf_x, 5*sendCount_x,MPI_DOUBLE,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(recvbuf_X, 5*recvCount_X,MPI_DOUBLE,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(sendbuf_X, 5*sendCount_X,MPI_DOUBLE,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(recvbuf_x, 5*recvCount_x,MPI_DOUBLE,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(sendbuf_y, 5*sendCount_y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(recvbuf_Y, 5*recvCount_Y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(sendbuf_Y, 5*sendCount_Y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(recvbuf_y, 5*recvCount_y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(sendbuf_z, 5*sendCount_z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(recvbuf_Z, 5*recvCount_Z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(sendbuf_Z, 5*sendCount_Z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(recvbuf_z, 5*recvCount_z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[5]);
MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[6]);
MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[6]);
MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[7]);
MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[7]);
MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[8]);
MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[8]);
MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[9]);
MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[9]);
MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[10]);
MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[10]);
MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[11]);
MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[11]);
MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[12]);
MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[12]);
MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[13]);
MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[13]);
MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[14]);
MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[14]);
MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[15]);
MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[15]);
MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[16]);
MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[16]);
MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[17]);
MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[17]);
//...................................................................................
//*************************************************************************
@ -1188,8 +1211,8 @@ int main(int argc, char **argv)
//...................................................................................
// Wait for completion of D3Q19 communication
comm.waitAll(18,req1);
comm.waitAll(18,req2);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
//...................................................................................
// Unpack the distributions on the device
//...................................................................................
@ -1270,23 +1293,23 @@ int main(int argc, char **argv)
//...................................................................................
//...................................................................................
// Send all the D3Q7 distributions
req1[0] = comm.Isend(recvbuf_x, 2*recvCount_x,rank_X,sendtag);
req2[0] = comm.Irecv(sendbuf_X, 2*sendCount_X,rank_x,recvtag);
req1[1] = comm.Isend(recvbuf_X, 2*recvCount_X,rank_x,sendtag);
req2[1] = comm.Irecv(sendbuf_x, 2*sendCount_x,rank_X,recvtag);
req1[2] = comm.Isend(recvbuf_y, 2*recvCount_y,rank_Y,sendtag);
req2[2] = comm.Irecv(sendbuf_Y, 2*sendCount_Y,rank_y,recvtag);
req1[3] = comm.Isend(recvbuf_Y, 2*recvCount_Y,rank_y,sendtag);
req2[3] = comm.Irecv(sendbuf_y, 2*sendCount_y,rank_Y,recvtag);
req1[4] = comm.Isend(recvbuf_z, 2*recvCount_z,rank_Z,sendtag);
req2[4] = comm.Irecv(sendbuf_Z, 2*sendCount_Z,rank_z,recvtag);
req1[5] = comm.Isend(recvbuf_Z, 2*recvCount_Z,rank_z,sendtag);
req2[5] = comm.Irecv(sendbuf_z, 2*sendCount_z,rank_Z,recvtag);
MPI_Isend(recvbuf_x, 2*recvCount_x,MPI_DOUBLE,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(sendbuf_X, 2*sendCount_X,MPI_DOUBLE,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(recvbuf_X, 2*recvCount_X,MPI_DOUBLE,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(sendbuf_x, 2*sendCount_x,MPI_DOUBLE,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(recvbuf_y, 2*recvCount_y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(sendbuf_Y, 2*sendCount_Y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(recvbuf_Y, 2*recvCount_Y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(sendbuf_y, 2*sendCount_y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(recvbuf_z, 2*recvCount_z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(sendbuf_Z, 2*sendCount_Z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(recvbuf_Z, 2*recvCount_Z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(sendbuf_z, 2*sendCount_z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[5]);
//...................................................................................
//...................................................................................
// Wait for completion of D3Q7 communication
comm.waitAll(6,req1);
comm.waitAll(6,req2);
MPI_Waitall(6,req1,stat1);
MPI_Waitall(6,req2,stat2);
//...................................................................................
//...................................................................................
UnpackDenD3Q7(sendList_x,sendCount_x,sendbuf_x,2,Den,N);
@ -1322,49 +1345,48 @@ int main(int argc, char **argv)
PackValues(sendList_YZ, sendCount_YZ,sendbuf_YZ, Phi, N);
//...................................................................................
// Send / Recv all the phase indcator field values
MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(sendbuf_y, sendCount_y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(recvbuf_Y, recvCount_Y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(sendbuf_Z, sendCount_Z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(recvbuf_z, recvCount_z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[5]);
MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[6]);
MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[6]);
MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[7]);
MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[7]);
MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[8]);
MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[8]);
MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[9]);
MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[9]);
MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[10]);
MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[10]);
MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[11]);
MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[11]);
MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[12]);
MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[12]);
MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[13]);
MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[13]);
MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[14]);
MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[14]);
MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[15]);
MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[15]);
MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[16]);
MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[16]);
MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[17]);
MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[17]);
//...................................................................................
req1[0] = comm.Isend(sendbuf_x, sendCount_x,rank_X,sendtag);
req2[0] = comm.Irecv(recvbuf_X, recvCount_X,rank_x,recvtag);
req1[1] = comm.Isend(sendbuf_X, sendCount_X,rank_x,sendtag);
req2[1] = comm.Irecv(recvbuf_x, recvCount_x,rank_X,recvtag);
req1[2] = comm.Isend(sendbuf_y, sendCount_y,rank_Y,sendtag);
req2[2] = comm.Irecv(recvbuf_Y, recvCount_Y,rank_y,recvtag);
req1[3] = comm.Isend(sendbuf_Y, sendCount_Y,rank_y,sendtag);
req2[3] = comm.Irecv(recvbuf_y, recvCount_y,rank_Y,recvtag);
req1[4] = comm.Isend(sendbuf_z, sendCount_z,rank_Z,sendtag);
req2[4] = comm.Irecv(recvbuf_Z, recvCount_Z,rank_z,recvtag);
req1[5] = comm.Isend(sendbuf_Z, sendCount_Z,rank_z,sendtag);
req2[5] = comm.Irecv(recvbuf_z, recvCount_z,rank_Z,recvtag);
req1[6] = comm.Isend(sendbuf_xy, sendCount_xy,rank_XY,sendtag);
req2[6] = comm.Irecv(recvbuf_XY, recvCount_XY,rank_xy,recvtag);
req1[7] = comm.Isend(sendbuf_XY, sendCount_XY,rank_xy,sendtag);
req2[7] = comm.Irecv(recvbuf_xy, recvCount_xy,rank_XY,recvtag);
req1[8] = comm.Isend(sendbuf_Xy, sendCount_Xy,rank_xY,sendtag);
req2[8] = comm.Irecv(recvbuf_xY, recvCount_xY,rank_Xy,recvtag);
req1[9] = comm.Isend(sendbuf_xY, sendCount_xY,rank_Xy,sendtag);
req2[9] = comm.Irecv(recvbuf_Xy, recvCount_Xy,rank_xY,recvtag);
req1[10] = comm.Isend(sendbuf_xz, sendCount_xz,rank_XZ,sendtag);
req2[10] = comm.Irecv(recvbuf_XZ, recvCount_XZ,rank_xz,recvtag);
req1[11] = comm.Isend(sendbuf_XZ, sendCount_XZ,rank_xz,sendtag);
req2[11] = comm.Irecv(recvbuf_xz, recvCount_xz,rank_XZ,recvtag);
req1[12] = comm.Isend(sendbuf_Xz, sendCount_Xz,rank_xZ,sendtag);
req2[12] = comm.Irecv(recvbuf_xZ, recvCount_xZ,rank_Xz,recvtag);
req1[13] = comm.Isend(sendbuf_xZ, sendCount_xZ,rank_Xz,sendtag);
req2[13] = comm.Irecv(recvbuf_Xz, recvCount_Xz,rank_xZ,recvtag);
req1[14] = comm.Isend(sendbuf_yz, sendCount_yz,rank_YZ,sendtag);
req2[14] = comm.Irecv(recvbuf_YZ, recvCount_YZ,rank_yz,recvtag);
req1[15] = comm.Isend(sendbuf_YZ, sendCount_YZ,rank_yz,sendtag);
req2[15] = comm.Irecv(recvbuf_yz, recvCount_yz,rank_YZ,recvtag);
req1[16] = comm.Isend(sendbuf_Yz, sendCount_Yz,rank_yZ,sendtag);
req2[16] = comm.Irecv(recvbuf_yZ, recvCount_yZ,rank_Yz,recvtag);
req1[17] = comm.Isend(sendbuf_yZ, sendCount_yZ,rank_Yz,sendtag);
req2[17] = comm.Irecv(recvbuf_Yz, recvCount_Yz,rank_yZ,recvtag);
//...................................................................................
//...................................................................................
// Wait for completion of Indicator Field communication
//...................................................................................
comm.waitAll(18,req1);
comm.waitAll(18,req2);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
//...................................................................................
//...................................................................................
UnpackValues(recvList_x, recvCount_x,recvbuf_x, Phi, N);
@ -1387,14 +1409,14 @@ int main(int argc, char **argv)
UnpackValues(recvList_YZ, recvCount_YZ,recvbuf_YZ, Phi, N);
//...................................................................................
comm.barrier();
MPI_Barrier(comm);
// Iteration completed!
timestep++;
//...................................................................
}
//************************************************************************/
stoptime = Utilities::MPI::time();
stoptime = MPI_Wtime();
// cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl;
cputime = stoptime - starttime;
// cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl;
@ -1437,7 +1459,7 @@ int main(int argc, char **argv)
fwrite(ColorGrad,8,3*N,COLORGRAD);
fclose(COLORGRAD);
// ****************************************************
comm.barrier();
MPI_Barrier(comm);
MPI_Finalize();
// ****************************************************
}

View File

@ -10,7 +10,7 @@
#include "D3Q19.h"
#include "D3Q7.h"
#include "Color.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
using namespace std;
@ -62,7 +62,7 @@ inline void UnpackMeshData(int *list, int count, double *recvbuf, DoubleArray &V
}
}
//***************************************************************************************
inline void CommunicateMeshHalo(DoubleArray &MeshData, const Utilities::MPI& Communicator,
inline void CommunicateMeshHalo(DoubleArray &MeshData, MPI_Comm Communicator,
double *sendbuf_x,double *sendbuf_y,double *sendbuf_z,double *sendbuf_X,double *sendbuf_Y,double *sendbuf_Z,
double *sendbuf_xy,double *sendbuf_XY,double *sendbuf_xY,double *sendbuf_Xy,
double *sendbuf_xz,double *sendbuf_XZ,double *sendbuf_xZ,double *sendbuf_Xz,
@ -111,24 +111,42 @@ inline void CommunicateMeshHalo(DoubleArray &MeshData, const Utilities::MPI& Com
PackMeshData(sendList_yZ, sendCount_yZ ,sendbuf_yZ, MeshData);
PackMeshData(sendList_YZ, sendCount_YZ ,sendbuf_YZ, MeshData);
//......................................................................................
comm.sendrecv(sendbuf_x,sendCount_x,rank_x,sendtag,recvbuf_X,recvCount_X,rank_X,recvtag);
comm.sendrecv(sendbuf_X,sendCount_X,rank_X,sendtag,recvbuf_x,recvCount_x,rank_x,recvtag);
comm.sendrecv(sendbuf_y,sendCount_y,rank_y,sendtag,recvbuf_Y,recvCount_Y,rank_Y,recvtag);
comm.sendrecv(sendbuf_Y,sendCount_Y,rank_Y,sendtag,recvbuf_y,recvCount_y,rank_y,recvtag);
comm.sendrecv(sendbuf_z,sendCount_z,rank_z,sendtag,recvbuf_Z,recvCount_Z,rank_Z,recvtag);
comm.sendrecv(sendbuf_Z,sendCount_Z,rank_Z,sendtag,recvbuf_z,recvCount_z,rank_z,recvtag);
comm.sendrecv(sendbuf_xy,sendCount_xy,rank_xy,sendtag,recvbuf_XY,recvCount_XY,rank_XY,recvtag);
comm.sendrecv(sendbuf_XY,sendCount_XY,rank_XY,sendtag,recvbuf_xy,recvCount_xy,rank_xy,recvtag);
comm.sendrecv(sendbuf_Xy,sendCount_Xy,rank_Xy,sendtag,recvbuf_xY,recvCount_xY,rank_xY,recvtag);
comm.sendrecv(sendbuf_xY,sendCount_xY,rank_xY,sendtag,recvbuf_Xy,recvCount_Xy,rank_Xy,recvtag);
comm.sendrecv(sendbuf_xz,sendCount_xz,rank_xz,sendtag,recvbuf_XZ,recvCount_XZ,rank_XZ,recvtag);
comm.sendrecv(sendbuf_XZ,sendCount_XZ,rank_XZ,sendtag,recvbuf_xz,recvCount_xz,rank_xz,recvtag);
comm.sendrecv(sendbuf_Xz,sendCount_Xz,rank_Xz,sendtag,recvbuf_xZ,recvCount_xZ,rank_xZ,recvtag);
comm.sendrecv(sendbuf_xZ,sendCount_xZ,rank_xZ,sendtag,recvbuf_Xz,recvCount_Xz,rank_Xz,recvtag);
comm.sendrecv(sendbuf_yz,sendCount_yz,rank_yz,sendtag,recvbuf_YZ,recvCount_YZ,rank_YZ,recvtag);
comm.sendrecv(sendbuf_YZ,sendCount_YZ,rank_YZ,sendtag,recvbuf_yz,recvCount_yz,rank_yz,recvtag);
comm.sendrecv(sendbuf_Yz,sendCount_Yz,rank_Yz,sendtag,recvbuf_yZ,recvCount_yZ,rank_yZ,recvtag);
comm.sendrecv(sendbuf_yZ,sendCount_yZ,rank_yZ,sendtag,recvbuf_Yz,recvCount_Yz,rank_Yz,recvtag);
MPI_Sendrecv(sendbuf_x,sendCount_x,MPI_CHAR,rank_x,sendtag,
recvbuf_X,recvCount_X,MPI_CHAR,rank_X,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_X,sendCount_X,MPI_CHAR,rank_X,sendtag,
recvbuf_x,recvCount_x,MPI_CHAR,rank_x,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_y,sendCount_y,MPI_CHAR,rank_y,sendtag,
recvbuf_Y,recvCount_Y,MPI_CHAR,rank_Y,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_Y,sendCount_Y,MPI_CHAR,rank_Y,sendtag,
recvbuf_y,recvCount_y,MPI_CHAR,rank_y,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_z,sendCount_z,MPI_CHAR,rank_z,sendtag,
recvbuf_Z,recvCount_Z,MPI_CHAR,rank_Z,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_Z,sendCount_Z,MPI_CHAR,rank_Z,sendtag,
recvbuf_z,recvCount_z,MPI_CHAR,rank_z,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_xy,sendCount_xy,MPI_CHAR,rank_xy,sendtag,
recvbuf_XY,recvCount_XY,MPI_CHAR,rank_XY,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_XY,sendCount_XY,MPI_CHAR,rank_XY,sendtag,
recvbuf_xy,recvCount_xy,MPI_CHAR,rank_xy,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_Xy,sendCount_Xy,MPI_CHAR,rank_Xy,sendtag,
recvbuf_xY,recvCount_xY,MPI_CHAR,rank_xY,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_xY,sendCount_xY,MPI_CHAR,rank_xY,sendtag,
recvbuf_Xy,recvCount_Xy,MPI_CHAR,rank_Xy,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_xz,sendCount_xz,MPI_CHAR,rank_xz,sendtag,
recvbuf_XZ,recvCount_XZ,MPI_CHAR,rank_XZ,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_XZ,sendCount_XZ,MPI_CHAR,rank_XZ,sendtag,
recvbuf_xz,recvCount_xz,MPI_CHAR,rank_xz,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_Xz,sendCount_Xz,MPI_CHAR,rank_Xz,sendtag,
recvbuf_xZ,recvCount_xZ,MPI_CHAR,rank_xZ,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_xZ,sendCount_xZ,MPI_CHAR,rank_xZ,sendtag,
recvbuf_Xz,recvCount_Xz,MPI_CHAR,rank_Xz,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_yz,sendCount_yz,MPI_CHAR,rank_yz,sendtag,
recvbuf_YZ,recvCount_YZ,MPI_CHAR,rank_YZ,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_YZ,sendCount_YZ,MPI_CHAR,rank_YZ,sendtag,
recvbuf_yz,recvCount_yz,MPI_CHAR,rank_yz,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_Yz,sendCount_Yz,MPI_CHAR,rank_Yz,sendtag,
recvbuf_yZ,recvCount_yZ,MPI_CHAR,rank_yZ,recvtag,Communicator,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendbuf_yZ,sendCount_yZ,MPI_CHAR,rank_yZ,sendtag,
recvbuf_Yz,recvCount_Yz,MPI_CHAR,rank_Yz,recvtag,Communicator,MPI_STATUS_IGNORE);
//........................................................................................
UnpackMeshData(recvList_x, recvCount_x ,recvbuf_x, MeshData);
UnpackMeshData(recvList_X, recvCount_X ,recvbuf_X, MeshData);
@ -154,11 +172,15 @@ inline void CommunicateMeshHalo(DoubleArray &MeshData, const Utilities::MPI& Com
int main(int argc, char **argv)
{
//*****************************************
// ***** MPI STUFF ****************
//*****************************************
// Initialize MPI
int rank,nprocs;
MPI_Init(&argc,&argv);
Utilities::MPI comm( MPI_COMM_WORLD );
int rank = comm.getRank();
int nprocs = comm.getSize();
MPI_Comm comm = MPI_COMM_WORLD;
MPI_Comm_rank(comm,&rank);
MPI_Comm_size(comm,&nprocs);
// parallel domain size (# of sub-domains)
int nprocx,nprocy,nprocz;
int iproc,jproc,kproc;
@ -172,6 +194,7 @@ int main(int argc, char **argv)
int rank_yz,rank_YZ,rank_yZ,rank_Yz;
//**********************************
MPI_Request req1[18],req2[18];
MPI_Status stat1[18],stat2[18];
if (rank == 0){
printf("********************************************************\n");
@ -260,39 +283,38 @@ int main(int argc, char **argv)
}
// **************************************************************
// Broadcast simulation parameters from rank 0 to all other procs
comm.barrier();
MPI_Barrier(comm);
//.................................................
comm.bcast(&tau,1,0);
comm.bcast(&alpha,1,0);
comm.bcast(&beta,1,0);
comm.bcast(&das,1,0);
comm.bcast(&dbs,1,0);
comm.bcast(&xIntPos,1,0);
comm.bcast(&wp_saturation,1,0);
comm.bcast(&pBC,1,0);
comm.bcast(&Restart,1,0);
comm.bcast(&din,1,0);
comm.bcast(&dout,1,0);
comm.bcast(&Fx,1,0);
comm.bcast(&Fy,1,0);
comm.bcast(&Fz,1,0);
comm.bcast(&timestepMax,1,0);
comm.bcast(&interval,1,0);
comm.bcast(&tol,1,0);
MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&das,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&dbs,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&xIntPos,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&wp_saturation,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm);
MPI_Bcast(&Restart,1,MPI_LOGICAL,0,comm);
MPI_Bcast(&din,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&timestepMax,1,MPI_INT,0,comm);
MPI_Bcast(&interval,1,MPI_INT,0,comm);
MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm);
// Computational domain
comm.bcast(&Nz,1,0);
// comm.bcast(&nBlocks,1,0);
// comm.bcast(&nthreads,1,0);
comm.bcast(&nprocx,1,0);
comm.bcast(&nprocy,1,0);
comm.bcast(&nprocz,1,0);
comm.bcast(&nspheres,1,0);
comm.bcast(&Lx,1,0);
comm.bcast(&Ly,1,0);
comm.bcast(&Lz,1,0);
MPI_Bcast(&Nz,1,MPI_INT,0,comm);
// MPI_Bcast(&nBlocks,1,MPI_INT,0,comm);
// MPI_Bcast(&nthreads,1,MPI_INT,0,comm);
MPI_Bcast(&nprocx,1,MPI_INT,0,comm);
MPI_Bcast(&nprocy,1,MPI_INT,0,comm);
MPI_Bcast(&nprocz,1,MPI_INT,0,comm);
MPI_Bcast(&nspheres,1,MPI_INT,0,comm);
MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm);
//.................................................
comm.barrier();
MPI_Barrier(comm);
// **************************************************************
// **************************************************************
double Ps = -(das-dbs)/(das+dbs);
@ -324,7 +346,7 @@ int main(int argc, char **argv)
printf("********************************************************\n");
}
comm.barrier();
MPI_Barrier(comm);
kproc = rank/(nprocx*nprocy);
jproc = (rank-nprocx*nprocy*kproc)/nprocx;
iproc = rank-nprocx*nprocy*kproc-nprocz*jproc;
@ -663,14 +685,14 @@ int main(int argc, char **argv)
//.......................................................................
if (rank == 0) printf("Reading the sphere packing \n");
if (rank == 0) ReadSpherePacking(nspheres,cx,cy,cz,rad);
comm.barrier();
MPI_Barrier(comm);
// Broadcast the sphere packing to all processes
comm.bcast(cx,nspheres,0);
comm.bcast(cy,nspheres,0);
comm.bcast(cz,nspheres,0);
comm.bcast(rad,nspheres,0);
MPI_Bcast(cx,nspheres,MPI_DOUBLE,0,comm);
MPI_Bcast(cy,nspheres,MPI_DOUBLE,0,comm);
MPI_Bcast(cz,nspheres,MPI_DOUBLE,0,comm);
MPI_Bcast(rad,nspheres,MPI_DOUBLE,0,comm);
//...........................................................................
comm.barrier();
MPI_Barrier(comm);
if (rank == 0) cout << "Domain set." << endl;
//.......................................................................
// sprintf(LocalRankString,"%05d",rank);
@ -703,7 +725,7 @@ int main(int argc, char **argv)
}
}
sum_local = 1.0*sum;
porosity = comm.sumReduce( sum_local );
MPI_Allreduce(&sum_local,&porosity,1,MPI_DOUBLE,MPI_SUM,comm);
porosity = porosity*iVol_global;
if (rank==0) printf("Media porosity = %f \n",porosity);
@ -837,7 +859,7 @@ int main(int argc, char **argv)
}
}
}
comm.barrier();
MPI_Barrier(comm);
if (rank==0) printf ("SendLists are ready on host\n");
//......................................................................................
// Use MPI to fill in the recvCounts form the associated processes
@ -848,48 +870,89 @@ int main(int argc, char **argv)
//**********************************************************************************
// Fill in the recieve counts using MPI
sendtag = recvtag = 3;
req1[0] = comm.Isend(&sendCount_x,1,rank_x,sendtag);
req2[0] = comm.Irecv(&recvCount_X,1,rank_X,recvtag);
req1[1] = comm.Isend(&sendCount_X,1,rank_X,sendtag);
req2[1] = comm.Irecv(&recvCount_x,1,rank_x,recvtag);
req1[2] = comm.Isend(&sendCount_y,1,rank_y,sendtag);
req2[2] = comm.Irecv(&recvCount_Y,1,rank_Y,recvtag);
req1[3] = comm.Isend(&sendCount_Y,1,rank_Y,sendtag);
req2[3] = comm.Irecv(&recvCount_y,1,rank_y,recvtag);
req1[4] = comm.Isend(&sendCount_z,1,rank_z,sendtag);
req2[4] = comm.Irecv(&recvCount_Z,1,rank_Z,recvtag);
req1[5] = comm.Isend(&sendCount_Z,1,rank_Z,sendtag);
req2[5] = comm.Irecv(&recvCount_z,1,rank_z,recvtag);
MPI_Isend(&sendCount_x, 1,MPI_INT,rank_x,sendtag,comm,&req1[0]);
MPI_Irecv(&recvCount_X, 1,MPI_INT,rank_X,recvtag,comm,&req2[0]);
MPI_Isend(&sendCount_X, 1,MPI_INT,rank_X,sendtag,comm,&req1[1]);
MPI_Irecv(&recvCount_x, 1,MPI_INT,rank_x,recvtag,comm,&req2[1]);
MPI_Isend(&sendCount_y, 1,MPI_INT,rank_y,sendtag,comm,&req1[2]);
MPI_Irecv(&recvCount_Y, 1,MPI_INT,rank_Y,recvtag,comm,&req2[2]);
MPI_Isend(&sendCount_Y, 1,MPI_INT,rank_Y,sendtag,comm,&req1[3]);
MPI_Irecv(&recvCount_y, 1,MPI_INT,rank_y,recvtag,comm,&req2[3]);
MPI_Isend(&sendCount_z, 1,MPI_INT,rank_z,sendtag,comm,&req1[4]);
MPI_Irecv(&recvCount_Z, 1,MPI_INT,rank_Z,recvtag,comm,&req2[4]);
MPI_Isend(&sendCount_Z, 1,MPI_INT,rank_Z,sendtag,comm,&req1[5]);
MPI_Irecv(&recvCount_z, 1,MPI_INT,rank_z,recvtag,comm,&req2[5]);
req1[6] = comm.Isend(&sendCount_xy,1,rank_xy,sendtag);
req2[6] = comm.Irecv(&recvCount_XY,1,rank_XY,recvtag);
req1[7] = comm.Isend(&sendCount_XY,1,rank_XY,sendtag);
req2[7] = comm.Irecv(&recvCount_xy,1,rank_xy,recvtag);
req1[8] = comm.Isend(&sendCount_Xy,1,rank_Xy,sendtag);
req2[8] = comm.Irecv(&recvCount_xY,1,rank_xY,recvtag);
req1[9] = comm.Isend(&sendCount_xY,1,rank_xY,sendtag);
req2[9] = comm.Irecv(&recvCount_Xy,1,rank_Xy,recvtag);
MPI_Isend(&sendCount_xy, 1,MPI_INT,rank_xy,sendtag,comm,&req1[6]);
MPI_Irecv(&recvCount_XY, 1,MPI_INT,rank_XY,recvtag,comm,&req2[6]);
MPI_Isend(&sendCount_XY, 1,MPI_INT,rank_XY,sendtag,comm,&req1[7]);
MPI_Irecv(&recvCount_xy, 1,MPI_INT,rank_xy,recvtag,comm,&req2[7]);
MPI_Isend(&sendCount_Xy, 1,MPI_INT,rank_Xy,sendtag,comm,&req1[8]);
MPI_Irecv(&recvCount_xY, 1,MPI_INT,rank_xY,recvtag,comm,&req2[8]);
MPI_Isend(&sendCount_xY, 1,MPI_INT,rank_xY,sendtag,comm,&req1[9]);
MPI_Irecv(&recvCount_Xy, 1,MPI_INT,rank_Xy,recvtag,comm,&req2[9]);
req1[10] = comm.Isend(&sendCount_xz,1,rank_xz,sendtag);
req2[10] = comm.Irecv(&recvCount_XZ,1,rank_XZ,recvtag);
req1[11] = comm.Isend(&sendCount_XZ,1,rank_XZ,sendtag);
req2[11] = comm.Irecv(&recvCount_xz,1,rank_xz,recvtag);
req1[12] = comm.Isend(&sendCount_Xz,1,rank_Xz,sendtag);
req2[12] = comm.Irecv(&recvCount_xZ,1,rank_xZ,recvtag);
req1[13] = comm.Isend(&sendCount_xZ,1,rank_xZ,sendtag);
req2[13] = comm.Irecv(&recvCount_Xz,1,rank_Xz,recvtag);
MPI_Isend(&sendCount_xz, 1,MPI_INT,rank_xz,sendtag,comm,&req1[10]);
MPI_Irecv(&recvCount_XZ, 1,MPI_INT,rank_XZ,recvtag,comm,&req2[10]);
MPI_Isend(&sendCount_XZ, 1,MPI_INT,rank_XZ,sendtag,comm,&req1[11]);
MPI_Irecv(&recvCount_xz, 1,MPI_INT,rank_xz,recvtag,comm,&req2[11]);
MPI_Isend(&sendCount_Xz, 1,MPI_INT,rank_Xz,sendtag,comm,&req1[12]);
MPI_Irecv(&recvCount_xZ, 1,MPI_INT,rank_xZ,recvtag,comm,&req2[12]);
MPI_Isend(&sendCount_xZ, 1,MPI_INT,rank_xZ,sendtag,comm,&req1[13]);
MPI_Irecv(&recvCount_Xz, 1,MPI_INT,rank_Xz,recvtag,comm,&req2[13]);
req1[14] = comm.Isend(&sendCount_yz,1,rank_yz,sendtag);
req2[14] = comm.Irecv(&recvCount_YZ,1,rank_YZ,recvtag);
req1[15] = comm.Isend(&sendCount_YZ,1,rank_YZ,sendtag);
req2[15] = comm.Irecv(&recvCount_yz,1,rank_yz,recvtag);
req1[16] = comm.Isend(&sendCount_Yz,1,rank_Yz,sendtag);
req2[16] = comm.Irecv(&recvCount_yZ,1,rank_yZ,recvtag);
req1[17] = comm.Isend(&sendCount_yZ,1,rank_yZ,sendtag);
req2[17] = comm.Irecv(&recvCount_Yz,1,rank_Yz,recvtag);
comm.waitAll(18,req1);
comm.waitAll(18,req2);
comm.barrier();
MPI_Isend(&sendCount_yz, 1,MPI_INT,rank_yz,sendtag,comm,&req1[14]);
MPI_Irecv(&recvCount_YZ, 1,MPI_INT,rank_YZ,recvtag,comm,&req2[14]);
MPI_Isend(&sendCount_YZ, 1,MPI_INT,rank_YZ,sendtag,comm,&req1[15]);
MPI_Irecv(&recvCount_yz, 1,MPI_INT,rank_yz,recvtag,comm,&req2[15]);
MPI_Isend(&sendCount_Yz, 1,MPI_INT,rank_Yz,sendtag,comm,&req1[16]);
MPI_Irecv(&recvCount_yZ, 1,MPI_INT,rank_yZ,recvtag,comm,&req2[16]);
MPI_Isend(&sendCount_yZ, 1,MPI_INT,rank_yZ,sendtag,comm,&req1[17]);
MPI_Irecv(&recvCount_Yz, 1,MPI_INT,rank_Yz,recvtag,comm,&req2[17]);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
MPI_Barrier(comm);
/* MPI_Send(&sendCount_x,1,MPI_INT,rank_X,sendtag,comm);
MPI_Recv(&recvCount_X,1,MPI_INT,rank_x,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_X,1,MPI_INT,rank_x,sendtag,comm);
MPI_Recv(&recvCount_x,1,MPI_INT,rank_X,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_y,1,MPI_INT,rank_Y,sendtag,comm);
MPI_Recv(&recvCount_Y,1,MPI_INT,rank_y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Y,1,MPI_INT,rank_y,sendtag,comm);
MPI_Recv(&recvCount_y,1,MPI_INT,rank_Y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_z,1,MPI_INT,rank_Z,sendtag,comm);
MPI_Recv(&recvCount_Z,1,MPI_INT,rank_z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Z,1,MPI_INT,rank_z,sendtag,comm);
MPI_Recv(&recvCount_z,1,MPI_INT,rank_Z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_xy,1,MPI_INT,rank_XY,sendtag,comm);
MPI_Recv(&recvCount_XY,1,MPI_INT,rank_xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_XY,1,MPI_INT,rank_xy,sendtag,comm);
MPI_Recv(&recvCount_xy,1,MPI_INT,rank_XY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Xy,1,MPI_INT,rank_xY,sendtag,comm);
MPI_Recv(&recvCount_xY,1,MPI_INT,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_xY,1,MPI_INT,rank_Xy,sendtag,comm);
MPI_Recv(&recvCount_Xy,1,MPI_INT,rank_xY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_xz,1,MPI_INT,rank_XZ,sendtag,comm);
MPI_Recv(&recvCount_XZ,1,MPI_INT,rank_xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_XZ,1,MPI_INT,rank_xz,sendtag,comm);
MPI_Recv(&recvCount_xz,1,MPI_INT,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Xz,1,MPI_INT,rank_xZ,sendtag,comm);
MPI_Recv(&recvCount_xZ,1,MPI_INT,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_xZ,1,MPI_INT,rank_Xz,sendtag,comm);
MPI_Recv(&recvCount_Xz,1,MPI_INT,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_yz,1,MPI_INT,rank_YZ,sendtag,comm);
MPI_Recv(&recvCount_YZ,1,MPI_INT,rank_yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_YZ,1,MPI_INT,rank_yz,sendtag,comm);
MPI_Recv(&recvCount_yz,1,MPI_INT,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Yz,1,MPI_INT,rank_yZ,sendtag,comm);
MPI_Recv(&recvCount_yZ,1,MPI_INT,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_yZ,1,MPI_INT,rank_Yz,sendtag,comm);
MPI_Recv(&recvCount_Yz,1,MPI_INT,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Barrier(comm);
*/ //**********************************************************************************
//......................................................................................
int *recvList_x, *recvList_y, *recvList_z, *recvList_X, *recvList_Y, *recvList_Z;
int *recvList_xy, *recvList_yz, *recvList_xz, *recvList_Xy, *recvList_Yz, *recvList_xZ;
@ -919,48 +982,48 @@ int main(int argc, char **argv)
// Use MPI to fill in the appropriate values for recvList
// Fill in the recieve lists using MPI
sendtag = recvtag = 4;
req1[0] = comm.Isend(sendList_x,sendCount_x,rank_x,sendtag);
req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_X,recvtag);
req1[1] = comm.Isend(sendList_X,sendCount_X,rank_X,sendtag);
req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_x,recvtag);
req1[2] = comm.Isend(sendList_y,sendCount_y,rank_y,sendtag);
req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_Y,recvtag);
req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_Y,sendtag);
req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_y,recvtag);
req1[4] = comm.Isend(sendList_z,sendCount_z,rank_z,sendtag);
req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_Z,recvtag);
req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_Z,sendtag);
req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_z,recvtag);
MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_x,sendtag,comm,&req1[0]);
MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_X,recvtag,comm,&req2[0]);
MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_X,sendtag,comm,&req1[1]);
MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_x,recvtag,comm,&req2[1]);
MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_y,sendtag,comm,&req1[2]);
MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_Y,recvtag,comm,&req2[2]);
MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_Y,sendtag,comm,&req1[3]);
MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_y,recvtag,comm,&req2[3]);
MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_z,sendtag,comm,&req1[4]);
MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_Z,recvtag,comm,&req2[4]);
MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_Z,sendtag,comm,&req1[5]);
MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_z,recvtag,comm,&req2[5]);
req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_xy,sendtag);
req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_XY,recvtag);
req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_XY,sendtag);
req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_xy,recvtag);
req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_Xy,sendtag);
req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_xY,recvtag);
req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_xY,sendtag);
req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_Xy,recvtag);
MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_xy,sendtag,comm,&req1[6]);
MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_XY,recvtag,comm,&req2[6]);
MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_XY,sendtag,comm,&req1[7]);
MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_xy,recvtag,comm,&req2[7]);
MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_Xy,sendtag,comm,&req1[8]);
MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_xY,recvtag,comm,&req2[8]);
MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_xY,sendtag,comm,&req1[9]);
MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_Xy,recvtag,comm,&req2[9]);
req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_xz,sendtag);
req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_XZ,recvtag);
req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_XZ,sendtag);
req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_xz,recvtag);
req1[12] = comm.Isend(sendList_Xz,endCount_Xz,rank_Xz,sendtag);
req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_xZ,recvtag);
req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_xZ,sendtag);
req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_Xz,recvtag);
MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_xz,sendtag,comm,&req1[10]);
MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_XZ,recvtag,comm,&req2[10]);
MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_XZ,sendtag,comm,&req1[11]);
MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_xz,recvtag,comm,&req2[11]);
MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_Xz,sendtag,comm,&req1[12]);
MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_xZ,recvtag,comm,&req2[12]);
MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_xZ,sendtag,comm,&req1[13]);
MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_Xz,recvtag,comm,&req2[13]);
req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_yz,sendtag);
req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_YZ,recvtag);
req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_YZ,sendtag);
req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_yz,recvtag);
req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_Yz,sendtag);
req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_yZ,recvtag);
req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_yZ,sendtag);
req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_Yz,recvtag);
comm.waitAll(18,req1);
comm.waitAll(18,req2);
comm.barrier();
MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_yz,sendtag,comm,&req1[14]);
MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_YZ,recvtag,comm,&req2[14]);
MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_YZ,sendtag,comm,&req1[15]);
MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_yz,recvtag,comm,&req2[15]);
MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_Yz,sendtag,comm,&req1[16]);
MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_yZ,recvtag,comm,&req2[16]);
MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_yZ,sendtag,comm,&req1[17]);
MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_Yz,recvtag,comm,&req2[17]);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
MPI_Barrier(comm);
//......................................................................................
for (int idx=0; idx<recvCount_x; idx++) recvList_x[idx] -= (Nx-2);
for (int idx=0; idx<recvCount_X; idx++) recvList_X[idx] += (Nx-2);
@ -1075,7 +1138,7 @@ int main(int argc, char **argv)
dvc_AllocateDeviceMemory((void **) &dvcRecvList_Yz, recvCount_Yz*sizeof(int)); // Allocate device memory
dvc_AllocateDeviceMemory((void **) &dvcRecvList_YZ, recvCount_YZ*sizeof(int)); // Allocate device memory
//......................................................................................
comm.barrier();
MPI_Barrier(comm);
if (rank==0) printf ("Prepare to copy send/recv Lists to device \n");
dvc_CopyToDevice(dvcSendList_x,sendList_x,sendCount_x*sizeof(int));
dvc_CopyToDevice(dvcSendList_X,sendList_X,sendCount_X*sizeof(int));
@ -1182,24 +1245,42 @@ int main(int argc, char **argv)
PackID(sendList_yZ, sendCount_yZ ,sendID_yZ, id);
PackID(sendList_YZ, sendCount_YZ ,sendID_YZ, id);
//......................................................................................
comm.sendrecv(sendID_x,sendCount_x,rank_x,sendtag,recvID_X,recvCount_X,rank_X,recvtag);
comm.sendrecv(sendID_X,sendCount_X,rank_X,sendtag,recvID_x,recvCount_x,rank_x,recvtag);
comm.sendrecv(sendID_y,sendCount_y,rank_y,sendtag,recvID_Y,recvCount_Y,rank_Y,recvtag);
comm.sendrecv(sendID_Y,sendCount_Y,rank_Y,sendtag,recvID_y,recvCount_y,rank_y,recvtag);
comm.sendrecv(sendID_z,sendCount_z,rank_z,sendtag,recvID_Z,recvCount_Z,rank_Z,recvtag);
comm.sendrecv(sendID_Z,sendCount_Z,rank_Z,sendtag,recvID_z,recvCount_z,rank_z,recvtag);
comm.sendrecv(sendID_xy,sendCount_xy,rank_xy,sendtag,recvID_XY,recvCount_XY,rank_XY,recvtag);
comm.sendrecv(sendID_XY,sendCount_XY,rank_XY,sendtag,recvID_xy,recvCount_xy,rank_xy,recvtag);
comm.sendrecv(sendID_Xy,sendCount_Xy,rank_Xy,sendtag,recvID_xY,recvCount_xY,rank_xY,recvtag);
comm.sendrecv(sendID_xY,sendCount_xY,rank_xY,sendtag,recvID_Xy,recvCount_Xy,rank_Xy,recvtag);
comm.sendrecv(sendID_xz,sendCount_xz,rank_xz,sendtag,recvID_XZ,recvCount_XZ,rank_XZ,recvtag);
comm.sendrecv(sendID_XZ,sendCount_XZ,rank_XZ,sendtag,recvID_xz,recvCount_xz,rank_xz,recvtag);
comm.sendrecv(sendID_Xz,sendCount_Xz,rank_Xz,sendtag,recvID_xZ,recvCount_xZ,rank_xZ,recvtag);
comm.sendrecv(sendID_xZ,sendCount_xZ,rank_xZ,sendtag,recvID_Xz,recvCount_Xz,rank_Xz,recvtag);
comm.sendrecv(sendID_yz,sendCount_yz,rank_yz,sendtag,recvID_YZ,recvCount_YZ,rank_YZ,recvtag);
comm.sendrecv(sendID_YZ,sendCount_YZ,rank_YZ,sendtag,recvID_yz,recvCount_yz,rank_yz,recvtag);
comm.sendrecv(sendID_Yz,sendCount_Yz,rank_Yz,sendtag,recvID_yZ,recvCount_yZ,rank_yZ,recvtag);
comm.sendrecv(sendID_yZ,sendCount_yZ,rank_yZ,sendtag,recvID_Yz,recvCount_Yz,rank_Yz,recvtag);
MPI_Sendrecv(sendID_x,sendCount_x,MPI_CHAR,rank_x,sendtag,
recvID_X,recvCount_X,MPI_CHAR,rank_X,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_X,sendCount_X,MPI_CHAR,rank_X,sendtag,
recvID_x,recvCount_x,MPI_CHAR,rank_x,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_y,sendCount_y,MPI_CHAR,rank_y,sendtag,
recvID_Y,recvCount_Y,MPI_CHAR,rank_Y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Y,sendCount_Y,MPI_CHAR,rank_Y,sendtag,
recvID_y,recvCount_y,MPI_CHAR,rank_y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_z,sendCount_z,MPI_CHAR,rank_z,sendtag,
recvID_Z,recvCount_Z,MPI_CHAR,rank_Z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Z,sendCount_Z,MPI_CHAR,rank_Z,sendtag,
recvID_z,recvCount_z,MPI_CHAR,rank_z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xy,sendCount_xy,MPI_CHAR,rank_xy,sendtag,
recvID_XY,recvCount_XY,MPI_CHAR,rank_XY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_XY,sendCount_XY,MPI_CHAR,rank_XY,sendtag,
recvID_xy,recvCount_xy,MPI_CHAR,rank_xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Xy,sendCount_Xy,MPI_CHAR,rank_Xy,sendtag,
recvID_xY,recvCount_xY,MPI_CHAR,rank_xY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xY,sendCount_xY,MPI_CHAR,rank_xY,sendtag,
recvID_Xy,recvCount_Xy,MPI_CHAR,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xz,sendCount_xz,MPI_CHAR,rank_xz,sendtag,
recvID_XZ,recvCount_XZ,MPI_CHAR,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_XZ,sendCount_XZ,MPI_CHAR,rank_XZ,sendtag,
recvID_xz,recvCount_xz,MPI_CHAR,rank_xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Xz,sendCount_Xz,MPI_CHAR,rank_Xz,sendtag,
recvID_xZ,recvCount_xZ,MPI_CHAR,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xZ,sendCount_xZ,MPI_CHAR,rank_xZ,sendtag,
recvID_Xz,recvCount_Xz,MPI_CHAR,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_yz,sendCount_yz,MPI_CHAR,rank_yz,sendtag,
recvID_YZ,recvCount_YZ,MPI_CHAR,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_YZ,sendCount_YZ,MPI_CHAR,rank_YZ,sendtag,
recvID_yz,recvCount_yz,MPI_CHAR,rank_yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Yz,sendCount_Yz,MPI_CHAR,rank_Yz,sendtag,
recvID_yZ,recvCount_yZ,MPI_CHAR,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_yZ,sendCount_yZ,MPI_CHAR,rank_yZ,sendtag,
recvID_Yz,recvCount_Yz,MPI_CHAR,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE);
//......................................................................................
UnpackID(recvList_x, recvCount_x ,recvID_x, id);
UnpackID(recvList_X, recvCount_X ,recvID_X, id);
@ -1267,7 +1348,7 @@ int main(int argc, char **argv)
recvMeshData_YZ = new double [recvCount_YZ];
recvMeshData_XZ = new double [recvCount_XZ];
if (rank==0) printf ("Devices are ready to communicate. \n");
comm.barrier();
MPI_Barrier(comm);
//...........device phase ID.................................................
if (rank==0) printf ("Copying phase ID to device \n");
@ -1447,8 +1528,8 @@ int main(int argc, char **argv)
//.......create and start timer............
double starttime,stoptime,cputime;
comm.barrier();
starttime = Utilities::MPI::time();
MPI_Barrier(comm);
starttime = MPI_Wtime();
//.........................................
//...........................................................................
// MAIN VARIABLES INITIALIZED HERE
@ -1519,7 +1600,7 @@ int main(int argc, char **argv)
dvc_CopyToDevice(f_odd,cDistOdd,9*N*sizeof(double));
dvc_CopyToDevice(Den,cDen,2*N*sizeof(double));
dvc_Barrier();
comm.barrier();
MPI_Barrier(comm);
}
// Pack the buffers (zeros out the halo region)
dvc_PackDenD3Q7(dvcRecvList_x,recvCount_x,recvbuf_x,2,Den,N);
@ -1558,48 +1639,48 @@ int main(int argc, char **argv)
//...................................................................................
// Send / Recv all the phase indcator field values
//...................................................................................
req1[0] = comm.Isend(sendbuf_x,sendCount_x,rank_x,sendtag);
req2[0] = comm.Irecv(recvbuf_X,recvCount_X,rank_X,recvtag);
req1[1] = comm.Isend(sendbuf_X,sendCount_X,rank_X,sendtag);
req2[1] = comm.Irecv(recvbuf_x,recvCount_x,rank_x,recvtag);
req1[2] = comm.Isend(sendbuf_y,sendCount_y,rank_y,sendtag);
req2[2] = comm.Irecv(recvbuf_Y,recvCount_Y,rank_Y,recvtag);
req1[3] = comm.Isend(sendbuf_Y,sendCount_Y,rank_Y,sendtag);
req2[3] = comm.Irecv(recvbuf_y,recvCount_y,rank_y,recvtag);
req1[4] = comm.Isend(sendbuf_z,sendCount_z,rank_z,sendtag);
req2[4] = comm.Irecv(recvbuf_Z,recvCount_Z,rank_Z,recvtag);
req1[5] = comm.Isend(sendbuf_Z,sendCount_Z,rank_Z,sendtag);
req2[5] = comm.Irecv(recvbuf_z,recvCount_z,rank_z,recvtag);
req1[6] = comm.Isend(sendbuf_xy,sendCount_xy,rank_xy,sendtag);
req2[6] = comm.Irecv(recvbuf_XY,recvCount_XY,rank_XY,recvtag);
req1[7] = comm.Isend(sendbuf_XY,sendCount_XY,rank_XY,sendtag);
req2[7] = comm.Irecv(recvbuf_xy,recvCount_xy,rank_xy,recvtag);
req1[8] = comm.Isend(sendbuf_Xy,sendCount_Xy,rank_Xy,sendtag);
req2[8] = comm.Irecv(recvbuf_xY,recvCount_xY,rank_xY,recvtag);
req1[9] = comm.Isend(sendbuf_xY,sendCount_xY,rank_xY,sendtag);
req2[9] = comm.Irecv(recvbuf_Xy,recvCount_Xy,rank_Xy,recvtag);
req1[10] = comm.Isend(sendbuf_xz,sendCount_xz,rank_xz,sendtag);
req2[10] = comm.Irecv(recvbuf_XZ,recvCount_XZ,rank_XZ,recvtag);
req1[11] = comm.Isend(sendbuf_XZ,sendCount_XZ,rank_XZ,sendtag);
req2[11] = comm.Irecv(recvbuf_xz,recvCount_xz,rank_xz,recvtag);
req1[12] = comm.Isend(sendbuf_Xz,sendCount_Xz,rank_Xz,sendtag);
req2[12] = comm.Irecv(recvbuf_xZ,recvCount_xZ,rank_xZ,recvtag);
req1[13] = comm.Isend(sendbuf_xZ,sendCount_xZ,rank_xZ,sendtag);
req2[13] = comm.Irecv(recvbuf_Xz,recvCount_Xz,rank_Xz,recvtag);
req1[14] = comm.Isend(sendbuf_yz,sendCount_yz,rank_yz,sendtag);
req2[14] = comm.Irecv(recvbuf_YZ,recvCount_YZ,rank_YZ,recvtag);
req1[15] = comm.Isend(sendbuf_YZ,sendCount_YZ,rank_YZ,sendtag);
req2[15] = comm.Irecv(recvbuf_yz,recvCount_yz,rank_yz,recvtag);
req1[16] = comm.Isend(sendbuf_Yz,sendCount_Yz,rank_Yz,sendtag);
req2[16] = comm.Irecv(recvbuf_yZ,recvCount_yZ,rank_yZ,recvtag);
req1[17] = comm.Isend(sendbuf_yZ,sendCount_yZ,rank_yZ,sendtag);
req2[17] = comm.Irecv(recvbuf_Yz,recvCount_Yz,rank_Yz,recvtag);
MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_x,sendtag,comm,&req1[0]);
MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_X,recvtag,comm,&req2[0]);
MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_X,sendtag,comm,&req1[1]);
MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_x,recvtag,comm,&req2[1]);
MPI_Isend(sendbuf_y, sendCount_y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[2]);
MPI_Irecv(recvbuf_Y, recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[2]);
MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[3]);
MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[3]);
MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[4]);
MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[4]);
MPI_Isend(sendbuf_Z, sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[5]);
MPI_Irecv(recvbuf_z, recvCount_z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[5]);
MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[6]);
MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[6]);
MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[7]);
MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[7]);
MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[8]);
MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[8]);
MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[9]);
MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[9]);
MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[10]);
MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[10]);
MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[11]);
MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[11]);
MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[12]);
MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[12]);
MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[13]);
MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[13]);
MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[14]);
MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[14]);
MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[15]);
MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[15]);
MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[16]);
MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[16]);
MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[17]);
MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[17]);
//...................................................................................
//...................................................................................
// Wait for completion of Indicator Field communication
//...................................................................................
comm.waitAll(18,req1);
comm.waitAll(18,req2);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
dvc_Barrier();
//...................................................................................
//...................................................................................
@ -1645,7 +1726,7 @@ int main(int argc, char **argv)
dvc_CopyToHost(Phase.data,Phi,N*sizeof(double));
dvc_CopyToHost(Press.data,Pressure,N*sizeof(double));
dvc_CopyToHost(Vel,Velocity,3*N*sizeof(double));
comm.barrier();
MPI_Barrier(comm);
//...........................................................................
timestep=0;
@ -1741,42 +1822,42 @@ int main(int argc, char **argv)
//...................................................................................
// Send all the distributions
req1[0] = comm.Isend(sendbuf_x,5*sendCount_x,rank_x,sendtag);
req2[0] = comm.Irecv(recvbuf_X,5*recvCount_X,rank_X,recvtag);
req1[1] = comm.Isend(sendbuf_X,5*sendCount_X,rank_X,sendtag);
req2[1] = comm.Irecv(recvbuf_x,5*recvCount_x,rank_x,recvtag);
req1[2] = comm.Isend(sendbuf_y,5*sendCount_y,rank_y,sendtag);
req2[2] = comm.Irecv(recvbuf_Y,5*recvCount_Y,rank_Y,recvtag);
req1[3] = comm.Isend(sendbuf_Y,5*sendCount_Y,rank_Y,sendtag);
req2[3] = comm.Irecv(recvbuf_y,5*recvCount_y,rank_y,recvtag);
req1[4] = comm.Isend(sendbuf_z,5*sendCount_z,rank_z,sendtag);
req2[4] = comm.Irecv(recvbuf_Z,5*recvCount_Z,rank_Z,recvtag);
req1[5] = comm.Isend(sendbuf_Z,5*sendCount_Z,rank_Z,sendtag);
req2[5] = comm.Irecv(recvbuf_z,5*recvCount_z,rank_z,recvtag);
req1[6] = comm.Isend(sendbuf_xy,sendCount_xy,rank_xy,sendtag);
req2[6] = comm.Irecv(recvbuf_XY,recvCount_XY,rank_XY,recvtag);
req1[7] = comm.Isend(sendbuf_XY,sendCount_XY,rank_XY,sendtag);
req2[7] = comm.Irecv(recvbuf_xy,recvCount_xy,rank_xy,recvtag);
req1[8] = comm.Isend(sendbuf_Xy,sendCount_Xy,rank_Xy,sendtag);
req2[8] = comm.Irecv(recvbuf_xY,recvCount_xY,rank_xY,recvtag);
req1[9] = comm.Isend(sendbuf_xY,sendCount_xY,rank_xY,sendtag);
req2[9] = comm.Irecv(recvbuf_Xy,recvCount_Xy,rank_Xy,recvtag);
req1[10] = comm.Isend(sendbuf_xz,sendCount_xz,rank_xz,sendtag);
req2[10] = comm.Irecv(recvbuf_XZ,recvCount_XZ,rank_XZ,recvtag);
req1[11] = comm.Isend(sendbuf_XZ,sendCount_XZ,rank_XZ,sendtag);
req2[11] = comm.Irecv(recvbuf_xz,recvCount_xz,rank_xz,recvtag);
req1[12] = comm.Isend(sendbuf_Xz,sendCount_Xz,rank_Xz,sendtag);
req2[12] = comm.Irecv(recvbuf_xZ,recvCount_xZ,rank_xZ,recvtag);
req1[13] = comm.Isend(sendbuf_xZ,sendCount_xZ,rank_xZ,sendtag);
req2[13] = comm.Irecv(recvbuf_Xz,recvCount_Xz,rank_Xz,recvtag);
req1[14] = comm.Isend(sendbuf_yz,sendCount_yz,rank_yz,sendtag);
req2[14] = comm.Irecv(recvbuf_YZ,recvCount_YZ,rank_YZ,recvtag);
req1[15] = comm.Isend(sendbuf_YZ,sendCount_YZ,rank_YZ,sendtag);
req2[15] = comm.Irecv(recvbuf_yz,recvCount_yz,rank_yz,recvtag);
req1[16] = comm.Isend(sendbuf_Yz,sendCount_Yz,rank_Yz,sendtag);
req2[16] = comm.Irecv(recvbuf_yZ,recvCount_yZ,rank_yZ,recvtag);
req1[17] = comm.Isend(sendbuf_yZ,sendCount_yZ,rank_yZ,sendtag);
req2[17] = comm.Irecv(recvbuf_Yz,recvCount_Yz,rank_Yz,recvtag);
MPI_Isend(sendbuf_x, 5*sendCount_x,MPI_DOUBLE,rank_x,sendtag,comm,&req1[0]);
MPI_Irecv(recvbuf_X, 5*recvCount_X,MPI_DOUBLE,rank_X,recvtag,comm,&req2[0]);
MPI_Isend(sendbuf_X, 5*sendCount_X,MPI_DOUBLE,rank_X,sendtag,comm,&req1[1]);
MPI_Irecv(recvbuf_x, 5*recvCount_x,MPI_DOUBLE,rank_x,recvtag,comm,&req2[1]);
MPI_Isend(sendbuf_y, 5*sendCount_y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[2]);
MPI_Irecv(recvbuf_Y, 5*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[2]);
MPI_Isend(sendbuf_Y, 5*sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[3]);
MPI_Irecv(recvbuf_y, 5*recvCount_y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[3]);
MPI_Isend(sendbuf_z, 5*sendCount_z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[4]);
MPI_Irecv(recvbuf_Z, 5*recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[4]);
MPI_Isend(sendbuf_Z, 5*sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[5]);
MPI_Irecv(recvbuf_z, 5*recvCount_z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[5]);
MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[6]);
MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[6]);
MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[7]);
MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[7]);
MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[8]);
MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[8]);
MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[9]);
MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[9]);
MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[10]);
MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[10]);
MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[11]);
MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[11]);
MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[12]);
MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[12]);
MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[13]);
MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[13]);
MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[14]);
MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[14]);
MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[15]);
MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[15]);
MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[16]);
MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[16]);
MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[17]);
MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[17]);
//...................................................................................
//*************************************************************************
@ -1793,8 +1874,8 @@ int main(int argc, char **argv)
//...................................................................................
// Wait for completion of D3Q19 communication
comm.waitAll(18,req1);
comm.waitAll(18,req2);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
//...................................................................................
// Unpack the distributions on the device
@ -1877,23 +1958,23 @@ int main(int argc, char **argv)
//...................................................................................
// Send all the D3Q7 distributions
req1[0] = comm.Isend(recvbuf_x,2*recvCount_x,rank_x,sendtag);
req2[0] = comm.Irecv(sendbuf_X,2*sendCount_X,rank_X,recvtag);
req1[1] = comm.Isend(recvbuf_X,2*recvCount_X,rank_X,sendtag);
req2[1] = comm.Irecv(sendbuf_x,2*sendCount_x,rank_x,recvtag);
req1[2] = comm.Isend(recvbuf_y,2*recvCount_y,rank_y,sendtag);
req2[2] = comm.Irecv(sendbuf_Y,2*sendCount_Y,rank_Y,recvtag);
req1[3] = comm.Isend(recvbuf_Y,2*recvCount_Y,rank_Y,sendtag);
req2[3] = comm.Irecv(sendbuf_y,2*sendCount_y,rank_y,recvtag);
req1[4] = comm.Isend(recvbuf_z,2*recvCount_z,rank_z,sendtag);
req2[4] = comm.Irecv(sendbuf_Z,2*sendCount_Z,rank_Z,recvtag);
req1[5] = comm.Isend(recvbuf_Z,2*recvCount_Z,rank_Z,sendtag);
req2[5] = comm.Irecv(sendbuf_z,2*sendCount_z,rank_z,recvtag);
MPI_Isend(recvbuf_x, 2*recvCount_x,MPI_DOUBLE,rank_x,sendtag,comm,&req1[0]);
MPI_Irecv(sendbuf_X, 2*sendCount_X,MPI_DOUBLE,rank_X,recvtag,comm,&req2[0]);
MPI_Isend(recvbuf_X, 2*recvCount_X,MPI_DOUBLE,rank_X,sendtag,comm,&req1[1]);
MPI_Irecv(sendbuf_x, 2*sendCount_x,MPI_DOUBLE,rank_x,recvtag,comm,&req2[1]);
MPI_Isend(recvbuf_y, 2*recvCount_y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[2]);
MPI_Irecv(sendbuf_Y, 2*sendCount_Y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[2]);
MPI_Isend(recvbuf_Y, 2*recvCount_Y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[3]);
MPI_Irecv(sendbuf_y, 2*sendCount_y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[3]);
MPI_Isend(recvbuf_z, 2*recvCount_z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[4]);
MPI_Irecv(sendbuf_Z, 2*sendCount_Z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[4]);
MPI_Isend(recvbuf_Z, 2*recvCount_Z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[5]);
MPI_Irecv(sendbuf_z, 2*sendCount_z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[5]);
//...................................................................................
//...................................................................................
// Wait for completion of D3Q7 communication
comm.waitAll(6,req1);
comm.waitAll(6,req2);
MPI_Waitall(6,req1,stat1);
MPI_Waitall(6,req2,stat2);
//...................................................................................
//...................................................................................
dvc_UnpackDenD3Q7(dvcSendList_x,sendCount_x,sendbuf_x,2,Den,N);
@ -1932,48 +2013,48 @@ int main(int argc, char **argv)
//...................................................................................
// Send / Recv all the phase indcator field values
//...................................................................................
req1[0] = comm.Isend(sendbuf_x,sendCount_x,rank_x,sendtag);
req2[0] = comm.Irecv(recvbuf_X,recvCount_X,rank_X,recvtag);
req1[1] = comm.Isend(sendbuf_X,sendCount_X,rank_X,sendtag);
req2[1] = comm.Irecv(recvbuf_x,recvCount_x,rank_x,recvtag);
req1[2] = comm.Isend(sendbuf_y,sendCount_y,rank_y,sendtag);
req2[2] = comm.Irecv(recvbuf_Y,recvCount_Y,rank_Y,recvtag);
req1[3] = comm.Isend(sendbuf_Y,sendCount_Y,rank_Y,sendtag);
req2[3] = comm.Irecv(recvbuf_y,recvCount_y,rank_y,recvtag);
req1[4] = comm.Isend(sendbuf_z,sendCount_z,rank_z,sendtag);
req2[4] = comm.Irecv(recvbuf_Z,recvCount_Z,rank_Z,recvtag);
req1[5] = comm.Isend(sendbuf_Z,sendCount_Z,rank_Z,sendtag);
req2[5] = comm.Irecv(recvbuf_z,recvCount_z,rank_z,recvtag);
req1[6] = comm.Isend(sendbuf_xy,sendCount_xy,rank_xy,sendtag);
req2[6] = comm.Irecv(recvbuf_XY,recvCount_XY,rank_XY,recvtag);
req1[7] = comm.Isend(sendbuf_XY,sendCount_XY,rank_XY,sendtag);
req2[7] = comm.Irecv(recvbuf_xy,recvCount_xy,rank_xy,recvtag);
req1[8] = comm.Isend(sendbuf_Xy,sendCount_Xy,rank_Xy,sendtag);
req2[8] = comm.Irecv(recvbuf_xY,recvCount_xY,rank_xY,recvtag);
req1[9] = comm.Isend(sendbuf_xY,sendCount_xY,rank_xY,sendtag);
req2[9] = comm.Irecv(recvbuf_Xy,recvCount_Xy,rank_Xy,recvtag);
req1[10] = comm.Isend(sendbuf_xz,sendCount_xz,rank_xz,sendtag);
req2[10] = comm.Irecv(recvbuf_XZ,recvCount_XZ,rank_XZ,recvtag);
req1[11] = comm.Isend(sendbuf_XZ,sendCount_XZ,rank_XZ,sendtag);
req2[11] = comm.Irecv(recvbuf_xz,recvCount_xz,rank_xz,recvtag);
req1[12] = comm.Isend(sendbuf_Xz,sendCount_Xz,rank_Xz,sendtag);
req2[12] = comm.Irecv(recvbuf_xZ,recvCount_xZ,rank_xZ,recvtag);
req1[13] = comm.Isend(sendbuf_xZ,sendCount_xZ,rank_xZ,sendtag);
req2[13] = comm.Irecv(recvbuf_Xz,recvCount_Xz,rank_Xz,recvtag);
req1[14] = comm.Isend(sendbuf_yz,sendCount_yz,rank_yz,sendtag);
req2[14] = comm.Irecv(recvbuf_YZ,recvCount_YZ,rank_YZ,recvtag);
req1[15] = comm.Isend(sendbuf_YZ,sendCount_YZ,rank_YZ,sendtag);
req2[15] = comm.Irecv(recvbuf_yz,recvCount_yz,rank_yz,recvtag);
req1[16] = comm.Isend(sendbuf_Yz,sendCount_Yz,rank_Yz,sendtag);
req2[16] = comm.Irecv(recvbuf_yZ,recvCount_yZ,rank_yZ,recvtag);
req1[17] = comm.Isend(sendbuf_yZ,sendCount_yZ,rank_yZ,sendtag);
req2[17] = comm.Irecv(recvbuf_Yz,recvCount_Yz,rank_Yz,recvtag);
MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_x,sendtag,comm,&req1[0]);
MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_X,recvtag,comm,&req2[0]);
MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_X,sendtag,comm,&req1[1]);
MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_x,recvtag,comm,&req2[1]);
MPI_Isend(sendbuf_y, sendCount_y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[2]);
MPI_Irecv(recvbuf_Y, recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[2]);
MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[3]);
MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[3]);
MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[4]);
MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[4]);
MPI_Isend(sendbuf_Z, sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[5]);
MPI_Irecv(recvbuf_z, recvCount_z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[5]);
MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[6]);
MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[6]);
MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[7]);
MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[7]);
MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[8]);
MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[8]);
MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[9]);
MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[9]);
MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[10]);
MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[10]);
MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[11]);
MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[11]);
MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[12]);
MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[12]);
MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[13]);
MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[13]);
MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[14]);
MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[14]);
MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[15]);
MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[15]);
MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[16]);
MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[16]);
MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[17]);
MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[17]);
//...................................................................................
//...................................................................................
// Wait for completion of Indicator Field communication
//...................................................................................
comm.waitAll(18,req1);
comm.waitAll(18,req2);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
dvc_Barrier();
//...................................................................................
//...................................................................................
@ -2003,7 +2084,7 @@ int main(int argc, char **argv)
dvc_UnpackValues(dvcRecvList_Yz, recvCount_Yz,recvbuf_Yz, Phi, N);
dvc_UnpackValues(dvcRecvList_YZ, recvCount_YZ,recvbuf_YZ, Phi, N);
//...................................................................................
comm.barrier();
MPI_Barrier(comm);
// Iteration completed!
timestep++;
@ -2283,27 +2364,27 @@ int main(int argc, char **argv)
//...........................................................................
}
//...........................................................................
comm.barrier();
nwp_volume_global = comm.sumReduce( nwp_volume );
awn_global = comm.sumReduce( awn );
ans_global = comm.sumReduce( ans );
aws_global = comm.sumReduce( aws );
lwns_global = comm.sumReduce( lwns );
As_global = comm.sumReduce( As );
Jwn_global = comm.sumReduce( Jwn );
efawns_global = comm.sumReduce( efawns );
MPI_Barrier(comm);
MPI_Allreduce(&nwp_volume,&nwp_volume_global,1,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&awn,&awn_global,1,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&ans,&ans_global,1,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&aws,&aws_global,1,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&lwns,&lwns_global,1,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&As,&As_global,1,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&Jwn,&Jwn_global,1,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&efawns,&efawns_global,1,MPI_DOUBLE,MPI_SUM,comm);
// Phase averages
vol_w_global = comm.sumReduce( vol_w );
vol_n_global = comm.sumReduce( vol_n );
paw_global = comm.sumReduce( paw );
pan_global = comm.sumReduce( pan );
vaw_global(0) = comm.sumReduce( vaw(0) );
van_global(0) = comm.sumReduce( van(0) );
vawn_global(0) = comm.sumReduce( vawn(0) );
Gwn_global(0) = comm.sumReduce( Gwn(0) );
Gns_global(0) = comm.sumReduce( Gns(0) );
Gws_global(0) = comm.sumReduce( Gws(0) );
comm.barrier();
MPI_Allreduce(&vol_w,&vol_w_global,1,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&vol_n,&vol_n_global,1,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&paw,&paw_global,1,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&pan,&pan_global,1,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&vaw(0),&vaw_global(0),3,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&van(0),&van_global(0),3,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&vawn(0),&vawn_global(0),3,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&Gwn(0),&Gwn_global(0),6,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&Gns(0),&Gns_global(0),6,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&Gws(0),&Gws_global(0),6,MPI_DOUBLE,MPI_SUM,comm);
MPI_Barrier(comm);
//.........................................................................
// Compute the change in the total surface energy based on the defined interval
// See McClure, Prins and Miller (2013)
@ -2370,8 +2451,8 @@ int main(int argc, char **argv)
}
//************************************************************************/
dvc_Barrier();
comm.barrier();
stoptime = Utilities::MPI::time();
MPI_Barrier(comm);
stoptime = MPI_Wtime();
if (rank==0) printf("-------------------------------------------------------------------\n");
// Compute the walltime per timestep
cputime = (stoptime - starttime)/timestep;
@ -2408,7 +2489,7 @@ int main(int argc, char **argv)
*/ //************************************************************************/
// ****************************************************
comm.barrier();
MPI_Barrier(comm);
MPI_Finalize();
// ****************************************************
}

View File

@ -1240,6 +1240,15 @@ __global__ void dvc_ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny
}
__global__ void dvc_ScaLBL_CopySlice_z(double *Phi, int Nx, int Ny, int Nz, int Source, int Dest){
double value;
int n = blockIdx.x*blockDim.x + threadIdx.x;
if (n < Nx*Ny){
value = Phi[Source*Nx*Ny+n];
Phi[Dest*Nx*Ny+n] = value;
}
}
__global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi,
double *Velocity, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta,
@ -4134,5 +4143,9 @@ extern "C" void ScaLBL_Color_BC_Z(int *list, int *Map, double *Phi, double *Den,
}
}
extern "C" void ScaLBL_CopySlice_z(double *Phi, int Nx, int Ny, int Nz, int Source, int Dest){
int GRID = Nx*Ny / 512 + 1;
dvc_ScaLBL_CopySlice_z<<<GRID,512>>>(Phi,Nx,Ny,Nz,Source,Dest);
}

View File

@ -267,7 +267,6 @@ __global__ void dvc_ScaLBL_D3Q19_Init(double *dist, int Np)
}
}
//*************************************************************************
__global__ void dvc_ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *disteven, double *distodd, int Np, int q){
int n,nn;
@ -1728,6 +1727,43 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Pressure_BC_Z(int *list, double *dist,
//...................................................
}
}
__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){
int idx, n;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
double f5 = 0.111111111111111111111111 - dist[6*Np+n];
double f11 = 0.05555555555555555555556 - dist[12*Np+n];
double f14 = 0.05555555555555555555556 - dist[13*Np+n];
double f15 = 0.05555555555555555555556 - dist[16*Np+n];
double f18 = 0.05555555555555555555556 - dist[17*Np+n];
dist[6*Np+n] = f5;
dist[12*Np+n] = f11;
dist[13*Np+n] = f14;
dist[16*Np+n] = f15;
dist[17*Np+n] = f18;
}
}
__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){
int idx, n;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
double f6 = 0.111111111111111111111111 - dist[5*Np+n];
double f12 = 0.05555555555555555555556 - dist[11*Np+n];
double f13 = 0.05555555555555555555556 - dist[14*Np+n] ;
double f16 = 0.05555555555555555555556 - dist[15*Np+n];
double f17 = 0.05555555555555555555556 - dist[18*Np+n];
dist[5*Np+n] = f6;
dist[11*Np+n] = f12;
dist[14*Np+n] = f13;
dist[15*Np+n] = f16;
dist[18*Np+n] = f17;
}
}
__global__ void dvc_ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *d_neighborList, int *list, double *dist, double din, int count, int Np)
{
@ -2324,10 +2360,11 @@ extern "C" void ScaLBL_D3Q19_Init(double *dist, int Np){
dvc_ScaLBL_D3Q19_Init<<<NBLOCKS,NTHREADS >>>(dist, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_AA_Init: %s \n",cudaGetErrorString(err));
printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_Swap(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz){
dvc_ScaLBL_D3Q19_Swap<<<NBLOCKS,NTHREADS >>>(ID, disteven, distodd, Nx, Ny, Nz);
cudaError_t err = cudaGetLastError();
@ -2614,11 +2651,23 @@ extern "C" double deviceReduce(double *in, double* out, int N) {
return sum;
}
//
//extern "C" void ScaLBL_D3Q19_Pressure_BC_Z(int *list, double *dist, double dout, int count, int Np){
// int GRID = count / 512 + 1;
// dvc_ScaLBL_D3Q19_Pressure_BC_Z<<<GRID,512>>>(disteven, distodd, dout, Nx, Ny, Nz, outlet);
//}
extern "C" void ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q19_Reflection_BC_z<<<GRID,512>>>(list, dist, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_Reflection_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q19_Reflection_BC_Z<<<GRID,512>>>(list, dist, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_Reflection_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx,
double Fy, double Fz){

View File

@ -1,4 +1,5 @@
// GPU Functions for D3Q7 Lattice Boltzmann Methods
#include <stdio.h>
#define NBLOCKS 560
#define NTHREADS 128
@ -79,6 +80,25 @@ __global__ void dvc_ScaLBL_D3Q7_Unpack(int q, int *list, int start, int count,
}
}
__global__ void dvc_ScaLBL_D3Q7_Reflection_BC_z(int *list, double *dist, int count, int Np){
int idx, n;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
double f5 = 0.222222222222222222222222 - dist[6*Np+n];
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_Reflection_BC_Z(int *list, double *dist, int count, int Np){
int idx, n;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
double f6 = 0.222222222222222222222222 - dist[5*Np+n];
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_Init(char *ID, double *f_even, double *f_odd, double *Den, int Nx, int Ny, int Nz)
{
int n,N;
@ -207,6 +227,24 @@ __global__ void dvc_ScaLBL_D3Q7_Density(char *ID, double *disteven, double *dis
}
}
extern "C" void ScaLBL_D3Q7_Reflection_BC_z(int *list, double *dist, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_Reflection_BC_z<<<GRID,512>>>(list, dist, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_Reflection_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_Reflection_BC_Z(int *list, double *dist, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_Reflection_BC_Z<<<GRID,512>>>(list, dist, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_Reflection_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_Unpack(int q, int *list, int start, int count, double *recvbuf, double *dist, int N){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_Unpack <<<GRID,512 >>>(q, list, start, count, recvbuf, dist, N);

View File

@ -6,7 +6,7 @@
#include <iostream>
#include <fstream>
#include <string.h>
#include "common/MPI.h"
#include <mpi.h>
#include <stdlib.h>
using namespace std;
@ -64,11 +64,15 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){
int main(int argc, char **argv)
{
//*****************************************
// ***** MPI STUFF ****************
//*****************************************
// Initialize MPI
int rank,nprocs;
MPI_Init(&argc,&argv);
Utilities::MPI comm( MPI_COMM_WORLD );
int rank = comm.getRank();
int nprocs = comm.getSize();
MPI_Comm comm = MPI_COMM_WORLD;
MPI_Comm_rank(comm,&rank);
MPI_Comm_size(comm,&nprocs);
// parallel domain size (# of sub-domains)
int nprocx,nprocy,nprocz;
int iproc,jproc,kproc;
@ -82,6 +86,7 @@ int main(int argc, char **argv)
int rank_yz,rank_YZ,rank_yZ,rank_Yz;
//**********************************
MPI_Request req1[18],req2[18];
MPI_Status stat1[18],stat2[18];
//**********************************
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
//!!!!!!!!!!! Random debugging communications!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@ -131,23 +136,24 @@ int main(int argc, char **argv)
// **************************************************************
// Broadcast simulation parameters from rank 0 to all other procs
comm.barrier();
MPI_Barrier(comm);
//.................................................
comm.bcast(&Nz,1,0);
comm.bcast(&nBlocks,1,0);
comm.bcast(&nthreads,1,0);
comm.bcast(&tau,1,0);
comm.bcast(&Fx,1,0);
comm.bcast(&Fy,1,0);
comm.bcast(&Fz,1,0);
comm.bcast(&timestepMax,1,0);
comm.bcast(&interval,1,0);
comm.bcast(&tol,1,0);
comm.bcast(&nprocx,1,0);
comm.bcast(&nprocy,1,0);
comm.bcast(&nprocz,1,0);
MPI_Bcast(&Nz,1,MPI_INT,0,comm);
MPI_Bcast(&nBlocks,1,MPI_INT,0,comm);
MPI_Bcast(&nthreads,1,MPI_INT,0,comm);
MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&timestepMax,1,MPI_INT,0,comm);
MPI_Bcast(&interval,1,MPI_INT,0,comm);
MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&nprocx,1,MPI_INT,0,comm);
MPI_Bcast(&nprocy,1,MPI_INT,0,comm);
MPI_Bcast(&nprocz,1,MPI_INT,0,comm);
//.................................................
comm.barrier();
MPI_Barrier(comm);
// **************************************************************
double rlx_setA = 1.f/tau;
@ -170,7 +176,7 @@ int main(int argc, char **argv)
printf("Sub-domain size = %i x %i x %i\n",Nz,Nz,Nz);
}
comm.barrier();
MPI_Barrier(comm);
kproc = rank/(nprocx*nprocy);
jproc = (rank-nprocx*nprocy*kproc)/nprocx;
iproc = rank-nprocx*nprocy*kproc-nprocz*jproc;
@ -451,7 +457,7 @@ int main(int argc, char **argv)
PM.close();
// printf("File porosity = %f\n", double(sum)/N);
//...........................................................................
comm.barrier();
MPI_Barrier(comm);
if (rank == 0) cout << "Domain set." << endl;
//...........................................................................
// Write the communcation structure into a file for debugging
@ -588,7 +594,7 @@ int main(int argc, char **argv)
}
}
}
comm.barrier();
MPI_Barrier(comm);
if (rank==0) printf ("SendLists are ready on host\n");
//......................................................................................
// Use MPI to fill in the recvCounts form the associated processes
@ -599,46 +605,46 @@ int main(int argc, char **argv)
//**********************************************************************************
// Fill in the recieve counts using MPI
sendtag = recvtag = 3;
comm.send(&sendCount_x,1,rank_X,sendtag);
comm.recv(&recvCount_X,1,rank_x,recvtag);
comm.send(&sendCount_X,1,rank_x,sendtag);
comm.recv(&recvCount_x,1,rank_X,recvtag);
comm.send(&sendCount_y,1,rank_Y,sendtag);
comm.recv(&recvCount_Y,1,rank_y,recvtag);
comm.send(&sendCount_Y,1,rank_y,sendtag);
comm.recv(&recvCount_y,1,rank_Y,recvtag);
comm.send(&sendCount_z,1,rank_Z,sendtag);
comm.recv(&recvCount_Z,1,rank_z,recvtag);
comm.send(&sendCount_Z,1,rank_z,sendtag);
comm.recv(&recvCount_z,1,rank_Z,recvtag);
MPI_Send(&sendCount_x,1,MPI_INT,rank_X,sendtag,comm);
MPI_Recv(&recvCount_X,1,MPI_INT,rank_x,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_X,1,MPI_INT,rank_x,sendtag,comm);
MPI_Recv(&recvCount_x,1,MPI_INT,rank_X,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_y,1,MPI_INT,rank_Y,sendtag,comm);
MPI_Recv(&recvCount_Y,1,MPI_INT,rank_y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Y,1,MPI_INT,rank_y,sendtag,comm);
MPI_Recv(&recvCount_y,1,MPI_INT,rank_Y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_z,1,MPI_INT,rank_Z,sendtag,comm);
MPI_Recv(&recvCount_Z,1,MPI_INT,rank_z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Z,1,MPI_INT,rank_z,sendtag,comm);
MPI_Recv(&recvCount_z,1,MPI_INT,rank_Z,recvtag,comm,MPI_STATUS_IGNORE);
comm.send(&sendCount_xy,1,rank_XY,sendtag);
comm.recv(&recvCount_XY,1,rank_xy,recvtag);
comm.send(&sendCount_XY,1,rank_xy,sendtag);
comm.recv(&recvCount_xy,1,rank_XY,recvtag);
comm.send(&sendCount_Xy,1,rank_xY,sendtag);
comm.recv(&recvCount_xY,1,rank_Xy,recvtag);
comm.send(&sendCount_xY,1,rank_Xy,sendtag);
comm.recv(&recvCount_Xy,1,rank_xY,recvtag);
MPI_Send(&sendCount_xy,1,MPI_INT,rank_XY,sendtag,comm);
MPI_Recv(&recvCount_XY,1,MPI_INT,rank_xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_XY,1,MPI_INT,rank_xy,sendtag,comm);
MPI_Recv(&recvCount_xy,1,MPI_INT,rank_XY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Xy,1,MPI_INT,rank_xY,sendtag,comm);
MPI_Recv(&recvCount_xY,1,MPI_INT,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_xY,1,MPI_INT,rank_Xy,sendtag,comm);
MPI_Recv(&recvCount_Xy,1,MPI_INT,rank_xY,recvtag,comm,MPI_STATUS_IGNORE);
comm.send(&sendCount_xz,1,rank_XZ,sendtag);
comm.recv(&recvCount_XZ,1,rank_xz,recvtag);
comm.send(&sendCount_XZ,1,rank_xz,sendtag);
comm.recv(&recvCount_xz,1,rank_XZ,recvtag);
comm.send(&sendCount_Xz,1,rank_xZ,sendtag);
comm.recv(&recvCount_xZ,1,rank_Xz,recvtag);
comm.send(&sendCount_xZ,1,rank_Xz,sendtag);
comm.recv(&recvCount_Xz,1,rank_xZ,recvtag);
MPI_Send(&sendCount_xz,1,MPI_INT,rank_XZ,sendtag,comm);
MPI_Recv(&recvCount_XZ,1,MPI_INT,rank_xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_XZ,1,MPI_INT,rank_xz,sendtag,comm);
MPI_Recv(&recvCount_xz,1,MPI_INT,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Xz,1,MPI_INT,rank_xZ,sendtag,comm);
MPI_Recv(&recvCount_xZ,1,MPI_INT,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_xZ,1,MPI_INT,rank_Xz,sendtag,comm);
MPI_Recv(&recvCount_Xz,1,MPI_INT,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE);
comm.send(&sendCount_yz,1,rank_YZ,sendtag);
comm.recv(&recvCount_YZ,1,rank_yz,recvtag);
comm.send(&sendCount_YZ,1,rank_yz,sendtag);
comm.recv(&recvCount_yz,1,rank_YZ,recvtag);
comm.send(&sendCount_Yz,1,rank_yZ,sendtag);
comm.recv(&recvCount_yZ,1,rank_Yz,recvtag);
comm.send(&sendCount_yZ,1,rank_Yz,sendtag);
comm.recv(&recvCount_Yz,1,rank_yZ,recvtag);
comm.barrier();
MPI_Send(&sendCount_yz,1,MPI_INT,rank_YZ,sendtag,comm);
MPI_Recv(&recvCount_YZ,1,MPI_INT,rank_yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_YZ,1,MPI_INT,rank_yz,sendtag,comm);
MPI_Recv(&recvCount_yz,1,MPI_INT,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Yz,1,MPI_INT,rank_yZ,sendtag,comm);
MPI_Recv(&recvCount_yZ,1,MPI_INT,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_yZ,1,MPI_INT,rank_Yz,sendtag,comm);
MPI_Recv(&recvCount_Yz,1,MPI_INT,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Barrier(comm);
//**********************************************************************************
//......................................................................................
int *recvList_x, *recvList_y, *recvList_z, *recvList_X, *recvList_Y, *recvList_Z;
@ -669,48 +675,48 @@ int main(int argc, char **argv)
// Use MPI to fill in the appropriate values for recvList
// Fill in the recieve lists using MPI
sendtag = recvtag = 4;
req1[0] = comm.Isend(sendList_x,sendCount_x,rank_X,sendtag);
req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_x,recvtag);
req1[1] = comm.Isend(sendList_X,sendCount_X,rank_x,sendtag);
req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_X,recvtag);
req1[2] = comm.Isend(sendList_y,sendCount_y,rank_Y,sendtag);
req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_y,recvtag);
req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_y,sendtag);
req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_Y,recvtag);
req1[4] = comm.Isend(sendList_z,sendCount_z,rank_Z,sendtag);
req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_z,recvtag);
req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_z,sendtag);
req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_Z,recvtag);
MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_Z,recvtag,comm,&req2[5]);
req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_XY,sendtag);
req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_xy,recvtag);
req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_xy,sendtag);
req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_XY,recvtag);
req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_xY,sendtag);
req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_Xy,recvtag);
req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_Xy,sendtag);
req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_xY,recvtag);
MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_XY,sendtag,comm,&req1[6]);
MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_xy,recvtag,comm,&req2[6]);
MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_xy,sendtag,comm,&req1[7]);
MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_XY,recvtag,comm,&req2[7]);
MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_xY,sendtag,comm,&req1[8]);
MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_Xy,recvtag,comm,&req2[8]);
MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_Xy,sendtag,comm,&req1[9]);
MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_xY,recvtag,comm,&req2[9]);
req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_XZ,sendtag);
req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_xz,recvtag);
req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_xz,sendtag);
req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_XZ,recvtag);
req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_xZ,sendtag);
req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_Xz,recvtag);
req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_Xz,sendtag);
req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_xZ,recvtag);
MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_XZ,sendtag,comm,&req1[10]);
MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_xz,recvtag,comm,&req2[10]);
MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_xz,sendtag,comm,&req1[11]);
MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_XZ,recvtag,comm,&req2[11]);
MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_xZ,sendtag,comm,&req1[12]);
MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_Xz,recvtag,comm,&req2[12]);
MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_Xz,sendtag,comm,&req1[13]);
MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_xZ,recvtag,comm,&req2[13]);
req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_YZ,sendtag);
req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_yz,recvtag);
req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_yz,sendtag);
req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_YZ,recvtag);
req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_yZ,sendtag);
req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_Yz,recvtag);
req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_Yz,sendtag);
req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_yZ,recvtag);
comm.waitAll(18,req1);
comm.waitAll(18,req2);
comm.barrier();
MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_YZ,sendtag,comm,&req1[14]);
MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_yz,recvtag,comm,&req2[14]);
MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_yz,sendtag,comm,&req1[15]);
MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_YZ,recvtag,comm,&req2[15]);
MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_yZ,sendtag,comm,&req1[16]);
MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_Yz,recvtag,comm,&req2[16]);
MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_Yz,sendtag,comm,&req1[17]);
MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_yZ,recvtag,comm,&req2[17]);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
MPI_Barrier(comm);
//......................................................................................
double *sendbuf_x, *sendbuf_y, *sendbuf_z, *sendbuf_X, *sendbuf_Y, *sendbuf_Z;
double *sendbuf_xy, *sendbuf_yz, *sendbuf_xz, *sendbuf_Xy, *sendbuf_Yz, *sendbuf_xZ;
@ -909,24 +915,42 @@ int main(int argc, char **argv)
PackID(sendList_yZ, sendCount_yZ ,sendID_yZ, id);
PackID(sendList_YZ, sendCount_YZ ,sendID_YZ, id);
//......................................................................................
comm.sendrecv(sendID_x,sendCount_x,rank_X,sendtag,recvID_X,recvCount_X,rank_x,recvtag);
comm.sendrecv(sendID_X,sendCount_X,rank_x,sendtag,recvID_x,recvCount_x,rank_X,recvtag);
comm.sendrecv(sendID_y,sendCount_y,rank_Y,sendtag,recvID_Y,recvCount_Y,rank_y,recvtag);
comm.sendrecv(sendID_Y,sendCount_Y,rank_y,sendtag,recvID_y,recvCount_y,rank_Y,recvtag);
comm.sendrecv(sendID_z,sendCount_z,rank_Z,sendtag,recvID_Z,recvCount_Z,rank_z,recvtag);
comm.sendrecv(sendID_Z,sendCount_Z,rank_z,sendtag,recvID_z,recvCount_z,rank_Z,recvtag);
comm.sendrecv(sendID_xy,sendCount_xy,rank_XY,sendtag,recvID_XY,recvCount_XY,rank_xy,recvtag);
comm.sendrecv(sendID_XY,sendCount_XY,rank_xy,sendtag,recvID_xy,recvCount_xy,rank_XY,recvtag);
comm.sendrecv(sendID_Xy,sendCount_Xy,rank_xY,sendtag,recvID_xY,recvCount_xY,rank_Xy,recvtag);
comm.sendrecv(sendID_xY,sendCount_xY,rank_Xy,sendtag,recvID_Xy,recvCount_Xy,rank_xY,recvtag);
comm.sendrecv(sendID_xz,sendCount_xz,rank_XZ,sendtag,recvID_XZ,recvCount_XZ,rank_xz,recvtag);
comm.sendrecv(sendID_XZ,sendCount_XZ,rank_xz,sendtag,recvID_xz,recvCount_xz,rank_XZ,recvtag);
comm.sendrecv(sendID_Xz,sendCount_Xz,rank_xZ,sendtag,recvID_xZ,recvCount_xZ,rank_Xz,recvtag);
comm.sendrecv(sendID_xZ,sendCount_xZ,rank_Xz,sendtag,recvID_Xz,recvCount_Xz,rank_xZ,recvtag);
comm.sendrecv(sendID_yz,sendCount_yz,rank_YZ,sendtag,recvID_YZ,recvCount_YZ,rank_yz,recvtag);
comm.sendrecv(sendID_YZ,sendCount_YZ,rank_yz,sendtag,recvID_yz,recvCount_yz,rank_YZ,recvtag);
comm.sendrecv(sendID_Yz,sendCount_Yz,rank_yZ,sendtag,recvID_yZ,recvCount_yZ,rank_Yz,recvtag);
comm.sendrecv(sendID_yZ,sendCount_yZ,rank_Yz,sendtag,recvID_Yz,recvCount_Yz,rank_yZ,recvtag);
MPI_Sendrecv(sendID_x,sendCount_x,MPI_CHAR,rank_X,sendtag,
recvID_X,recvCount_X,MPI_CHAR,rank_x,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_X,sendCount_X,MPI_CHAR,rank_x,sendtag,
recvID_x,recvCount_x,MPI_CHAR,rank_X,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_y,sendCount_y,MPI_CHAR,rank_Y,sendtag,
recvID_Y,recvCount_Y,MPI_CHAR,rank_y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Y,sendCount_Y,MPI_CHAR,rank_y,sendtag,
recvID_y,recvCount_y,MPI_CHAR,rank_Y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_z,sendCount_z,MPI_CHAR,rank_Z,sendtag,
recvID_Z,recvCount_Z,MPI_CHAR,rank_z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Z,sendCount_Z,MPI_CHAR,rank_z,sendtag,
recvID_z,recvCount_z,MPI_CHAR,rank_Z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xy,sendCount_xy,MPI_CHAR,rank_XY,sendtag,
recvID_XY,recvCount_XY,MPI_CHAR,rank_xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_XY,sendCount_XY,MPI_CHAR,rank_xy,sendtag,
recvID_xy,recvCount_xy,MPI_CHAR,rank_XY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Xy,sendCount_Xy,MPI_CHAR,rank_xY,sendtag,
recvID_xY,recvCount_xY,MPI_CHAR,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xY,sendCount_xY,MPI_CHAR,rank_Xy,sendtag,
recvID_Xy,recvCount_Xy,MPI_CHAR,rank_xY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xz,sendCount_xz,MPI_CHAR,rank_XZ,sendtag,
recvID_XZ,recvCount_XZ,MPI_CHAR,rank_xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_XZ,sendCount_XZ,MPI_CHAR,rank_xz,sendtag,
recvID_xz,recvCount_xz,MPI_CHAR,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Xz,sendCount_Xz,MPI_CHAR,rank_xZ,sendtag,
recvID_xZ,recvCount_xZ,MPI_CHAR,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xZ,sendCount_xZ,MPI_CHAR,rank_Xz,sendtag,
recvID_Xz,recvCount_Xz,MPI_CHAR,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_yz,sendCount_yz,MPI_CHAR,rank_YZ,sendtag,
recvID_YZ,recvCount_YZ,MPI_CHAR,rank_yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_YZ,sendCount_YZ,MPI_CHAR,rank_yz,sendtag,
recvID_yz,recvCount_yz,MPI_CHAR,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Yz,sendCount_Yz,MPI_CHAR,rank_yZ,sendtag,
recvID_yZ,recvCount_yZ,MPI_CHAR,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_yZ,sendCount_yZ,MPI_CHAR,rank_Yz,sendtag,
recvID_Yz,recvCount_Yz,MPI_CHAR,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE);
//......................................................................................
UnpackID(recvList_x, recvCount_x ,recvID_x, id);
UnpackID(recvList_X, recvCount_X ,recvID_X, id);
@ -959,7 +983,7 @@ int main(int argc, char **argv)
free(recvID_yz); free(recvID_YZ); free(recvID_yZ); free(recvID_Yz);
//......................................................................................
if (rank==0) printf ("Devices are ready to communicate. \n");
comm.barrier();
MPI_Barrier(comm);
//...........device phase ID.................................................
if (rank==0) printf ("Copying phase ID to device \n");
@ -999,8 +1023,8 @@ int main(int argc, char **argv)
//.......create and start timer............
double starttime,stoptime,cputime;
comm.barrier();
starttime = Utilities::MPI::time();
MPI_Barrier(comm);
starttime = MPI_Wtime();
// Old cuda timer is below
// cudaEvent_t start, stop;
// float time;
@ -1112,48 +1136,48 @@ int main(int argc, char **argv)
//...................................................................................
// Send all the distributions
req1[0] = comm.Isend(sendbuf_x,5*sendCount_x,rank_X,sendtag);
req2[0] = comm.Irecv(recvbuf_X,5*recvCount_X,rank_x,recvtag);
req1[1] = comm.Isend(sendbuf_X,5*sendCount_X,rank_x,sendtag);
req2[1] = comm.Irecv(recvbuf_x,5*recvCount_x,rank_X,recvtag);
req1[2] = comm.Isend(sendbuf_y,5*sendCount_y,rank_Y,sendtag);
req2[2] = comm.Irecv(recvbuf_Y,5*recvCount_Y,rank_y,recvtag);
req1[3] = comm.Isend(sendbuf_Y,5*sendCount_Y,rank_y,sendtag);
req2[3] = comm.Irecv(recvbuf_y,5*recvCount_y,rank_Y,recvtag);
req1[4] = comm.Isend(sendbuf_z,5*sendCount_z,rank_Z,sendtag);
req2[4] = comm.Irecv(recvbuf_Z,5*recvCount_Z,rank_z,recvtag);
req1[5] = comm.Isend(sendbuf_Z,5*sendCount_Z,rank_z,sendtag);
req2[5] = comm.Irecv(recvbuf_z,5*recvCount_z,rank_Z,recvtag);
req1[6] = comm.Isend(sendbuf_xy,sendCount_xy,rank_XY,sendtag);
req2[6] = comm.Irecv(recvbuf_XY,recvCount_XY,rank_xy,recvtag);
req1[7] = comm.Isend(sendbuf_XY,sendCount_XY,rank_xy,sendtag);
req2[7] = comm.Irecv(recvbuf_xy,recvCount_xy,rank_XY,recvtag);
req1[8] = comm.Isend(sendbuf_Xy,sendCount_Xy,rank_xY,sendtag);
req2[8] = comm.Irecv(recvbuf_xY,recvCount_xY,rank_Xy,recvtag);
req1[9] = comm.Isend(sendbuf_xY,sendCount_xY,rank_Xy,sendtag);
req2[9] = comm.Irecv(recvbuf_Xy,recvCount_Xy,rank_xY,recvtag);
req1[10] = comm.Isend(sendbuf_xz,sendCount_xz,rank_XZ,sendtag);
req2[10] = comm.Irecv(recvbuf_XZ,recvCount_XZ,rank_xz,recvtag);
req1[11] = comm.Isend(sendbuf_XZ,sendCount_XZ,rank_xz,sendtag);
req2[11] = comm.Irecv(recvbuf_xz,recvCount_xz,rank_XZ,recvtag);
req1[12] = comm.Isend(sendbuf_Xz,sendCount_Xz,rank_xZ,sendtag);
req2[12] = comm.Irecv(recvbuf_xZ,recvCount_xZ,rank_Xz,recvtag);
req1[13] = comm.Isend(sendbuf_xZ,sendCount_xZ,rank_Xz,sendtag);
req2[13] = comm.Irecv(recvbuf_Xz,recvCount_Xz,rank_xZ,recvtag);
req1[14] = comm.Isend(sendbuf_yz,sendCount_yz,rank_YZ,sendtag);
req2[14] = comm.Irecv(recvbuf_YZ,recvCount_YZ,rank_yz,recvtag);
req1[15] = comm.Isend(sendbuf_YZ,sendCount_YZ,rank_yz,sendtag);
req2[15] = comm.Irecv(recvbuf_yz,recvCount_yz,rank_YZ,recvtag);
req1[16] = comm.Isend(sendbuf_Yz,sendCount_Yz,rank_yZ,sendtag);
req2[16] = comm.Irecv(recvbuf_yZ,recvCount_yZ,rank_Yz,recvtag);
req1[17] = comm.Isend(sendbuf_yZ,sendCount_yZ,rank_Yz,sendtag);
req2[17] = comm.Irecv(recvbuf_Yz,recvCount_Yz,rank_yZ,recvtag);
MPI_Isend(sendbuf_x, 5*sendCount_x,MPI_DOUBLE,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(recvbuf_X, 5*recvCount_X,MPI_DOUBLE,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(sendbuf_X, 5*sendCount_X,MPI_DOUBLE,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(recvbuf_x, 5*recvCount_x,MPI_DOUBLE,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(sendbuf_y, 5*sendCount_y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(recvbuf_Y, 5*recvCount_Y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(sendbuf_Y, 5*sendCount_Y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(recvbuf_y, 5*recvCount_y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(sendbuf_z, 5*sendCount_z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(recvbuf_Z, 5*recvCount_Z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(sendbuf_Z, 5*sendCount_Z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(recvbuf_z, 5*recvCount_z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[5]);
MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[6]);
MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[6]);
MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[7]);
MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[7]);
MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[8]);
MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[8]);
MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[9]);
MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[9]);
MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[10]);
MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[10]);
MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[11]);
MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[11]);
MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[12]);
MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[12]);
MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[13]);
MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[13]);
MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[14]);
MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[14]);
MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[15]);
MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[15]);
MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[16]);
MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[16]);
MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[17]);
MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[17]);
//...................................................................................
//...................................................................................
// Wait for completion of D3Q19 communication
comm.waitAll(18,req1);
comm.waitAll(18,req2);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
//...................................................................................
// Unpack the distributions on the device
//...................................................................................
@ -1236,7 +1260,7 @@ int main(int argc, char **argv)
//*****************************************************************************
//*****************************************************************************
comm.barrier();
MPI_Barrier(comm);
// Iteration completed!
timestep++;
//...................................................................
@ -1245,8 +1269,8 @@ int main(int argc, char **argv)
// cudaThreadSynchronize();
dvc_Barrier();
comm.barrier();
stoptime = Utilities::MPI::time();
MPI_Barrier(comm);
stoptime = MPI_Wtime();
// cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl;
cputime = stoptime - starttime;
// cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl;
@ -1280,7 +1304,7 @@ int main(int argc, char **argv)
// dvc_CopyToDevice(velocity, vel, 3*dist_mem_size, dvc_CopyToDeviceDeviceToHost);
//..............................................................................
// cudaThreadSynchronize();
// comm.barrier();
// MPI_Barrier(comm);
//............................................................
//....Write the z-velocity to test poiseuille flow............
// double vz,vz_avg;
@ -1309,7 +1333,7 @@ int main(int argc, char **argv)
// free (velocity); free(id);
// ****************************************************
comm.barrier();
MPI_Barrier(comm);
MPI_Finalize();
// ****************************************************
}

View File

@ -1,10 +1,8 @@
#include "common/MPI.h"
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <cuda.h>
#include <mpi.h>
inline void PackID(int *list, int count, char *sendbuf, char *ID){
// Fill in the phase ID values from neighboring processors
@ -555,11 +553,15 @@ void Write_Out(double *array, int Nx, int Ny, int Nz){
int main(int argc, char **argv)
{
//*****************************************
// ***** MPI STUFF ****************
//*****************************************
// Initialize MPI
int rank,nprocs;
MPI_Init(&argc,&argv);
Utilities::MPI comm( MPI_COMM_WORLD );
int rank = comm.getRank();
int nprocs = comm.getSize();
MPI_Comm comm = MPI_COMM_WORLD;
MPI_Comm_rank(comm,&rank);
MPI_Comm_size(comm,&nprocs);
// parallel domain size (# of sub-domains)
int nprocx,nprocy,nprocz;
int iproc,jproc,kproc;
@ -573,6 +575,7 @@ int main(int argc, char **argv)
int rank_yz,rank_YZ,rank_yZ,rank_Yz;
//**********************************
MPI_Request req1[18],req2[18];
MPI_Status stat1[18],stat2[18];
//**********************************
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
//!!!!!!!!!!! Random debugging communications!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@ -622,21 +625,24 @@ int main(int argc, char **argv)
// **************************************************************
// Broadcast simulation parameters from rank 0 to all other procs
comm.barrier();
MPI_Barrier(comm);
//.................................................
comm.bcast(&Nz,1,0);
comm.bcast(&nBlocks,1,0);
comm.bcast(&nthreads,1,0);
comm.bcast(&tau,1,0);
comm.bcast(&Fx,1,0);
comm.bcast(&Fy,1,0);
comm.bcast(&Fz,1,0);
comm.bcast(&iterMax,1,0);
comm.bcast(&interval,1,0);
comm.bcast(&tol,1,0);
comm.bcast(&nprocx,1,0);
comm.bcast(&nprocy,1,0);
comm.bcast(&nprocz,1,0);
MPI_Bcast(&Nz,1,MPI_INT,0,comm);
MPI_Bcast(&nBlocks,1,MPI_INT,0,comm);
MPI_Bcast(&nthreads,1,MPI_INT,0,comm);
MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&iterMax,1,MPI_INT,0,comm);
MPI_Bcast(&interval,1,MPI_INT,0,comm);
MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&nprocx,1,MPI_INT,0,comm);
MPI_Bcast(&nprocy,1,MPI_INT,0,comm);
MPI_Bcast(&nprocz,1,MPI_INT,0,comm);
//.................................................
MPI_Barrier(comm);
// **************************************************************
double rlx_setA = 1.f/tau;
@ -659,7 +665,7 @@ int main(int argc, char **argv)
printf("Sub-domain size = %i x %i x %i\n",Nz,Nz,Nz);
}
comm.barrier();
MPI_Barrier(comm);
kproc = rank/(nprocx*nprocy);
jproc = (rank-nprocx*nprocy*kproc)/nprocx;
iproc = rank-nprocx*nprocy*kproc-nprocz*jproc;
@ -940,7 +946,7 @@ int main(int argc, char **argv)
PM.close();
// printf("File porosity = %f\n", double(sum)/N);
//...........................................................................
comm.barrier();
MPI_Barrier(comm);
if (rank == 0) cout << "Domain set." << endl;
//...........................................................................
// Write the communcation structure into a file for debugging
@ -1077,7 +1083,7 @@ int main(int argc, char **argv)
}
}
}
comm.barrier();
MPI_Barrier(comm);
if (rank==0) printf ("SendLists are ready on host\n");
//......................................................................................
// Use MPI to fill in the recvCounts form the associated processes
@ -1088,46 +1094,46 @@ int main(int argc, char **argv)
//**********************************************************************************
// Fill in the recieve counts using MPI
sendtag = recvtag = 3;
comm.send(&sendCount_x,1,rank_X,sendtag);
comm.recv(&recvCount_X,1,rank_x,recvtag);
comm.send(&sendCount_X,1,rank_x,sendtag);
comm.recv(&recvCount_x,1,rank_X,recvtag);
comm.send(&sendCount_y,1,rank_Y,sendtag);
comm.recv(&recvCount_Y,1,rank_y,recvtag);
comm.send(&sendCount_Y,1,rank_y,sendtag);
comm.recv(&recvCount_y,1,rank_Y,recvtag);
comm.send(&sendCount_z,1,rank_Z,sendtag);
comm.recv(&recvCount_Z,1,rank_z,recvtag);
comm.send(&sendCount_Z,1,rank_z,sendtag);
comm.recv(&recvCount_z,1,rank_Z,recvtag);
MPI_Send(&sendCount_x,1,MPI_INT,rank_X,sendtag,comm);
MPI_Recv(&recvCount_X,1,MPI_INT,rank_x,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_X,1,MPI_INT,rank_x,sendtag,comm);
MPI_Recv(&recvCount_x,1,MPI_INT,rank_X,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_y,1,MPI_INT,rank_Y,sendtag,comm);
MPI_Recv(&recvCount_Y,1,MPI_INT,rank_y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Y,1,MPI_INT,rank_y,sendtag,comm);
MPI_Recv(&recvCount_y,1,MPI_INT,rank_Y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_z,1,MPI_INT,rank_Z,sendtag,comm);
MPI_Recv(&recvCount_Z,1,MPI_INT,rank_z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Z,1,MPI_INT,rank_z,sendtag,comm);
MPI_Recv(&recvCount_z,1,MPI_INT,rank_Z,recvtag,comm,MPI_STATUS_IGNORE);
comm.send(&sendCount_xy,1,rank_XY,sendtag);
comm.recv(&recvCount_XY,1,rank_xy,recvtag);
comm.send(&sendCount_XY,1,rank_xy,sendtag);
comm.recv(&recvCount_xy,1,rank_XY,recvtag);
comm.send(&sendCount_Xy,1,rank_xY,sendtag);
comm.recv(&recvCount_xY,1,rank_Xy,recvtag);
comm.send(&sendCount_xY,1,rank_Xy,sendtag);
comm.recv(&recvCount_Xy,1,rank_xY,recvtag);
MPI_Send(&sendCount_xy,1,MPI_INT,rank_XY,sendtag,comm);
MPI_Recv(&recvCount_XY,1,MPI_INT,rank_xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_XY,1,MPI_INT,rank_xy,sendtag,comm);
MPI_Recv(&recvCount_xy,1,MPI_INT,rank_XY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Xy,1,MPI_INT,rank_xY,sendtag,comm);
MPI_Recv(&recvCount_xY,1,MPI_INT,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_xY,1,MPI_INT,rank_Xy,sendtag,comm);
MPI_Recv(&recvCount_Xy,1,MPI_INT,rank_xY,recvtag,comm,MPI_STATUS_IGNORE);
comm.send(&sendCount_xz,1,rank_XZ,sendtag);
comm.recv(&recvCount_XZ,1,rank_xz,recvtag);
comm.send(&sendCount_XZ,1,rank_xz,sendtag);
comm.recv(&recvCount_xz,1,rank_XZ,recvtag);
comm.send(&sendCount_Xz,1,rank_xZ,sendtag);
comm.recv(&recvCount_xZ,1,rank_Xz,recvtag);
comm.send(&sendCount_xZ,1,rank_Xz,sendtag);
comm.recv(&recvCount_Xz,1,rank_xZ,recvtag);
MPI_Send(&sendCount_xz,1,MPI_INT,rank_XZ,sendtag,comm);
MPI_Recv(&recvCount_XZ,1,MPI_INT,rank_xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_XZ,1,MPI_INT,rank_xz,sendtag,comm);
MPI_Recv(&recvCount_xz,1,MPI_INT,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Xz,1,MPI_INT,rank_xZ,sendtag,comm);
MPI_Recv(&recvCount_xZ,1,MPI_INT,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_xZ,1,MPI_INT,rank_Xz,sendtag,comm);
MPI_Recv(&recvCount_Xz,1,MPI_INT,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE);
comm.send(&sendCount_yz,1,rank_YZ,sendtag);
comm.recv(&recvCount_YZ,1,rank_yz,recvtag);
comm.send(&sendCount_YZ,1,rank_yz,sendtag);
comm.recv(&recvCount_yz,1,rank_YZ,recvtag);
comm.send(&sendCount_Yz,1,rank_yZ,sendtag);
comm.recv(&recvCount_yZ,1,rank_Yz,recvtag);
comm.send(&sendCount_yZ,1,rank_Yz,sendtag);
comm.recv(&recvCount_Yz,1,rank_yZ,recvtag);
comm.barrier();
MPI_Send(&sendCount_yz,1,MPI_INT,rank_YZ,sendtag,comm);
MPI_Recv(&recvCount_YZ,1,MPI_INT,rank_yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_YZ,1,MPI_INT,rank_yz,sendtag,comm);
MPI_Recv(&recvCount_yz,1,MPI_INT,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Yz,1,MPI_INT,rank_yZ,sendtag,comm);
MPI_Recv(&recvCount_yZ,1,MPI_INT,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_yZ,1,MPI_INT,rank_Yz,sendtag,comm);
MPI_Recv(&recvCount_Yz,1,MPI_INT,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Barrier(comm);
//**********************************************************************************
//recvCount_x = sendCount_x;
//recvCount_X = sendCount_X;
@ -1151,7 +1157,7 @@ int main(int argc, char **argv)
//......................................................................................
// Use MPI to fill in the appropriate values
// int tag = 5;
// Mcomm.sendrecv(sendCount_x,1,rank_x,tag,sendCount_X,1);
// MPI_Sendrecv(sendCount_x,1,MPI_INT,rank_x,tag,sendCount_X,1,MPI_INT,comm,req);
//......................................................................................
int *recvList_x, *recvList_y, *recvList_z, *recvList_X, *recvList_Y, *recvList_Z;
int *recvList_xy, *recvList_yz, *recvList_xz, *recvList_Xy, *recvList_Yz, *recvList_xZ;
@ -1181,48 +1187,48 @@ int main(int argc, char **argv)
// Use MPI to fill in the appropriate values for recvList
// Fill in the recieve lists using MPI
sendtag = recvtag = 4;
req1[0] = comm.Isend(sendList_x,sendCount_x,rank_X,sendtag);
req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_x,recvtag);
req1[1] = comm.Isend(sendList_X,sendCount_X,rank_x,sendtag);
req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_X,recvtag);
req1[2] = comm.Isend(sendList_y,sendCount_y,rank_Y,sendtag);
req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_y,recvtag);
req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_y,sendtag);
req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_Y,recvtag);
req1[4] = comm.Isend(sendList_z,sendCount_z,rank_Z,sendtag);
req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_z,recvtag);
req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_z,sendtag);
req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_Z,recvtag);
MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_Z,recvtag,comm,&req2[5]);
req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_XY,sendtag);
req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_xy,recvtag);
req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_xy,sendtag);
req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_XY,recvtag);
req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_xY,sendtag);
req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_Xy,recvtag);
req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_Xy,sendtag);
req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_xY,recvtag);
MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_XY,sendtag,comm,&req1[6]);
MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_xy,recvtag,comm,&req2[6]);
MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_xy,sendtag,comm,&req1[7]);
MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_XY,recvtag,comm,&req2[7]);
MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_xY,sendtag,comm,&req1[8]);
MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_Xy,recvtag,comm,&req2[8]);
MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_Xy,sendtag,comm,&req1[9]);
MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_xY,recvtag,comm,&req2[9]);
req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_XZ,sendtag);
req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_xz,recvtag);
req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_xz,sendtag);
req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_XZ,recvtag);
req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_xZ,sendtag);
req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_Xz,recvtag);
req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_Xz,sendtag);
req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_xZ,recvtag);
MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_XZ,sendtag,comm,&req1[10]);
MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_xz,recvtag,comm,&req2[10]);
MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_xz,sendtag,comm,&req1[11]);
MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_XZ,recvtag,comm,&req2[11]);
MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_xZ,sendtag,comm,&req1[12]);
MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_Xz,recvtag,comm,&req2[12]);
MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_Xz,sendtag,comm,&req1[13]);
MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_xZ,recvtag,comm,&req2[13]);
req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_YZ,sendtag);
req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_yz,recvtag);
req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_yz,sendtag);
req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_YZ,recvtag);
req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_yZ,sendtag);
req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_Yz,recvtag);
req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_Yz,sendtag);
req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_yZ,recvtag);
comm.waitAll(18,req1);
comm.waitAll(18,req2);
comm.barrier();
MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_YZ,sendtag,comm,&req1[14]);
MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_yz,recvtag,comm,&req2[14]);
MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_yz,sendtag,comm,&req1[15]);
MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_YZ,recvtag,comm,&req2[15]);
MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_yZ,sendtag,comm,&req1[16]);
MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_Yz,recvtag,comm,&req2[16]);
MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_Yz,sendtag,comm,&req1[17]);
MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_yZ,recvtag,comm,&req2[17]);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
MPI_Barrier(comm);
//......................................................................................
double *sendbuf_x, *sendbuf_y, *sendbuf_z, *sendbuf_X, *sendbuf_Y, *sendbuf_Z;
double *sendbuf_xy, *sendbuf_yz, *sendbuf_xz, *sendbuf_Xy, *sendbuf_Yz, *sendbuf_xZ;
@ -1421,24 +1427,42 @@ int main(int argc, char **argv)
PackID(sendList_yZ, sendCount_yZ ,sendID_yZ, id);
PackID(sendList_YZ, sendCount_YZ ,sendID_YZ, id);
//......................................................................................
comm.sendrecv(sendID_x,sendCount_x,rank_X,sendtag,recvID_X,recvCount_X,rank_x,recvtag);
comm.sendrecv(sendID_X,sendCount_X,rank_x,sendtag,recvID_x,recvCount_x,rank_X,recvtag);
comm.sendrecv(sendID_y,sendCount_y,rank_Y,sendtag,recvID_Y,recvCount_Y,rank_y,recvtag);
comm.sendrecv(sendID_Y,sendCount_Y,rank_y,sendtag,recvID_y,recvCount_y,rank_Y,recvtag);
comm.sendrecv(sendID_z,sendCount_z,rank_Z,sendtag,recvID_Z,recvCount_Z,rank_z,recvtag);
comm.sendrecv(sendID_Z,sendCount_Z,rank_z,sendtag,recvID_z,recvCount_z,rank_Z,recvtag);
comm.sendrecv(sendID_xy,sendCount_xy,rank_XY,sendtag,recvID_XY,recvCount_XY,rank_xy,recvtag);
comm.sendrecv(sendID_XY,sendCount_XY,rank_xy,sendtag,recvID_xy,recvCount_xy,rank_XY,recvtag);
comm.sendrecv(sendID_Xy,sendCount_Xy,rank_xY,sendtag,recvID_xY,recvCount_xY,rank_Xy,recvtag);
comm.sendrecv(sendID_xY,sendCount_xY,rank_Xy,sendtag,recvID_Xy,recvCount_Xy,rank_xY,recvtag);
comm.sendrecv(sendID_xz,sendCount_xz,rank_XZ,sendtag,recvID_XZ,recvCount_XZ,rank_xz,recvtag);
comm.sendrecv(sendID_XZ,sendCount_XZ,rank_xz,sendtag,recvID_xz,recvCount_xz,rank_XZ,recvtag);
comm.sendrecv(sendID_Xz,sendCount_Xz,rank_xZ,sendtag,recvID_xZ,recvCount_xZ,rank_Xz,recvtag);
comm.sendrecv(sendID_xZ,sendCount_xZ,rank_Xz,sendtag,recvID_Xz,recvCount_Xz,rank_xZ,recvtag);
comm.sendrecv(sendID_yz,sendCount_yz,rank_YZ,sendtag,recvID_YZ,recvCount_YZ,rank_yz,recvtag);
comm.sendrecv(sendID_YZ,sendCount_YZ,rank_yz,sendtag,recvID_yz,recvCount_yz,rank_YZ,recvtag);
comm.sendrecv(sendID_Yz,sendCount_Yz,rank_yZ,sendtag,recvID_yZ,recvCount_yZ,rank_Yz,recvtag);
comm.sendrecv(sendID_yZ,sendCount_yZ,rank_Yz,sendtag,recvID_Yz,recvCount_Yz,rank_yZ,recvtag);
MPI_Sendrecv(sendID_x,sendCount_x,MPI_CHAR,rank_X,sendtag,
recvID_X,recvCount_X,MPI_CHAR,rank_x,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_X,sendCount_X,MPI_CHAR,rank_x,sendtag,
recvID_x,recvCount_x,MPI_CHAR,rank_X,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_y,sendCount_y,MPI_CHAR,rank_Y,sendtag,
recvID_Y,recvCount_Y,MPI_CHAR,rank_y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Y,sendCount_Y,MPI_CHAR,rank_y,sendtag,
recvID_y,recvCount_y,MPI_CHAR,rank_Y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_z,sendCount_z,MPI_CHAR,rank_Z,sendtag,
recvID_Z,recvCount_Z,MPI_CHAR,rank_z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Z,sendCount_Z,MPI_CHAR,rank_z,sendtag,
recvID_z,recvCount_z,MPI_CHAR,rank_Z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xy,sendCount_xy,MPI_CHAR,rank_XY,sendtag,
recvID_XY,recvCount_XY,MPI_CHAR,rank_xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_XY,sendCount_XY,MPI_CHAR,rank_xy,sendtag,
recvID_xy,recvCount_xy,MPI_CHAR,rank_XY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Xy,sendCount_Xy,MPI_CHAR,rank_xY,sendtag,
recvID_xY,recvCount_xY,MPI_CHAR,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xY,sendCount_xY,MPI_CHAR,rank_Xy,sendtag,
recvID_Xy,recvCount_Xy,MPI_CHAR,rank_xY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xz,sendCount_xz,MPI_CHAR,rank_XZ,sendtag,
recvID_XZ,recvCount_XZ,MPI_CHAR,rank_xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_XZ,sendCount_XZ,MPI_CHAR,rank_xz,sendtag,
recvID_xz,recvCount_xz,MPI_CHAR,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Xz,sendCount_Xz,MPI_CHAR,rank_xZ,sendtag,
recvID_xZ,recvCount_xZ,MPI_CHAR,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xZ,sendCount_xZ,MPI_CHAR,rank_Xz,sendtag,
recvID_Xz,recvCount_Xz,MPI_CHAR,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_yz,sendCount_yz,MPI_CHAR,rank_YZ,sendtag,
recvID_YZ,recvCount_YZ,MPI_CHAR,rank_yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_YZ,sendCount_YZ,MPI_CHAR,rank_yz,sendtag,
recvID_yz,recvCount_yz,MPI_CHAR,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Yz,sendCount_Yz,MPI_CHAR,rank_yZ,sendtag,
recvID_yZ,recvCount_yZ,MPI_CHAR,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_yZ,sendCount_yZ,MPI_CHAR,rank_Yz,sendtag,
recvID_Yz,recvCount_Yz,MPI_CHAR,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE);
//......................................................................................
UnpackID(recvList_x, recvCount_x ,recvID_x, id);
UnpackID(recvList_X, recvCount_X ,recvID_X, id);
@ -1471,7 +1495,7 @@ int main(int argc, char **argv)
free(recvID_yz); free(recvID_YZ); free(recvID_yZ); free(recvID_Yz);
//......................................................................................
if (rank==0) printf ("Devices are ready to communicate. \n");
comm.barrier();
MPI_Barrier(comm);
//...........device phase ID.................................................
if (rank==0) printf ("Copying phase ID to device \n");
@ -1511,8 +1535,8 @@ int main(int argc, char **argv)
//.......create and start timer............
double starttime,stoptime,cputime;
comm.barrier();
starttime = Utilities::MPI::time();
MPI_Barrier(comm);
starttime = MPI_Wtime();
// Old cuda timer is below
// cudaEvent_t start, stop;
// float time;
@ -1609,48 +1633,48 @@ int main(int argc, char **argv)
//...................................................................................
// Send all the distributions
req1[0] = comm.Isend(sendbuf_x,5*sendCount_x,rank_X,sendtag);
req2[0] = comm.Irecv(recvbuf_X,5*recvCount_X,rank_x,recvtag);
req1[1] = comm.Isend(sendbuf_X,5*sendCount_X,rank_x,sendtag);
req2[1] = comm.Irecv(recvbuf_x,5*recvCount_x,rank_X,recvtag);
req1[2] = comm.Isend(sendbuf_y,5*sendCount_y,rank_Y,sendtag);
req2[2] = comm.Irecv(recvbuf_Y,5*recvCount_Y,rank_y,recvtag);
req1[3] = comm.Isend(sendbuf_Y,5*sendCount_Y,rank_y,sendtag);
req2[3] = comm.Irecv(recvbuf_y,5*recvCount_y,rank_Y,recvtag);
req1[4] = comm.Isend(sendbuf_z,5*sendCount_z,rank_Z,sendtag);
req2[4] = comm.Irecv(recvbuf_Z,5*recvCount_Z,rank_z,recvtag);
req1[5] = comm.Isend(sendbuf_Z,5*sendCount_Z,rank_z,sendtag);
req2[5] = comm.Irecv(recvbuf_z,5*recvCount_z,rank_Z,recvtag);
req1[6] = comm.Isend(sendbuf_xy,sendCount_xy,rank_XY,sendtag);
req2[6] = comm.Irecv(recvbuf_XY,recvCount_XY,rank_xy,recvtag);
req1[7] = comm.Isend(sendbuf_XY,sendCount_XY,rank_xy,sendtag);
req2[7] = comm.Irecv(recvbuf_xy,recvCount_xy,rank_XY,recvtag);
req1[8] = comm.Isend(sendbuf_Xy,sendCount_Xy,rank_xY,sendtag);
req2[8] = comm.Irecv(recvbuf_xY,recvCount_xY,rank_Xy,recvtag);
req1[9] = comm.Isend(sendbuf_xY,sendCount_xY,rank_Xy,sendtag);
req2[9] = comm.Irecv(recvbuf_Xy,recvCount_Xy,rank_xY,recvtag);
req1[10] = comm.Isend(sendbuf_xz,sendCount_xz,rank_XZ,sendtag);
req2[10] = comm.Irecv(recvbuf_XZ,recvCount_XZ,rank_xz,recvtag);
req1[11] = comm.Isend(sendbuf_XZ,sendCount_XZ,rank_xz,sendtag);
req2[11] = comm.Irecv(recvbuf_xz,recvCount_xz,rank_XZ,recvtag);
req1[12] = comm.Isend(sendbuf_Xz,sendCount_Xz,rank_xZ,sendtag);
req2[12] = comm.Irecv(recvbuf_xZ,recvCount_xZ,rank_Xz,recvtag);
req1[13] = comm.Isend(sendbuf_xZ,sendCount_xZ,rank_Xz,sendtag);
req2[13] = comm.Irecv(recvbuf_Xz,recvCount_Xz,rank_xZ,recvtag);
req1[14] = comm.Isend(sendbuf_yz,sendCount_yz,rank_YZ,sendtag);
req2[14] = comm.Irecv(recvbuf_YZ,recvCount_YZ,rank_yz,recvtag);
req1[15] = comm.Isend(sendbuf_YZ,sendCount_YZ,rank_yz,sendtag);
req2[15] = comm.Irecv(recvbuf_yz,recvCount_yz,rank_YZ,recvtag);
req1[16] = comm.Isend(sendbuf_Yz,sendCount_Yz,rank_yZ,sendtag);
req2[16] = comm.Irecv(recvbuf_yZ,recvCount_yZ,rank_Yz,recvtag);
req1[17] = comm.Isend(sendbuf_yZ,sendCount_yZ,rank_Yz,sendtag);
req2[17] = comm.Irecv(recvbuf_Yz,recvCount_Yz,rank_yZ,recvtag);
MPI_Isend(sendbuf_x, 5*sendCount_x,MPI_DOUBLE,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(recvbuf_X, 5*recvCount_X,MPI_DOUBLE,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(sendbuf_X, 5*sendCount_X,MPI_DOUBLE,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(recvbuf_x, 5*recvCount_x,MPI_DOUBLE,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(sendbuf_y, 5*sendCount_y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(recvbuf_Y, 5*recvCount_Y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(sendbuf_Y, 5*sendCount_Y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(recvbuf_y, 5*recvCount_y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(sendbuf_z, 5*sendCount_z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(recvbuf_Z, 5*recvCount_Z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(sendbuf_Z, 5*sendCount_Z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(recvbuf_z, 5*recvCount_z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[5]);
MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[6]);
MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[6]);
MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[7]);
MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[7]);
MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[8]);
MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[8]);
MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[9]);
MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[9]);
MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[10]);
MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[10]);
MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[11]);
MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[11]);
MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[12]);
MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[12]);
MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[13]);
MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[13]);
MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[14]);
MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[14]);
MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[15]);
MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[15]);
MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[16]);
MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[16]);
MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[17]);
MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[17]);
//...................................................................................
//...................................................................................
// Wait for completion of D3Q19 communication
comm.waitAll(18,req1);
comm.waitAll(18,req2);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
//...................................................................................
// Unpack the distributions on the device
//...................................................................................
@ -1734,7 +1758,7 @@ int main(int argc, char **argv)
//*****************************************************************************
//*****************************************************************************
comm.barrier();
MPI_Barrier(comm);
// Iteration completed!
iter++;
//...................................................................
@ -1742,8 +1766,8 @@ int main(int argc, char **argv)
//************************************************************************/
cudaThreadSynchronize();
comm.barrier();
stoptime = Utilities::MPI::time();
MPI_Barrier(comm);
stoptime = MPI_Wtime();
// cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl;
cputime = stoptime - starttime;
// cout << "Lattice update rate: "<< double(Nx*Ny*Nz*iter)/cputime/1000000 << " MLUPS" << endl;
@ -1778,7 +1802,7 @@ int main(int argc, char **argv)
cudaMemcpy(velocity, vel, 3*dist_mem_size, cudaMemcpyDeviceToHost);
//..............................................................................
cudaThreadSynchronize();
comm.barrier();
MPI_Barrier(comm);
//............................................................
//....Write the z-velocity to test poiseuille flow............
double vz,vz_avg;
@ -1807,7 +1831,7 @@ int main(int argc, char **argv)
free (velocity); free(id);
// ****************************************************
comm.barrier();
MPI_Barrier(comm);
MPI_Finalize();
// ****************************************************
}

View File

@ -1,4 +1,6 @@
#include "common/MPI.h"
#ifdef useMPI
#include <mpi.h>
#endif
#include <stdio.h>
#include <stdlib.h>
@ -60,10 +62,18 @@ int main(int argc, char *argv[])
{
//********** Initialize MPI ****************
int numprocs,rank;
#ifdef useMPI
MPI_Status stat;
MPI_Init(&argc,&argv);
Utilities::MPI comm( MPI_COMM_WORLD );
int rank = comm.getRank();
int numprocs = comm.getSize();
MPI_Comm comm = MPI_COMM_WORLD;
MPI_Comm_size(comm,&numprocs);
MPI_Comm_rank(comm,&rank);
#else
MPI_Comm comm = MPI_COMM_WORLD;
numprocs = 1;
rank = 0;
#endif
//******************************************
if (rank == 0){
@ -113,31 +123,32 @@ int main(int argc, char *argv[])
input >> tol; // error tolerance
//.............................................................
}
#ifdef useMPI
// **************************************************************
// Broadcast simulation parameters from rank 0 to all other procs
comm.barrier();
MPI_Barrier(comm);
//.................................................
comm.bcast(&Nz,1,0);
comm.bcast(&nBlocks,1,0);
comm.bcast(&nthreads,1,0);
comm.bcast(&Fx,1,0);
comm.bcast(&Fy,1,0);
comm.bcast(&Fz,1,0);
comm.bcast(&tau,1,0);
comm.bcast(&alpha,1,0);
comm.bcast(&beta,1,0);
comm.bcast(&das,1,0);
comm.bcast(&dbs,1,0);
comm.bcast(&pBC,1,0);
comm.bcast(&din,1,0);
comm.bcast(&dout,1,0);
comm.bcast(&timestepMax,1,0);
comm.bcast(&interval,1,0);
comm.bcast(&tol,1,0);
MPI_Bcast(&Nz,1,MPI_INT,0,comm);
MPI_Bcast(&nBlocks,1,MPI_INT,0,comm);
MPI_Bcast(&nthreads,1,MPI_INT,0,comm);
MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&das,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&dbs,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm);
MPI_Bcast(&din,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&timestepMax,1,MPI_INT,0,comm);
MPI_Bcast(&interval,1,MPI_INT,0,comm);
MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm);
//.................................................
comm.barrier();
MPI_Barrier(comm);
// **************************************************************
#endif
double rlxA = 1.f/tau;
double rlxB = 8.f*(2.f-rlxA)/(8.f-rlxA);
@ -232,7 +243,11 @@ int main(int argc, char *argv[])
if (k==4) k=Nz-5;
}
}
comm.bcast(&id[0],N,0);
#ifdef useMPI //............................................................
MPI_Barrier(comm);
MPI_Bcast(&id[0],N,MPI_CHAR,0,comm);
MPI_Barrier(comm);
#endif
if (rank == 0) printf("Domain set.\n");
//...........................................................................

View File

@ -2,7 +2,7 @@
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include "common/MPI.h"
#include <mpi.h>
using namespace std;
@ -98,11 +98,15 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){
int main(int argc, char **argv)
{
//*****************************************
// ***** MPI STUFF ****************
//*****************************************
// Initialize MPI
int rank,nprocs;
MPI_Init(&argc,&argv);
Utilities::MPI comm( MPI_COMM_WORLD );
int rank = comm.getRank();
int nprocs = comm.getSize();
MPI_Comm comm = MPI_COMM_WORLD;
MPI_Comm_rank(comm,&rank);
MPI_Comm_size(comm,&nprocs);
// parallel domain size (# of sub-domains)
int nprocx,nprocy,nprocz;
int iproc,jproc,kproc;
@ -116,6 +120,7 @@ int main(int argc, char **argv)
int rank_yz,rank_YZ,rank_yZ,rank_Yz;
//**********************************
MPI_Request req1[18],req2[18];
MPI_Status stat1[18],stat2[18];
if (rank == 0){
printf("********************************************************\n");
@ -172,30 +177,31 @@ int main(int argc, char **argv)
}
// **************************************************************
// Broadcast simulation parameters from rank 0 to all other procs
comm.barrier();
MPI_Barrier(comm);
//.................................................
comm.bcast(&Nz,1,0);
comm.bcast(&nBlocks,1,0);
comm.bcast(&nthreads,1,0);
comm.bcast(&Fx,1,0);
comm.bcast(&Fy,1,0);
comm.bcast(&Fz,1,0);
comm.bcast(&tau,1,0);
comm.bcast(&alpha,1,0);
comm.bcast(&beta,1,0);
comm.bcast(&das,1,0);
comm.bcast(&dbs,1,0);
comm.bcast(&pBC,1,0);
comm.bcast(&din,1,0);
comm.bcast(&dout,1,0);
comm.bcast(&timestepMax,1,0);
comm.bcast(&interval,1,0);
comm.bcast(&tol,1,0);
comm.bcast(&nprocx,1,0);
comm.bcast(&nprocy,1,0);
comm.bcast(&nprocz,1,0);
MPI_Bcast(&Nz,1,MPI_INT,0,comm);
MPI_Bcast(&nBlocks,1,MPI_INT,0,comm);
MPI_Bcast(&nthreads,1,MPI_INT,0,comm);
MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&das,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&dbs,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm);
MPI_Bcast(&din,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&timestepMax,1,MPI_INT,0,comm);
MPI_Bcast(&interval,1,MPI_INT,0,comm);
MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&nprocx,1,MPI_INT,0,comm);
MPI_Bcast(&nprocy,1,MPI_INT,0,comm);
MPI_Bcast(&nprocz,1,MPI_INT,0,comm);
//.................................................
comm.barrier();
MPI_Barrier(comm);
// **************************************************************
// **************************************************************
@ -225,7 +231,7 @@ int main(int argc, char **argv)
}
comm.barrier();
MPI_Barrier(comm);
kproc = rank/(nprocx*nprocy);
jproc = (rank-nprocx*nprocy*kproc)/nprocx;
iproc = rank-nprocx*nprocy*kproc-nprocz*jproc;
@ -507,7 +513,7 @@ int main(int argc, char **argv)
PM.close();
// printf("File porosity = %f\n", double(sum)/N);
//...........................................................................
comm.barrier();
MPI_Barrier(comm);
if (rank == 0) cout << "Domain set." << endl;
//...........................................................................
// Write the communcation structure into a file for debugging
@ -644,7 +650,7 @@ int main(int argc, char **argv)
}
}
}
comm.barrier();
MPI_Barrier(comm);
if (rank==0) printf ("SendLists are ready on host\n");
//......................................................................................
// Use MPI to fill in the recvCounts form the associated processes
@ -655,46 +661,46 @@ int main(int argc, char **argv)
//**********************************************************************************
// Fill in the recieve counts using MPI
sendtag = recvtag = 3;
comm.Send(&sendCount_x,1,rank_X,sendtag);
comm.Recv(&recvCount_X,1,rank_x,recvtag);
comm.Send(&sendCount_X,1,rank_x,sendtag);
comm.Recv(&recvCount_x,1,rank_X,recvtag);
comm.Send(&sendCount_y,1,rank_Y,sendtag);
comm.Recv(&recvCount_Y,1,rank_y,recvtag);
comm.Send(&sendCount_Y,1,rank_y,sendtag);
comm.Recv(&recvCount_y,1,rank_Y,recvtag);
comm.Send(&sendCount_z,1,rank_Z,sendtag);
comm.Recv(&recvCount_Z,1,rank_z,recvtag);
comm.Send(&sendCount_Z,1,rank_z,sendtag);
comm.Recv(&recvCount_z,1,rank_Z,recvtag);
MPI_Send(&sendCount_x,1,MPI_INT,rank_X,sendtag,comm);
MPI_Recv(&recvCount_X,1,MPI_INT,rank_x,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_X,1,MPI_INT,rank_x,sendtag,comm);
MPI_Recv(&recvCount_x,1,MPI_INT,rank_X,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_y,1,MPI_INT,rank_Y,sendtag,comm);
MPI_Recv(&recvCount_Y,1,MPI_INT,rank_y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Y,1,MPI_INT,rank_y,sendtag,comm);
MPI_Recv(&recvCount_y,1,MPI_INT,rank_Y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_z,1,MPI_INT,rank_Z,sendtag,comm);
MPI_Recv(&recvCount_Z,1,MPI_INT,rank_z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Z,1,MPI_INT,rank_z,sendtag,comm);
MPI_Recv(&recvCount_z,1,MPI_INT,rank_Z,recvtag,comm,MPI_STATUS_IGNORE);
comm.Send(&sendCount_xy,1,rank_XY,sendtag);
comm.Recv(&recvCount_XY,1,rank_xy,recvtag);
comm.Send(&sendCount_XY,1,rank_xy,sendtag);
comm.Recv(&recvCount_xy,1,rank_XY,recvtag);
comm.Send(&sendCount_Xy,1,rank_xY,sendtag);
comm.Recv(&recvCount_xY,1,rank_Xy,recvtag);
comm.Send(&sendCount_xY,1,rank_Xy,sendtag);
comm.Recv(&recvCount_Xy,1,rank_xY,recvtag);
MPI_Send(&sendCount_xy,1,MPI_INT,rank_XY,sendtag,comm);
MPI_Recv(&recvCount_XY,1,MPI_INT,rank_xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_XY,1,MPI_INT,rank_xy,sendtag,comm);
MPI_Recv(&recvCount_xy,1,MPI_INT,rank_XY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Xy,1,MPI_INT,rank_xY,sendtag,comm);
MPI_Recv(&recvCount_xY,1,MPI_INT,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_xY,1,MPI_INT,rank_Xy,sendtag,comm);
MPI_Recv(&recvCount_Xy,1,MPI_INT,rank_xY,recvtag,comm,MPI_STATUS_IGNORE);
comm.Send(&sendCount_xz,1,rank_XZ,sendtag);
comm.Recv(&recvCount_XZ,1,rank_xz,recvtag);
comm.Send(&sendCount_XZ,1,rank_xz,sendtag);
comm.Recv(&recvCount_xz,1,rank_XZ,recvtag);
comm.Send(&sendCount_Xz,1,rank_xZ,sendtag);
comm.Recv(&recvCount_xZ,1,rank_Xz,recvtag);
comm.Send(&sendCount_xZ,1,rank_Xz,sendtag);
comm.Recv(&recvCount_Xz,1,rank_xZ,recvtag);
MPI_Send(&sendCount_xz,1,MPI_INT,rank_XZ,sendtag,comm);
MPI_Recv(&recvCount_XZ,1,MPI_INT,rank_xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_XZ,1,MPI_INT,rank_xz,sendtag,comm);
MPI_Recv(&recvCount_xz,1,MPI_INT,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Xz,1,MPI_INT,rank_xZ,sendtag,comm);
MPI_Recv(&recvCount_xZ,1,MPI_INT,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_xZ,1,MPI_INT,rank_Xz,sendtag,comm);
MPI_Recv(&recvCount_Xz,1,MPI_INT,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE);
comm.Send(&sendCount_yz,1,rank_YZ,sendtag);
comm.Recv(&recvCount_YZ,1,rank_yz,recvtag);
comm.Send(&sendCount_YZ,1,rank_yz,sendtag);
comm.Recv(&recvCount_yz,1,rank_YZ,recvtag);
comm.Send(&sendCount_Yz,1,rank_yZ,sendtag);
comm.Recv(&recvCount_yZ,1,rank_Yz,recvtag);
comm.Send(&sendCount_yZ,1,rank_Yz,sendtag);
comm.Recv(&recvCount_Yz,1,rank_yZ,recvtag);
comm.barrier();
MPI_Send(&sendCount_yz,1,MPI_INT,rank_YZ,sendtag,comm);
MPI_Recv(&recvCount_YZ,1,MPI_INT,rank_yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_YZ,1,MPI_INT,rank_yz,sendtag,comm);
MPI_Recv(&recvCount_yz,1,MPI_INT,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Yz,1,MPI_INT,rank_yZ,sendtag,comm);
MPI_Recv(&recvCount_yZ,1,MPI_INT,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_yZ,1,MPI_INT,rank_Yz,sendtag,comm);
MPI_Recv(&recvCount_Yz,1,MPI_INT,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Barrier(comm);
//**********************************************************************************
//......................................................................................
int *recvList_x, *recvList_y, *recvList_z, *recvList_X, *recvList_Y, *recvList_Z;
@ -725,48 +731,48 @@ int main(int argc, char **argv)
// Use MPI to fill in the appropriate values for recvList
// Fill in the recieve lists using MPI
sendtag = recvtag = 4;
req1[0] = comm.Isend(sendList_x,sendCount_x,rank_X,sendtag);
req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_x,recvtag);
req1[1] = comm.Isend(sendList_X,sendCount_X,rank_x,sendtag);
req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_X,recvtag);
req1[2] = comm.Isend(sendList_y,sendCount_y,rank_Y,sendtag);
req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_y,recvtag);
req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_y,sendtag);
req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_Y,recvtag);
req1[4] = comm.Isend(sendList_z,sendCount_z,rank_Z,sendtag);
req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_z,recvtag);
req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_z,sendtag);
req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_Z,recvtag);
MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_Z,recvtag,comm,&req2[5]);
req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_XY,sendtag);
req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_xy,recvtag);
req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_xy,sendtag);
req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_XY,recvtag);
req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_xY,sendtag);
req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_Xy,recvtag);
req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_Xy,sendtag);
req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_xY,recvtag);
MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_XY,sendtag,comm,&req1[6]);
MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_xy,recvtag,comm,&req2[6]);
MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_xy,sendtag,comm,&req1[7]);
MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_XY,recvtag,comm,&req2[7]);
MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_xY,sendtag,comm,&req1[8]);
MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_Xy,recvtag,comm,&req2[8]);
MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_Xy,sendtag,comm,&req1[9]);
MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_xY,recvtag,comm,&req2[9]);
req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_XZ,sendtag);
req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_xz,recvtag);
req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_xz,sendtag);
req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_XZ,recvtag);
req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_xZ,sendtag);
req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_Xz,recvtag);
req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_Xz,sendtag);
req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_xZ,recvtag);
MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_XZ,sendtag,comm,&req1[10]);
MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_xz,recvtag,comm,&req2[10]);
MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_xz,sendtag,comm,&req1[11]);
MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_XZ,recvtag,comm,&req2[11]);
MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_xZ,sendtag,comm,&req1[12]);
MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_Xz,recvtag,comm,&req2[12]);
MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_Xz,sendtag,comm,&req1[13]);
MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_xZ,recvtag,comm,&req2[13]);
req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_YZ,sendtag);
req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_yz,recvtag);
req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_yz,sendtag);
req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_YZ,recvtag);
req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_yZ,sendtag);
req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_Yz,recvtag);
req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_Yz,sendtag);
req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_yZ,recvtag);
comm.waitAll(18,req1);
comm.waitAll(18,req2);
comm.barrier();
MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_YZ,sendtag,comm,&req1[14]);
MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_yz,recvtag,comm,&req2[14]);
MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_yz,sendtag,comm,&req1[15]);
MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_YZ,recvtag,comm,&req2[15]);
MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_yZ,sendtag,comm,&req1[16]);
MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_Yz,recvtag,comm,&req2[16]);
MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_Yz,sendtag,comm,&req1[17]);
MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_yZ,recvtag,comm,&req2[17]);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
MPI_Barrier(comm);
//......................................................................................
for (int idx=0; idx<recvCount_x; idx++) recvList_x[idx] -= (Nx-2);
for (int idx=0; idx<recvCount_X; idx++) recvList_X[idx] += (Nx-2);
@ -987,24 +993,42 @@ int main(int argc, char **argv)
PackID(sendList_yZ, sendCount_yZ ,sendID_yZ, id);
PackID(sendList_YZ, sendCount_YZ ,sendID_YZ, id);
//......................................................................................
comm.sendrecv(sendID_x,sendCount_x,rank_X,sendtag,recvID_X,recvCount_X,rank_x,recvtag);
comm.sendrecv(sendID_X,sendCount_X,rank_x,sendtag,recvID_x,recvCount_x,rank_X,recvtag);
comm.sendrecv(sendID_y,sendCount_y,rank_Y,sendtag,recvID_Y,recvCount_Y,rank_y,recvtag);
comm.sendrecv(sendID_Y,sendCount_Y,rank_y,sendtag,recvID_y,recvCount_y,rank_Y,recvtag);
comm.sendrecv(sendID_z,sendCount_z,rank_Z,sendtag,recvID_Z,recvCount_Z,rank_z,recvtag);
comm.sendrecv(sendID_Z,sendCount_Z,rank_z,sendtag,recvID_z,recvCount_z,rank_Z,recvtag);
comm.sendrecv(sendID_xy,sendCount_xy,rank_XY,sendtag,recvID_XY,recvCount_XY,rank_xy,recvtag);
comm.sendrecv(sendID_XY,sendCount_XY,rank_xy,sendtag,recvID_xy,recvCount_xy,rank_XY,recvtag);
comm.sendrecv(sendID_Xy,sendCount_Xy,rank_xY,sendtag,recvID_xY,recvCount_xY,rank_Xy,recvtag);
comm.sendrecv(sendID_xY,sendCount_xY,rank_Xy,sendtag,recvID_Xy,recvCount_Xy,rank_xY,recvtag);
comm.sendrecv(sendID_xz,sendCount_xz,rank_XZ,sendtag,recvID_XZ,recvCount_XZ,rank_xz,recvtag);
comm.sendrecv(sendID_XZ,sendCount_XZ,rank_xz,sendtag,recvID_xz,recvCount_xz,rank_XZ,recvtag);
comm.sendrecv(sendID_Xz,sendCount_Xz,rank_xZ,sendtag,recvID_xZ,recvCount_xZ,rank_Xz,recvtag);
comm.sendrecv(sendID_xZ,sendCount_xZ,rank_Xz,sendtag,recvID_Xz,recvCount_Xz,rank_xZ,recvtag);
comm.sendrecv(sendID_yz,sendCount_yz,rank_YZ,sendtag,recvID_YZ,recvCount_YZ,rank_yz,recvtag);
comm.sendrecv(sendID_YZ,sendCount_YZ,rank_yz,sendtag,recvID_yz,recvCount_yz,rank_YZ,recvtag);
comm.sendrecv(sendID_Yz,sendCount_Yz,rank_yZ,sendtag,recvID_yZ,recvCount_yZ,rank_Yz,recvtag);
comm.sendrecv(sendID_yZ,sendCount_yZ,rank_Yz,sendtag,recvID_Yz,recvCount_Yz,rank_yZ,recvtag);
MPI_Sendrecv(sendID_x,sendCount_x,MPI_CHAR,rank_X,sendtag,
recvID_X,recvCount_X,MPI_CHAR,rank_x,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_X,sendCount_X,MPI_CHAR,rank_x,sendtag,
recvID_x,recvCount_x,MPI_CHAR,rank_X,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_y,sendCount_y,MPI_CHAR,rank_Y,sendtag,
recvID_Y,recvCount_Y,MPI_CHAR,rank_y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Y,sendCount_Y,MPI_CHAR,rank_y,sendtag,
recvID_y,recvCount_y,MPI_CHAR,rank_Y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_z,sendCount_z,MPI_CHAR,rank_Z,sendtag,
recvID_Z,recvCount_Z,MPI_CHAR,rank_z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Z,sendCount_Z,MPI_CHAR,rank_z,sendtag,
recvID_z,recvCount_z,MPI_CHAR,rank_Z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xy,sendCount_xy,MPI_CHAR,rank_XY,sendtag,
recvID_XY,recvCount_XY,MPI_CHAR,rank_xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_XY,sendCount_XY,MPI_CHAR,rank_xy,sendtag,
recvID_xy,recvCount_xy,MPI_CHAR,rank_XY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Xy,sendCount_Xy,MPI_CHAR,rank_xY,sendtag,
recvID_xY,recvCount_xY,MPI_CHAR,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xY,sendCount_xY,MPI_CHAR,rank_Xy,sendtag,
recvID_Xy,recvCount_Xy,MPI_CHAR,rank_xY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xz,sendCount_xz,MPI_CHAR,rank_XZ,sendtag,
recvID_XZ,recvCount_XZ,MPI_CHAR,rank_xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_XZ,sendCount_XZ,MPI_CHAR,rank_xz,sendtag,
recvID_xz,recvCount_xz,MPI_CHAR,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Xz,sendCount_Xz,MPI_CHAR,rank_xZ,sendtag,
recvID_xZ,recvCount_xZ,MPI_CHAR,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xZ,sendCount_xZ,MPI_CHAR,rank_Xz,sendtag,
recvID_Xz,recvCount_Xz,MPI_CHAR,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_yz,sendCount_yz,MPI_CHAR,rank_YZ,sendtag,
recvID_YZ,recvCount_YZ,MPI_CHAR,rank_yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_YZ,sendCount_YZ,MPI_CHAR,rank_yz,sendtag,
recvID_yz,recvCount_yz,MPI_CHAR,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Yz,sendCount_Yz,MPI_CHAR,rank_yZ,sendtag,
recvID_yZ,recvCount_yZ,MPI_CHAR,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_yZ,sendCount_yZ,MPI_CHAR,rank_Yz,sendtag,
recvID_Yz,recvCount_Yz,MPI_CHAR,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE);
//......................................................................................
UnpackID(recvList_x, recvCount_x ,recvID_x, id);
UnpackID(recvList_X, recvCount_X ,recvID_X, id);
@ -1037,7 +1061,7 @@ int main(int argc, char **argv)
free(recvID_yz); free(recvID_YZ); free(recvID_yZ); free(recvID_Yz);
*/ //......................................................................................
if (rank==0) printf ("Devices are ready to communicate. \n");
comm.barrier();
MPI_Barrier(comm);
//...........device phase ID.................................................
if (rank==0) printf ("Copying phase ID to device \n");
@ -1102,49 +1126,48 @@ int main(int argc, char **argv)
//...................................................................................
// Send / Recv all the phase indcator field values
//...................................................................................
req1[0] = comm.Isend(sendbuf_x,sendCount_x,rank_X,sendtag);
req2[0] = comm.Irecv(recvbuf_X,recvCount_X,rank_x,recvtag);
req1[1] = comm.Isend(sendbuf_X,sendCount_X,rank_x,sendtag);
req2[1] = comm.Irecv(recvbuf_x,recvCount_x,rank_X,recvtag);
req1[2] = comm.Isend(sendbuf_y,sendCount_y,rank_Y,sendtag);
req2[2] = comm.Irecv(recvbuf_Y,recvCount_Y,rank_y,recvtag);
req1[3] = comm.Isend(sendbuf_Y,sendCount_Y,rank_y,sendtag);
req2[3] = comm.Irecv(recvbuf_y,recvCount_y,rank_Y,recvtag);
req1[4] = comm.Isend(sendbuf_z,sendCount_z,rank_Z,sendtag);
req2[4] = comm.Irecv(recvbuf_Z,recvCount_Z,rank_z,recvtag);
req1[5] = comm.Isend(sendbuf_Z,sendCount_Z,rank_z,sendtag);
req2[5] = comm.Irecv(recvbuf_z,recvCount_z,rank_Z,recvtag);
req1[6] = comm.Isend(sendbuf_xy,sendCount_xy,rank_XY,sendtag);
req2[6] = comm.Irecv(recvbuf_XY,recvCount_XY,rank_xy,recvtag);
req1[7] = comm.Isend(sendbuf_XY,sendCount_XY,rank_xy,sendtag);
req2[7] = comm.Irecv(recvbuf_xy,recvCount_xy,rank_XY,recvtag);
req1[8] = comm.Isend(sendbuf_Xy,sendCount_Xy,rank_xY,sendtag);
req2[8] = comm.Irecv(recvbuf_xY,recvCount_xY,rank_Xy,recvtag);
req1[9] = comm.Isend(sendbuf_xY,sendCount_xY,rank_Xy,sendtag);
req2[9] = comm.Irecv(recvbuf_Xy,recvCount_Xy,rank_xY,recvtag);
req1[10] = comm.Isend(sendbuf_xz,sendCount_xz,rank_XZ,sendtag);
req2[10] = comm.Irecv(recvbuf_XZ,recvCount_XZ,rank_xz,recvtag);
req1[11] = comm.Isend(sendbuf_XZ,sendCount_XZ,rank_xz,sendtag);
req2[11] = comm.Irecv(recvbuf_xz,recvCount_xz,rank_XZ,recvtag);
req1[12] = comm.Isend(sendbuf_Xz,sendCount_Xz,rank_xZ,sendtag);
req2[12] = comm.Irecv(recvbuf_xZ,recvCount_xZ,rank_Xz,recvtag);
req1[13] = comm.Isend(sendbuf_xZ,sendCount_xZ,rank_Xz,sendtag);
req2[13] = comm.Irecv(recvbuf_Xz,recvCount_Xz,rank_xZ,recvtag);
req1[14] = comm.Isend(sendbuf_yz,sendCount_yz,rank_YZ,sendtag);
req2[14] = comm.Irecv(recvbuf_YZ,recvCount_YZ,rank_yz,recvtag);
req1[15] = comm.Isend(sendbuf_YZ,sendCount_YZ,rank_yz,sendtag);
req2[15] = comm.Irecv(recvbuf_yz,recvCount_yz,rank_YZ,recvtag);
req1[16] = comm.Isend(sendbuf_Yz,sendCount_Yz,rank_yZ,sendtag);
req2[16] = comm.Irecv(recvbuf_yZ,recvCount_yZ,rank_Yz,recvtag);
req1[17] = comm.Isend(sendbuf_yZ,sendCount_yZ,rank_Yz,sendtag);
req2[17] = comm.Irecv(recvbuf_Yz,recvCount_Yz,rank_yZ,recvtag);
MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(sendbuf_y, sendCount_y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(recvbuf_Y, recvCount_Y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(sendbuf_Z, sendCount_Z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(recvbuf_z, recvCount_z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[5]);
MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[6]);
MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[6]);
MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[7]);
MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[7]);
MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[8]);
MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[8]);
MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[9]);
MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[9]);
MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[10]);
MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[10]);
MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[11]);
MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[11]);
MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[12]);
MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[12]);
MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[13]);
MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[13]);
MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[14]);
MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[14]);
MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[15]);
MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[15]);
MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[16]);
MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[16]);
MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[17]);
MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[17]);
//...................................................................................
//...................................................................................
// Wait for completion of Indicator Field communication
//...................................................................................
comm.waitAll(18,req1);
comm.waitAll(18,req2);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
//...................................................................................
//...................................................................................
/* dvc_UnpackValues(faceGrid, packThreads, dvcSendList_x, sendCount_x,sendbuf_x, Phi, N);
@ -1184,8 +1207,8 @@ int main(int argc, char **argv)
//.......create and start timer............
double starttime,stoptime,cputime;
comm.barrier();
starttime = Utilities::MPI::time();
MPI_Barrier(comm);
starttime = MPI_Wtime();
// Old cuda timer is below
// cudaEvent_t start, stop;
// float time;
@ -1286,42 +1309,42 @@ int main(int argc, char **argv)
//...................................................................................
// Send all the distributions
req1[0] = comm.Isend(sendbuf_x,5*sendCount_x,rank_X,sendtag);
req2[0] = comm.Irecv(recvbuf_X,5*recvCount_X,rank_x,recvtag);
req1[1] = comm.Isend(sendbuf_X,5*sendCount_X,rank_x,sendtag);
req2[1] = comm.Irecv(recvbuf_x,5*recvCount_x,rank_X,recvtag);
req1[2] = comm.Isend(sendbuf_y,5*sendCount_y,rank_Y,sendtag);
req2[2] = comm.Irecv(recvbuf_Y,5*recvCount_Y,rank_y,recvtag);
req1[3] = comm.Isend(sendbuf_Y,5*sendCount_Y,rank_y,sendtag);
req2[3] = comm.Irecv(recvbuf_y,5*recvCount_y,rank_Y,recvtag);
req1[4] = comm.Isend(sendbuf_z,5*sendCount_z,rank_Z,sendtag);
req2[4] = comm.Irecv(recvbuf_Z,5*recvCount_Z,rank_z,recvtag);
req1[5] = comm.Isend(sendbuf_Z,5*sendCount_Z,rank_z,sendtag);
req2[5] = comm.Irecv(recvbuf_z,5*recvCount_z,rank_Z,recvtag);
req1[6] = comm.Isend(sendbuf_xy,sendCount_xy,rank_XY,sendtag);
req2[6] = comm.Irecv(recvbuf_XY,recvCount_XY,rank_xy,recvtag);
req1[7] = comm.Isend(sendbuf_XY,sendCount_XY,rank_xy,sendtag);
req2[7] = comm.Irecv(recvbuf_xy,recvCount_xy,rank_XY,recvtag);
req1[8] = comm.Isend(sendbuf_Xy,sendCount_Xy,rank_xY,sendtag);
req2[8] = comm.Irecv(recvbuf_xY,recvCount_xY,rank_Xy,recvtag);
req1[9] = comm.Isend(sendbuf_xY,sendCount_xY,rank_Xy,sendtag);
req2[9] = comm.Irecv(recvbuf_Xy,recvCount_Xy,rank_xY,recvtag);
req1[10] = comm.Isend(sendbuf_xz,sendCount_xz,rank_XZ,sendtag);
req2[10] = comm.Irecv(recvbuf_XZ,recvCount_XZ,rank_xz,recvtag);
req1[11] = comm.Isend(sendbuf_XZ,sendCount_XZ,rank_xz,sendtag);
req2[11] = comm.Irecv(recvbuf_xz,recvCount_xz,rank_XZ,recvtag);
req1[12] = comm.Isend(sendbuf_Xz,sendCount_Xz,rank_xZ,sendtag);
req2[12] = comm.Irecv(recvbuf_xZ,recvCount_xZ,rank_Xz,recvtag);
req1[13] = comm.Isend(sendbuf_xZ,sendCount_xZ,rank_Xz,sendtag);
req2[13] = comm.Irecv(recvbuf_Xz,recvCount_Xz,rank_xZ,recvtag);
req1[14] = comm.Isend(sendbuf_yz,sendCount_yz,rank_YZ,sendtag);
req2[14] = comm.Irecv(recvbuf_YZ,recvCount_YZ,rank_yz,recvtag);
req1[15] = comm.Isend(sendbuf_YZ,sendCount_YZ,rank_yz,sendtag);
req2[15] = comm.Irecv(recvbuf_yz,recvCount_yz,rank_YZ,recvtag);
req1[16] = comm.Isend(sendbuf_Yz,sendCount_Yz,rank_yZ,sendtag);
req2[16] = comm.Irecv(recvbuf_yZ,recvCount_yZ,rank_Yz,recvtag);
req1[17] = comm.Isend(sendbuf_yZ,sendCount_yZ,rank_Yz,sendtag);
req2[17] = comm.Irecv(recvbuf_Yz,recvCount_Yz,rank_yZ,recvtag);
MPI_Isend(sendbuf_x, 5*sendCount_x,MPI_DOUBLE,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(recvbuf_X, 5*recvCount_X,MPI_DOUBLE,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(sendbuf_X, 5*sendCount_X,MPI_DOUBLE,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(recvbuf_x, 5*recvCount_x,MPI_DOUBLE,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(sendbuf_y, 5*sendCount_y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(recvbuf_Y, 5*recvCount_Y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(sendbuf_Y, 5*sendCount_Y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(recvbuf_y, 5*recvCount_y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(sendbuf_z, 5*sendCount_z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(recvbuf_Z, 5*recvCount_Z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(sendbuf_Z, 5*sendCount_Z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(recvbuf_z, 5*recvCount_z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[5]);
MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[6]);
MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[6]);
MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[7]);
MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[7]);
MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[8]);
MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[8]);
MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[9]);
MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[9]);
MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[10]);
MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[10]);
MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[11]);
MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[11]);
MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[12]);
MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[12]);
MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[13]);
MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[13]);
MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[14]);
MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[14]);
MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[15]);
MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[15]);
MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[16]);
MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[16]);
MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[17]);
MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[17]);
//...................................................................................
//*************************************************************************
@ -1340,8 +1363,8 @@ int main(int argc, char **argv)
//...................................................................................
// Wait for completion of D3Q19 communication
comm.waitAll(18,req1);
comm.waitAll(18,req2);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
//...................................................................................
// Unpack the distributions on the device
//...................................................................................
@ -1423,23 +1446,23 @@ int main(int argc, char **argv)
//...................................................................................
// Send all the D3Q7 distributions
req1[0] = comm.Isend(recvbuf_x, 2*recvCount_x,rank_X,sendtag);
req2[0] = comm.Irecv(sendbuf_X, 2*sendCount_X,rank_x,recvtag);
req1[1] = comm.Isend(recvbuf_X, 2*recvCount_X,rank_x,sendtag);
req2[1] = comm.Irecv(sendbuf_x, 2*sendCount_x,rank_X,recvtag);
req1[2] = comm.Isend(recvbuf_y, 2*recvCount_y,rank_Y,sendtag);
req2[2] = comm.Irecv(sendbuf_Y, 2*sendCount_Y,rank_y,recvtag);
req1[3] = comm.Isend(recvbuf_Y, 2*recvCount_Y,rank_y,sendtag);
req2[3] = comm.Irecv(sendbuf_y, 2*sendCount_y,rank_Y,recvtag);
req1[4] = comm.Isend(recvbuf_z, 2*recvCount_z,rank_Z,sendtag);
req2[4] = comm.Irecv(sendbuf_Z, 2*sendCount_Z,rank_z,recvtag);
req1[5] = comm.Isend(recvbuf_Z, 2*recvCount_Z,rank_z,sendtag);
req2[5] = comm.Irecv(sendbuf_z, 2*sendCount_z,rank_Z,recvtag);
MPI_Isend(recvbuf_x, 2*recvCount_x,MPI_DOUBLE,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(sendbuf_X, 2*sendCount_X,MPI_DOUBLE,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(recvbuf_X, 2*recvCount_X,MPI_DOUBLE,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(sendbuf_x, 2*sendCount_x,MPI_DOUBLE,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(recvbuf_y, 2*recvCount_y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(sendbuf_Y, 2*sendCount_Y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(recvbuf_Y, 2*recvCount_Y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(sendbuf_y, 2*sendCount_y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(recvbuf_z, 2*recvCount_z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(sendbuf_Z, 2*sendCount_Z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(recvbuf_Z, 2*recvCount_Z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(sendbuf_z, 2*sendCount_z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[5]);
//...................................................................................
//...................................................................................
// Wait for completion of D3Q7 communication
comm.waitAll(6,req1);
comm.waitAll(6,req2);
MPI_Waitall(6,req1,stat1);
MPI_Waitall(6,req2,stat2);
//...................................................................................
//...................................................................................
dvc_UnpackDenD3Q7(faceGrid,packThreads,dvcSendList_x,sendCount_x,sendbuf_x,2,Den,N);
@ -1484,48 +1507,48 @@ int main(int argc, char **argv)
//...................................................................................
// Send / Recv all the phase indcator field values
//...................................................................................
req1[0] = comm.Isend(sendbuf_x, sendCount_x,rank_X,sendtag);
req2[0] = comm.Irecv(recvbuf_X, recvCount_X,rank_x,recvtag);
req1[1] = comm.Isend(sendbuf_X, sendCount_X,rank_x,sendtag);
req2[1] = comm.Irecv(recvbuf_x, recvCount_x,rank_X,recvtag);
req1[2] = comm.Isend(sendbuf_y, sendCount_y,rank_Y,sendtag);
req2[2] = comm.Irecv(recvbuf_Y, recvCount_Y,rank_y,recvtag);
req1[3] = comm.Isend(sendbuf_Y, sendCount_Y,rank_y,sendtag);
req2[3] = comm.Irecv(recvbuf_y, recvCount_y,rank_Y,recvtag);
req1[4] = comm.Isend(sendbuf_z, sendCount_z,rank_Z,sendtag);
req2[4] = comm.Irecv(recvbuf_Z, recvCount_Z,rank_z,recvtag);
req1[5] = comm.Isend(sendbuf_Z, sendCount_Z,rank_z,sendtag);
req2[5] = comm.Irecv(recvbuf_z, recvCount_z,rank_Z,recvtag);
req1[6] = comm.Isend(sendbuf_xy, sendCount_xy,rank_XY,sendtag);
req2[6] = comm.Irecv(recvbuf_XY, recvCount_XY,rank_xy,recvtag);
req1[7] = comm.Isend(sendbuf_XY, sendCount_XY,rank_xy,sendtag);
req2[7] = comm.Irecv(recvbuf_xy, recvCount_xy,rank_XY,recvtag);
req1[8] = comm.Isend(sendbuf_Xy, sendCount_Xy,rank_xY,sendtag);
req2[8] = comm.Irecv(recvbuf_xY, recvCount_xY,rank_Xy,recvtag);
req1[9] = comm.Isend(sendbuf_xY, sendCount_xY,rank_Xy,sendtag);
req2[9] = comm.Irecv(recvbuf_Xy, recvCount_Xy,rank_xY,recvtag);
req1[10] = comm.Isend(sendbuf_xz, sendCount_xz,rank_XZ,sendtag);
req2[10] = comm.Irecv(recvbuf_XZ, recvCount_XZ,rank_xz,recvtag);
req1[11] = comm.Isend(sendbuf_XZ, sendCount_XZ,rank_xz,sendtag);
req2[11] = comm.Irecv(recvbuf_xz, recvCount_xz,rank_XZ,recvtag);
req1[12] = comm.Isend(sendbuf_Xz, sendCount_Xz,rank_xZ,sendtag);
req2[12] = comm.Irecv(recvbuf_xZ, recvCount_xZ,rank_Xz,recvtag);
req1[13] = comm.Isend(sendbuf_xZ, sendCount_xZ,rank_Xz,sendtag);
req2[13] = comm.Irecv(recvbuf_Xz, recvCount_Xz,rank_xZ,recvtag);
req1[14] = comm.Isend(sendbuf_yz, sendCount_yz,rank_YZ,sendtag);
req2[14] = comm.Irecv(recvbuf_YZ, recvCount_YZ,rank_yz,recvtag);
req1[15] = comm.Isend(sendbuf_YZ, sendCount_YZ,rank_yz,sendtag);
req2[15] = comm.Irecv(recvbuf_yz, recvCount_yz,rank_YZ,recvtag);
req1[16] = comm.Isend(sendbuf_Yz, sendCount_Yz,rank_yZ,sendtag);
req2[16] = comm.Irecv(recvbuf_yZ, recvCount_yZ,rank_Yz,recvtag);
req1[17] = comm.Isend(sendbuf_yZ, sendCount_yZ,rank_Yz,sendtag);
req2[17] = comm.Irecv(recvbuf_Yz, recvCount_Yz,rank_yZ,recvtag);
MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(sendbuf_y, sendCount_y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(recvbuf_Y, recvCount_Y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(sendbuf_Z, sendCount_Z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(recvbuf_z, recvCount_z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[5]);
MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[6]);
MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[6]);
MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[7]);
MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[7]);
MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[8]);
MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[8]);
MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[9]);
MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[9]);
MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[10]);
MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[10]);
MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[11]);
MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[11]);
MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[12]);
MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[12]);
MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[13]);
MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[13]);
MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[14]);
MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[14]);
MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[15]);
MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[15]);
MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[16]);
MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[16]);
MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[17]);
MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[17]);
//...................................................................................
//...................................................................................
// Wait for completion of Indicator Field communication
//...................................................................................
comm.waitAll(18,req1);
comm.waitAll(18,req2);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
//...................................................................................
//...................................................................................
/* dvc_UnpackValues(faceGrid, packThreads, dvcSendList_x, sendCount_x,sendbuf_x, Phi, N);
@ -1554,7 +1577,7 @@ int main(int argc, char **argv)
dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_Yz, recvCount_Yz,recvbuf_Yz, Phi, N);
dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_YZ, recvCount_YZ,recvbuf_YZ, Phi, N);
//...................................................................................
comm.barrier();
MPI_Barrier(comm);
// Iteration completed!
timestep++;
@ -1564,8 +1587,8 @@ int main(int argc, char **argv)
// cudaThreadSynchronize();
dvc_Barrier();
comm.barrier();
stoptime = Utilities::MPI::time();
MPI_Barrier(comm);
stoptime = MPI_Wtime();
// cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl;
cputime = stoptime - starttime;
// cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl;
@ -1593,7 +1616,7 @@ int main(int argc, char **argv)
//************************************************************************/
// ****************************************************
comm.barrier();
MPI_Barrier(comm);
MPI_Finalize();
// ****************************************************
}

View File

@ -2,7 +2,7 @@
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include "common/MPI.h"
#include <mpi.h>
#include "pmmc.h"
#include "Domain.h"
@ -101,11 +101,15 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){
int main(int argc, char **argv)
{
//*****************************************
// ***** MPI STUFF ****************
//*****************************************
// Initialize MPI
int rank,nprocs;
MPI_Init(&argc,&argv);
Utilities::MPI comm( MPI_COMM_WORLD );
int rank = comm.getRank();
int nprocs = comm.getSize();
MPI_Comm comm = MPI_COMM_WORLD;
MPI_Comm_rank(comm,&rank);
MPI_Comm_size(comm,&nprocs);
// parallel domain size (# of sub-domains)
int nprocx,nprocy,nprocz;
int iproc,jproc,kproc;
@ -119,6 +123,7 @@ int main(int argc, char **argv)
int rank_yz,rank_YZ,rank_yZ,rank_Yz;
//**********************************
MPI_Request req1[18],req2[18];
MPI_Status stat1[18],stat2[18];
if (rank == 0){
printf("********************************************************\n");
@ -198,35 +203,35 @@ int main(int argc, char **argv)
}
// **************************************************************
// Broadcast simulation parameters from rank 0 to all other procs
comm.barrier();
MPI_Barrier(comm);
//.................................................
comm.bcast(&tau,1,0);
comm.bcast(&alpha,1,0);
comm.bcast(&beta,1,0);
comm.bcast(&das,1,0);
comm.bcast(&dbs,1,0);
comm.bcast(&pBC,1,0);
comm.bcast(&din,1,0);
comm.bcast(&dout,1,0);
comm.bcast(&Fx,1,0);
comm.bcast(&Fy,1,0);
comm.bcast(&Fz,1,0);
comm.bcast(&timestepMax,1,0);
comm.bcast(&interval,1,0);
comm.bcast(&tol,1,0);
MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&das,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&dbs,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm);
MPI_Bcast(&din,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&timestepMax,1,MPI_INT,0,comm);
MPI_Bcast(&interval,1,MPI_INT,0,comm);
MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm);
// Computational domain
comm.bcast(&Nz,1,0);
comm.bcast(&nBlocks,1,0);
comm.bcast(&nthreads,1,0);
comm.bcast(&nprocx,1,0);
comm.bcast(&nprocy,1,0);
comm.bcast(&nprocz,1,0);
comm.bcast(&nspheres,1,0);
comm.bcast(&Lx,1,0);
comm.bcast(&Ly,1,0);
comm.bcast(&Lz,1,0);
MPI_Bcast(&Nz,1,MPI_INT,0,comm);
MPI_Bcast(&nBlocks,1,MPI_INT,0,comm);
MPI_Bcast(&nthreads,1,MPI_INT,0,comm);
MPI_Bcast(&nprocx,1,MPI_INT,0,comm);
MPI_Bcast(&nprocy,1,MPI_INT,0,comm);
MPI_Bcast(&nprocz,1,MPI_INT,0,comm);
MPI_Bcast(&nspheres,1,MPI_INT,0,comm);
MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm);
//.................................................
comm.barrier();
MPI_Barrier(comm);
// **************************************************************
// **************************************************************
double Ps = -(das-dbs)/(das+dbs);
@ -258,7 +263,7 @@ int main(int argc, char **argv)
printf("********************************************************\n");
}
comm.barrier();
MPI_Barrier(comm);
kproc = rank/(nprocx*nprocy);
jproc = (rank-nprocx*nprocy*kproc)/nprocx;
iproc = rank-nprocx*nprocy*kproc-nprocz*jproc;
@ -556,14 +561,14 @@ int main(int argc, char **argv)
//.......................................................................
if (rank == 0) printf("Reading the sphere packing \n");
if (rank == 0) ReadSpherePacking(nspheres,cx,cy,cz,rad);
comm.barrier();
MPI_Barrier(comm);
// Broadcast the sphere packing to all processes
comm.bcast(cx,nspheres,0);
comm.bcast(cy,nspheres,0);
comm.bcast(cz,nspheres,0);
comm.bcast(rad,nspheres,0);
MPI_Bcast(cx,nspheres,MPI_DOUBLE,0,comm);
MPI_Bcast(cy,nspheres,MPI_DOUBLE,0,comm);
MPI_Bcast(cz,nspheres,MPI_DOUBLE,0,comm);
MPI_Bcast(rad,nspheres,MPI_DOUBLE,0,comm);
//...........................................................................
comm.barrier();
MPI_Barrier(comm);
if (rank == 0) cout << "Domain set." << endl;
//.......................................................................
// sprintf(LocalRankString,"%05d",rank);
@ -713,7 +718,7 @@ int main(int argc, char **argv)
}
}
}
comm.barrier();
MPI_Barrier(comm);
if (rank==0) printf ("SendLists are ready on host\n");
//......................................................................................
// Use MPI to fill in the recvCounts form the associated processes
@ -724,49 +729,89 @@ int main(int argc, char **argv)
//**********************************************************************************
// Fill in the recieve counts using MPI
sendtag = recvtag = 3;
req1[0] = comm.Isend(&sendCount_x,1,rank_X,sendtag);
req2[0] = comm.Irecv(&recvCount_X,1,rank_x,recvtag);
req1[1] = comm.Isend(&sendCount_X,1,rank_x,sendtag);
req2[1] = comm.Irecv(&recvCount_x,1,rank_X,recvtag);
req1[2] = comm.Isend(&sendCount_y,1,rank_Y,sendtag);
req2[2] = comm.Irecv(&recvCount_Y,1,rank_y,recvtag);
req1[3] = comm.Isend(&sendCount_Y,1,rank_y,sendtag);
req2[3] = comm.Irecv(&recvCount_y,1,rank_Y,recvtag);
req1[4] = comm.Isend(&sendCount_z,1,rank_Z,sendtag);
req2[4] = comm.Irecv(&recvCount_Z,1,rank_z,recvtag);
req1[5] = comm.Isend(&sendCount_Z,1,rank_z,sendtag);
req2[5] = comm.Irecv(&recvCount_z,1,rank_Z,recvtag);
MPI_Isend(&sendCount_x, 1,MPI_INT,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(&recvCount_X, 1,MPI_INT,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(&sendCount_X, 1,MPI_INT,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(&recvCount_x, 1,MPI_INT,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(&sendCount_y, 1,MPI_INT,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(&recvCount_Y, 1,MPI_INT,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(&sendCount_Y, 1,MPI_INT,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(&recvCount_y, 1,MPI_INT,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(&sendCount_z, 1,MPI_INT,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(&recvCount_Z, 1,MPI_INT,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(&sendCount_Z, 1,MPI_INT,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(&recvCount_z, 1,MPI_INT,rank_Z,recvtag,comm,&req2[5]);
req1[6] = comm.Isend(&sendCount_xy,1,rank_XY,sendtag);
req2[6] = comm.Irecv(&recvCount_XY,1,rank_xy,recvtag);
req1[7] = comm.Isend(&sendCount_XY,1,rank_xy,sendtag);
req2[7] = comm.Irecv(&recvCount_xy,1,rank_XY,recvtag);
req1[8] = comm.Isend(&sendCount_Xy,1,rank_xY,sendtag);
req2[8] = comm.Irecv(&recvCount_xY,1,rank_Xy,recvtag);
req1[9] = comm.Isend(&sendCount_xY,1,rank_Xy,sendtag);
req2[9] = comm.Irecv(&recvCount_Xy,1,rank_xY,recvtag);
MPI_Isend(&sendCount_xy, 1,MPI_INT,rank_XY,sendtag,comm,&req1[6]);
MPI_Irecv(&recvCount_XY, 1,MPI_INT,rank_xy,recvtag,comm,&req2[6]);
MPI_Isend(&sendCount_XY, 1,MPI_INT,rank_xy,sendtag,comm,&req1[7]);
MPI_Irecv(&recvCount_xy, 1,MPI_INT,rank_XY,recvtag,comm,&req2[7]);
MPI_Isend(&sendCount_Xy, 1,MPI_INT,rank_xY,sendtag,comm,&req1[8]);
MPI_Irecv(&recvCount_xY, 1,MPI_INT,rank_Xy,recvtag,comm,&req2[8]);
MPI_Isend(&sendCount_xY, 1,MPI_INT,rank_Xy,sendtag,comm,&req1[9]);
MPI_Irecv(&recvCount_Xy, 1,MPI_INT,rank_xY,recvtag,comm,&req2[9]);
req1[10] = comm.Isend(&sendCount_xz,1,rank_XZ,sendtag);
req2[10] = comm.Irecv(&recvCount_XZ,1,rank_xz,recvtag);
req1[11] = comm.Isend(&sendCount_XZ,1,rank_xz,sendtag);
req2[11] = comm.Irecv(&recvCount_xz,1,rank_XZ,recvtag);
req1[12] = comm.Isend(&sendCount_Xz,1,rank_xZ,sendtag);
req2[12] = comm.Irecv(&recvCount_xZ,1,rank_Xz,recvtag);
req1[13] = comm.Isend(&sendCount_xZ,1,rank_Xz,sendtag);
req2[13] = comm.Irecv(&recvCount_Xz,1,rank_xZ,recvtag);
MPI_Isend(&sendCount_xz, 1,MPI_INT,rank_XZ,sendtag,comm,&req1[10]);
MPI_Irecv(&recvCount_XZ, 1,MPI_INT,rank_xz,recvtag,comm,&req2[10]);
MPI_Isend(&sendCount_XZ, 1,MPI_INT,rank_xz,sendtag,comm,&req1[11]);
MPI_Irecv(&recvCount_xz, 1,MPI_INT,rank_XZ,recvtag,comm,&req2[11]);
MPI_Isend(&sendCount_Xz, 1,MPI_INT,rank_xZ,sendtag,comm,&req1[12]);
MPI_Irecv(&recvCount_xZ, 1,MPI_INT,rank_Xz,recvtag,comm,&req2[12]);
MPI_Isend(&sendCount_xZ, 1,MPI_INT,rank_Xz,sendtag,comm,&req1[13]);
MPI_Irecv(&recvCount_Xz, 1,MPI_INT,rank_xZ,recvtag,comm,&req2[13]);
req1[14] = comm.Isend(&sendCount_yz,1,rank_YZ,sendtag);
req2[14] = comm.Irecv(&recvCount_YZ,1,rank_yz,recvtag);
req1[15] = comm.Isend(&sendCount_YZ,1,rank_yz,sendtag);
req2[15] = comm.Irecv(&recvCount_yz,1,rank_YZ,recvtag);
req1[16] = comm.Isend(&sendCount_Yz,1,rank_yZ,sendtag);
req2[16] = comm.Irecv(&recvCount_yZ,1,rank_Yz,recvtag);
req1[17] = comm.Isend(&sendCount_yZ,1,rank_Yz,sendtag);
req2[17] = comm.Irecv(&recvCount_Yz,1,rank_yZ,recvtag);
comm.waitAll(18,req1);
comm.waitAll(18,req2);
comm.barrier();
//**********************************************************************************
MPI_Isend(&sendCount_yz, 1,MPI_INT,rank_YZ,sendtag,comm,&req1[14]);
MPI_Irecv(&recvCount_YZ, 1,MPI_INT,rank_yz,recvtag,comm,&req2[14]);
MPI_Isend(&sendCount_YZ, 1,MPI_INT,rank_yz,sendtag,comm,&req1[15]);
MPI_Irecv(&recvCount_yz, 1,MPI_INT,rank_YZ,recvtag,comm,&req2[15]);
MPI_Isend(&sendCount_Yz, 1,MPI_INT,rank_yZ,sendtag,comm,&req1[16]);
MPI_Irecv(&recvCount_yZ, 1,MPI_INT,rank_Yz,recvtag,comm,&req2[16]);
MPI_Isend(&sendCount_yZ, 1,MPI_INT,rank_Yz,sendtag,comm,&req1[17]);
MPI_Irecv(&recvCount_Yz, 1,MPI_INT,rank_yZ,recvtag,comm,&req2[17]);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
MPI_Barrier(comm);
/* MPI_Send(&sendCount_x,1,MPI_INT,rank_X,sendtag,comm);
MPI_Recv(&recvCount_X,1,MPI_INT,rank_x,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_X,1,MPI_INT,rank_x,sendtag,comm);
MPI_Recv(&recvCount_x,1,MPI_INT,rank_X,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_y,1,MPI_INT,rank_Y,sendtag,comm);
MPI_Recv(&recvCount_Y,1,MPI_INT,rank_y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Y,1,MPI_INT,rank_y,sendtag,comm);
MPI_Recv(&recvCount_y,1,MPI_INT,rank_Y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_z,1,MPI_INT,rank_Z,sendtag,comm);
MPI_Recv(&recvCount_Z,1,MPI_INT,rank_z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Z,1,MPI_INT,rank_z,sendtag,comm);
MPI_Recv(&recvCount_z,1,MPI_INT,rank_Z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_xy,1,MPI_INT,rank_XY,sendtag,comm);
MPI_Recv(&recvCount_XY,1,MPI_INT,rank_xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_XY,1,MPI_INT,rank_xy,sendtag,comm);
MPI_Recv(&recvCount_xy,1,MPI_INT,rank_XY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Xy,1,MPI_INT,rank_xY,sendtag,comm);
MPI_Recv(&recvCount_xY,1,MPI_INT,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_xY,1,MPI_INT,rank_Xy,sendtag,comm);
MPI_Recv(&recvCount_Xy,1,MPI_INT,rank_xY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_xz,1,MPI_INT,rank_XZ,sendtag,comm);
MPI_Recv(&recvCount_XZ,1,MPI_INT,rank_xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_XZ,1,MPI_INT,rank_xz,sendtag,comm);
MPI_Recv(&recvCount_xz,1,MPI_INT,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Xz,1,MPI_INT,rank_xZ,sendtag,comm);
MPI_Recv(&recvCount_xZ,1,MPI_INT,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_xZ,1,MPI_INT,rank_Xz,sendtag,comm);
MPI_Recv(&recvCount_Xz,1,MPI_INT,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_yz,1,MPI_INT,rank_YZ,sendtag,comm);
MPI_Recv(&recvCount_YZ,1,MPI_INT,rank_yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_YZ,1,MPI_INT,rank_yz,sendtag,comm);
MPI_Recv(&recvCount_yz,1,MPI_INT,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_Yz,1,MPI_INT,rank_yZ,sendtag,comm);
MPI_Recv(&recvCount_yZ,1,MPI_INT,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Send(&sendCount_yZ,1,MPI_INT,rank_Yz,sendtag,comm);
MPI_Recv(&recvCount_Yz,1,MPI_INT,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Barrier(comm);
*/ //**********************************************************************************
//......................................................................................
int *recvList_x, *recvList_y, *recvList_z, *recvList_X, *recvList_Y, *recvList_Z;
int *recvList_xy, *recvList_yz, *recvList_xz, *recvList_Xy, *recvList_Yz, *recvList_xZ;
@ -796,48 +841,48 @@ int main(int argc, char **argv)
// Use MPI to fill in the appropriate values for recvList
// Fill in the recieve lists using MPI
sendtag = recvtag = 4;
req1[0] = comm.Isend(sendList_x,sendCount_x,rank_X,sendtag);
req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_x,recvtag);
req1[1] = comm.Isend(sendList_X,sendCount_X,rank_x,sendtag);
req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_X,recvtag);
req1[2] = comm.Isend(sendList_y,sendCount_y,rank_Y,sendtag);
req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_y,recvtag);
req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_y,sendtag);
req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_Y,recvtag);
req1[4] = comm.Isend(sendList_z,sendCount_z,rank_Z,sendtag);
req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_z,recvtag);
req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_z,sendtag);
req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_Z,recvtag);
MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_Z,recvtag,comm,&req2[5]);
req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_XY,sendtag);
req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_xy,recvtag);
req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_xy,sendtag);
req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_XY,recvtag);
req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_xY,sendtag);
req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_Xy,recvtag);
req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_Xy,sendtag);
req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_xY,recvtag);
MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_XY,sendtag,comm,&req1[6]);
MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_xy,recvtag,comm,&req2[6]);
MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_xy,sendtag,comm,&req1[7]);
MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_XY,recvtag,comm,&req2[7]);
MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_xY,sendtag,comm,&req1[8]);
MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_Xy,recvtag,comm,&req2[8]);
MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_Xy,sendtag,comm,&req1[9]);
MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_xY,recvtag,comm,&req2[9]);
req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_XZ,sendtag);
req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_xz,recvtag);
req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_xz,sendtag);
req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_XZ,recvtag);
req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_xZ,sendtag);
req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_Xz,recvtag);
req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_Xz,sendtag);
req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_xZ,recvtag);
MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_XZ,sendtag,comm,&req1[10]);
MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_xz,recvtag,comm,&req2[10]);
MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_xz,sendtag,comm,&req1[11]);
MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_XZ,recvtag,comm,&req2[11]);
MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_xZ,sendtag,comm,&req1[12]);
MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_Xz,recvtag,comm,&req2[12]);
MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_Xz,sendtag,comm,&req1[13]);
MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_xZ,recvtag,comm,&req2[13]);
req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_YZ,sendtag);
req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_yz,recvtag);
req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_yz,sendtag);
req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_YZ,recvtag);
req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_yZ,sendtag);
req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_Yz,recvtag);
req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_Yz,sendtag);
req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_yZ,recvtag);
comm.waitAll(18,req1);
comm.waitAll(18,req2);
comm.barrier();
MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_YZ,sendtag,comm,&req1[14]);
MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_yz,recvtag,comm,&req2[14]);
MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_yz,sendtag,comm,&req1[15]);
MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_YZ,recvtag,comm,&req2[15]);
MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_yZ,sendtag,comm,&req1[16]);
MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_Yz,recvtag,comm,&req2[16]);
MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_Yz,sendtag,comm,&req1[17]);
MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_yZ,recvtag,comm,&req2[17]);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
MPI_Barrier(comm);
//......................................................................................
for (int idx=0; idx<recvCount_x; idx++) recvList_x[idx] -= (Nx-2);
for (int idx=0; idx<recvCount_X; idx++) recvList_X[idx] += (Nx-2);
@ -1058,24 +1103,42 @@ int main(int argc, char **argv)
PackID(sendList_yZ, sendCount_yZ ,sendID_yZ, id);
PackID(sendList_YZ, sendCount_YZ ,sendID_YZ, id);
//......................................................................................
comm.sendrecv(sendID_x,sendCount_x,rank_X,sendtag,recvID_X,recvCount_X,rank_x,recvtag);
comm.sendrecv(sendID_X,sendCount_X,rank_x,sendtag,recvID_x,recvCount_x,rank_X,recvtag);
comm.sendrecv(sendID_y,sendCount_y,rank_Y,sendtag,recvID_Y,recvCount_Y,rank_y,recvtag);
comm.sendrecv(sendID_Y,sendCount_Y,rank_y,sendtag,recvID_y,recvCount_y,rank_Y,recvtag);
comm.sendrecv(sendID_z,sendCount_z,rank_Z,sendtag,recvID_Z,recvCount_Z,rank_z,recvtag);
comm.sendrecv(sendID_Z,sendCount_Z,rank_z,sendtag,recvID_z,recvCount_z,rank_Z,recvtag);
comm.sendrecv(sendID_xy,sendCount_xy,rank_XY,sendtag,recvID_XY,recvCount_XY,rank_xy,recvtag);
comm.sendrecv(sendID_XY,sendCount_XY,rank_xy,sendtag,recvID_xy,recvCount_xy,rank_XY,recvtag);
comm.sendrecv(sendID_Xy,sendCount_Xy,rank_xY,sendtag,recvID_xY,recvCount_xY,rank_Xy,recvtag);
comm.sendrecv(sendID_xY,sendCount_xY,rank_Xy,sendtag,recvID_Xy,recvCount_Xy,rank_xY,recvtag);
comm.sendrecv(sendID_xz,sendCount_xz,rank_XZ,sendtag,recvID_XZ,recvCount_XZ,rank_xz,recvtag);
comm.sendrecv(sendID_XZ,sendCount_XZ,rank_xz,sendtag,recvID_xz,recvCount_xz,rank_XZ,recvtag);
comm.sendrecv(sendID_Xz,sendCount_Xz,rank_xZ,sendtag,recvID_xZ,recvCount_xZ,rank_Xz,recvtag);
comm.sendrecv(sendID_xZ,sendCount_xZ,rank_Xz,sendtag,recvID_Xz,recvCount_Xz,rank_xZ,recvtag);
comm.sendrecv(sendID_yz,sendCount_yz,rank_YZ,sendtag,recvID_YZ,recvCount_YZ,rank_yz,recvtag);
comm.sendrecv(sendID_YZ,sendCount_YZ,rank_yz,sendtag,recvID_yz,recvCount_yz,rank_YZ,recvtag);
comm.sendrecv(sendID_Yz,sendCount_Yz,rank_yZ,sendtag,recvID_yZ,recvCount_yZ,rank_Yz,recvtag);
comm.sendrecv(sendID_yZ,sendCount_yZ,rank_Yz,sendtag,recvID_Yz,recvCount_Yz,rank_yZ,recvtag);
MPI_Sendrecv(sendID_x,sendCount_x,MPI_CHAR,rank_X,sendtag,
recvID_X,recvCount_X,MPI_CHAR,rank_x,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_X,sendCount_X,MPI_CHAR,rank_x,sendtag,
recvID_x,recvCount_x,MPI_CHAR,rank_X,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_y,sendCount_y,MPI_CHAR,rank_Y,sendtag,
recvID_Y,recvCount_Y,MPI_CHAR,rank_y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Y,sendCount_Y,MPI_CHAR,rank_y,sendtag,
recvID_y,recvCount_y,MPI_CHAR,rank_Y,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_z,sendCount_z,MPI_CHAR,rank_Z,sendtag,
recvID_Z,recvCount_Z,MPI_CHAR,rank_z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Z,sendCount_Z,MPI_CHAR,rank_z,sendtag,
recvID_z,recvCount_z,MPI_CHAR,rank_Z,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xy,sendCount_xy,MPI_CHAR,rank_XY,sendtag,
recvID_XY,recvCount_XY,MPI_CHAR,rank_xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_XY,sendCount_XY,MPI_CHAR,rank_xy,sendtag,
recvID_xy,recvCount_xy,MPI_CHAR,rank_XY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Xy,sendCount_Xy,MPI_CHAR,rank_xY,sendtag,
recvID_xY,recvCount_xY,MPI_CHAR,rank_Xy,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xY,sendCount_xY,MPI_CHAR,rank_Xy,sendtag,
recvID_Xy,recvCount_Xy,MPI_CHAR,rank_xY,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xz,sendCount_xz,MPI_CHAR,rank_XZ,sendtag,
recvID_XZ,recvCount_XZ,MPI_CHAR,rank_xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_XZ,sendCount_XZ,MPI_CHAR,rank_xz,sendtag,
recvID_xz,recvCount_xz,MPI_CHAR,rank_XZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Xz,sendCount_Xz,MPI_CHAR,rank_xZ,sendtag,
recvID_xZ,recvCount_xZ,MPI_CHAR,rank_Xz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_xZ,sendCount_xZ,MPI_CHAR,rank_Xz,sendtag,
recvID_Xz,recvCount_Xz,MPI_CHAR,rank_xZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_yz,sendCount_yz,MPI_CHAR,rank_YZ,sendtag,
recvID_YZ,recvCount_YZ,MPI_CHAR,rank_yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_YZ,sendCount_YZ,MPI_CHAR,rank_yz,sendtag,
recvID_yz,recvCount_yz,MPI_CHAR,rank_YZ,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_Yz,sendCount_Yz,MPI_CHAR,rank_yZ,sendtag,
recvID_yZ,recvCount_yZ,MPI_CHAR,rank_Yz,recvtag,comm,MPI_STATUS_IGNORE);
MPI_Sendrecv(sendID_yZ,sendCount_yZ,MPI_CHAR,rank_Yz,sendtag,
recvID_Yz,recvCount_Yz,MPI_CHAR,rank_yZ,recvtag,comm,MPI_STATUS_IGNORE);
//......................................................................................
UnpackID(recvList_x, recvCount_x ,recvID_x, id);
UnpackID(recvList_X, recvCount_X ,recvID_X, id);
@ -1108,7 +1171,7 @@ int main(int argc, char **argv)
free(recvID_yz); free(recvID_YZ); free(recvID_yZ); free(recvID_Yz);
*/ //......................................................................................
if (rank==0) printf ("Devices are ready to communicate. \n");
comm.barrier();
MPI_Barrier(comm);
//...........device phase ID.................................................
if (rank==0) printf ("Copying phase ID to device \n");
@ -1259,48 +1322,48 @@ int main(int argc, char **argv)
//...................................................................................
// Send / Recv all the phase indcator field values
//...................................................................................
req1[0] = comm.Isend(sendbuf_x,sendCount_x,rank_X,sendtag);
req2[0] = comm.Irecv(recvbuf_X,recvCount_X,rank_x,recvtag);
req1[1] = comm.Isend(sendbuf_X,sendCount_X,rank_x,sendtag);
req2[1] = comm.Irecv(recvbuf_x,recvCount_x,rank_X,recvtag);
req1[2] = comm.Isend(sendbuf_y,sendCount_y,rank_Y,sendtag);
req2[2] = comm.Irecv(recvbuf_Y,recvCount_Y,rank_y,recvtag);
req1[3] = comm.Isend(sendbuf_Y,sendCount_Y,rank_y,sendtag);
req2[3] = comm.Irecv(recvbuf_y,recvCount_y,rank_Y,recvtag);
req1[4] = comm.Isend(sendbuf_z,sendCount_z,rank_Z,sendtag);
req2[4] = comm.Irecv(recvbuf_Z,recvCount_Z,rank_z,recvtag);
req1[5] = comm.Isend(sendbuf_Z,sendCount_Z,rank_z,sendtag);
req2[5] = comm.Irecv(recvbuf_z,recvCount_z,rank_Z,recvtag);
req1[6] = comm.Isend(sendbuf_xy,sendCount_xy,rank_XY,sendtag);
req2[6] = comm.Irecv(recvbuf_XY,recvCount_XY,rank_xy,recvtag);
req1[7] = comm.Isend(sendbuf_XY,sendCount_XY,rank_xy,sendtag);
req2[7] = comm.Irecv(recvbuf_xy,recvCount_xy,rank_XY,recvtag);
req1[8] = comm.Isend(sendbuf_Xy,sendCount_Xy,rank_xY,sendtag);
req2[8] = comm.Irecv(recvbuf_xY,recvCount_xY,rank_Xy,recvtag);
req1[9] = comm.Isend(sendbuf_xY,sendCount_xY,rank_Xy,sendtag);
req2[9] = comm.Irecv(recvbuf_Xy,recvCount_Xy,rank_xY,recvtag);
req1[10] = comm.Isend(sendbuf_xz,sendCount_xz,rank_XZ,sendtag);
req2[10] = comm.Irecv(recvbuf_XZ,recvCount_XZ,rank_xz,recvtag);
req1[11] = comm.Isend(sendbuf_XZ,sendCount_XZ,rank_xz,sendtag);
req2[11] = comm.Irecv(recvbuf_xz,recvCount_xz,rank_XZ,recvtag);
req1[12] = comm.Isend(sendbuf_Xz,sendCount_Xz,rank_xZ,sendtag);
req2[12] = comm.Irecv(recvbuf_xZ,recvCount_xZ,rank_Xz,recvtag);
req1[13] = comm.Isend(sendbuf_xZ,sendCount_xZ,rank_Xz,sendtag);
req2[13] = comm.Irecv(recvbuf_Xz,recvCount_Xz,rank_xZ,recvtag);
req1[14] = comm.Isend(sendbuf_yz,sendCount_yz,rank_YZ,sendtag);
req2[14] = comm.Irecv(recvbuf_YZ,recvCount_YZ,rank_yz,recvtag);
req1[15] = comm.Isend(sendbuf_YZ,sendCount_YZ,rank_yz,sendtag);
req2[15] = comm.Irecv(recvbuf_yz,recvCount_yz,rank_YZ,recvtag);
req1[16] = comm.Isend(sendbuf_Yz,sendCount_Yz,rank_yZ,sendtag);
req2[16] = comm.Irecv(recvbuf_yZ,recvCount_yZ,rank_Yz,recvtag);
req1[17] = comm.Isend(sendbuf_yZ,sendCount_yZ,rank_Yz,sendtag);
req2[17] = comm.Irecv(recvbuf_Yz,recvCount_Yz,rank_yZ,recvtag);
MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(sendbuf_y, sendCount_y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(recvbuf_Y, recvCount_Y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(sendbuf_Z, sendCount_Z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(recvbuf_z, recvCount_z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[5]);
MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[6]);
MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[6]);
MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[7]);
MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[7]);
MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[8]);
MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[8]);
MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[9]);
MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[9]);
MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[10]);
MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[10]);
MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[11]);
MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[11]);
MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[12]);
MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[12]);
MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[13]);
MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[13]);
MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[14]);
MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[14]);
MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[15]);
MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[15]);
MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[16]);
MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[16]);
MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[17]);
MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[17]);
//...................................................................................
//...................................................................................
// Wait for completion of Indicator Field communication
//...................................................................................
comm.waitAll(18,req1);
comm.waitAll(18,req2);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
//...................................................................................
//...................................................................................
/* dvc_UnpackValues(faceGrid, packThreads, dvcSendList_x, sendCount_x,sendbuf_x, Phi, N);
@ -1340,8 +1403,8 @@ int main(int argc, char **argv)
//.......create and start timer............
double starttime,stoptime,cputime;
comm.barrier();
starttime = Utilities::MPI::time();
MPI_Barrier(comm);
starttime = MPI_Wtime();
//.........................................
sendtag = recvtag = 5;
@ -1431,42 +1494,42 @@ int main(int argc, char **argv)
//...................................................................................
// Send all the distributions
req1[0] = comm.Isend(sendbuf_x,5*sendCount_x,rank_X,sendtag);
req2[0] = comm.Irecv(recvbuf_X,5*recvCount_X,rank_x,recvtag);
req1[1] = comm.Isend(sendbuf_X,5*sendCount_X,rank_x,sendtag);
req2[1] = comm.Irecv(recvbuf_x,5*recvCount_x,rank_X,recvtag);
req1[2] = comm.Isend(sendbuf_y,5*sendCount_y,rank_Y,sendtag);
req2[2] = comm.Irecv(recvbuf_Y,5*recvCount_Y,rank_y,recvtag);
req1[3] = comm.Isend(sendbuf_Y,5*sendCount_Y,rank_y,sendtag);
req2[3] = comm.Irecv(recvbuf_y,5*recvCount_y,rank_Y,recvtag);
req1[4] = comm.Isend(sendbuf_z,5*sendCount_z,rank_Z,sendtag);
req2[4] = comm.Irecv(recvbuf_Z,5*recvCount_Z,rank_z,recvtag);
req1[5] = comm.Isend(sendbuf_Z,5*sendCount_Z,rank_z,sendtag);
req2[5] = comm.Irecv(recvbuf_z,5*recvCount_z,rank_Z,recvtag);
req1[6] = comm.Isend(sendbuf_xy,sendCount_xy,rank_XY,sendtag);
req2[6] = comm.Irecv(recvbuf_XY,recvCount_XY,rank_xy,recvtag);
req1[7] = comm.Isend(sendbuf_XY,sendCount_XY,rank_xy,sendtag);
req2[7] = comm.Irecv(recvbuf_xy,recvCount_xy,rank_XY,recvtag);
req1[8] = comm.Isend(sendbuf_Xy,sendCount_Xy,rank_xY,sendtag);
req2[8] = comm.Irecv(recvbuf_xY,recvCount_xY,rank_Xy,recvtag);
req1[9] = comm.Isend(sendbuf_xY,sendCount_xY,rank_Xy,sendtag);
req2[9] = comm.Irecv(recvbuf_Xy,recvCount_Xy,rank_xY,recvtag);
req1[10] = comm.Isend(sendbuf_xz,sendCount_xz,rank_XZ,sendtag);
req2[10] = comm.Irecv(recvbuf_XZ,recvCount_XZ,rank_xz,recvtag);
req1[11] = comm.Isend(sendbuf_XZ,sendCount_XZ,rank_xz,sendtag);
req2[11] = comm.Irecv(recvbuf_xz,recvCount_xz,rank_XZ,recvtag);
req1[12] = comm.Isend(sendbuf_Xz,sendCount_Xz,rank_xZ,sendtag);
req2[12] = comm.Irecv(recvbuf_xZ,recvCount_xZ,rank_Xz,recvtag);
req1[13] = comm.Isend(sendbuf_xZ,sendCount_xZ,rank_Xz,sendtag);
req2[13] = comm.Irecv(recvbuf_Xz,recvCount_Xz,rank_xZ,recvtag);
req1[14] = comm.Isend(sendbuf_yz,sendCount_yz,rank_YZ,sendtag);
req2[14] = comm.Irecv(recvbuf_YZ,recvCount_YZ,rank_yz,recvtag);
req1[15] = comm.Isend(sendbuf_YZ,sendCount_YZ,rank_yz,sendtag);
req2[15] = comm.Irecv(recvbuf_yz,recvCount_yz,rank_YZ,recvtag);
req1[16] = comm.Isend(sendbuf_Yz,sendCount_Yz,rank_yZ,sendtag);
req2[16] = comm.Irecv(recvbuf_yZ,recvCount_yZ,rank_Yz,recvtag);
req1[17] = comm.Isend(sendbuf_yZ,sendCount_yZ,rank_Yz,sendtag);
req2[17] = comm.Irecv(recvbuf_Yz,recvCount_Yz,rank_yZ,recvtag);
MPI_Isend(sendbuf_x, 5*sendCount_x,MPI_DOUBLE,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(recvbuf_X, 5*recvCount_X,MPI_DOUBLE,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(sendbuf_X, 5*sendCount_X,MPI_DOUBLE,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(recvbuf_x, 5*recvCount_x,MPI_DOUBLE,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(sendbuf_y, 5*sendCount_y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(recvbuf_Y, 5*recvCount_Y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(sendbuf_Y, 5*sendCount_Y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(recvbuf_y, 5*recvCount_y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(sendbuf_z, 5*sendCount_z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(recvbuf_Z, 5*recvCount_Z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(sendbuf_Z, 5*sendCount_Z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(recvbuf_z, 5*recvCount_z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[5]);
MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[6]);
MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[6]);
MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[7]);
MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[7]);
MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[8]);
MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[8]);
MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[9]);
MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[9]);
MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[10]);
MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[10]);
MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[11]);
MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[11]);
MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[12]);
MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[12]);
MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[13]);
MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[13]);
MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[14]);
MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[14]);
MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[15]);
MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[15]);
MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[16]);
MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[16]);
MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[17]);
MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[17]);
//...................................................................................
//*************************************************************************
@ -1484,8 +1547,8 @@ int main(int argc, char **argv)
//...................................................................................
// Wait for completion of D3Q19 communication
comm.waitAll(18,req1);
comm.waitAll(18,req2);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
//...................................................................................
// Unpack the distributions on the device
//...................................................................................
@ -1567,23 +1630,23 @@ int main(int argc, char **argv)
//...................................................................................
// Send all the D3Q7 distributions
req1[0] = comm.Isend(recvbuf_x,2*recvCount_x,rank_X,sendtag);
req2[0] = comm.Irecv(sendbuf_X,2*sendCount_X,rank_x,recvtag);
req1[1] = comm.Isend(recvbuf_X,2*recvCount_X,rank_x,sendtag);
req2[1] = comm.Irecv(sendbuf_x,2*sendCount_x,rank_X,recvtag);
req1[2] = comm.Isend(recvbuf_y,2*recvCount_y,rank_Y,sendtag);
req2[2] = comm.Irecv(sendbuf_Y,2*sendCount_Y,rank_y,recvtag);
req1[3] = comm.Isend(recvbuf_Y,2*recvCount_Y,rank_y,sendtag);
req2[3] = comm.Irecv(sendbuf_y,2*sendCount_y,rank_Y,recvtag);
req1[4] = comm.Isend(recvbuf_z,2*recvCount_z,rank_Z,sendtag);
req2[4] = comm.Irecv(sendbuf_Z,2*sendCount_Z,rank_z,recvtag);
req1[5] = comm.Isend(recvbuf_Z,2*recvCount_Z,rank_z,sendtag);
req2[5] = comm.Irecv(sendbuf_z,2*sendCount_z,rank_Z,recvtag);
MPI_Isend(recvbuf_x, 2*recvCount_x,MPI_DOUBLE,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(sendbuf_X, 2*sendCount_X,MPI_DOUBLE,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(recvbuf_X, 2*recvCount_X,MPI_DOUBLE,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(sendbuf_x, 2*sendCount_x,MPI_DOUBLE,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(recvbuf_y, 2*recvCount_y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(sendbuf_Y, 2*sendCount_Y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(recvbuf_Y, 2*recvCount_Y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(sendbuf_y, 2*sendCount_y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(recvbuf_z, 2*recvCount_z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(sendbuf_Z, 2*sendCount_Z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(recvbuf_Z, 2*recvCount_Z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(sendbuf_z, 2*sendCount_z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[5]);
//...................................................................................
//...................................................................................
// Wait for completion of D3Q7 communication
comm.waitAll(6,req1);
comm.waitAll(6,req2);
MPI_Waitall(6,req1,stat1);
MPI_Waitall(6,req2,stat2);
//...................................................................................
//...................................................................................
dvc_UnpackDenD3Q7(faceGrid,packThreads,dvcSendList_x,sendCount_x,sendbuf_x,2,Den,N);
@ -1622,48 +1685,48 @@ int main(int argc, char **argv)
//...................................................................................
// Send / Recv all the phase indcator field values
//...................................................................................
req1[0] = comm.Isend(sendbuf_x, sendCount_x,rank_X,sendtag);
req2[0] = comm.Irecv(recvbuf_X, recvCount_X,rank_x,recvtag);
req1[1] = comm.Isend(sendbuf_X, sendCount_X,rank_x,sendtag);
req2[1] = comm.Irecv(recvbuf_x, recvCount_x,rank_X,recvtag);
req1[2] = comm.Isend(sendbuf_y, sendCount_y,rank_Y,sendtag);
req2[2] = comm.Irecv(recvbuf_Y, recvCount_Y,rank_y,recvtag);
req1[3] = comm.Isend(sendbuf_Y, sendCount_Y,rank_y,sendtag);
req2[3] = comm.Irecv(recvbuf_y, recvCount_y,rank_Y,recvtag);
req1[4] = comm.Isend(sendbuf_z, sendCount_z,rank_Z,sendtag);
req2[4] = comm.Irecv(recvbuf_Z, recvCount_Z,rank_z,recvtag);
req1[5] = comm.Isend(sendbuf_Z, sendCount_Z,rank_z,sendtag);
req2[5] = comm.Irecv(recvbuf_z, recvCount_z,rank_Z,recvtag);
req1[6] = comm.Isend(sendbuf_xy, sendCount_xy,rank_XY,sendtag);
req2[6] = comm.Irecv(recvbuf_XY, recvCount_XY,rank_xy,recvtag);
req1[7] = comm.Isend(sendbuf_XY, sendCount_XY,rank_xy,sendtag);
req2[7] = comm.Irecv(recvbuf_xy, recvCount_xy,rank_XY,recvtag);
req1[8] = comm.Isend(sendbuf_Xy, sendCount_Xy,rank_xY,sendtag);
req2[8] = comm.Irecv(recvbuf_xY, recvCount_xY,rank_Xy,recvtag);
req1[9] = comm.Isend(sendbuf_xY, sendCount_xY,rank_Xy,sendtag);
req2[9] = comm.Irecv(recvbuf_Xy, recvCount_Xy,rank_xY,recvtag);
req1[10] = comm.Isend(sendbuf_xz, sendCount_xz,rank_XZ,sendtag);
req2[10] = comm.Irecv(recvbuf_XZ, recvCount_XZ,rank_xz,recvtag);
req1[11] = comm.Isend(sendbuf_XZ, sendCount_XZ,rank_xz,sendtag);
req2[11] = comm.Irecv(recvbuf_xz, recvCount_xz,rank_XZ,recvtag);
req1[12] = comm.Isend(sendbuf_Xz, sendCount_Xz,rank_xZ,sendtag);
req2[12] = comm.Irecv(recvbuf_xZ, recvCount_xZ,rank_Xz,recvtag);
req1[13] = comm.Isend(sendbuf_xZ, sendCount_xZ,rank_Xz,sendtag);
req2[13] = comm.Irecv(recvbuf_Xz, recvCount_Xz,rank_xZ,recvtag);
req1[14] = comm.Isend(sendbuf_yz, sendCount_yz,rank_YZ,sendtag);
req2[14] = comm.Irecv(recvbuf_YZ, recvCount_YZ,rank_yz,recvtag);
req1[15] = comm.Isend(sendbuf_YZ, sendCount_YZ,rank_yz,sendtag);
req2[15] = comm.Irecv(recvbuf_yz, recvCount_yz,rank_YZ,recvtag);
req1[16] = comm.Isend(sendbuf_Yz, sendCount_Yz,rank_yZ,sendtag);
req2[16] = comm.Irecv(recvbuf_yZ, recvCount_yZ,rank_Yz,recvtag);
req1[17] = comm.Isend(sendbuf_yZ, sendCount_yZ,rank_Yz,sendtag);
req2[17] = comm.Irecv(recvbuf_Yz, recvCount_Yz,rank_yZ,recvtag);
MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_X,sendtag,comm,&req1[0]);
MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_x,recvtag,comm,&req2[0]);
MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_x,sendtag,comm,&req1[1]);
MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_X,recvtag,comm,&req2[1]);
MPI_Isend(sendbuf_y, sendCount_y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[2]);
MPI_Irecv(recvbuf_Y, recvCount_Y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[2]);
MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[3]);
MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[3]);
MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[4]);
MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[4]);
MPI_Isend(sendbuf_Z, sendCount_Z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[5]);
MPI_Irecv(recvbuf_z, recvCount_z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[5]);
MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[6]);
MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[6]);
MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[7]);
MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[7]);
MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[8]);
MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[8]);
MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[9]);
MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[9]);
MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[10]);
MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[10]);
MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[11]);
MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[11]);
MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[12]);
MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[12]);
MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[13]);
MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[13]);
MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[14]);
MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[14]);
MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[15]);
MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[15]);
MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[16]);
MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[16]);
MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[17]);
MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[17]);
//...................................................................................
//...................................................................................
// Wait for completion of Indicator Field communication
//...................................................................................
comm.waitAll(18,req1);
comm.waitAll(18,req2);
MPI_Waitall(18,req1,stat1);
MPI_Waitall(18,req2,stat2);
//...................................................................................
//...................................................................................
/* dvc_UnpackValues(faceGrid, packThreads, dvcSendList_x, sendCount_x,sendbuf_x, Phi, N);
@ -1692,7 +1755,7 @@ int main(int argc, char **argv)
dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_Yz, recvCount_Yz,recvbuf_Yz, Phi, N);
dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_YZ, recvCount_YZ,recvbuf_YZ, Phi, N);
//...................................................................................
comm.barrier();
MPI_Barrier(comm);
// Iteration completed!
timestep++;
//...................................................................
@ -1703,7 +1766,7 @@ int main(int argc, char **argv)
//...........................................................................
dvc_Barrier();
dvc_CopyToHost(Phase.data,Phi,N*sizeof(double));
comm.barrier();
MPI_Barrier(comm);
//...........................................................................
// Compute areas using porous medium marching cubes algorithm
// McClure, Adalsteinsson, et al. (2007)
@ -1872,15 +1935,15 @@ int main(int argc, char **argv)
//*******************************************************************
}
//...........................................................................
comm.barrier();
nwp_volume_global = comm.sumReduce( nwp_volume );
awn_global = comm.sumReduce( awn );
ans_global = comm.sumReduce( ans );
aws_global = comm.sumReduce( aws );
lwns_global = comm.sumReduce( lwns );
As_global = comm.sumReduce( As );
MPI_Barrier(comm);
MPI_Allreduce(&nwp_volume,&nwp_volume_global,1,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&awn,&awn_global,1,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&ans,&ans_global,1,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&aws,&aws_global,1,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&lwns,&lwns_global,1,MPI_DOUBLE,MPI_SUM,comm);
MPI_Allreduce(&As,&As_global,1,MPI_DOUBLE,MPI_SUM,comm);
comm.barrier();
MPI_Barrier(comm);
//.........................................................................
// Compute the change in the total surface energy based on the defined interval
// See McClure, Prins and Miller (2013)
@ -1909,8 +1972,8 @@ int main(int argc, char **argv)
}
//************************************************************************/
dvc_Barrier();
comm.barrier();
stoptime = Utilities::MPI::time();
MPI_Barrier(comm);
stoptime = MPI_Wtime();
if (rank==0) printf("-------------------------------------------------------------------\n");
// cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl;
cputime = stoptime - starttime;
@ -1946,7 +2009,7 @@ int main(int argc, char **argv)
*/ //************************************************************************/
// ****************************************************
comm.barrier();
MPI_Barrier(comm);
MPI_Finalize();
// ****************************************************
}

View File

@ -0,0 +1,37 @@
import sys
import numpy as np
import matplotlib.pylab as plt
FILENAME=sys.argv[1]
Nx=int(sys.argv[2])
Ny=int(sys.argv[3])
Nz=int(sys.argv[4])
# read the input image
Output = np.fromfile(FILENAME,dtype = np.uint8)
Output.shape = (Nz,Ny,Nx)
Oil=np.count_nonzero(Output==1)
Water=np.count_nonzero(Output==2)
Sw=Water/(Oil+Water)
Porosity=1.0-(Oil+Water)/(Nx*Ny*Nz)
print(FILENAME,"Porosity=", Porosity)
SaturationProfile=np.zeros(Nz)
PorosityProfile=np.zeros(Nz)
# Compute saturation slice by slice
for idx in range(0, Nz):
Slice = Output[idx,:,:]
Oil=np.count_nonzero(Slice==1)
Water=np.count_nonzero(Slice==2)
SaturationProfile[idx]=Water/(Oil+Water)
PorosityProfile[idx]=(Oil+Water)/(Nx*Ny)
plt.figure()
plt.plot(SaturationProfile)
plt.xlabel('Position (z)')
plt.ylabel('Water Saturation')
plt.show()

View File

@ -2,16 +2,57 @@ require("ggplot2")
GG_THEME=theme_bw()+theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())
ReadDatabase<-function(FILE){
INPUT<-gsub(';','',readLines(FILE))
S<-gsub('tauA = ','',gsub("\\s+"," ",(grep("tauA",INPUT,value=TRUE))))
TAU_A = as.numeric(gsub("/.*","",S))
S<-gsub('tauB = ','',gsub("\\s+"," ",(grep("tauB",INPUT,value=TRUE))))
TAU_B = as.numeric(gsub("/.*","",S))
S<-gsub('rhoA = ','',gsub("\\s+"," ",(grep("rhoA",INPUT,value=TRUE))))
RHO_A = as.numeric(gsub("/.*","",S))
S<-gsub('rhoB = ','',gsub("\\s+"," ",(grep("rhoB",INPUT,value=TRUE))))
RHO_B = as.numeric(gsub("/.*","",S))
S<-gsub('alpha = ','',gsub("\\s+"," ",(grep("alpha",INPUT,value=TRUE))))
ALPHA = as.numeric(gsub("/.*","",S))
# Read the affinity
S<-gsub('ComponentAffinity = ','',gsub("\\s+"," ",(grep("ComponentAffinity",INPUT,value=TRUE))))
S<-gsub("/.*","",S)
AFFINITY<-as.numeric(unlist(strsplit(S,", ")))
PARAMETERS<-c(TAU_A,TAU_B,RHO_A,RHO_B,ALPHA,AFFINITY)
return(PARAMETERS)
}
ReadSubphase<-function(PATH){
FILE=paste0(PATH,"/subphase.csv")
S<-read.csv(FILE,head=TRUE,sep=" ")
S$Vw<-S$Vwc+S$Vwd
S$Vn<-S$Vnc+S$Vnd
S$Aw<-S$Awc+S$Awd
S$An<-S$Anc+S$And
S$Hw<-S$Hwc+S$Hwd
S$Hn<-S$Hnc+S$Hnd
S$Xw<-S$Xwc+S$Xwd
S$Xn<-S$Xnc+S$Xnd
S$Sw<-S$Vw/(S$Vn+S$Vw)
S$pw<-(S$pwc*S$Vwc+S$pwd*S$Vwd) / (S$Vwc+S$Vwd)
S$pn<-(S$pnc*S$Vnc+S$pnd*S$Vnd) / (S$Vnc+S$Vnd)
S$Qwx<-S$Vw*(S$Pwc_x+S$Pwd_x)/(S$Mwc+S$Mwd)
S$Qnx<-S$Vn*(S$Pnc_x+S$Pnd_x)/(S$Mnc+S$Mnd)
S$Krn<-S$nun*S$Qnx/S$Fx
S$Krw<-S$nuw*S$Qwx/S$Fx
S$Qwy<-S$Vw*(S$Pwc_y+S$Pwd_y)/(S$Mwc+S$Mwd)
S$Qny<-S$Vn*(S$Pnc_y+S$Pnd_y)/(S$Mnc+S$Mnd)
S$Qwz<-S$Vw*(S$Pwc_z+S$Pwd_z)/(S$Mwc+S$Mwd)
S$Qnz<-S$Vn*(S$Pnc_z+S$Pnd_z)/(S$Mnc+S$Mnd)
S$Krn<-S$nun*S$Qnz/S$Fz
S$Krw<-S$nuw*S$Qwz/S$Fz
S$Case<-PATH
return(S)
}

537
gpu/D3Q7BC.cu Normal file
View File

@ -0,0 +1,537 @@
#include <math.h>
#include <stdio.h>
#include <cuda_profiler_api.h>
#define NBLOCKS 1024
#define NTHREADS 256
__global__ void dvc_ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
{
int idx;
int iq,ib;
double value_b,value_q;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
value_b = BoundaryValue[ib];//get boundary value from a solid site
value_q = dist[iq];
dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice
}
}
__global__ void dvc_ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
{
int idx;
int iq,ib;
double value_b,value_q;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
value_b = BoundaryValue[ib];//get boundary value from a solid site
value_q = dist[iq];
dist[iq] = value_q + value_b;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
//...................................................
f5 = Vin - (f0+f1+f2+f3+f4+f6);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
//...................................................
f6 = Vout - (f0+f1+f2+f3+f4+f5);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np)
{
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
f5 = Vin - (f0+f1+f2+f3+f4+f6);
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np)
{
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
// unknown distributions
nr6 = d_neighborList[n+5*Np];
f6 = Vout - (f0+f1+f2+f3+f4+f5);
dist[nr6] = f6;
}
}
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count)
{
int idx,n,nm;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
nm = Map[n];
Psi[nm] = Vin;
}
}
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count)
{
int idx,n,nm;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
nm = Map[n];
Psi[nm] = Vout;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
//...................................................
f5 = Cin - (f0+f1+f2+f3+f4+f6);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
//...................................................
f6 = Cout - (f0+f1+f2+f3+f4+f5);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np)
{
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
f5 = Cin - (f0+f1+f2+f3+f4+f6);
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np)
{
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
// unknown distributions
nr6 = d_neighborList[n+5*Np];
f6 = Cout - (f0+f1+f2+f3+f4+f5);
dist[nr6] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
// unknown distributions
nr6 = d_neighborList[n+5*Np];
dist[nr6] = f6;
}
}
//*************************************************************************
extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_Dirichlet_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_Solid_Dirichlet_D3Q7 (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_Neumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_Solid_Neumann_D3Q7 (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z<<<GRID,512>>>(list, dist, Vin, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z<<<GRID,512>>>(list, dist, Vout, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Vin, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Vout, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Poisson_D3Q7_BC_z<<<GRID,512>>>(list, Map, Psi, Vin, count);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_Poisson_D3Q7_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Poisson_D3Q7_BC_Z<<<GRID,512>>>(list, Map, Psi, Vout, count);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_Poisson_D3Q7_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z<<<GRID,512>>>(list, dist, Cin, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z<<<GRID,512>>>(list, dist, Cout, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Cin, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Cout, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
}

2743
gpu/Greyscale.cu Normal file

File diff suppressed because it is too large Load Diff

3036
gpu/GreyscaleColor.cu Normal file

File diff suppressed because it is too large Load Diff

391
gpu/Ion.cu Normal file
View File

@ -0,0 +1,391 @@
#include <stdio.h>
#include <math.h>
//#include <cuda_profiler_api.h>
#define NBLOCKS 1024
#define NTHREADS 256
__global__ void dvc_ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
int n,nread;
double fq,Ci;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
Ci = fq;
// q=1
nread = neighborList[n];
fq = dist[nread];
Ci += fq;
// q=2
nread = neighborList[n+Np];
fq = dist[nread];
Ci += fq;
// q=3
nread = neighborList[n+2*Np];
fq = dist[nread];
Ci += fq;
// q=4
nread = neighborList[n+3*Np];
fq = dist[nread];
Ci += fq;
// q=5
nread = neighborList[n+4*Np];
fq = dist[nread];
Ci += fq;
// q=6
nread = neighborList[n+5*Np];
fq = dist[nread];
Ci += fq;
Den[n]=Ci;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
int n;
double fq,Ci;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
Ci = fq;
// q=1
fq = dist[2*Np+n];
Ci += fq;
// q=2
fq = dist[1*Np+n];
Ci += fq;
// q=3
fq = dist[4*Np+n];
Ci += fq;
// q=4
fq = dist[3*Np+n];
Ci += fq;
// q=5
fq = dist[6*Np+n];
Ci += fq;
// q=6
fq = dist[5*Np+n];
Ci += fq;
Den[n]=Ci;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
int n;
double Ci;
double ux,uy,uz;
double uEPx,uEPy,uEPz;//electrochemical induced velocity
double Ex,Ey,Ez;//electrical field
double f0,f1,f2,f3,f4,f5,f6;
int nr1,nr2,nr3,nr4,nr5,nr6;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
Ci=Den[n];
Ex=ElectricField[n+0*Np];
Ey=ElectricField[n+1*Np];
Ez=ElectricField[n+2*Np];
ux=Velocity[n+0*Np];
uy=Velocity[n+1*Np];
uz=Velocity[n+2*Np];
uEPx=zi*Di/Vt*Ex;
uEPy=zi*Di/Vt*Ey;
uEPz=zi*Di/Vt*Ez;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
// q=2
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n+2*Np]; // neighbor 4
f3 = dist[nr3];
// q=4
nr4 = neighborList[n+3*Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n+4*Np];
f5 = dist[nr5];
// q=6
nr6 = neighborList[n+5*Np];
f6 = dist[nr6];
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 1
dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
//dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q=2
dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
//dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 3
dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
//dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 4
dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
//dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 5
dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
//dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 6
dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
//dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
int n;
double Ci;
double ux,uy,uz;
double uEPx,uEPy,uEPz;//electrochemical induced velocity
double Ex,Ey,Ez;//electrical field
double f0,f1,f2,f3,f4,f5,f6;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
Ci=Den[n];
Ex=ElectricField[n+0*Np];
Ey=ElectricField[n+1*Np];
Ez=ElectricField[n+2*Np];
ux=Velocity[n+0*Np];
uy=Velocity[n+1*Np];
uz=Velocity[n+2*Np];
uEPx=zi*Di/Vt*Ex;
uEPy=zi*Di/Vt*Ey;
uEPz=zi*Di/Vt*Ez;
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
f6 = dist[5*Np+n];
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 1
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
//dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q=2
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
//dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 3
dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
//dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 4
dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
//dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 5
dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
//dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 6
dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
//dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
int n;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<Np) {
dist[0*Np+n] = 0.25*DenInit;
dist[1*Np+n] = 0.125*DenInit;
dist[2*Np+n] = 0.125*DenInit;
dist[3*Np+n] = 0.125*DenInit;
dist[4*Np+n] = 0.125*DenInit;
dist[5*Np+n] = 0.125*DenInit;
dist[6*Np+n] = 0.125*DenInit;
Den[n] = DenInit;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
int n;
double DenInit;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<Np) {
DenInit = Den[n];
dist[0*Np+n] = 0.25*DenInit;
dist[1*Np+n] = 0.125*DenInit;
dist[2*Np+n] = 0.125*DenInit;
dist[3*Np+n] = 0.125*DenInit;
dist[4*Np+n] = 0.125*DenInit;
dist[5*Np+n] = 0.125*DenInit;
dist[6*Np+n] = 0.125*DenInit;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
int n;
double Ci;//ion concentration of species i
double CD;//charge density
double CD_tmp;
double F = 96485.0;//Faraday's constant; unit[C/mol]; F=e*Na, where Na is the Avogadro constant
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
Ci = Den[n+ion_component*Np];
CD = ChargeDensity[n];
CD_tmp = F*IonValence*Ci;
ChargeDensity[n] = CD*(ion_component>0) + CD_tmp;
}
}
}
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_IonConcentration<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,start,finish,Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_IonConcentration: %s \n",cudaGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_IonConcentration<<<NBLOCKS,NTHREADS >>>(dist,Den,start,finish,Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_IonConcentration: %s \n",cudaGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_Ion<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion: %s \n",cudaGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_Ion<<<NBLOCKS,NTHREADS >>>(dist,Den,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion: %s \n",cudaGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_Init<<<NBLOCKS,NTHREADS >>>(dist,Den,DenInit,Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_Ion_Init: %s \n",cudaGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_Init_FromFile<<<NBLOCKS,NTHREADS >>>(dist,Den,Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_Ion_Init_FromFile: %s \n",cudaGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_ChargeDensity<<<NBLOCKS,NTHREADS >>>(Den,ChargeDensity,IonValence,ion_component,start,finish,Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_Ion_ChargeDensity: %s \n",cudaGetErrorString(err));
}
//cudaProfilerStop();
}

330
gpu/Poisson.cu Normal file
View File

@ -0,0 +1,330 @@
#include <stdio.h>
#include <math.h>
//#include <cuda_profiler_api.h>
#define NBLOCKS 1024
#define NTHREADS 256
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
double psi;//electric potential
double fq;
int nread;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
psi = fq;
// q=1
nread = neighborList[n];
fq = dist[nread];
psi += fq;
// q=2
nread = neighborList[n+Np];
fq = dist[nread];
psi += fq;
// q=3
nread = neighborList[n+2*Np];
fq = dist[nread];
psi += fq;
// q = 4
nread = neighborList[n+3*Np];
fq = dist[nread];
psi += fq;
// q=5
nread = neighborList[n+4*Np];
fq = dist[nread];
psi += fq;
// q = 6
nread = neighborList[n+5*Np];
fq = dist[nread];
psi += fq;
idx=Map[n];
Psi[idx] = psi;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
double psi;//electric potential
double fq;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
psi = fq;
// q=1
fq = dist[2*Np+n];
psi += fq;
// q=2
fq = dist[1*Np+n];
psi += fq;
// q=3
fq = dist[4*Np+n];
psi += fq;
// q=4
fq = dist[3*Np+n];
psi += fq;
// q=5
fq = dist[6*Np+n];
psi += fq;
// q=6
fq = dist[5*Np+n];
psi += fq;
idx=Map[n];
Psi[idx] = psi;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
int n;
double psi;//electric potential
double Ex,Ey,Ez;//electric field
double rho_e;//local charge density
double f0,f1,f2,f3,f4,f5,f6;
int nr1,nr2,nr3,nr4,nr5,nr6;
double rlx=1.0/tau;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
rho_e = Den_charge[n];
rho_e = rho_e/epsilon_LB;
idx=Map[n];
psi = Psi[idx];
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n+2*Np]; // neighbor 4
f3 = dist[nr3];
// q = 4
nr4 = neighborList[n+3*Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n+4*Np];
f5 = dist[nr5];
// q = 6
nr6 = neighborList[n+5*Np];
f6 = dist[nr6];
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
Ez = (f5-f6)*rlx*4.0;
ElectricField[n+0*Np] = Ex;
ElectricField[n+1*Np] = Ey;
ElectricField[n+2*Np] = Ez;
// q = 0
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
// q = 1
dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 2
dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 3
dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 4
dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 5
dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 6
dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
//........................................................................
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
int n;
double psi;//electric potential
double Ex,Ey,Ez;//electric field
double rho_e;//local charge density
double f0,f1,f2,f3,f4,f5,f6;
double rlx=1.0/tau;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
rho_e = Den_charge[n];
rho_e = rho_e/epsilon_LB;
idx=Map[n];
psi = Psi[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
f6 = dist[5*Np+n];
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
Ez = (f5-f6)*rlx*4.0;
ElectricField[n+0*Np] = Ex;
ElectricField[n+1*Np] = Ey;
ElectricField[n+2*Np] = Ez;
// q = 0
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
// q = 1
dist[1*Np+n] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 2
dist[2*Np+n] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 3
dist[3*Np+n] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 4
dist[4*Np+n] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 5
dist[5*Np+n] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 6
dist[6*Np+n] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
//........................................................................
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
int ijk;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
ijk = Map[n];
dist[0*Np+n] = 0.25*Psi[ijk];
dist[1*Np+n] = 0.125*Psi[ijk];
dist[2*Np+n] = 0.125*Psi[ijk];
dist[3*Np+n] = 0.125*Psi[ijk];
dist[4*Np+n] = 0.125*Psi[ijk];
dist[5*Np+n] = 0.125*Psi[ijk];
dist[6*Np+n] = 0.125*Psi[ijk];
}
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Psi,start,finish,Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential: %s \n",cudaGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential: %s \n",cudaGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_Poisson<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,start,finish,Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Poisson: %s \n",cudaGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_Poisson<<<NBLOCKS,NTHREADS >>>(Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,start,finish,Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Poisson: %s \n",cudaGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Poisson_Init<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_Poisson_Init: %s \n",cudaGetErrorString(err));
}
//cudaProfilerStop();
}

995
gpu/Stokes.cu Normal file
View File

@ -0,0 +1,995 @@
#include <stdio.h>
#include <math.h>
//#include <cuda_profiler_api.h>
#define NBLOCKS 1024
#define NTHREADS 256
__global__ void dvc_ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz, double rho0, double den_scale, double h, double time_conv,int start, int finish, int Np){
int n;
double fq;
// conserved momemnts
double rho,jx,jy,jz;
double ux,uy,uz;
// non-conserved moments
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
int nread;
// body force due to electric field
double rhoE;//charge density
double Ex,Ey,Ez;
// total body force
double Fx,Fy,Fz;
const double mrt_V1=0.05263157894736842;
const double mrt_V2=0.012531328320802;
const double mrt_V3=0.04761904761904762;
const double mrt_V4=0.004594820384294068;
const double mrt_V5=0.01587301587301587;
const double mrt_V6=0.0555555555555555555555555;
const double mrt_V7=0.02777777777777778;
const double mrt_V8=0.08333333333333333;
const double mrt_V9=0.003341687552213868;
const double mrt_V10=0.003968253968253968;
const double mrt_V11=0.01388888888888889;
const double mrt_V12=0.04166666666666666;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
rhoE = ChargeDensity[n];
Ex = ElectricField[n+0*Np];
Ey = ElectricField[n+1*Np];
Ez = ElectricField[n+2*Np];
//compute total body force, including input body force (Gx,Gy,Gz)
Fx = Gx + rhoE*Ex*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
Fy = Gy + rhoE*Ey*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
Fz = Gz + rhoE*Ez*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
// q=0
fq = dist[n];
rho = fq;
m1 = -30.0*fq;
m2 = 12.0*fq;
// q=1
nread = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
fq = dist[nread]; // reading the f1 data into register fq
//fp = dist[10*Np+n];
rho += fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jx = fq;
m4 = -4.0*fq;
m9 = 2.0*fq;
m10 = -4.0*fq;
// f2 = dist[10*Np+n];
nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
fq = dist[nread]; // reading the f2 data into register fq
//fq = dist[Np+n];
rho += fq;
m1 -= 11.0*(fq);
m2 -= 4.0*(fq);
jx -= fq;
m4 += 4.0*(fq);
m9 += 2.0*(fq);
m10 -= 4.0*(fq);
// q=3
nread = neighborList[n+2*Np]; // neighbor 4
fq = dist[nread];
//fq = dist[11*Np+n];
rho += fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jy = fq;
m6 = -4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 = fq;
m12 = -2.0*fq;
// q = 4
nread = neighborList[n+3*Np]; // neighbor 3
fq = dist[nread];
//fq = dist[2*Np+n];
rho+= fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jy -= fq;
m6 += 4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 += fq;
m12 -= 2.0*fq;
// q=5
nread = neighborList[n+4*Np];
fq = dist[nread];
//fq = dist[12*Np+n];
rho += fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jz = fq;
m8 = -4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 -= fq;
m12 += 2.0*fq;
// q = 6
nread = neighborList[n+5*Np];
fq = dist[nread];
//fq = dist[3*Np+n];
rho+= fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jz -= fq;
m8 += 4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 -= fq;
m12 += 2.0*fq;
// q=7
nread = neighborList[n+6*Np];
fq = dist[nread];
//fq = dist[13*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jy += fq;
m6 += fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 = fq;
m16 = fq;
m17 = -fq;
// q = 8
nread = neighborList[n+7*Np];
fq = dist[nread];
//fq = dist[4*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jy -= fq;
m6 -= fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 += fq;
m16 -= fq;
m17 += fq;
// q=9
nread = neighborList[n+8*Np];
fq = dist[nread];
//fq = dist[14*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jy -= fq;
m6 -= fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 -= fq;
m16 += fq;
m17 += fq;
// q = 10
nread = neighborList[n+9*Np];
fq = dist[nread];
//fq = dist[5*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jy += fq;
m6 += fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 -= fq;
m16 -= fq;
m17 -= fq;
// q=11
nread = neighborList[n+10*Np];
fq = dist[nread];
//fq = dist[15*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jz += fq;
m8 += fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 = fq;
m16 -= fq;
m18 = fq;
// q=12
nread = neighborList[n+11*Np];
fq = dist[nread];
//fq = dist[6*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jz -= fq;
m8 -= fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 += fq;
m16 += fq;
m18 -= fq;
// q=13
nread = neighborList[n+12*Np];
fq = dist[nread];
//fq = dist[16*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jz -= fq;
m8 -= fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 -= fq;
m16 -= fq;
m18 -= fq;
// q=14
nread = neighborList[n+13*Np];
fq = dist[nread];
//fq = dist[7*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jz += fq;
m8 += fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 -= fq;
m16 += fq;
m18 += fq;
// q=15
nread = neighborList[n+14*Np];
fq = dist[nread];
//fq = dist[17*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy += fq;
m6 += fq;
jz += fq;
m8 += fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 = fq;
m17 += fq;
m18 -= fq;
// q=16
nread = neighborList[n+15*Np];
fq = dist[nread];
//fq = dist[8*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy -= fq;
m6 -= fq;
jz -= fq;
m8 -= fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 += fq;
m17 -= fq;
m18 += fq;
// q=17
//fq = dist[18*Np+n];
nread = neighborList[n+16*Np];
fq = dist[nread];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy += fq;
m6 += fq;
jz -= fq;
m8 -= fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 -= fq;
m17 += fq;
m18 += fq;
// q=18
nread = neighborList[n+17*Np];
fq = dist[nread];
//fq = dist[9*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy -= fq;
m6 -= fq;
jz += fq;
m8 += fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 -= fq;
m17 -= fq;
m18 -= fq;
// write the velocity
ux = jx / rho0;
uy = jy / rho0;
uz = jz / rho0;
Velocity[n] = ux;
Velocity[Np+n] = uy;
Velocity[2*Np+n] = uz;
//..............incorporate external force................................................
//..............carry out relaxation process...............................................
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0) - m2);
m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4);
m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6);
m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) - m9);
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) - m11);
m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho0) - m12);
m13 = m13 + rlx_setA*((jx*jy/rho0) - m13);
m14 = m14 + rlx_setA*((jy*jz/rho0) - m14);
m15 = m15 + rlx_setA*((jx*jz/rho0) - m15);
m16 = m16 + rlx_setB*( - m16);
m17 = m17 + rlx_setB*( - m17);
m18 = m18 + rlx_setB*( - m18);
//.......................................................................................................
//.................inverse transformation......................................................
// q=0
fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2;
dist[n] = fq;
// q = 1
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx;
nread = neighborList[n+Np];
dist[nread] = fq;
// q=2
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx;
nread = neighborList[n];
dist[nread] = fq;
// q = 3
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy;
nread = neighborList[n+3*Np];
dist[nread] = fq;
// q = 4
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy;
nread = neighborList[n+2*Np];
dist[nread] = fq;
// q = 5
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz;
nread = neighborList[n+5*Np];
dist[nread] = fq;
// q = 6
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz;
nread = neighborList[n+4*Np];
dist[nread] = fq;
// q = 7
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy);
nread = neighborList[n+7*Np];
dist[nread] = fq;
// q = 8
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy);
nread = neighborList[n+6*Np];
dist[nread] = fq;
// q = 9
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy);
nread = neighborList[n+9*Np];
dist[nread] = fq;
// q = 10
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy);
nread = neighborList[n+8*Np];
dist[nread] = fq;
// q = 11
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz);
nread = neighborList[n+11*Np];
dist[nread] = fq;
// q = 12
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz);
nread = neighborList[n+10*Np];
dist[nread]= fq;
// q = 13
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz);
nread = neighborList[n+13*Np];
dist[nread] = fq;
// q= 14
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz);
nread = neighborList[n+12*Np];
dist[nread] = fq;
// q = 15
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz);
nread = neighborList[n+15*Np];
dist[nread] = fq;
// q = 16
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz);
nread = neighborList[n+14*Np];
dist[nread] = fq;
// q = 17
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)
-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz);
nread = neighborList[n+17*Np];
dist[nread] = fq;
// q = 18
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)
-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz);
nread = neighborList[n+16*Np];
dist[nread] = fq;
}
}
}
__global__ void dvc_ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){
int n;
double fq;
// conserved momemnts
double rho,jx,jy,jz;
double ux,uy,uz;
// non-conserved moments
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
// body force due to electric field
double rhoE;//charge density
double Ex,Ey,Ez;
// total body force
double Fx,Fy,Fz;
const double mrt_V1=0.05263157894736842;
const double mrt_V2=0.012531328320802;
const double mrt_V3=0.04761904761904762;
const double mrt_V4=0.004594820384294068;
const double mrt_V5=0.01587301587301587;
const double mrt_V6=0.0555555555555555555555555;
const double mrt_V7=0.02777777777777778;
const double mrt_V8=0.08333333333333333;
const double mrt_V9=0.003341687552213868;
const double mrt_V10=0.003968253968253968;
const double mrt_V11=0.01388888888888889;
const double mrt_V12=0.04166666666666666;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
rhoE = ChargeDensity[n];
Ex = ElectricField[n+0*Np];
Ey = ElectricField[n+1*Np];
Ez = ElectricField[n+2*Np];
//compute total body force, including input body force (Gx,Gy,Gz)
Fx = Gx + rhoE*Ex*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;//the extra factors at the end necessarily convert unit from phys to LB
Fy = Gy + rhoE*Ey*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
Fz = Gz + rhoE*Ez*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
// q=0
fq = dist[n];
rho = fq;
m1 = -30.0*fq;
m2 = 12.0*fq;
// q=1
fq = dist[2*Np+n];
rho += fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jx = fq;
m4 = -4.0*fq;
m9 = 2.0*fq;
m10 = -4.0*fq;
// f2 = dist[10*Np+n];
fq = dist[1*Np+n];
rho += fq;
m1 -= 11.0*(fq);
m2 -= 4.0*(fq);
jx -= fq;
m4 += 4.0*(fq);
m9 += 2.0*(fq);
m10 -= 4.0*(fq);
// q=3
fq = dist[4*Np+n];
rho += fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jy = fq;
m6 = -4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 = fq;
m12 = -2.0*fq;
// q = 4
fq = dist[3*Np+n];
rho+= fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jy -= fq;
m6 += 4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 += fq;
m12 -= 2.0*fq;
// q=5
fq = dist[6*Np+n];
rho += fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jz = fq;
m8 = -4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 -= fq;
m12 += 2.0*fq;
// q = 6
fq = dist[5*Np+n];
rho+= fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jz -= fq;
m8 += 4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 -= fq;
m12 += 2.0*fq;
// q=7
fq = dist[8*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jy += fq;
m6 += fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 = fq;
m16 = fq;
m17 = -fq;
// q = 8
fq = dist[7*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jy -= fq;
m6 -= fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 += fq;
m16 -= fq;
m17 += fq;
// q=9
fq = dist[10*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jy -= fq;
m6 -= fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 -= fq;
m16 += fq;
m17 += fq;
// q = 10
fq = dist[9*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jy += fq;
m6 += fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 -= fq;
m16 -= fq;
m17 -= fq;
// q=11
fq = dist[12*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jz += fq;
m8 += fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 = fq;
m16 -= fq;
m18 = fq;
// q=12
fq = dist[11*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jz -= fq;
m8 -= fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 += fq;
m16 += fq;
m18 -= fq;
// q=13
fq = dist[14*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jz -= fq;
m8 -= fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 -= fq;
m16 -= fq;
m18 -= fq;
// q=14
fq = dist[13*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jz += fq;
m8 += fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 -= fq;
m16 += fq;
m18 += fq;
// q=15
fq = dist[16*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy += fq;
m6 += fq;
jz += fq;
m8 += fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 = fq;
m17 += fq;
m18 -= fq;
// q=16
fq = dist[15*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy -= fq;
m6 -= fq;
jz -= fq;
m8 -= fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 += fq;
m17 -= fq;
m18 += fq;
// q=17
fq = dist[18*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy += fq;
m6 += fq;
jz -= fq;
m8 -= fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 -= fq;
m17 += fq;
m18 += fq;
// q=18
fq = dist[17*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy -= fq;
m6 -= fq;
jz += fq;
m8 += fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 -= fq;
m17 -= fq;
m18 -= fq;
// write the velocity
ux = jx / rho0;
uy = jy / rho0;
uz = jz / rho0;
Velocity[n] = ux;
Velocity[Np+n] = uy;
Velocity[2*Np+n] = uz;
//........................................................................
// READ THE DISTRIBUTIONS
// (read from opposite array due to previous swap operation)
//........................................................................
//..............incorporate external force................................................
//..............carry out relaxation process...............................................
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0) - m2);
m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4);
m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6);
m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) - m9);
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) - m11);
m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho0) - m12);
m13 = m13 + rlx_setA*((jx*jy/rho0) - m13);
m14 = m14 + rlx_setA*((jy*jz/rho0) - m14);
m15 = m15 + rlx_setA*((jx*jz/rho0) - m15);
m16 = m16 + rlx_setB*( - m16);
m17 = m17 + rlx_setB*( - m17);
m18 = m18 + rlx_setB*( - m18);
//.......................................................................................................
//.................inverse transformation......................................................
// q=0
fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2;
dist[n] = fq;
// q = 1
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10) + 0.16666666*Fx;
dist[1*Np+n] = fq;
// q=2
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx;
dist[2*Np+n] = fq;
// q = 3
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy;
dist[3*Np+n] = fq;
// q = 4
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy;
dist[4*Np+n] = fq;
// q = 5
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz;
dist[5*Np+n] = fq;
// q = 6
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz;
dist[6*Np+n] = fq;
// q = 7
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy);
dist[7*Np+n] = fq;
// q = 8
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy);
dist[8*Np+n] = fq;
// q = 9
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy);
dist[9*Np+n] = fq;
// q = 10
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy);
dist[10*Np+n] = fq;
// q = 11
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz);
dist[11*Np+n] = fq;
// q = 12
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz);
dist[12*Np+n] = fq;
// q = 13
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz);
dist[13*Np+n] = fq;
// q= 14
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz);
dist[14*Np+n] = fq;
// q = 15
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz);
dist[15*Np+n] = fq;
// q = 16
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz);
dist[16*Np+n] = fq;
// q = 17
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)
-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz);
dist[17*Np+n] = fq;
// q = 18
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)
-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz);
dist[18*Np+n] = fq;
//........................................................................
}
}
}
extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q19_AAodd_StokesMRT<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Velocity,ChargeDensity,ElectricField,rlx_setA,rlx_setB,Gx,Gy,Gz,rho0,den_scale,h,time_conv,start,finish,Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_AAodd_StokesMRT: %s \n",cudaGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q19_AAeven_StokesMRT<<<NBLOCKS,NTHREADS >>>(dist,Velocity,ChargeDensity,ElectricField,rlx_setA,rlx_setB,Gx,Gy,Gz,rho0,den_scale,h,time_conv,start,finish,Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_AAeven_StokesMRT: %s \n",cudaGetErrorString(err));
}
//cudaProfilerStop();
}

View File

@ -1,4 +1,4 @@
/*
/*
color lattice boltzmann model
*/
#include "models/ColorModel.h"
@ -9,7 +9,7 @@ color lattice boltzmann model
#include <stdlib.h>
#include <time.h>
ScaLBL_ColorModel::ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM):
ScaLBL_ColorModel::ScaLBL_ColorModel(int RANK, int NP, MPI_Comm COMM):
rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),alpha(0),beta(0),
Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0),
Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM)
@ -56,8 +56,6 @@ void ScaLBL_ColorModel::ReadCheckpoint(char *FILENAME, double *cPhi, double *cfq
File.close();
}
*/
void ScaLBL_ColorModel::ReadParams(string filename){
// read the input database
db = std::make_shared<Database>( filename );
@ -123,28 +121,31 @@ void ScaLBL_ColorModel::ReadParams(string filename){
//if (BoundaryCondition==4) flux *= rhoA; // mass flux must adjust for density (see formulation for details)
BoundaryCondition = 0;
if (domain_db->keyExists( "BC" )){
if (color_db->keyExists( "BC" )){
BoundaryCondition = color_db->getScalar<int>( "BC" );
}
else if (domain_db->keyExists( "BC" )){
BoundaryCondition = domain_db->getScalar<int>( "BC" );
}
// Override user-specified boundary condition for specific protocols
auto protocol = color_db->getWithDefault<std::string>( "protocol", "none" );
if (protocol == "seed water"){
if (BoundaryCondition != 0 ){
if (BoundaryCondition != 0 && BoundaryCondition != 5){
BoundaryCondition = 0;
if (rank==0) printf("WARNING: protocol (seed water) supports only full periodic boundary condition \n");
}
domain_db->putScalar<int>( "BC", BoundaryCondition );
}
else if (protocol == "open connected oil"){
if (BoundaryCondition != 0 ){
if (BoundaryCondition != 0 && BoundaryCondition != 5){
BoundaryCondition = 0;
if (rank==0) printf("WARNING: protocol (open connected oil) supports only full periodic boundary condition \n");
}
domain_db->putScalar<int>( "BC", BoundaryCondition );
}
else if (protocol == "shell aggregation"){
if (BoundaryCondition != 0 ){
if (BoundaryCondition != 0 && BoundaryCondition != 5){
BoundaryCondition = 0;
if (rank==0) printf("WARNING: protocol (shell aggregation) supports only full periodic boundary condition \n");
}
@ -167,9 +168,9 @@ void ScaLBL_ColorModel::SetDomain(){
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = 1; // initialize this way
//Averages = std::shared_ptr<TwoPhase> ( new TwoPhase(Dm) ); // TwoPhase analysis object
Averages = std::shared_ptr<SubPhase> ( new SubPhase(Dm) ); // TwoPhase analysis object
comm.barrier();
MPI_Barrier(comm);
Dm->CommInit();
comm.barrier();
MPI_Barrier(comm);
// Read domain parameters
rank = Dm->rank();
nprocx = Dm->nprocx();
@ -192,12 +193,12 @@ void ScaLBL_ColorModel::ReadInput(){
}
else if (domain_db->keyExists( "GridFile" )){
// Read the local domain data
auto input_id = readMicroCT( *domain_db, comm );
auto input_id = readMicroCT( *domain_db, MPI_COMM_WORLD );
// Fill the halo (assuming GCW of 1)
array<int,3> size0 = { (int) input_id.size(0), (int) input_id.size(1), (int) input_id.size(2) };
ArraySize size1 = { (size_t) Mask->Nx, (size_t) Mask->Ny, (size_t) Mask->Nz };
ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 );
fillHalo<signed char> fill( comm, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 );
fillHalo<signed char> fill( MPI_COMM_WORLD, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 );
Array<signed char> id_view;
id_view.viewRaw( size1, Mask->id.data() );
fill.copy( input_id, id_view );
@ -292,7 +293,7 @@ void ScaLBL_ColorModel::AssignComponentLabels(double *phase)
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = Mask->id[i];
for (size_t idx=0; idx<NLABELS; idx++)
label_count_global[idx] = Dm->Comm.sumReduce( label_count[idx] );
label_count_global[idx]=sumReduce( Dm->Comm, label_count[idx]);
if (rank==0){
printf("Component labels: %lu \n",NLABELS);
@ -465,14 +466,15 @@ void ScaLBL_ColorModel::Initialize(){
ScaLBL_CopyToDevice(Phi,cPhi,N*sizeof(double));
ScaLBL_DeviceBarrier();
comm.barrier();
MPI_Barrier(comm);
}
if (rank==0) printf ("Initializing phase field \n");
ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
if (BoundaryCondition >0 ){
// establish reservoirs for external bC
if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4 ){
if (Dm->kproc()==0){
ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0);
ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,1);
@ -495,6 +497,7 @@ void ScaLBL_ColorModel::Run(){
int IMAGE_COUNT = 0;
std::vector<std::string> ImageList;
bool SET_CAPILLARY_NUMBER = false;
bool RESCALE_FORCE = false;
bool MORPH_ADAPT = false;
bool USE_MORPH = false;
bool USE_SEED = false;
@ -503,6 +506,7 @@ void ScaLBL_ColorModel::Run(){
int MAX_MORPH_TIMESTEPS = 50000; // maximum number of LBM timesteps to spend in morphological adaptation routine
int MIN_STEADY_TIMESTEPS = 100000;
int MAX_STEADY_TIMESTEPS = 200000;
int RESCALE_FORCE_AFTER_TIMESTEP = 0;
int RAMP_TIMESTEPS = 0;//50000; // number of timesteps to run initially (to get a reasonable velocity field before other pieces kick in)
int CURRENT_MORPH_TIMESTEPS=0; // counter for number of timesteps spent in morphological adaptation routine (reset each time)
int CURRENT_STEADY_TIMESTEPS=0; // counter for number of timesteps spent in morphological adaptation routine (reset each time)
@ -517,13 +521,25 @@ void ScaLBL_ColorModel::Run(){
double initial_volume = 0.0;
double delta_volume = 0.0;
double delta_volume_target = 0.0;
double RESIDUAL_ENDPOINT_THRESHOLD = 0.04;
double NOISE_THRESHOLD = 0.0;
double BUMP_RATE = 2.0;
bool USE_BUMP_RATE = false;
int RESCALE_FORCE_COUNT = 0;
int RESCALE_FORCE_MAX = 0;
/* history for morphological algoirthm */
double KRA_MORPH_FACTOR=0.5;
double volA_prev = 0.0;
double log_krA_prev = 1.0;
double log_krA_target = 1.0;
double log_krA = 1.0;
double slope_krA_volume = 0.0;
if (color_db->keyExists( "vol_A_previous" )){
volA_prev = color_db->getScalar<double>( "vol_A_previous" );
}
if (color_db->keyExists( "log_krA_previous" )){
log_krA_prev = color_db->getScalar<double>( "log_krA_previous" );
}
if (color_db->keyExists( "krA_morph_factor" )){
KRA_MORPH_FACTOR = color_db->getScalar<double>( "krA_morph_factor" );
}
/* defaults for simulation protocols */
auto protocol = color_db->getWithDefault<std::string>( "protocol", "none" );
if (protocol == "image sequence"){
// Get the list of images
@ -535,46 +551,33 @@ void ScaLBL_ColorModel::Run(){
USE_MORPH = true;
}
else if (protocol == "seed water"){
morph_delta = 0.05;
morph_delta = -0.05;
seed_water = 0.01;
USE_SEED = true;
USE_MORPH = true;
}
else if (protocol == "open connected oil"){
morph_delta = 0.05;
morph_delta = -0.05;
USE_MORPH = true;
USE_MORPHOPEN_OIL = true;
}
else if (protocol == "shell aggregation"){
morph_delta = 0.05;
morph_delta = -0.05;
USE_MORPH = true;
}
if (color_db->keyExists( "residual_endpoint_threshold" )){
RESIDUAL_ENDPOINT_THRESHOLD = color_db->getScalar<double>( "residual_endpoint_threshold" );
}
NULL_USE( RESIDUAL_ENDPOINT_THRESHOLD );
if (color_db->keyExists( "noise_threshold" )){
NOISE_THRESHOLD = color_db->getScalar<double>( "noise_threshold" );
USE_BUMP_RATE = true;
}
if (color_db->keyExists( "bump_rate" )){
BUMP_RATE = color_db->getScalar<double>( "bump_rate" );
USE_BUMP_RATE = true;
}
if (color_db->keyExists( "capillary_number" )){
capillary_number = color_db->getScalar<double>( "capillary_number" );
SET_CAPILLARY_NUMBER=true;
//RESCALE_FORCE_MAX = 1;
}
if (analysis_db->keyExists( "rescale_force_count" )){
RESCALE_FORCE_MAX = analysis_db->getScalar<int>( "rescale_force_count" );
if (color_db->keyExists( "rescale_force_after_timestep" )){
RESCALE_FORCE_AFTER_TIMESTEP = color_db->getScalar<int>( "rescale_force_after_timestep" );
RESCALE_FORCE = true;
}
if (color_db->keyExists( "timestep" )){
timestep = color_db->getScalar<int>( "timestep" );
}
if (BoundaryCondition != 0 && SET_CAPILLARY_NUMBER==true){
if (rank == 0) printf("WARINING: capillary number target only supported for BC = 0 \n");
if (BoundaryCondition != 0 && BoundaryCondition != 5 && SET_CAPILLARY_NUMBER==true){
if (rank == 0) printf("WARINING: capillary number target only supported for BC = 0 or 5 \n");
SET_CAPILLARY_NUMBER=false;
}
if (analysis_db->keyExists( "seed_water" )){
@ -651,8 +654,8 @@ void ScaLBL_ColorModel::Run(){
//.......create and start timer............
double starttime,stoptime,cputime;
ScaLBL_DeviceBarrier();
comm.barrier();
starttime = Utilities::MPI::time();
MPI_Barrier(comm);
starttime = MPI_Wtime();
//.........................................
//************ MAIN ITERATION LOOP ***************************************/
@ -677,7 +680,7 @@ void ScaLBL_ColorModel::Run(){
// Perform the collision operation
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
if (BoundaryCondition > 0){
if (BoundaryCondition > 0 && BoundaryCondition < 5){
ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB);
ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB);
}
@ -698,10 +701,14 @@ void ScaLBL_ColorModel::Run(){
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
}
else if (BoundaryCondition == 5){
ScaLBL_Comm->D3Q19_Reflection_BC_z(fq);
ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq);
}
ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB,
alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_DeviceBarrier();
comm.barrier();
ScaLBL_DeviceBarrier();
MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL);
// *************EVEN TIMESTEP*************
timestep++;
@ -715,7 +722,7 @@ void ScaLBL_ColorModel::Run(){
// Perform the collision operation
ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL
// Halo exchange for phase field
if (BoundaryCondition > 0){
if (BoundaryCondition > 0 && BoundaryCondition < 5){
ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB);
ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB);
}
@ -734,21 +741,23 @@ void ScaLBL_ColorModel::Run(){
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
}
else if (BoundaryCondition == 5){
ScaLBL_Comm->D3Q19_Reflection_BC_z(fq);
ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq);
}
ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB,
alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_DeviceBarrier();
comm.barrier();
ScaLBL_DeviceBarrier();
MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL);
//************************************************************************
PROFILE_STOP("Update");
if (rank==0 && timestep%analysis_interval == 0 && BoundaryCondition > 0){
if (rank==0 && timestep%analysis_interval == 0 && BoundaryCondition == 4){
printf("%i %f \n",timestep,din);
}
// Run the analysis
analysis.basic(timestep, current_db, *Averages, Phi, Pressure, Velocity, fq, Den );
// allow initial ramp-up to get closer to steady state
if (timestep > RAMP_TIMESTEPS && timestep%analysis_interval == 0 && USE_MORPH){
analysis.finish();
@ -758,7 +767,7 @@ void ScaLBL_ColorModel::Run(){
double volA = Averages->gnb.V;
volA /= Dm->Volume;
volB /= Dm->Volume;;
initial_volume = volA*Dm->Volume;
//initial_volume = volA*Dm->Volume;
double vA_x = Averages->gnb.Px/Averages->gnb.M;
double vA_y = Averages->gnb.Py/Averages->gnb.M;
double vA_z = Averages->gnb.Pz/Averages->gnb.M;
@ -790,28 +799,50 @@ void ScaLBL_ColorModel::Run(){
isSteady = true;
if (CURRENT_STEADY_TIMESTEPS > MAX_STEADY_TIMESTEPS)
isSteady = true;
if (SET_CAPILLARY_NUMBER && RESCALE_FORCE_COUNT < RESCALE_FORCE_MAX){
RESCALE_FORCE_COUNT++;
Fx *= capillary_number / Ca;
Fy *= capillary_number / Ca;
Fz *= capillary_number / Ca;
if (force_mag > 1e-3){
Fx *= 1e-3/force_mag; // impose ceiling for stability
Fy *= 1e-3/force_mag;
Fz *= 1e-3/force_mag;
}
if (rank == 0) printf(" -- adjust force by factor %f \n ",capillary_number / Ca);
Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta);
color_db->putVector<double>("F",{Fx,Fy,Fz});
if (RESCALE_FORCE == true && SET_CAPILLARY_NUMBER == true && CURRENT_STEADY_TIMESTEPS > RESCALE_FORCE_AFTER_TIMESTEP){
RESCALE_FORCE = false;
double RESCALE_FORCE_FACTOR = capillary_number / Ca;
if (RESCALE_FORCE_FACTOR > 2.0) RESCALE_FORCE_FACTOR = 2.0;
if (RESCALE_FORCE_FACTOR < 0.5) RESCALE_FORCE_FACTOR = 0.5;
Fx *= RESCALE_FORCE_FACTOR;
Fy *= RESCALE_FORCE_FACTOR;
Fz *= RESCALE_FORCE_FACTOR;
force_mag = sqrt(Fx*Fx+Fy*Fy+Fz*Fz);
if (force_mag > 1e-3){
Fx *= 1e-3/force_mag; // impose ceiling for stability
Fy *= 1e-3/force_mag;
Fz *= 1e-3/force_mag;
}
if (rank == 0) printf(" -- adjust force by factor %f \n ",capillary_number / Ca);
Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta);
color_db->putVector<double>("F",{Fx,Fy,Fz});
}
if ( isSteady ){
MORPH_ADAPT = true;
CURRENT_MORPH_TIMESTEPS=0;
delta_volume_target = Dm->Volume*volA *morph_delta; // set target volume change
//****** ENDPOINT ADAPTATION ********/
double krA_TMP= fabs(muA*flow_rate_A / force_mag);
double krB_TMP= fabs(muB*flow_rate_B / force_mag);
log_krA = log(krA_TMP);
if (krA_TMP < 0.0){
// cannot do endpoint adaptation if kr is negative
log_krA = log_krA_prev;
}
else if (krA_TMP < krB_TMP && morph_delta > 0.0){
/** morphological target based on relative permeability for A **/
log_krA_target = log(KRA_MORPH_FACTOR*(krA_TMP));
slope_krA_volume = (log_krA - log_krA_prev)/(Dm->Volume*(volA - volA_prev));
delta_volume_target=min(delta_volume_target,Dm->Volume*(volA+(log_krA_target - log_krA)/slope_krA_volume));
if (rank==0){
printf(" Enabling endpoint adaptation: krA = %f, krB = %f \n",krA_TMP,krB_TMP);
printf(" log(kr)=%f, volume=%f, TARGET log(kr)=%f, volume change=%f \n",log_krA, volA, log_krA_target, delta_volume_target/(volA*Dm->Volume));
}
}
log_krA_prev = log_krA;
volA_prev = volA;
//******************************** **/
/** compute averages & write data **/
Averages->Full();
Averages->Write(timestep);
analysis.WriteVisData(timestep, current_db, *Averages, Phi, Pressure, Velocity, fq, Den );
@ -826,8 +857,8 @@ void ScaLBL_ColorModel::Run(){
double pB = Averages->gwb.p;
double pAc = Averages->gnc.p;
double pBc = Averages->gwc.p;
double pAB = (pA-pB)/(h*5.796*alpha);
double pAB_connected = (pAc-pBc)/(h*5.796*alpha);
double pAB = (pA-pB)/(h*6.0*alpha);
double pAB_connected = (pAc-pBc)/(h*6.0*alpha);
// connected contribution
double Vol_nc = Averages->gnc.V/Dm->Volume;
double Vol_wc = Averages->gwc.V/Dm->Volume;
@ -885,26 +916,16 @@ void ScaLBL_ColorModel::Run(){
Fx *= capillary_number / Ca;
Fy *= capillary_number / Ca;
Fz *= capillary_number / Ca;
RESCALE_FORCE_COUNT = 1;
if (force_mag > 1e-3){
Fx *= 1e-3/force_mag; // impose ceiling for stability
Fy *= 1e-3/force_mag;
Fz *= 1e-3/force_mag;
}
if (flow_rate_A < NOISE_THRESHOLD && USE_BUMP_RATE){
if (rank==0) printf("Hit noise threshold (%f): bumping capillary number by %f X \n",NOISE_THRESHOLD,BUMP_RATE);
Fx *= BUMP_RATE; // impose bump condition
Fy *= BUMP_RATE;
Fz *= BUMP_RATE;
capillary_number *= BUMP_RATE;
color_db->putScalar<int>("capillary_number",capillary_number);
current_db->putDatabase("Color", color_db);
MORPH_ADAPT = false; // re-run current point if below noise threshold
}
if (rank == 0) printf(" -- adjust force by factor %f \n ",capillary_number / Ca);
Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta);
color_db->putVector<double>("F",{Fx,Fy,Fz});
}
CURRENT_STEADY_TIMESTEPS = 0;
}
else{
@ -937,8 +958,8 @@ void ScaLBL_ColorModel::Run(){
else if (USE_SEED){
delta_volume = volA*Dm->Volume - initial_volume;
CURRENT_MORPH_TIMESTEPS += analysis_interval;
//double massChange = SeedPhaseField(seed_water);
if (rank==0) printf("***Seed water in oil %f, volume change %f / %f ***\n", seed_water, delta_volume, delta_volume_target);
double massChange = SeedPhaseField(seed_water);
if (rank==0) printf("***Seed water in oil %f, volume change %f / %f ***\n", massChange, delta_volume, delta_volume_target);
}
else if (USE_MORPHOPEN_OIL){
delta_volume = volA*Dm->Volume - initial_volume;
@ -956,42 +977,30 @@ void ScaLBL_ColorModel::Run(){
CURRENT_STEADY_TIMESTEPS=0;
initial_volume = volA*Dm->Volume;
delta_volume = 0.0;
if (USE_DIRECT){
//BoundaryCondition = 0;
//ScaLBL_Comm->BoundaryCondition = 0;
//ScaLBL_Comm_Regular->BoundaryCondition = 0;
//Fx = capillary_number*dir_x*force_mag / Ca;
//Fy = capillary_number*dir_y*force_mag / Ca;
//Fz = capillary_number*dir_z*force_mag / Ca;
}
if (RESCALE_FORCE_AFTER_TIMESTEP > 0)
RESCALE_FORCE = true;
}
else if (!(USE_DIRECT) && CURRENT_MORPH_TIMESTEPS > MAX_MORPH_TIMESTEPS) {
MORPH_ADAPT = false;
CURRENT_STEADY_TIMESTEPS=0;
initial_volume = volA*Dm->Volume;
delta_volume = 0.0;
RESCALE_FORCE = true;
if (RESCALE_FORCE_AFTER_TIMESTEP > 0)
RESCALE_FORCE = true;
}
if ( REVERSE_FLOW_DIRECTION ){
//if (rank==0) printf("*****REVERSE FLOW DIRECTION***** \n");
delta_volume = 0.0;
// flow direction will reverse after next steady point
MORPH_ADAPT = false;
CURRENT_STEADY_TIMESTEPS=0;
//morph_delta *= (-1.0);
REVERSE_FLOW_DIRECTION = false;
}
comm.barrier();
}
morph_timesteps += analysis_interval;
}
MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL);
}
analysis.finish();
PROFILE_STOP("Loop");
PROFILE_SAVE("lbpm_color_simulator",1);
//************************************************************************
ScaLBL_DeviceBarrier();
comm.barrier();
stoptime = Utilities::MPI::time();
MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL);
stoptime = MPI_Wtime();
if (rank==0) printf("-------------------------------------------------------------------\n");
// Compute the walltime per timestep
cputime = (stoptime - starttime)/timestep;
@ -1035,17 +1044,17 @@ double ScaLBL_ColorModel::ImageInit(std::string Filename){
}
}
Count = Dm->Comm.sumReduce( Count );
PoreCount = Dm->Comm.sumReduce( PoreCount );
Count=sumReduce( Dm->Comm, Count);
PoreCount=sumReduce( Dm->Comm, PoreCount);
if (rank==0) printf(" new saturation: %f (%f / %f) \n", Count / PoreCount, Count, PoreCount);
ScaLBL_CopyToDevice(Phi, PhaseLabel, Nx*Ny*Nz*sizeof(double));
comm.barrier();
MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL);
ScaLBL_D3Q19_Init(fq, Np);
ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
comm.barrier();
MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL);
ScaLBL_CopyToHost(Averages->Phi.data(),Phi,Nx*Ny*Nz*sizeof(double));
@ -1077,7 +1086,7 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){
BlobIDstruct new_index;
double vF=0.0; double vS=0.0;
ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,Averages->SDs,vF,vS,phase_label,Dm->Comm);
Dm->Comm.barrier();
MPI_Barrier(Dm->Comm);
long long count_connected=0;
long long count_porespace=0;
@ -1099,9 +1108,9 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){
}
}
}
count_connected = Dm->Comm.sumReduce( count_connected);
count_porespace = Dm->Comm.sumReduce( count_porespace);
count_water = Dm->Comm.sumReduce( count_water);
count_connected=sumReduce( Dm->Comm, count_connected);
count_porespace=sumReduce( Dm->Comm, count_porespace);
count_water=sumReduce( Dm->Comm, count_water);
for (int k=0; k<nz; k++){
for (int j=0; j<ny; j++){
@ -1173,7 +1182,7 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){
}
}
}
count_morphopen = Dm->Comm.sumReduce( count_morphopen);
count_morphopen=sumReduce( Dm->Comm, count_morphopen);
volume_change = double(count_morphopen - count_connected);
if (rank==0) printf(" opening of connected oil %f \n",volume_change/count_connected);
@ -1181,7 +1190,7 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){
ScaLBL_CopyToDevice(Phi,phase.data(),N*sizeof(double));
ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
if (BoundaryCondition >0 ){
if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4){
if (Dm->kproc()==0){
ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0);
ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,1);
@ -1196,99 +1205,79 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){
}
return(volume_change);
}
double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){
srand(time(NULL));
double mass_loss =0.f;
double count =0.f;
double *Aq_tmp, *Bq_tmp;
Aq_tmp = new double [7*Np];
Bq_tmp = new double [7*Np];
srand(time(NULL));
double mass_loss =0.f;
double count =0.f;
double *Aq_tmp, *Bq_tmp;
Aq_tmp = new double [7*Np];
Bq_tmp = new double [7*Np];
ScaLBL_CopyToHost(Aq_tmp, Aq, 7*Np*sizeof(double));
ScaLBL_CopyToHost(Bq_tmp, Bq, 7*Np*sizeof(double));
/* for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
double random_value = double(rand())/ RAND_MAX;
ScaLBL_CopyToHost(Aq_tmp, Aq, 7*Np*sizeof(double));
ScaLBL_CopyToHost(Bq_tmp, Bq, 7*Np*sizeof(double));
for (int n=0; n < ScaLBL_Comm->LastExterior(); n++){
double random_value = seed_water_in_oil*double(rand())/ RAND_MAX;
double dA = Aq_tmp[n] + Aq_tmp[n+Np] + Aq_tmp[n+2*Np] + Aq_tmp[n+3*Np] + Aq_tmp[n+4*Np] + Aq_tmp[n+5*Np] + Aq_tmp[n+6*Np];
double dB = Bq_tmp[n] + Bq_tmp[n+Np] + Bq_tmp[n+2*Np] + Bq_tmp[n+3*Np] + Bq_tmp[n+4*Np] + Bq_tmp[n+5*Np] + Bq_tmp[n+6*Np];
double phase_id = (dA - dB) / (dA + dB);
if (phase_id > 0.0){
Aq_tmp[n] -= 0.3333333333333333*random_value;
Aq_tmp[n+Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+2*Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+3*Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+4*Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+5*Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+6*Np] -= 0.1111111111111111*random_value;
Bq_tmp[n] += 0.3333333333333333*random_value;
Bq_tmp[n+Np] += 0.1111111111111111*random_value;
Bq_tmp[n+2*Np] += 0.1111111111111111*random_value;
Bq_tmp[n+3*Np] += 0.1111111111111111*random_value;
Bq_tmp[n+4*Np] += 0.1111111111111111*random_value;
Bq_tmp[n+5*Np] += 0.1111111111111111*random_value;
Bq_tmp[n+6*Np] += 0.1111111111111111*random_value;
}
mass_loss += random_value*seed_water_in_oil;
}
if (Averages->SDs(i,j,k) < 0.f){
// skip
}
else if (phase(i,j,k) > 0.f ){
phase(i,j,k) -= random_value*seed_water_in_oil;
mass_loss += random_value*seed_water_in_oil;
count++;
}
else {
for (int n=ScaLBL_Comm->FirstInterior(); n < ScaLBL_Comm->LastInterior(); n++){
double random_value = seed_water_in_oil*double(rand())/ RAND_MAX;
double dA = Aq_tmp[n] + Aq_tmp[n+Np] + Aq_tmp[n+2*Np] + Aq_tmp[n+3*Np] + Aq_tmp[n+4*Np] + Aq_tmp[n+5*Np] + Aq_tmp[n+6*Np];
double dB = Bq_tmp[n] + Bq_tmp[n+Np] + Bq_tmp[n+2*Np] + Bq_tmp[n+3*Np] + Bq_tmp[n+4*Np] + Bq_tmp[n+5*Np] + Bq_tmp[n+6*Np];
double phase_id = (dA - dB) / (dA + dB);
if (phase_id > 0.0){
Aq_tmp[n] -= 0.3333333333333333*random_value;
Aq_tmp[n+Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+2*Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+3*Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+4*Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+5*Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+6*Np] -= 0.1111111111111111*random_value;
Bq_tmp[n] += 0.3333333333333333*random_value;
Bq_tmp[n+Np] += 0.1111111111111111*random_value;
Bq_tmp[n+2*Np] += 0.1111111111111111*random_value;
Bq_tmp[n+3*Np] += 0.1111111111111111*random_value;
Bq_tmp[n+4*Np] += 0.1111111111111111*random_value;
Bq_tmp[n+5*Np] += 0.1111111111111111*random_value;
Bq_tmp[n+6*Np] += 0.1111111111111111*random_value;
}
mass_loss += random_value*seed_water_in_oil;
}
}
}
}
}
*/
for (int n=0; n < ScaLBL_Comm->LastExterior(); n++){
double random_value = seed_water_in_oil*double(rand())/ RAND_MAX;
double dA = Aq_tmp[n] + Aq_tmp[n+Np] + Aq_tmp[n+2*Np] + Aq_tmp[n+3*Np] + Aq_tmp[n+4*Np] + Aq_tmp[n+5*Np] + Aq_tmp[n+6*Np];
double dB = Bq_tmp[n] + Bq_tmp[n+Np] + Bq_tmp[n+2*Np] + Bq_tmp[n+3*Np] + Bq_tmp[n+4*Np] + Bq_tmp[n+5*Np] + Bq_tmp[n+6*Np];
double phase_id = (dA - dB) / (dA + dB);
if (phase_id > 0.0){
Aq_tmp[n] -= 0.3333333333333333*random_value;
Aq_tmp[n+Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+2*Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+3*Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+4*Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+5*Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+6*Np] -= 0.1111111111111111*random_value;
Bq_tmp[n] += 0.3333333333333333*random_value;
Bq_tmp[n+Np] += 0.1111111111111111*random_value;
Bq_tmp[n+2*Np] += 0.1111111111111111*random_value;
Bq_tmp[n+3*Np] += 0.1111111111111111*random_value;
Bq_tmp[n+4*Np] += 0.1111111111111111*random_value;
Bq_tmp[n+5*Np] += 0.1111111111111111*random_value;
Bq_tmp[n+6*Np] += 0.1111111111111111*random_value;
}
mass_loss += random_value*seed_water_in_oil;
}
count= sumReduce( Dm->Comm, count);
mass_loss= sumReduce( Dm->Comm, mass_loss);
if (rank == 0) printf("Remove mass %f from %f voxels \n",mass_loss,count);
for (int n=ScaLBL_Comm->FirstInterior(); n < ScaLBL_Comm->LastInterior(); n++){
double random_value = seed_water_in_oil*double(rand())/ RAND_MAX;
double dA = Aq_tmp[n] + Aq_tmp[n+Np] + Aq_tmp[n+2*Np] + Aq_tmp[n+3*Np] + Aq_tmp[n+4*Np] + Aq_tmp[n+5*Np] + Aq_tmp[n+6*Np];
double dB = Bq_tmp[n] + Bq_tmp[n+Np] + Bq_tmp[n+2*Np] + Bq_tmp[n+3*Np] + Bq_tmp[n+4*Np] + Bq_tmp[n+5*Np] + Bq_tmp[n+6*Np];
double phase_id = (dA - dB) / (dA + dB);
if (phase_id > 0.0){
Aq_tmp[n] -= 0.3333333333333333*random_value;
Aq_tmp[n+Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+2*Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+3*Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+4*Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+5*Np] -= 0.1111111111111111*random_value;
Aq_tmp[n+6*Np] -= 0.1111111111111111*random_value;
Bq_tmp[n] += 0.3333333333333333*random_value;
Bq_tmp[n+Np] += 0.1111111111111111*random_value;
Bq_tmp[n+2*Np] += 0.1111111111111111*random_value;
Bq_tmp[n+3*Np] += 0.1111111111111111*random_value;
Bq_tmp[n+4*Np] += 0.1111111111111111*random_value;
Bq_tmp[n+5*Np] += 0.1111111111111111*random_value;
Bq_tmp[n+6*Np] += 0.1111111111111111*random_value;
}
mass_loss += random_value*seed_water_in_oil;
}
// Need to initialize Aq, Bq, Den, Phi directly
//ScaLBL_CopyToDevice(Phi,phase.data(),7*Np*sizeof(double));
ScaLBL_CopyToDevice(Aq, Aq_tmp, 7*Np*sizeof(double));
ScaLBL_CopyToDevice(Bq, Bq_tmp, 7*Np*sizeof(double));
count = Dm->Comm.sumReduce( count );
mass_loss = Dm->Comm.sumReduce( mass_loss );
if (rank == 0) printf("Remove mass %f from %f voxels \n",mass_loss,count);
// Need to initialize Aq, Bq, Den, Phi directly
//ScaLBL_CopyToDevice(Phi,phase.data(),7*Np*sizeof(double));
ScaLBL_CopyToDevice(Aq, Aq_tmp, 7*Np*sizeof(double));
ScaLBL_CopyToDevice(Bq, Bq_tmp, 7*Np*sizeof(double));
return(mass_loss);
return(mass_loss);
}
double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta_volume){
@ -1297,6 +1286,8 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta
double vF = 0.f;
double vS = 0.f;
double delta_volume;
double WallFactor = 0.0;
bool USE_CONNECTED_NWP = false;
DoubleArray phase(Nx,Ny,Nz);
IntArray phase_label(Nx,Ny,Nz);;
@ -1317,7 +1308,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta
}
}
}
double volume_initial = Dm->Comm.sumReduce( count);
double volume_initial = sumReduce( Dm->Comm, count);
/*
sprintf(LocalRankFilename,"phi_initial.%05i.raw",rank);
FILE *INPUT = fopen(LocalRankFilename,"wb");
@ -1325,32 +1316,56 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta
fclose(INPUT);
*/
// 2. Identify connected components of phase field -> phase_label
BlobIDstruct new_index;
ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,phase,Averages->SDs,vF,vS,phase_label,comm);
comm.barrier();
// only operate on component "0"
count = 0.0;
double second_biggest = 0.0;
double volume_connected = 0.0;
double second_biggest = 0.0;
if (USE_CONNECTED_NWP){
BlobIDstruct new_index;
ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,phase,Averages->SDs,vF,vS,phase_label,comm);
MPI_Barrier(Dm->Comm);
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
int label = phase_label(i,j,k);
if (label == 0 ){
phase_id(i,j,k) = 0;
count += 1.0;
// only operate on component "0"
count = 0.0;
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
int label = phase_label(i,j,k);
if (label == 0 ){
phase_id(i,j,k) = 0;
count += 1.0;
}
else
phase_id(i,j,k) = 1;
if (label == 1 ){
second_biggest += 1.0;
}
}
else
phase_id(i,j,k) = 1;
if (label == 1 ){
second_biggest += 1.0;
}
}
volume_connected = sumReduce( Dm->Comm, count);
second_biggest = sumReduce( Dm->Comm, second_biggest);
}
else {
// use the whole NWP
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
if (Averages->SDs(i,j,k) > 0.f){
if (phase(i,j,k) > 0.f ){
phase_id(i,j,k) = 0;
}
else {
phase_id(i,j,k) = 1;
}
}
else {
phase_id(i,j,k) = 1;
}
}
}
}
}
double volume_connected = Dm->Comm.sumReduce( count );
second_biggest = Dm->Comm.sumReduce( second_biggest );
}
/*int reach_x, reach_y, reach_z;
for (int k=0; k<Nz; k++){
@ -1385,18 +1400,21 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta
}
}
if (USE_CONNECTED_NWP){
if (volume_connected - second_biggest < 2.0*fabs(target_delta_volume) && target_delta_volume < 0.0){
// if connected volume is less than 2% just delete the whole thing
if (rank==0) printf("Connected region has shrunk! \n");
REVERSE_FLOW_DIRECTION = true;
}
/* else{*/
if (rank==0) printf("Pathway volume / next largest ganglion %f \n",volume_connected/second_biggest );
}
if (rank==0) printf("MorphGrow with target volume fraction change %f \n", target_delta_volume/volume_initial);
double target_delta_volume_incremental = target_delta_volume;
if (fabs(target_delta_volume) > 0.01*volume_initial)
target_delta_volume_incremental = 0.01*volume_initial*target_delta_volume/fabs(target_delta_volume);
delta_volume = MorphGrow(Averages->SDs,phase_distance,phase_id,Averages->Dm, target_delta_volume_incremental);
delta_volume = MorphGrow(Averages->SDs,phase_distance,phase_id,Averages->Dm, target_delta_volume_incremental, WallFactor);
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
@ -1437,7 +1455,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta
}
}
}
double volume_final = Dm->Comm.sumReduce( count );
double volume_final= sumReduce( Dm->Comm, count);
delta_volume = (volume_final-volume_initial);
if (rank == 0) printf("MorphInit: change fluid volume fraction by %f \n", delta_volume/volume_initial);
@ -1460,7 +1478,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta
// 7. Re-initialize phase field and density
ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
if (BoundaryCondition >0 ){
if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4){
if (Dm->kproc()==0){
ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0);
ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,1);
@ -1529,25 +1547,25 @@ void ScaLBL_ColorModel::WriteDebug(){
fwrite(PhaseField.data(),8,N,VELZ_FILE);
fclose(VELZ_FILE);
// ScaLBL_Comm->RegularLayout(Map,&ColorGrad[0],PhaseField);
// FILE *CGX_FILE;
// sprintf(LocalRankFilename,"Gradient_X.%05i.raw",rank);
// CGX_FILE = fopen(LocalRankFilename,"wb");
// fwrite(PhaseField.data(),8,N,CGX_FILE);
// fclose(CGX_FILE);
//
// ScaLBL_Comm->RegularLayout(Map,&ColorGrad[Np],PhaseField);
// FILE *CGY_FILE;
// sprintf(LocalRankFilename,"Gradient_Y.%05i.raw",rank);
// CGY_FILE = fopen(LocalRankFilename,"wb");
// fwrite(PhaseField.data(),8,N,CGY_FILE);
// fclose(CGY_FILE);
//
// ScaLBL_Comm->RegularLayout(Map,&ColorGrad[2*Np],PhaseField);
// FILE *CGZ_FILE;
// sprintf(LocalRankFilename,"Gradient_Z.%05i.raw",rank);
// CGZ_FILE = fopen(LocalRankFilename,"wb");
// fwrite(PhaseField.data(),8,N,CGZ_FILE);
// fclose(CGZ_FILE);
/* ScaLBL_Comm->RegularLayout(Map,&ColorGrad[0],PhaseField);
FILE *CGX_FILE;
sprintf(LocalRankFilename,"Gradient_X.%05i.raw",rank);
CGX_FILE = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,CGX_FILE);
fclose(CGX_FILE);
ScaLBL_Comm->RegularLayout(Map,&ColorGrad[Np],PhaseField);
FILE *CGY_FILE;
sprintf(LocalRankFilename,"Gradient_Y.%05i.raw",rank);
CGY_FILE = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,CGY_FILE);
fclose(CGY_FILE);
ScaLBL_Comm->RegularLayout(Map,&ColorGrad[2*Np],PhaseField);
FILE *CGZ_FILE;
sprintf(LocalRankFilename,"Gradient_Z.%05i.raw",rank);
CGZ_FILE = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,CGZ_FILE);
fclose(CGZ_FILE);
*/
}

View File

@ -12,13 +12,13 @@ Implementation of color lattice boltzmann model
#include "common/Communication.h"
#include "analysis/TwoPhase.h"
#include "analysis/runAnalysis.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include "ProfilerApp.h"
#include "threadpool/thread_pool.h"
class ScaLBL_ColorModel{
public:
ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM);
ScaLBL_ColorModel(int RANK, int NP, MPI_Comm COMM);
~ScaLBL_ColorModel();
// functions in they should be run
@ -68,7 +68,7 @@ public:
double *Pressure;
private:
Utilities::MPI comm;
MPI_Comm comm;
int dist_mem_size;
int neighborSize;

View File

@ -3,7 +3,7 @@ color lattice boltzmann model
*/
#include "models/DFHModel.h"
ScaLBL_DFHModel::ScaLBL_DFHModel(int RANK, int NP, const Utilities::MPI& COMM):
ScaLBL_DFHModel::ScaLBL_DFHModel(int RANK, int NP, MPI_Comm COMM):
rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),alpha(0),beta(0),
Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0),
Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM)
@ -81,13 +81,18 @@ void ScaLBL_DFHModel::ReadParams(string filename){
outletA=0.f;
outletB=1.f;
if (BoundaryCondition==4) flux = din*rhoA; // mass flux must adjust for density (see formulation for details)
BoundaryCondition = domain_db->getScalar<int>( "BC" );
if (color_db->keyExists( "BC" )){
BoundaryCondition = color_db->getScalar<int>( "BC" );
}
else if (domain_db->keyExists( "BC" )){
BoundaryCondition = domain_db->getScalar<int>( "BC" );
}
// Read domain parameters
auto L = domain_db->getVector<double>( "L" );
auto size = domain_db->getVector<int>( "n" );
auto nproc = domain_db->getVector<int>( "nproc" );
BoundaryCondition = domain_db->getScalar<int>( "BC" );
Nx = size[0];
Ny = size[1];
Nz = size[2];
@ -97,19 +102,21 @@ void ScaLBL_DFHModel::ReadParams(string filename){
nprocx = nproc[0];
nprocy = nproc[1];
nprocz = nproc[2];
if (BoundaryCondition==4) flux = din*rhoA; // mass flux must adjust for density (see formulation for details)
}
void ScaLBL_DFHModel::SetDomain(){
Dm = std::make_shared<Domain>(domain_db,comm); // full domain for analysis
Mask = std::make_shared<Domain>(domain_db,comm); // mask domain removes immobile phases
Dm = std::shared_ptr<Domain>(new Domain(domain_db,comm)); // full domain for analysis
Mask = std::shared_ptr<Domain>(new Domain(domain_db,comm)); // mask domain removes immobile phases
Nx+=2; Ny+=2; Nz += 2;
N = Nx*Ny*Nz;
id = new char [N];
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = 1; // initialize this way
Averages = std::make_shared<TwoPhase>( Dm ); // TwoPhase analysis object
comm.barrier();
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = 1; // initialize this way
Averages = std::shared_ptr<TwoPhase> ( new TwoPhase(Dm) ); // TwoPhase analysis object
MPI_Barrier(comm);
Dm->CommInit();
comm.barrier();
MPI_Barrier(comm);
rank = Dm->rank();
}
@ -131,7 +138,7 @@ void ScaLBL_DFHModel::ReadInput(){
sprintf(LocalRankString,"%05d",rank);
sprintf(LocalRankFilename,"%s%s","SignDist.",LocalRankString);
ReadBinaryFile(LocalRankFilename, Averages->SDs.data(), N);
comm.barrier();
MPI_Barrier(comm);
if (rank == 0) cout << "Domain set." << endl;
}
@ -207,7 +214,6 @@ void ScaLBL_DFHModel::Create(){
auto neighborList= new int[18*Npad];
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np);
comm.barrier();
//...........................................................................
// MAIN VARIABLES ALLOCATED HERE
//...........................................................................
@ -424,7 +430,7 @@ void ScaLBL_DFHModel::Initialize(){
}
}
}
count_wet_global = comm.sumReduce( count_wet );
MPI_Allreduce(&count_wet,&count_wet_global,1,MPI_DOUBLE,MPI_SUM,comm);
if (rank==0) printf("Wetting phase volume fraction =%f \n",count_wet_global/double(Nx*Ny*Nz*nprocs));
// initialize phi based on PhaseLabel (include solid component labels)
ScaLBL_CopyToDevice(Phi, PhaseLabel, Np*sizeof(double));
@ -446,7 +452,7 @@ void ScaLBL_DFHModel::Initialize(){
timestep=0;
}
}
comm.bcast(&timestep,1,0);
MPI_Bcast(&timestep,1,MPI_INT,0,comm);
// Read in the restart file to CPU buffers
double *cPhi = new double[Np];
double *cDist = new double[19*Np];
@ -468,7 +474,7 @@ void ScaLBL_DFHModel::Initialize(){
ScaLBL_DeviceBarrier();
delete [] cPhi;
delete [] cDist;
comm.barrier();
MPI_Barrier(comm);
}
if (rank==0) printf ("Initializing phase field \n");
@ -486,8 +492,8 @@ void ScaLBL_DFHModel::Run(){
//.......create and start timer............
double starttime,stoptime,cputime;
ScaLBL_DeviceBarrier();
comm.barrier();
starttime = Utilities::MPI::time();
MPI_Barrier(comm);
starttime = MPI_Wtime();
//.........................................
//************ MAIN ITERATION LOOP ***************************************/
@ -532,8 +538,7 @@ void ScaLBL_DFHModel::Run(){
}
ScaLBL_D3Q19_AAodd_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, SolidPotential, rhoA, rhoB, tauA, tauB,
alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_DeviceBarrier();
comm.barrier();
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
// *************EVEN TIMESTEP*************
timestep++;
@ -569,9 +574,9 @@ void ScaLBL_DFHModel::Run(){
}
ScaLBL_D3Q19_AAeven_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, SolidPotential, rhoA, rhoB, tauA, tauB,
alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_DeviceBarrier();
comm.barrier();
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
//************************************************************************
MPI_Barrier(comm);
PROFILE_STOP("Update");
// Run the analysis
@ -582,8 +587,8 @@ void ScaLBL_DFHModel::Run(){
PROFILE_SAVE("lbpm_color_simulator",1);
//************************************************************************
ScaLBL_DeviceBarrier();
comm.barrier();
stoptime = Utilities::MPI::time();
MPI_Barrier(comm);
stoptime = MPI_Wtime();
if (rank==0) printf("-------------------------------------------------------------------\n");
// Compute the walltime per timestep
cputime = (stoptime - starttime)/timestep;

View File

@ -12,13 +12,13 @@ Implementation of color lattice boltzmann model
#include "common/Communication.h"
#include "analysis/TwoPhase.h"
#include "analysis/runAnalysis.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include "ProfilerApp.h"
#include "threadpool/thread_pool.h"
class ScaLBL_DFHModel{
public:
ScaLBL_DFHModel(int RANK, int NP, const Utilities::MPI& COMM);
ScaLBL_DFHModel(int RANK, int NP, MPI_Comm COMM);
~ScaLBL_DFHModel();
// functions in they should be run
@ -66,7 +66,7 @@ public:
double *Pressure;
private:
Utilities::MPI comm;
MPI_Comm comm;
int dist_mem_size;
int neighborSize;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,95 @@
/*
Implementation of two-fluid greyscale color lattice boltzmann model
*/
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <iostream>
#include <exception>
#include <stdexcept>
#include <fstream>
#include "common/Communication.h"
#include "analysis/GreyPhase.h"
#include "common/MPI_Helpers.h"
#include "ProfilerApp.h"
#include "threadpool/thread_pool.h"
class ScaLBL_GreyscaleColorModel{
public:
ScaLBL_GreyscaleColorModel(int RANK, int NP, MPI_Comm COMM);
~ScaLBL_GreyscaleColorModel();
// functions in they should be run
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void ReadInput();
void Create();
void Initialize();
void Run();
void WriteDebug();
bool Restart,pBC;
bool REVERSE_FLOW_DIRECTION;
int timestep,timestepMax;
int BoundaryCondition;
double tauA,tauB,rhoA,rhoB,alpha,beta;
double tauA_eff,tauB_eff;
double Fx,Fy,Fz,flux;
double din,dout,inletA,inletB,outletA,outletB;
double GreyPorosity;
int Nx,Ny,Nz,N,Np;
int rank,nprocx,nprocy,nprocz,nprocs;
double Lx,Ly,Lz;
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm_Regular;
std::shared_ptr<GreyPhaseAnalysis> Averages;
// input database
std::shared_ptr<Database> db;
std::shared_ptr<Database> domain_db;
std::shared_ptr<Database> greyscaleColor_db;
std::shared_ptr<Database> analysis_db;
std::shared_ptr<Database> vis_db;
IntArray Map;
signed char *id;
int *NeighborList;
int *dvcMap;
double *fq, *Aq, *Bq;
double *Den, *Phi;
//double *GreySolidPhi; //Model 2 & 3
double *GreySolidGrad;//Model 1 & 4
//double *ColorGrad;
double *Velocity;
double *Pressure;
double *Porosity_dvc;
double *Permeability_dvc;
private:
MPI_Comm comm;
int dist_mem_size;
int neighborSize;
// filenames
char LocalRankString[8];
char LocalRankFilename[40];
char LocalRestartFile[40];
//int rank,nprocs;
void LoadParams(std::shared_ptr<Database> db0);
void AssignComponentLabels();
void AssignGreySolidLabels();
void AssignGreyPoroPermLabels();
void ImageInit(std::string filename);
double MorphInit(const double beta, const double morph_delta);
double SeedPhaseField(const double seed_water_in_oil);
double MorphOpenConnected(double target_volume_change);
void WriteVisFiles();
};

947
models/GreyscaleModel.cpp Normal file
View File

@ -0,0 +1,947 @@
/*
Greyscale lattice boltzmann model
*/
#include "models/GreyscaleModel.h"
#include "analysis/distance.h"
#include "analysis/morphology.h"
#include <stdlib.h>
#include <time.h>
template<class TYPE>
void DeleteArray( const TYPE *p )
{
delete [] p;
}
ScaLBL_GreyscaleModel::ScaLBL_GreyscaleModel(int RANK, int NP, MPI_Comm COMM):
rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0),tau_eff(0),Den(0),Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),GreyPorosity(0),
Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM)
{
SignDist.resize(Nx,Ny,Nz);
SignDist.fill(0);
}
ScaLBL_GreyscaleModel::~ScaLBL_GreyscaleModel(){
}
void ScaLBL_GreyscaleModel::ReadParams(string filename){
// read the input database
db = std::make_shared<Database>( filename );
domain_db = db->getDatabase( "Domain" );
greyscale_db = db->getDatabase( "Greyscale" );
analysis_db = db->getDatabase( "Analysis" );
vis_db = db->getDatabase( "Visualization" );
// set defaults
timestepMax = 100000;
tau = 1.0;
tau_eff = tau;
Den = 1.0;//constant density
tolerance = 0.01;
Fx = Fy = Fz = 0.0;
Restart=false;
din=dout=1.0;
flux=0.0;
dp = 10.0; //unit of 'dp': voxel
CollisionType = 1; //1: IMRT; 2: BGK; 3: MRT
// ---------------------- Greyscale Model parameters -----------------------//
if (greyscale_db->keyExists( "timestepMax" )){
timestepMax = greyscale_db->getScalar<int>( "timestepMax" );
}
if (greyscale_db->keyExists( "tau" )){
tau = greyscale_db->getScalar<double>( "tau" );
}
tau_eff = greyscale_db->getWithDefault<double>( "tau_eff", tau );
if (greyscale_db->keyExists( "Den" )){
Den = greyscale_db->getScalar<double>( "Den" );
}
if (greyscale_db->keyExists( "dp" )){
dp = greyscale_db->getScalar<double>( "dp" );
}
if (greyscale_db->keyExists( "F" )){
Fx = greyscale_db->getVector<double>( "F" )[0];
Fy = greyscale_db->getVector<double>( "F" )[1];
Fz = greyscale_db->getVector<double>( "F" )[2];
}
if (greyscale_db->keyExists( "Restart" )){
Restart = greyscale_db->getScalar<bool>( "Restart" );
}
if (greyscale_db->keyExists( "din" )){
din = greyscale_db->getScalar<double>( "din" );
}
if (greyscale_db->keyExists( "dout" )){
dout = greyscale_db->getScalar<double>( "dout" );
}
if (greyscale_db->keyExists( "flux" )){
flux = greyscale_db->getScalar<double>( "flux" );
}
if (greyscale_db->keyExists( "tolerance" )){
tolerance = greyscale_db->getScalar<double>( "tolerance" );
}
auto collision = greyscale_db->getWithDefault<std::string>( "collision", "IMRT" );
if (collision == "BGK"){
CollisionType=2;
}
else if (collision == "MRT"){
CollisionType=3;
}
// ------------------------------------------------------------------------//
//------------------------ Other Domain parameters ------------------------//
BoundaryCondition = 0;
if (greyscale_db->keyExists( "BC" )){
BoundaryCondition = greyscale_db->getScalar<int>( "BC" );
}
else if (domain_db->keyExists( "BC" )){
BoundaryCondition = domain_db->getScalar<int>( "BC" );
}
// ------------------------------------------------------------------------//
}
void ScaLBL_GreyscaleModel::SetDomain(){
Dm = std::shared_ptr<Domain>(new Domain(domain_db,comm)); // full domain for analysis
Mask = std::shared_ptr<Domain>(new Domain(domain_db,comm)); // mask domain removes immobile phases
// domain parameters
Nx = Dm->Nx;
Ny = Dm->Ny;
Nz = Dm->Nz;
Lx = Dm->Lx;
Ly = Dm->Ly;
Lz = Dm->Lz;
N = Nx*Ny*Nz;
SignDist.resize(Nx,Ny,Nz);
Velocity_x.resize(Nx,Ny,Nz);
Velocity_y.resize(Nx,Ny,Nz);
Velocity_z.resize(Nx,Ny,Nz);
PorosityMap.resize(Nx,Ny,Nz);
Pressure.resize(Nx,Ny,Nz);
id = new signed char [N];
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = 1; // initialize this way
MPI_Barrier(comm);
Dm->CommInit();
MPI_Barrier(comm);
// Read domain parameters
rank = Dm->rank();
nprocx = Dm->nprocx();
nprocy = Dm->nprocy();
nprocz = Dm->nprocz();
}
void ScaLBL_GreyscaleModel::ReadInput(){
sprintf(LocalRankString,"%05d",rank);
sprintf(LocalRankFilename,"%s%s","ID.",LocalRankString);
sprintf(LocalRestartFile,"%s%s","Restart.",LocalRankString);
if (domain_db->keyExists( "Filename" )){
auto Filename = domain_db->getScalar<std::string>( "Filename" );
Mask->Decomp(Filename);
}
else{
if (rank==0) printf("Filename of input image is not found, reading ID.0* instead.");
Mask->ReadIDs();
}
for (int i=0; i<Nx*Ny*Nz; i++) id[i] = Mask->id[i]; // save what was read
// Generate the signed distance map
// Initialize the domain and communication
Array<char> id_solid(Nx,Ny,Nz);
int count = 0;
// Solve for the position of the solid phase
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
int n = k*Nx*Ny+j*Nx+i;
// Initialize the solid phase
signed char label = Mask->id[n];
if (label > 0) id_solid(i,j,k) = 1;
else id_solid(i,j,k) = 0;
}
}
}
// Initialize the signed distance function
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
int n=k*Nx*Ny+j*Nx+i;
// Initialize distance to +/- 1
SignDist(i,j,k) = 2.0*double(id_solid(i,j,k))-1.0;
}
}
}
// MeanFilter(SignDist);
if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n");
CalcDist(SignDist,id_solid,*Mask);
if (rank == 0) cout << "Domain set." << endl;
}
/********************************************************
* AssignComponentLabels *
********************************************************/
void ScaLBL_GreyscaleModel::AssignComponentLabels(double *Porosity, double *Permeability)
{
size_t NLABELS=0;
signed char VALUE=0;
double POROSITY=0.f;
double PERMEABILITY=0.f;
auto LabelList = greyscale_db->getVector<int>( "ComponentLabels" );
auto PorosityList = greyscale_db->getVector<double>( "PorosityList" );
auto PermeabilityList = greyscale_db->getVector<double>( "PermeabilityList" );
NLABELS=LabelList.size();
if (NLABELS != PorosityList.size()){
ERROR("Error: ComponentLabels and PorosityList must be the same length! \n");
}
double label_count[NLABELS];
double label_count_global[NLABELS];
// Assign the labels
for (int idx=0; idx<NLABELS; idx++) label_count[idx]=0;
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
int n = k*Nx*Ny+j*Nx+i;
VALUE=id[n];
// Assign the affinity from the paired list
for (unsigned int idx=0; idx < NLABELS; idx++){
//printf("idx=%i, value=%i, %i, \n",idx, VALUE,LabelList[idx]);
if (VALUE == LabelList[idx]){
POROSITY=PorosityList[idx];
label_count[idx] += 1.0;
idx = NLABELS;
//Mask->id[n] = 0; // set mask to zero since this is an immobile component
}
}
int idx = Map(i,j,k);
if (!(idx < 0)){
if (POROSITY<=0.0){
ERROR("Error: Porosity for grey voxels must be 0.0 < Porosity <= 1.0 !\n");
}
else{
Porosity[idx] = POROSITY;
}
}
}
}
}
if (NLABELS != PermeabilityList.size()){
ERROR("Error: ComponentLabels and PermeabilityList must be the same length! \n");
}
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
int n = k*Nx*Ny+j*Nx+i;
VALUE=id[n];
// Assign the affinity from the paired list
for (unsigned int idx=0; idx < NLABELS; idx++){
//printf("idx=%i, value=%i, %i, \n",idx, VALUE,LabelList[idx]);
if (VALUE == LabelList[idx]){
PERMEABILITY=PermeabilityList[idx];
idx = NLABELS;
//Mask->id[n] = 0; // set mask to zero since this is an immobile component
}
}
int idx = Map(i,j,k);
if (!(idx < 0)){
if (PERMEABILITY<=0.0){
ERROR("Error: Permeability for grey voxel must be > 0.0 ! \n");
}
else{
Permeability[idx] = PERMEABILITY/Dm->voxel_length/Dm->voxel_length;
}
}
}
}
}
// Set Dm to match Mask
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = Mask->id[i];
for (int idx=0; idx<NLABELS; idx++) label_count_global[idx]=sumReduce( Dm->Comm, label_count[idx]);
//Initialize a weighted porosity after considering grey voxels
GreyPorosity=0.0;
for (unsigned int idx=0; idx<NLABELS; idx++){
double volume_fraction = double(label_count_global[idx])/double((Nx-2)*(Ny-2)*(Nz-2)*nprocs);
GreyPorosity+=volume_fraction*PorosityList[idx];
}
if (rank==0){
printf("Image resolution: %.5g [um/voxel]\n",Dm->voxel_length);
printf("Number of component labels: %lu \n",NLABELS);
for (unsigned int idx=0; idx<NLABELS; idx++){
VALUE=LabelList[idx];
POROSITY=PorosityList[idx];
PERMEABILITY=PermeabilityList[idx];
double volume_fraction = double(label_count_global[idx])/double((Nx-2)*(Ny-2)*(Nz-2)*nprocs);
printf(" label=%d: porosity=%.3g, permeability=%.3g [um^2] (=%.3g [voxel^2]), volume fraction=%.3g\n",
VALUE,POROSITY,PERMEABILITY,PERMEABILITY/Dm->voxel_length/Dm->voxel_length,volume_fraction);
printf(" effective porosity=%.3g\n",volume_fraction*POROSITY);
}
printf("The weighted porosity, considering both open and grey voxels, is %.3g\n",GreyPorosity);
}
}
void ScaLBL_GreyscaleModel::AssignComponentLabels(double *Porosity,double *Permeability,const vector<std::string> &File_poro,const vector<std::string> &File_perm)
{
double *Porosity_host, *Permeability_host;
Porosity_host = new double[N];
Permeability_host = new double[N];
double POROSITY=0.f;
double PERMEABILITY=0.f;
//Initialize a weighted porosity after considering grey voxels
double GreyPorosity_loc=0.0;
GreyPorosity=0.0;
//double label_count_loc = 0.0;
//double label_count_glb = 0.0;
Mask->ReadFromFile(File_poro[0],File_poro[1],Porosity_host);
Mask->ReadFromFile(File_perm[0],File_perm[1],Permeability_host);
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
int idx = Map(i,j,k);
if (!(idx < 0)){
int n = k*Nx*Ny+j*Nx+i;
POROSITY = Porosity_host[n];
PERMEABILITY = Permeability_host[n];
if (POROSITY<=0.0){
ERROR("Error: Porosity for grey voxels must be 0.0 < Porosity <= 1.0 !\n");
}
else if (PERMEABILITY<=0.0){
ERROR("Error: Permeability for grey voxel must be > 0.0 ! \n");
}
else{
Porosity[idx] = POROSITY;
Permeability[idx] = PERMEABILITY;
GreyPorosity_loc += POROSITY;
//label_count_loc += 1.0;
}
}
}
}
}
GreyPorosity = sumReduce( Dm->Comm, GreyPorosity_loc);
GreyPorosity = GreyPorosity/double((Nx-2)*(Ny-2)*(Nz-2)*nprocs);
if (rank==0){
printf("Image resolution: %.5g [um/voxel]\n",Dm->voxel_length);
printf("The weighted porosity, considering both open and grey voxels, is %.3g\n",GreyPorosity);
}
delete [] Porosity_host;
delete [] Permeability_host;
}
void ScaLBL_GreyscaleModel::Create(){
/*
* This function creates the variables needed to run a LBM
*/
//.........................................................
// don't perform computations at the eight corners
//id[0] = id[Nx-1] = id[(Ny-1)*Nx] = id[(Ny-1)*Nx + Nx-1] = 0;
//id[(Nz-1)*Nx*Ny] = id[(Nz-1)*Nx*Ny+Nx-1] = id[(Nz-1)*Nx*Ny+(Ny-1)*Nx] = id[(Nz-1)*Nx*Ny+(Ny-1)*Nx + Nx-1] = 0;
//.........................................................
// Initialize communication structures in averaging domain
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = Mask->id[i];
Mask->CommInit();
Np=Mask->PoreCount();
//...........................................................................
if (rank==0) printf ("Create ScaLBL_Communicator \n");
// Create a communicator for the device (will use optimized layout)
// ScaLBL_Communicator ScaLBL_Comm(Mask); // original
ScaLBL_Comm = std::shared_ptr<ScaLBL_Communicator>(new ScaLBL_Communicator(Mask));
int Npad=(Np/16 + 2)*16;
if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N);
Map.resize(Nx,Ny,Nz); Map.fill(-2);
auto neighborList= new int[18*Npad];
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np);
MPI_Barrier(comm);
//...........................................................................
// MAIN VARIABLES ALLOCATED HERE
//...........................................................................
// LBM variables
if (rank==0) printf ("Allocating distributions \n");
//......................device distributions.................................
dist_mem_size = Np*sizeof(double);
neighborSize=18*(Np*sizeof(int));
//...........................................................................
ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize);
ScaLBL_AllocateDeviceMemory((void **) &fq, 19*dist_mem_size);
ScaLBL_AllocateDeviceMemory((void **) &Permeability, sizeof(double)*Np);
ScaLBL_AllocateDeviceMemory((void **) &Porosity, sizeof(double)*Np);
ScaLBL_AllocateDeviceMemory((void **) &Pressure_dvc, sizeof(double)*Np);
ScaLBL_AllocateDeviceMemory((void **) &Velocity, 3*sizeof(double)*Np);
//...........................................................................
// Update GPU data structures
if (rank==0) printf ("Setting up device neighbor list \n");
fflush(stdout);
// copy the neighbor list
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
// initialize phi based on PhaseLabel (include solid component labels)
double *Poros, *Perm;
Poros = new double[Np];
Perm = new double[Np];
if (greyscale_db->keyExists("FileVoxelPorosityMap")){
//NOTE: FileVoxel**Map is a vector, including "file_name, datatype"
auto File_poro = greyscale_db->getVector<std::string>( "FileVoxelPorosityMap" );
auto File_perm = greyscale_db->getVector<std::string>( "FileVoxelPermeabilityMap" );
AssignComponentLabels(Poros,Perm,File_poro,File_perm);
}
else if (greyscale_db->keyExists("PorosityList")){
//initialize voxel porosity and perm from the input list
AssignComponentLabels(Poros,Perm);
}
else {
ERROR("Error: PorosityList or FilenameVoxelPorosityMap cannot be found! \n");
}
ScaLBL_CopyToDevice(Porosity, Poros, Np*sizeof(double));
ScaLBL_CopyToDevice(Permeability, Perm, Np*sizeof(double));
delete [] Poros;
delete [] Perm;
}
void ScaLBL_GreyscaleModel::Initialize(){
if (rank==0) printf ("Initializing distributions \n");
//TODO: for BGK, you need to consider voxel porosity
// for IMRT, the whole set of feq is different
// if in the future you have different collison mode, need to write two set of initialization functions
if (CollisionType==1){
ScaLBL_D3Q19_GreyIMRT_Init(fq, Np, Den);
if (rank==0) printf("Collision model: Incompressible MRT.\n");
}
else if (CollisionType==2){
ScaLBL_D3Q19_Init(fq, Np);
if (rank==0) printf("Collision model: BGK.\n");
}
else if (CollisionType==3){
ScaLBL_D3Q19_Init(fq, Np);
if (rank==0) printf("Collision model: MRT.\n");
}
else{
if (rank==0) printf("Unknown collison type! IMRT collision is used.\n");
ScaLBL_D3Q19_GreyIMRT_Init(fq, Np, Den);
CollisionType=1;
greyscale_db->putScalar<std::string>( "collision", "IMRT" );
}
if (Restart == true){
if (rank==0){
printf("Initializing distributions from Restart! \n");
}
// Read in the restart file to CPU buffers
std::shared_ptr<double> cfq;
cfq = std::shared_ptr<double>(new double[19*Np],DeleteArray<double>);
FILE *File;
File=fopen(LocalRestartFile,"rb");
fread(cfq.get(),sizeof(double),19*Np,File);
fclose(File);
// Copy the restart data to the GPU
ScaLBL_CopyToDevice(fq,cfq.get(),19*Np*sizeof(double));
ScaLBL_DeviceBarrier();
MPI_Barrier(comm);
}
}
void ScaLBL_GreyscaleModel::Run(){
int nprocs=nprocx*nprocy*nprocz;
const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz);
int analysis_interval = 1000; // number of timesteps in between in situ analysis
int visualization_interval = 1000;
int restart_interval = 10000; // number of timesteps in between in saving distributions for restart
if (analysis_db->keyExists( "analysis_interval" )){
analysis_interval = analysis_db->getScalar<int>( "analysis_interval" );
}
if (analysis_db->keyExists( "visualization_interval" )){
visualization_interval = analysis_db->getScalar<int>( "visualization_interval" );
}
if (analysis_db->keyExists( "restart_interval" )){
restart_interval = analysis_db->getScalar<int>( "restart_interval" );
}
if (greyscale_db->keyExists( "timestep" )){
timestep = greyscale_db->getScalar<int>( "timestep" );
}
if (rank==0){
printf("********************************************************\n");
printf("No. of timesteps: %i \n", timestepMax);
fflush(stdout);
}
//.......create and start timer............
double starttime,stoptime,cputime;
ScaLBL_DeviceBarrier();
MPI_Barrier(comm);
starttime = MPI_Wtime();
//.........................................
Minkowski Morphology(Mask);
//************ MAIN ITERATION LOOP ***************************************/
PROFILE_START("Loop");
auto current_db = db->cloneDatabase();
double rlx = 1.0/tau;
double rlx_eff = 1.0/tau_eff;
double error = 1.0;
double flow_rate_previous = 0.0;
while (timestep < timestepMax && error > tolerance) {
//************************************************************************/
// *************ODD TIMESTEP*************//
timestep++;
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
switch (CollisionType){
case 1:
ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc);
break;
case 2:
ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc);
break;
case 3:
ScaLBL_D3Q19_AAodd_Greyscale_MRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc);
break;
default:
ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc);
break;
}
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
ScaLBL_DeviceBarrier();
// Set BCs
if (BoundaryCondition == 3){
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
}
switch (CollisionType){
case 1:
ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc);
break;
case 2:
ScaLBL_D3Q19_AAodd_Greyscale(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc);
break;
case 3:
ScaLBL_D3Q19_AAodd_Greyscale_MRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc);
break;
default:
ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc);
break;
}
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
// *************EVEN TIMESTEP*************//
timestep++;
ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL
switch (CollisionType){
case 1:
ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc);
break;
case 2:
ScaLBL_D3Q19_AAeven_Greyscale(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc);
break;
case 3:
ScaLBL_D3Q19_AAeven_Greyscale_MRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc);
break;
default:
ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc);
break;
}
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
ScaLBL_DeviceBarrier();
// Set BCs
if (BoundaryCondition == 3){
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
}
switch (CollisionType){
case 1:
ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc);
break;
case 2:
ScaLBL_D3Q19_AAeven_Greyscale(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Pressure_dvc);
break;
case 3:
ScaLBL_D3Q19_AAeven_Greyscale_MRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc);
break;
default:
ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc);
break;
}
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
//************************************************************************/
if (timestep%analysis_interval==0){
ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x);
ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y);
ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z);
//ScaLBL_Comm->RegularLayout(Map,Porosity,PorosityMap);
//ScaLBL_Comm->RegularLayout(Map,Pressure_dvc,Pressure);
double count_loc=0;
double count;
double vax,vay,vaz;
double vax_loc,vay_loc,vaz_loc;
//double px_loc,py_loc,pz_loc;
//double px,py,pz;
//double mass_loc,mass_glb;
//parameters for domain average
int64_t i,j,k,n,imin,jmin,kmin,kmax;
// If external boundary conditions are set, do not average over the inlet and outlet
kmin=1; kmax=Nz-1;
//In case user forgets to specify the inlet/outlet buffer layers for BC>0
if (BoundaryCondition > 0 && Dm->kproc() == 0) kmin=4;
if (BoundaryCondition > 0 && Dm->kproc() == Dm->nprocz()-1) kmax=Nz-4;
imin=jmin=1;
// If inlet/outlet layers exist use these as default
//if (Dm->inlet_layers_x > 0) imin = Dm->inlet_layers_x;
//if (Dm->inlet_layers_y > 0) jmin = Dm->inlet_layers_y;
if (BoundaryCondition > 0 && Dm->inlet_layers_z > 0 && Dm->kproc() == 0) kmin = 1 + Dm->inlet_layers_z;//"1" indicates the halo layer
if (BoundaryCondition > 0 && Dm->outlet_layers_z > 0 && Dm->kproc() == Dm->nprocz()-1) kmax = Nz-1 - Dm->outlet_layers_z;
// px_loc = py_loc = pz_loc = 0.f;
// mass_loc = 0.f;
// for (int k=kmin; k<kmax; k++){
// for (int j=jmin; j<Ny-1; j++){
// for (int i=imin; i<Nx-1; i++){
// if (SignDist(i,j,k) > 0){
// px_loc += Velocity_x(i,j,k)*Den*PorosityMap(i,j,k);
// py_loc += Velocity_y(i,j,k)*Den*PorosityMap(i,j,k);
// pz_loc += Velocity_z(i,j,k)*Den*PorosityMap(i,j,k);
// mass_loc += Den*PorosityMap(i,j,k);
// }
// }
// }
// }
// MPI_Allreduce(&px_loc, &px, 1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
// MPI_Allreduce(&py_loc, &py, 1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
// MPI_Allreduce(&pz_loc, &pz, 1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
// MPI_Allreduce(&mass_loc,&mass_glb,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
//
// vax = px/mass_glb;
// vay = py/mass_glb;
// vaz = pz/mass_glb;
vax_loc = vay_loc = vaz_loc = 0.f;
for (int k=kmin; k<kmax; k++){
for (int j=jmin; j<Ny-1; j++){
for (int i=imin; i<Nx-1; i++){
if (SignDist(i,j,k) > 0){
vax_loc += Velocity_x(i,j,k);
vay_loc += Velocity_y(i,j,k);
vaz_loc += Velocity_z(i,j,k);
count_loc+=1.0;
}
}
}
}
vax = sumReduce( Mask->Comm, vax_loc);
vay = sumReduce( Mask->Comm, vay_loc);
vaz = sumReduce( Mask->Comm, vaz_loc);
count = sumReduce( Mask->Comm, count_loc);
vax /= count;
vay /= count;
vaz /= count;
double force_mag = sqrt(Fx*Fx+Fy*Fy+Fz*Fz);
double dir_x = Fx/force_mag;
double dir_y = Fy/force_mag;
double dir_z = Fz/force_mag;
if (force_mag == 0.0){
// default to z direction
dir_x = 0.0;
dir_y = 0.0;
dir_z = 1.0;
force_mag = 1.0;
}
//double flow_rate = (px*dir_x + py*dir_y + pz*dir_z)/mass_glb;
double flow_rate = (vax*dir_x + vay*dir_y + vaz*dir_z);
error = fabs(flow_rate - flow_rate_previous) / fabs(flow_rate);
flow_rate_previous = flow_rate;
//if (rank==0) printf("Computing Minkowski functionals \n");
Morphology.ComputeScalar(SignDist,0.f);
//Morphology.PrintAll();
double mu = (tau-0.5)/3.f;
double Vs = Morphology.V();
double As = Morphology.A();
double Hs = Morphology.H();
double Xs = Morphology.X();
Vs = sumReduce( Dm->Comm, Vs);
As = sumReduce( Dm->Comm, As);
Hs = sumReduce( Dm->Comm, Hs);
Xs = sumReduce( Dm->Comm, Xs);
double h = Dm->voxel_length;
//double absperm = h*h*mu*Mask->Porosity()*flow_rate / force_mag;
double absperm = h*h*mu*GreyPorosity*flow_rate / force_mag;
if (rank==0){
printf(" AbsPerm = %.5g [micron^2]\n",absperm);
bool WriteHeader=false;
FILE * log_file = fopen("Permeability.csv","r");
if (log_file != NULL)
fclose(log_file);
else
WriteHeader=true;
log_file = fopen("Permeability.csv","a");
if (WriteHeader)
fprintf(log_file,"timestep Fx Fy Fz mu Vs As Hs Xs vax vay vaz AbsPerm \n",
timestep,Fx,Fy,Fz,mu,h*h*h*Vs,h*h*As,h*Hs,Xs,vax,vay,vaz,absperm);
fprintf(log_file,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",timestep, Fx, Fy, Fz, mu,
h*h*h*Vs,h*h*As,h*Hs,Xs,vax,vay,vaz, absperm);
fclose(log_file);
}
}
if (timestep%visualization_interval==0){
VelocityField();
}
if (timestep%restart_interval==0){
//Use rank=0 write out Restart.db
if (rank==0) {
greyscale_db->putScalar<int>("timestep",timestep);
greyscale_db->putScalar<bool>( "Restart", true );
current_db->putDatabase("Greyscale", greyscale_db);
std::ofstream OutStream("Restart.db");
current_db->print(OutStream, "");
OutStream.close();
}
//Write out Restart data.
std::shared_ptr<double> cfq;
cfq = std::shared_ptr<double>(new double[19*Np],DeleteArray<double>);
ScaLBL_CopyToHost(cfq.get(),fq,19*Np*sizeof(double));// Copy restart data to the CPU
FILE *RESTARTFILE;
RESTARTFILE=fopen(LocalRestartFile,"wb");
fwrite(cfq.get(),sizeof(double),19*Np,RESTARTFILE);
fclose(RESTARTFILE);
MPI_Barrier(comm);
}
}
PROFILE_STOP("Loop");
PROFILE_SAVE("lbpm_greyscale_simulator",1);
//************************************************************************
ScaLBL_DeviceBarrier();
MPI_Barrier(comm);
stoptime = MPI_Wtime();
if (rank==0) printf("-------------------------------------------------------------------\n");
// Compute the walltime per timestep
cputime = (stoptime - starttime)/timestep;
// Performance obtained from each node
double MLUPS = double(Np)/cputime/1000000;
if (rank==0) printf("********************************************************\n");
if (rank==0) printf("CPU time = %f \n", cputime);
if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS);
MLUPS *= nprocs;
if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS);
if (rank==0) printf("********************************************************\n");
// ************************************************************************
}
void ScaLBL_GreyscaleModel::VelocityField(){
/* Minkowski Morphology(Mask);
int SIZE=Np*sizeof(double);
ScaLBL_D3Q19_Momentum(fq,Velocity, Np);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
ScaLBL_CopyToHost(&VELOCITY[0],&Velocity[0],3*SIZE);
memcpy(Morphology.SDn.data(), Distance.data(), Nx*Ny*Nz*sizeof(double));
Morphology.Initialize();
Morphology.UpdateMeshValues();
Morphology.ComputeLocal();
Morphology.Reduce();
double count_loc=0;
double count;
double vax,vay,vaz;
double vax_loc,vay_loc,vaz_loc;
vax_loc = vay_loc = vaz_loc = 0.f;
for (int n=0; n<ScaLBL_Comm->LastExterior(); n++){
vax_loc += VELOCITY[n];
vay_loc += VELOCITY[Np+n];
vaz_loc += VELOCITY[2*Np+n];
count_loc+=1.0;
}
for (int n=ScaLBL_Comm->FirstInterior(); n<ScaLBL_Comm->LastInterior(); n++){
vax_loc += VELOCITY[n];
vay_loc += VELOCITY[Np+n];
vaz_loc += VELOCITY[2*Np+n];
count_loc+=1.0;
}
MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
vax /= count;
vay /= count;
vaz /= count;
double mu = (tau-0.5)/3.f;
if (rank==0) printf("Fx Fy Fz mu Vs As Js Xs vx vy vz\n");
if (rank==0) printf("%.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",Fx, Fy, Fz, mu,
Morphology.V(),Morphology.A(),Morphology.J(),Morphology.X(),vax,vay,vaz);
*/
std::vector<IO::MeshDataStruct> visData;
fillHalo<double> fillData(Dm->Comm,Dm->rank_info,{Dm->Nx-2,Dm->Ny-2,Dm->Nz-2},{1,1,1},0,1);
auto VxVar = std::make_shared<IO::Variable>();
auto VyVar = std::make_shared<IO::Variable>();
auto VzVar = std::make_shared<IO::Variable>();
auto SignDistVar = std::make_shared<IO::Variable>();
auto PressureVar = std::make_shared<IO::Variable>();
IO::initialize("","silo","false");
// Create the MeshDataStruct
visData.resize(1);
visData[0].meshName = "domain";
visData[0].mesh = std::make_shared<IO::DomainMesh>( Dm->rank_info,Dm->Nx-2,Dm->Ny-2,Dm->Nz-2,Dm->Lx,Dm->Ly,Dm->Lz );
SignDistVar->name = "SignDist";
SignDistVar->type = IO::VariableType::VolumeVariable;
SignDistVar->dim = 1;
SignDistVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2);
visData[0].vars.push_back(SignDistVar);
VxVar->name = "Velocity_x";
VxVar->type = IO::VariableType::VolumeVariable;
VxVar->dim = 1;
VxVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2);
visData[0].vars.push_back(VxVar);
VyVar->name = "Velocity_y";
VyVar->type = IO::VariableType::VolumeVariable;
VyVar->dim = 1;
VyVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2);
visData[0].vars.push_back(VyVar);
VzVar->name = "Velocity_z";
VzVar->type = IO::VariableType::VolumeVariable;
VzVar->dim = 1;
VzVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2);
visData[0].vars.push_back(VzVar);
PressureVar->name = "Pressure";
PressureVar->type = IO::VariableType::VolumeVariable;
PressureVar->dim = 1;
PressureVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2);
visData[0].vars.push_back(PressureVar);
Array<double>& SignData = visData[0].vars[0]->data;
Array<double>& VelxData = visData[0].vars[1]->data;
Array<double>& VelyData = visData[0].vars[2]->data;
Array<double>& VelzData = visData[0].vars[3]->data;
Array<double>& PressureData = visData[0].vars[4]->data;
ASSERT(visData[0].vars[0]->name=="SignDist");
ASSERT(visData[0].vars[1]->name=="Velocity_x");
ASSERT(visData[0].vars[2]->name=="Velocity_y");
ASSERT(visData[0].vars[3]->name=="Velocity_z");
ASSERT(visData[0].vars[4]->name=="Pressure");
ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x);
ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y);
ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z);
ScaLBL_Comm->RegularLayout(Map,Pressure_dvc,Pressure);
fillData.copy(SignDist,SignData);
fillData.copy(Velocity_x,VelxData);
fillData.copy(Velocity_y,VelyData);
fillData.copy(Velocity_z,VelzData);
fillData.copy(Pressure,PressureData);
IO::writeData( timestep, visData, Dm->Comm );
}
void ScaLBL_GreyscaleModel::WriteDebug(){
// Copy back final phase indicator field and convert to regular layout
DoubleArray PhaseField(Nx,Ny,Nz);
//ScaLBL_CopyToHost(Porosity.data(), Poros, sizeof(double)*N);
// FILE *OUTFILE;
// sprintf(LocalRankFilename,"Phase.%05i.raw",rank);
// OUTFILE = fopen(LocalRankFilename,"wb");
// fwrite(PhaseField.data(),8,N,OUTFILE);
// fclose(OUTFILE);
//
// ScaLBL_Comm->RegularLayout(Map,&Den[0],PhaseField);
// FILE *AFILE;
// sprintf(LocalRankFilename,"A.%05i.raw",rank);
// AFILE = fopen(LocalRankFilename,"wb");
// fwrite(PhaseField.data(),8,N,AFILE);
// fclose(AFILE);
//
// ScaLBL_Comm->RegularLayout(Map,&Den[Np],PhaseField);
// FILE *BFILE;
// sprintf(LocalRankFilename,"B.%05i.raw",rank);
// BFILE = fopen(LocalRankFilename,"wb");
// fwrite(PhaseField.data(),8,N,BFILE);
// fclose(BFILE);
//
// ScaLBL_Comm->RegularLayout(Map,Pressure,PhaseField);
// FILE *PFILE;
// sprintf(LocalRankFilename,"Pressure.%05i.raw",rank);
// PFILE = fopen(LocalRankFilename,"wb");
// fwrite(PhaseField.data(),8,N,PFILE);
// fclose(PFILE);
ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField);
FILE *VELX_FILE;
sprintf(LocalRankFilename,"Velocity_X.%05i.raw",rank);
VELX_FILE = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,VELX_FILE);
fclose(VELX_FILE);
ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],PhaseField);
FILE *VELY_FILE;
sprintf(LocalRankFilename,"Velocity_Y.%05i.raw",rank);
VELY_FILE = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,VELY_FILE);
fclose(VELY_FILE);
ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],PhaseField);
FILE *VELZ_FILE;
sprintf(LocalRankFilename,"Velocity_Z.%05i.raw",rank);
VELZ_FILE = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,VELZ_FILE);
fclose(VELZ_FILE);
ScaLBL_Comm->RegularLayout(Map,&Porosity[0],PhaseField);
FILE *POROS_FILE;
sprintf(LocalRankFilename,"Porosity.%05i.raw",rank);
POROS_FILE = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,POROS_FILE);
fclose(POROS_FILE);
ScaLBL_Comm->RegularLayout(Map,&Permeability[0],PhaseField);
FILE *PERM_FILE;
sprintf(LocalRankFilename,"Permeability.%05i.raw",rank);
PERM_FILE = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,PERM_FILE);
fclose(PERM_FILE);
}

91
models/GreyscaleModel.h Normal file
View File

@ -0,0 +1,91 @@
/*
Implementation of color lattice boltzmann model
*/
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <iostream>
#include <exception>
#include <stdexcept>
#include <fstream>
#include "common/Communication.h"
#include "common/MPI_Helpers.h"
#include "common/Database.h"
#include "common/ScaLBL.h"
#include "ProfilerApp.h"
#include "threadpool/thread_pool.h"
class ScaLBL_GreyscaleModel{
public:
ScaLBL_GreyscaleModel(int RANK, int NP, MPI_Comm COMM);
~ScaLBL_GreyscaleModel();
// functions in they should be run
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void ReadInput();
void Create();
void Initialize();
void Run();
void WriteDebug();
void VelocityField();
bool Restart,pBC;
int timestep,timestepMax;
int BoundaryCondition;
int CollisionType;
double tau;
double tau_eff;
double Den;//constant density
double tolerance;
double Fx,Fy,Fz,flux;
double din,dout;
double dp;//solid particle diameter, unit in voxel
double GreyPorosity;
int Nx,Ny,Nz,N,Np;
int rank,nprocx,nprocy,nprocz,nprocs;
double Lx,Ly,Lz;
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
// input database
std::shared_ptr<Database> db;
std::shared_ptr<Database> domain_db;
std::shared_ptr<Database> greyscale_db;
std::shared_ptr<Database> analysis_db;
std::shared_ptr<Database> vis_db;
signed char *id;
int *NeighborList;
double *fq;
double *Permeability;//grey voxel permeability
double *Porosity;
double *Velocity;
double *Pressure_dvc;
IntArray Map;
DoubleArray SignDist;
DoubleArray Velocity_x;
DoubleArray Velocity_y;
DoubleArray Velocity_z;
DoubleArray PorosityMap;
DoubleArray Pressure;
private:
MPI_Comm comm;
int dist_mem_size;
int neighborSize;
// filenames
char LocalRankString[8];
char LocalRankFilename[40];
char LocalRestartFile[40];
void AssignComponentLabels(double *Porosity, double *Permeablity);
void AssignComponentLabels(double *Porosity,double *Permeability,const vector<std::string> &File_poro,const vector<std::string> &File_perm);
};

1041
models/IonModel.cpp Normal file

File diff suppressed because it is too large Load Diff

102
models/IonModel.h Normal file
View File

@ -0,0 +1,102 @@
/*
* Ion transporte LB Model
*/
#ifndef ScaLBL_IonModel_INC
#define ScaLBL_IonModel_INC
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <iostream>
#include <exception>
#include <stdexcept>
#include <fstream>
#include <vector>
#include "common/ScaLBL.h"
#include "common/Communication.h"
#include "common/MPI_Helpers.h"
#include "analysis/Minkowski.h"
#include "ProfilerApp.h"
class ScaLBL_IonModel{
public:
ScaLBL_IonModel(int RANK, int NP, MPI_Comm COMM);
~ScaLBL_IonModel();
// functions in they should be run
void ReadParams(string filename,vector<int> &num_iter);
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void ReadInput();
void Create();
void Initialize();
void Run(double *Velocity, double *ElectricField);
void getIonConcentration(DoubleArray &IonConcentration, const int ic);
void getIonConcentration_debug(int timestep);
void DummyFluidVelocity();
void DummyElectricField();
double CalIonDenConvergence(vector<double> &ci_avg_previous);
//bool Restart,pBC;
int timestep;
vector<int> timestepMax;
int BoundaryConditionSolid;
double h;//domain resolution, unit [um/lu]
double kb,electron_charge,T,Vt;
double k2_inv;
double tolerance;
double fluidVelx_dummy,fluidVely_dummy,fluidVelz_dummy;
double Ex_dummy,Ey_dummy,Ez_dummy;
int number_ion_species;
vector<int> BoundaryConditionInlet;
vector<int> BoundaryConditionOutlet;
vector<double> IonDiffusivity;//User input unit [m^2/sec]
vector<int> IonValence;
vector<double> IonConcentration;//unit [mol/m^3]
vector<double> Cin;//inlet boundary value, can be either concentration [mol/m^3] or flux [mol/m^2/sec]
vector<double> Cout;//outlet boundary value, can be either concentration [mol/m^3] or flux [mol/m^2/sec]
vector<double> tau;
vector<double> time_conv;
int Nx,Ny,Nz,N,Np;
int rank,nprocx,nprocy,nprocz,nprocs;
double Lx,Ly,Lz;
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
// input database
std::shared_ptr<Database> db;
std::shared_ptr<Database> domain_db;
std::shared_ptr<Database> ion_db;
IntArray Map;
DoubleArray Distance;
int *NeighborList;
double *fq;
double *Ci;
double *ChargeDensity;
double *IonSolid;
double *FluidVelocityDummy;
double *ElectricFieldDummy;
private:
MPI_Comm comm;
// filenames
char LocalRankString[8];
char LocalRankFilename[40];
char LocalRestartFile[40];
char OutputFilename[200];
//int rank,nprocs;
void LoadParams(std::shared_ptr<Database> db0);
void AssignSolidBoundary(double *ion_solid);
void AssignIonConcentration_FromFile(double *Ci,const vector<std::string> &File_ion);
void IonConcentration_LB_to_Phys(DoubleArray &Den_reg);
};
#endif

View File

@ -3,8 +3,9 @@
*/
#include "models/MRTModel.h"
#include "analysis/distance.h"
#include "common/ReadMicroCT.h"
ScaLBL_MRTModel::ScaLBL_MRTModel(int RANK, int NP, const Utilities::MPI& COMM):
ScaLBL_MRTModel::ScaLBL_MRTModel(int RANK, int NP, MPI_Comm COMM):
rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0),
Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),mu(0),
Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM)
@ -56,7 +57,10 @@ void ScaLBL_MRTModel::ReadParams(string filename){
}
// Read domain parameters
if (domain_db->keyExists( "BC" )){
if (mrt_db->keyExists( "BoundaryCondition" )){
BoundaryCondition = mrt_db->getScalar<int>( "BC" );
}
else if (domain_db->keyExists( "BC" )){
BoundaryCondition = domain_db->getScalar<int>( "BC" );
}
@ -82,9 +86,9 @@ void ScaLBL_MRTModel::SetDomain(){
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = 1; // initialize this way
//Averages = std::shared_ptr<TwoPhase> ( new TwoPhase(Dm) ); // TwoPhase analysis object
comm.barrier();
MPI_Barrier(comm);
Dm->CommInit();
comm.barrier();
MPI_Barrier(comm);
rank = Dm->rank();
nprocx = Dm->nprocx();
@ -93,16 +97,34 @@ void ScaLBL_MRTModel::SetDomain(){
}
void ScaLBL_MRTModel::ReadInput(){
int rank=Dm->rank();
//.......................................................................
//.......................................................................
Mask->ReadIDs();
sprintf(LocalRankString,"%05d",Dm->rank());
sprintf(LocalRankFilename,"%s%s","ID.",LocalRankString);
sprintf(LocalRestartFile,"%s%s","Restart.",LocalRankString);
// Generate the signed distance map
if (domain_db->keyExists( "Filename" )){
auto Filename = domain_db->getScalar<std::string>( "Filename" );
Mask->Decomp(Filename);
}
else if (domain_db->keyExists( "GridFile" )){
// Read the local domain data
auto input_id = readMicroCT( *domain_db, comm );
// Fill the halo (assuming GCW of 1)
array<int,3> size0 = { (int) input_id.size(0), (int) input_id.size(1), (int) input_id.size(2) };
ArraySize size1 = { (size_t) Mask->Nx, (size_t) Mask->Ny, (size_t) Mask->Nz };
ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 );
fillHalo<signed char> fill( comm, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 );
Array<signed char> id_view;
id_view.viewRaw( size1, Mask->id );
fill.copy( input_id, id_view );
fill.fill( id_view );
}
else{
Mask->ReadIDs();
}
// Generate the signed distance map
// Initialize the domain and communication
Array<char> id_solid(Nx,Ny,Nz);
// Solve for the position of the solid phase
@ -171,7 +193,7 @@ void ScaLBL_MRTModel::Create(){
if (rank==0) printf ("Setting up device map and neighbor list \n");
// copy the neighbor list
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
comm.barrier();
MPI_Barrier(comm);
}
@ -206,9 +228,8 @@ void ScaLBL_MRTModel::Run(){
//.......create and start timer............
double starttime,stoptime,cputime;
ScaLBL_DeviceBarrier();
comm.barrier();
starttime = Utilities::MPI::time();
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
starttime = MPI_Wtime();
if (rank==0) printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax);
if (rank==0) printf("********************************************************\n");
timestep=0;
@ -220,22 +241,45 @@ void ScaLBL_MRTModel::Run(){
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
// Set boundary conditions
if (BoundaryCondition == 3){
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
}
else if (BoundaryCondition == 4){
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
}
else if (BoundaryCondition == 5){
ScaLBL_Comm->D3Q19_Reflection_BC_z(fq);
ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq);
}
ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
ScaLBL_DeviceBarrier();
comm.barrier();
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
timestep++;
ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL
ScaLBL_D3Q19_AAeven_MRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
// Set boundary conditions
if (BoundaryCondition == 3){
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
}
else if (BoundaryCondition == 4){
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
}
else if (BoundaryCondition == 5){
ScaLBL_Comm->D3Q19_Reflection_BC_z(fq);
ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq);
}
ScaLBL_D3Q19_AAeven_MRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
ScaLBL_DeviceBarrier();
comm.barrier();
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
//************************************************************************/
if (timestep%1000==0){
ScaLBL_D3Q19_Momentum(fq,Velocity, Np);
ScaLBL_DeviceBarrier();
comm.barrier();
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x);
ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y);
ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z);
@ -257,10 +301,10 @@ void ScaLBL_MRTModel::Run(){
}
}
}
vax = Mask->Comm.sumReduce( vax_loc );
vay = Mask->Comm.sumReduce( vay_loc );
vaz = Mask->Comm.sumReduce( vaz_loc );
count = Mask->Comm.sumReduce( count_loc );
MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
vax /= count;
vay /= count;
@ -290,10 +334,10 @@ void ScaLBL_MRTModel::Run(){
double As = Morphology.A();
double Hs = Morphology.H();
double Xs = Morphology.X();
Vs = Dm->Comm.sumReduce( Vs);
As = Dm->Comm.sumReduce( As);
Hs = Dm->Comm.sumReduce( Hs);
Xs = Dm->Comm.sumReduce( Xs);
Vs=sumReduce( Dm->Comm, Vs);
As=sumReduce( Dm->Comm, As);
Hs=sumReduce( Dm->Comm, Hs);
Xs=sumReduce( Dm->Comm, Xs);
double h = Dm->voxel_length;
double absperm = h*h*mu*Mask->Porosity()*flow_rate / force_mag;
if (rank==0) {
@ -306,7 +350,7 @@ void ScaLBL_MRTModel::Run(){
}
}
//************************************************************************/
stoptime = Utilities::MPI::time();
stoptime = MPI_Wtime();
if (rank==0) printf("-------------------------------------------------------------------\n");
// Compute the walltime per timestep
cputime = (stoptime - starttime)/timestep;
@ -327,8 +371,7 @@ void ScaLBL_MRTModel::VelocityField(){
/* Minkowski Morphology(Mask);
int SIZE=Np*sizeof(double);
ScaLBL_D3Q19_Momentum(fq,Velocity, Np);
ScaLBL_DeviceBarrier();.
comm.barrier();
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
ScaLBL_CopyToHost(&VELOCITY[0],&Velocity[0],3*SIZE);
memcpy(Morphology.SDn.data(), Distance.data(), Nx*Ny*Nz*sizeof(double));
@ -355,10 +398,10 @@ void ScaLBL_MRTModel::VelocityField(){
vaz_loc += VELOCITY[2*Np+n];
count_loc+=1.0;
}
vax = Mask->Comm.sumReduce( vax_loc );
vay = Mask->Comm.sumReduce( vay_loc );
vaz = Mask->Comm.sumReduce( vaz_loc );
count = Mask->Comm.sumReduce( count_loc );
MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
vax /= count;
vay /= count;

View File

@ -11,13 +11,13 @@
#include "common/ScaLBL.h"
#include "common/Communication.h"
#include "common/MPI.h"
#include "common/MPI_Helpers.h"
#include "analysis/Minkowski.h"
#include "ProfilerApp.h"
class ScaLBL_MRTModel{
public:
ScaLBL_MRTModel(int RANK, int NP, const Utilities::MPI& COMM);
ScaLBL_MRTModel(int RANK, int NP, MPI_Comm COMM);
~ScaLBL_MRTModel();
// functions in they should be run
@ -63,7 +63,7 @@ public:
DoubleArray Velocity_y;
DoubleArray Velocity_z;
private:
Utilities::MPI comm;
MPI_Comm comm;
// filenames
char LocalRankString[8];

View File

@ -0,0 +1,137 @@
#include "models/MultiPhysController.h"
ScaLBL_Multiphys_Controller::ScaLBL_Multiphys_Controller(int RANK, int NP, MPI_Comm COMM):
rank(RANK),nprocs(NP),Restart(0),timestepMax(0),num_iter_Stokes(0),num_iter_Ion(0),
analysis_interval(0),visualization_interval(0),tolerance(0),comm(COMM)
{
}
ScaLBL_Multiphys_Controller::~ScaLBL_Multiphys_Controller(){
}
void ScaLBL_Multiphys_Controller::ReadParams(string filename){
// read the input database
db = std::make_shared<Database>( filename );
study_db = db->getDatabase( "MultiphysController" );
// Default parameters
timestepMax = 10000;
Restart = false;
num_iter_Stokes=1;
num_iter_Ion.push_back(1);
analysis_interval = 500;
visualization_interval = 10000;
tolerance = 1.0e-6;
// load input parameters
if (study_db->keyExists( "timestepMax" )){
timestepMax = study_db->getScalar<int>( "timestepMax" );
}
if (study_db->keyExists( "analysis_interval" )){
analysis_interval = study_db->getScalar<int>( "analysis_interval" );
}
if (study_db->keyExists( "visualization_interval" )){
visualization_interval = study_db->getScalar<int>( "visualization_interval" );
}
if (study_db->keyExists( "tolerance" )){
tolerance = study_db->getScalar<double>( "tolerance" );
}
//if (study_db->keyExists( "time_conv" )){
// time_conv = study_db->getScalar<double>( "time_conv" );
//}
//if (study_db->keyExists( "Schmidt_Number" )){
// SchmidtNum = study_db->getScalar<double>( "Schmidt_Number" );
//}
// recalculate relevant parameters
//if (SchmidtNum>1){
// num_iter_Stokes = int(round(SchmidtNum/2)*2);
// num_iter_Ion = 1;
//}
//else if (SchmidtNum>0 && SchmidtNum<1){
// num_iter_Ion = int(round((1.0/SchmidtNum)/2)*2);
// num_iter_Stokes = 1;
//}
//else if (SchmidtNum==1){
// num_iter_Stokes = 1;
// num_iter_Ion = 1;
//}
//else{
// ERROR("Error: SchmidtNum (Schmidt number) must be a positive number! \n");
//}
// load input parameters
// in case user wants to have an absolute control over the iternal iteration
if (study_db->keyExists( "num_iter_Ion_List" )){
num_iter_Ion.clear();
num_iter_Ion = study_db->getVector<int>( "num_iter_Ion_List" );
}
if (study_db->keyExists( "num_iter_Stokes" )){
num_iter_Stokes = study_db->getScalar<int>( "num_iter_Stokes" );
}
}
int ScaLBL_Multiphys_Controller::getStokesNumIter_PNP_coupling(double StokesTimeConv,const vector<double> &IonTimeConv){
//Return number of internal iterations for the Stokes solver
int num_iter_stokes;
vector<double> TimeConv;
TimeConv.assign(IonTimeConv.begin(),IonTimeConv.end());
TimeConv.insert(TimeConv.begin(),StokesTimeConv);
vector<double>::iterator it_max = max_element(TimeConv.begin(),TimeConv.end());
int idx_max = distance(TimeConv.begin(),it_max);
if (idx_max==0){
num_iter_stokes = 2;
}
else{
double temp = 2*TimeConv[idx_max]/StokesTimeConv;//the factor 2 is the number of iterations for the element has max time_conv
num_iter_stokes = int(round(temp/2)*2);
}
return num_iter_stokes;
}
vector<int> ScaLBL_Multiphys_Controller::getIonNumIter_PNP_coupling(double StokesTimeConv,const vector<double> &IonTimeConv){
//Return number of internal iterations for the Ion transport solver
vector<int> num_iter_ion;
vector<double> TimeConv;
TimeConv.assign(IonTimeConv.begin(),IonTimeConv.end());
TimeConv.insert(TimeConv.begin(),StokesTimeConv);
vector<double>::iterator it_max = max_element(TimeConv.begin(),TimeConv.end());
unsigned int idx_max = distance(TimeConv.begin(),it_max);
if (idx_max==0){
for (unsigned int idx=1;idx<TimeConv.size();idx++){
double temp = 2*StokesTimeConv/TimeConv[idx];//the factor 2 is the number of iterations for the element has max time_conv
num_iter_ion.push_back(int(round(temp/2)*2));
}
}
else if (idx_max==1){
num_iter_ion.push_back(2);
for (unsigned int idx=2;idx<TimeConv.size();idx++){
double temp = 2*TimeConv[idx_max]/TimeConv[idx];//the factor 2 is the number of iterations for the element has max time_conv
num_iter_ion.push_back(int(round(temp/2)*2));
}
}
else if (idx_max==TimeConv.size()-1){
for (unsigned int idx=1;idx<TimeConv.size()-1;idx++){
double temp = 2*TimeConv[idx_max]/TimeConv[idx];//the factor 2 is the number of iterations for the element has max time_conv
num_iter_ion.push_back(int(round(temp/2)*2));
}
num_iter_ion.push_back(2);
}
else {
for (unsigned int idx=1;idx<idx_max;idx++){
double temp = 2*TimeConv[idx_max]/TimeConv[idx];//the factor 2 is the number of iterations for the element has max time_conv
num_iter_ion.push_back(int(round(temp/2)*2));
}
num_iter_ion.push_back(2);
for (unsigned int idx=idx_max+1;idx<TimeConv.size();idx++){
double temp = 2*TimeConv[idx_max]/TimeConv[idx];//the factor 2 is the number of iterations for the element has max time_conv
num_iter_ion.push_back(int(round(temp/2)*2));
}
}
return num_iter_ion;
}

View File

@ -0,0 +1,56 @@
/*
* Multiphysics controller that coordinates the coupling between different models
*/
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <iostream>
#include <exception>
#include <stdexcept>
#include <fstream>
#include <vector>
#include <algorithm>
#include "common/ScaLBL.h"
#include "common/Communication.h"
#include "common/MPI_Helpers.h"
#include "analysis/Minkowski.h"
#include "ProfilerApp.h"
class ScaLBL_Multiphys_Controller{
public:
ScaLBL_Multiphys_Controller(int RANK, int NP, MPI_Comm COMM);
~ScaLBL_Multiphys_Controller();
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
int getStokesNumIter_PNP_coupling(double StokesTimeConv,const vector<double> &IonTimeConv);
vector<int> getIonNumIter_PNP_coupling(double StokesTimeConv,const vector<double> &IonTimeConv);
//void getIonNumIter_PNP_coupling(double StokesTimeConv,vector<double> &IonTimeConv,vector<int> &IonTimeMax);
bool Restart;
int timestepMax;
int num_iter_Stokes;
vector<int> num_iter_Ion;
int analysis_interval;
int visualization_interval;
double tolerance;
//double SchmidtNum;//Schmidt number = kinematic_viscosity/mass_diffusivity
int rank,nprocs;
// input database
std::shared_ptr<Database> db;
std::shared_ptr<Database> study_db;
private:
MPI_Comm comm;
// filenames
char LocalRankString[8];
char LocalRankFilename[40];
char LocalRestartFile[40];
//int rank,nprocs;
void LoadParams(std::shared_ptr<Database> db0);
};

787
models/PoissonSolver.cpp Normal file
View File

@ -0,0 +1,787 @@
/*
* Multi-relaxation time LBM Model
*/
#include "models/PoissonSolver.h"
#include "analysis/distance.h"
#include "common/ReadMicroCT.h"
ScaLBL_Poisson::ScaLBL_Poisson(int RANK, int NP, MPI_Comm COMM):
rank(RANK), nprocs(NP),timestep(0),timestepMax(0),tau(0),k2_inv(0),tolerance(0),h(0),
epsilon0(0),epsilon0_LB(0),epsilonR(0),epsilon_LB(0),Vin(0),Vout(0),Nx(0),Ny(0),Nz(0),N(0),Np(0),analysis_interval(0),
chargeDen_dummy(0),WriteLog(0),
nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),BoundaryConditionSolid(0),Lx(0),Ly(0),Lz(0),comm(COMM)
{
}
ScaLBL_Poisson::~ScaLBL_Poisson(){
}
void ScaLBL_Poisson::ReadParams(string filename){
// read the input database
db = std::make_shared<Database>( filename );
domain_db = db->getDatabase( "Domain" );
electric_db = db->getDatabase( "Poisson" );
k2_inv = 4.0;//speed of sound for D3Q7 lattice
tau = 0.5+k2_inv;
timestepMax = 100000;
tolerance = 1.0e-6;//stopping criterion for obtaining steady-state electricla potential
h = 1.0;//resolution; unit: um/lu
epsilon0 = 8.85e-12;//electric permittivity of vaccum; unit:[C/(V*m)]
epsilon0_LB = epsilon0*(h*1.0e-6);//unit:[C/(V*lu)]
epsilonR = 78.4;//default dielectric constant of water
epsilon_LB = epsilon0_LB*epsilonR;//electric permittivity
analysis_interval = 1000;
Vin = 1.0; //Boundary-z (inlet) electric potential
Vout = 1.0; //Boundary-Z (outlet) electric potential
chargeDen_dummy = 1.0e-3;//For debugging;unit=[C/m^3]
WriteLog = false;
// LB-Poisson Model parameters
if (electric_db->keyExists( "timestepMax" )){
timestepMax = electric_db->getScalar<int>( "timestepMax" );
}
if (electric_db->keyExists( "analysis_interval" )){
analysis_interval = electric_db->getScalar<int>( "analysis_interval" );
}
if (electric_db->keyExists( "tolerance" )){
tolerance = electric_db->getScalar<double>( "tolerance" );
}
if (electric_db->keyExists( "epsilonR" )){
epsilonR = electric_db->getScalar<double>( "epsilonR" );
}
if (electric_db->keyExists( "DummyChargeDen" )){
chargeDen_dummy = electric_db->getScalar<double>( "DummyChargeDen" );
}
if (electric_db->keyExists( "WriteLog" )){
WriteLog = electric_db->getScalar<bool>( "WriteLog" );
}
// Read solid boundary condition specific to Poisson equation
BoundaryConditionSolid = 1;
if (electric_db->keyExists( "BC_Solid" )){
BoundaryConditionSolid = electric_db->getScalar<int>( "BC_Solid" );
}
// Read boundary condition for electric potential
// BC = 0: normal periodic BC
// BC = 1: fixed inlet and outlet potential
BoundaryCondition = 0;
if (electric_db->keyExists( "BC" )){
BoundaryCondition = electric_db->getScalar<int>( "BC" );
}
// Read domain parameters
if (domain_db->keyExists( "voxel_length" )){//default unit: um/lu
h = domain_db->getScalar<double>( "voxel_length" );
}
//Re-calcualte model parameters if user updates input
epsilon0_LB = epsilon0*(h*1.0e-6);//unit:[C/(V*lu)]
epsilon_LB = epsilon0_LB*epsilonR;//electric permittivity
if (rank==0) printf("***********************************************************************************\n");
if (rank==0) printf("LB-Poisson Solver: steady-state MaxTimeStep = %i; steady-state tolerance = %.3g \n", timestepMax,tolerance);
if (rank==0) printf(" LB relaxation tau = %.5g \n", tau);
if (rank==0) printf("***********************************************************************************\n");
switch (BoundaryConditionSolid){
case 1:
if (rank==0) printf("LB-Poisson Solver: solid boundary: Dirichlet-type surfacen potential is assigned\n");
break;
case 2:
if (rank==0) printf("LB-Poisson Solver: solid boundary: Neumann-type surfacen charge density is assigned\n");
break;
default:
if (rank==0) printf("LB-Poisson Solver: solid boundary: Dirichlet-type surfacen potential is assigned\n");
break;
}
}
void ScaLBL_Poisson::SetDomain(){
Dm = std::shared_ptr<Domain>(new Domain(domain_db,comm)); // full domain for analysis
Mask = std::shared_ptr<Domain>(new Domain(domain_db,comm)); // mask domain removes immobile phases
// domain parameters
Nx = Dm->Nx;
Ny = Dm->Ny;
Nz = Dm->Nz;
Lx = Dm->Lx;
Ly = Dm->Ly;
Lz = Dm->Lz;
N = Nx*Ny*Nz;
Distance.resize(Nx,Ny,Nz);
Psi_host.resize(Nx,Ny,Nz);
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = 1; // initialize this way
//Averages = std::shared_ptr<TwoPhase> ( new TwoPhase(Dm) ); // TwoPhase analysis object
MPI_Barrier(comm);
Dm->BoundaryCondition = BoundaryCondition;
Mask->BoundaryCondition = BoundaryCondition;
Dm->CommInit();
MPI_Barrier(comm);
rank = Dm->rank();
nprocx = Dm->nprocx();
nprocy = Dm->nprocy();
nprocz = Dm->nprocz();
}
void ScaLBL_Poisson::ReadInput(){
sprintf(LocalRankString,"%05d",Dm->rank());
sprintf(LocalRankFilename,"%s%s","ID.",LocalRankString);
sprintf(LocalRestartFile,"%s%s","Restart.",LocalRankString);
if (domain_db->keyExists( "Filename" )){
auto Filename = domain_db->getScalar<std::string>( "Filename" );
Mask->Decomp(Filename);
}
else if (domain_db->keyExists( "GridFile" )){
// Read the local domain data
auto input_id = readMicroCT( *domain_db, comm );
// Fill the halo (assuming GCW of 1)
array<int,3> size0 = { (int) input_id.size(0), (int) input_id.size(1), (int) input_id.size(2) };
ArraySize size1 = { (size_t) Mask->Nx, (size_t) Mask->Ny, (size_t) Mask->Nz };
ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 );
fillHalo<signed char> fill( comm, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 );
Array<signed char> id_view;
id_view.viewRaw( size1, Mask->id );
fill.copy( input_id, id_view );
fill.fill( id_view );
}
else{
Mask->ReadIDs();
}
// Generate the signed distance map
// Initialize the domain and communication
Array<char> id_solid(Nx,Ny,Nz);
// Solve for the position of the solid phase
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
int n = k*Nx*Ny+j*Nx+i;
// Initialize the solid phase
if (Mask->id[n] > 0) id_solid(i,j,k) = 1;
else id_solid(i,j,k) = 0;
}
}
}
// Initialize the signed distance function
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
// Initialize distance to +/- 1
Distance(i,j,k) = 2.0*double(id_solid(i,j,k))-1.0;
}
}
}
// MeanFilter(Averages->SDs);
if (rank==0) printf("LB-Poisson Solver: Initialized solid phase & converting to Signed Distance function \n");
CalcDist(Distance,id_solid,*Dm);
if (rank == 0) cout << " Domain set." << endl;
}
void ScaLBL_Poisson::AssignSolidBoundary(double *poisson_solid)
{
size_t NLABELS=0;
signed char VALUE=0;
double AFFINITY=0.f;
auto LabelList = electric_db->getVector<int>( "SolidLabels" );
auto AffinityList = electric_db->getVector<double>( "SolidValues" );
NLABELS=LabelList.size();
if (NLABELS != AffinityList.size()){
ERROR("Error: LB-Poisson Solver: SolidLabels and SolidValues must be the same length! \n");
}
double label_count[NLABELS];
double label_count_global[NLABELS];
// Assign the labels
for (size_t idx=0; idx<NLABELS; idx++) label_count[idx]=0;
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
int n = k*Nx*Ny+j*Nx+i;
VALUE=Mask->id[n];
AFFINITY=0.f;
// Assign the affinity from the paired list
for (unsigned int idx=0; idx < NLABELS; idx++){
if (VALUE == LabelList[idx]){
AFFINITY=AffinityList[idx];
//NOTE need to convert the user input phys unit to LB unit
if (BoundaryConditionSolid==2){
//for BCS=1, i.e. Dirichlet-type, no need for unit conversion
AFFINITY = AFFINITY*(h*h*1.0e-12)/epsilon_LB;
}
label_count[idx] += 1.0;
idx = NLABELS;
//Mask->id[n] = 0; // set mask to zero since this is an immobile component
}
}
poisson_solid[n] = AFFINITY;
}
}
}
for (size_t idx=0; idx<NLABELS; idx++)
label_count_global[idx]=sumReduce( Dm->Comm, label_count[idx]);
if (rank==0){
printf("LB-Poisson Solver: number of Poisson solid labels: %lu \n",NLABELS);
for (unsigned int idx=0; idx<NLABELS; idx++){
VALUE=LabelList[idx];
AFFINITY=AffinityList[idx];
double volume_fraction = double(label_count_global[idx])/double((Nx-2)*(Ny-2)*(Nz-2)*nprocs);
switch (BoundaryConditionSolid){
case 1:
printf(" label=%d, surface potential=%.3g [V], volume fraction=%.2g\n",VALUE,AFFINITY,volume_fraction);
break;
case 2:
printf(" label=%d, surface charge density=%.3g [C/m^2], volume fraction=%.2g\n",VALUE,AFFINITY,volume_fraction);
break;
default:
printf(" label=%d, surface potential=%.3g [V], volume fraction=%.2g\n",VALUE,AFFINITY,volume_fraction);
break;
}
}
}
}
void ScaLBL_Poisson::Create(){
/*
* This function creates the variables needed to run a LBM
*/
int rank=Mask->rank();
//.........................................................
// Initialize communication structures in averaging domain
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = Mask->id[i];
Mask->CommInit();
Np=Mask->PoreCount();
//...........................................................................
if (rank==0) printf ("LB-Poisson Solver: Create ScaLBL_Communicator \n");
// Create a communicator for the device (will use optimized layout)
// ScaLBL_Communicator ScaLBL_Comm(Mask); // original
ScaLBL_Comm = std::shared_ptr<ScaLBL_Communicator>(new ScaLBL_Communicator(Mask));
ScaLBL_Comm_Regular = std::shared_ptr<ScaLBL_Communicator>(new ScaLBL_Communicator(Mask));
int Npad=(Np/16 + 2)*16;
if (rank==0) printf ("LB-Poisson Solver: Set up memory efficient layout \n");
Map.resize(Nx,Ny,Nz); Map.fill(-2);
auto neighborList= new int[18*Npad];
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np);
MPI_Barrier(comm);
//...........................................................................
// MAIN VARIABLES ALLOCATED HERE
//...........................................................................
// LBM variables
if (rank==0) printf ("LB-Poisson Solver: Allocating distributions \n");
//......................device distributions.................................
int dist_mem_size = Np*sizeof(double);
int neighborSize=18*(Np*sizeof(int));
//...........................................................................
ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize);
ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Np);
//ScaLBL_AllocateDeviceMemory((void **) &dvcID, sizeof(signed char)*Nx*Ny*Nz);
ScaLBL_AllocateDeviceMemory((void **) &fq, 7*dist_mem_size);
ScaLBL_AllocateDeviceMemory((void **) &Psi, sizeof(double)*Nx*Ny*Nz);
ScaLBL_AllocateDeviceMemory((void **) &ElectricField, 3*sizeof(double)*Np);
//...........................................................................
// Update GPU data structures
if (rank==0) printf ("LB-Poisson Solver: Setting up device map and neighbor list \n");
fflush(stdout);
int *TmpMap;
TmpMap=new int[Np];
for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
int idx=Map(i,j,k);
if (!(idx < 0))
TmpMap[idx] = k*Nx*Ny+j*Nx+i;
}
}
}
// check that TmpMap is valid
for (int idx=0; idx<ScaLBL_Comm->LastExterior(); idx++){
auto n = TmpMap[idx];
if (n > Nx*Ny*Nz){
printf("Bad value! idx=%i \n", n);
TmpMap[idx] = Nx*Ny*Nz-1;
}
}
for (int idx=ScaLBL_Comm->FirstInterior(); idx<ScaLBL_Comm->LastInterior(); idx++){
auto n = TmpMap[idx];
if ( n > Nx*Ny*Nz ){
printf("Bad value! idx=%i \n",n);
TmpMap[idx] = Nx*Ny*Nz-1;
}
}
ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np);
ScaLBL_DeviceBarrier();
delete [] TmpMap;
// copy the neighbor list
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
ScaLBL_DeviceBarrier();
MPI_Barrier(comm);
delete [] neighborList;
// copy node ID
//ScaLBL_CopyToDevice(dvcID, Mask->id, sizeof(signed char)*Nx*Ny*Nz);
//ScaLBL_DeviceBarrier();
//Initialize solid boundary for electric potential
ScaLBL_Comm->SetupBounceBackList(Map, Mask->id, Np);
MPI_Barrier(comm);
}
void ScaLBL_Poisson::Potential_Init(double *psi_init){
if (BoundaryCondition==1){
if (electric_db->keyExists( "Vin" )){
Vin = electric_db->getScalar<double>( "Vin" );
}
if (electric_db->keyExists( "Vout" )){
Vout = electric_db->getScalar<double>( "Vout" );
}
}
//By default only periodic BC is applied and Vin=Vout=1.0, i.e. there is no potential gradient along Z-axis
double slope = (Vout-Vin)/(Nz-2);
double psi_linearized;
for (int k=0;k<Nz;k++){
if (k==0 || k==1){
psi_linearized = Vin;
}
else if (k==Nz-1 || k==Nz-2){
psi_linearized = Vout;
}
else{
psi_linearized = slope*(k-1)+Vin;
}
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
int n = k*Nx*Ny+j*Nx+i;
if (Mask->id[n]>0){
psi_init[n] = psi_linearized;
}
}
}
}
}
void ScaLBL_Poisson::Initialize(){
/*
* This function initializes model
*/
if (rank==0) printf ("LB-Poisson Solver: initializing D3Q7 distributions\n");
//NOTE the initialization involves two steps:
//1. assign solid boundary value (surface potential or surface change density)
//2. Initialize electric potential for pore nodes
double *psi_host;
psi_host = new double [Nx*Ny*Nz];
AssignSolidBoundary(psi_host);//step1
Potential_Init(psi_host);//step2
ScaLBL_CopyToDevice(Psi, psi_host, Nx*Ny*Nz*sizeof(double));
ScaLBL_DeviceBarrier();
ScaLBL_D3Q7_Poisson_Init(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_D3Q7_Poisson_Init(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
delete [] psi_host;
//extra treatment for halo layer
//if (BoundaryCondition==1){
// if (Dm->kproc()==0){
// ScaLBL_SetSlice_z(Psi,Vin,Nx,Ny,Nz,0);
// }
// if (Dm->kproc() == nprocz-1){
// ScaLBL_SetSlice_z(Psi,Vout,Nx,Ny,Nz,Nz-1);
// }
//}
}
void ScaLBL_Poisson::Run(double *ChargeDensity){
//.......create and start timer............
//double starttime,stoptime,cputime;
//ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
//starttime = MPI_Wtime();
timestep=0;
double error = 1.0;
double psi_avg_previous = 0.0;
while (timestep < timestepMax && error > tolerance) {
//************************************************************************/
// *************ODD TIMESTEP*************//
timestep++;
SolveElectricPotentialAAodd();//update electric potential
SolvePoissonAAodd(ChargeDensity);//perform collision
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
// *************EVEN TIMESTEP*************//
timestep++;
SolveElectricPotentialAAeven();//update electric potential
SolvePoissonAAeven(ChargeDensity);//perform collision
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
//************************************************************************/
// Check convergence of steady-state solution
if (timestep%analysis_interval==0){
//ScaLBL_Comm->RegularLayout(Map,Psi,Psi_host);
ScaLBL_CopyToHost(Psi_host.data(),Psi,sizeof(double)*Nx*Ny*Nz);
double count_loc=0;
double count;
double psi_avg;
double psi_loc=0.f;
for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
if (Distance(i,j,k) > 0){
psi_loc += Psi_host(i,j,k);
count_loc+=1.0;
}
}
}
}
MPI_Allreduce(&psi_loc,&psi_avg,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
psi_avg /= count;
double psi_avg_mag=psi_avg;
if (psi_avg==0.0) psi_avg_mag=1.0;
error = fabs(psi_avg-psi_avg_previous)/fabs(psi_avg_mag);
psi_avg_previous = psi_avg;
}
}
if(WriteLog==true){
getConvergenceLog(timestep,error);
}
//************************************************************************/
//stoptime = MPI_Wtime();
////if (rank==0) printf("LB-Poission Solver: a steady-state solution is obtained\n");
////if (rank==0) printf("---------------------------------------------------------------------------\n");
//// Compute the walltime per timestep
//cputime = (stoptime - starttime)/timestep;
//// Performance obtained from each node
//double MLUPS = double(Np)/cputime/1000000;
//if (rank==0) printf("******************* LB-Poisson Solver Statistics ********************\n");
//if (rank==0) printf("CPU time = %f \n", cputime);
//if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS);
//MLUPS *= nprocs;
//if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS);
//if (rank==0) printf("*********************************************************************\n");
}
void ScaLBL_Poisson::getConvergenceLog(int timestep,double error){
if (rank==0){
bool WriteHeader=false;
TIMELOG = fopen("PoissonSolver_Convergence.csv","r");
if (TIMELOG != NULL)
fclose(TIMELOG);
else
WriteHeader=true;
TIMELOG = fopen("PoissonSolver_Convergence.csv","a+");
if (WriteHeader)
{
fprintf(TIMELOG,"Timestep Error\n");
fprintf(TIMELOG,"%i %.5g\n",timestep,error);
fflush(TIMELOG);
}
else {
fprintf(TIMELOG,"%i %.5g\n",timestep,error);
fflush(TIMELOG);
}
}
}
void ScaLBL_Poisson::SolveElectricPotentialAAodd(){
ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FROM NORMAL
ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE
ScaLBL_DeviceBarrier();
// Set boundary conditions
if (BoundaryCondition == 1){
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
}
//-------------------------//
ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
}
void ScaLBL_Poisson::SolveElectricPotentialAAeven(){
ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FORM NORMAL
ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE
ScaLBL_DeviceBarrier();
// Set boundary conditions
if (BoundaryCondition == 1){
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
}
//-------------------------//
ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
}
void ScaLBL_Poisson::SolvePoissonAAodd(double *ChargeDensity){
ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, 0, ScaLBL_Comm->LastExterior(), Np);
if (BoundaryConditionSolid==1){
ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi);
}
else if (BoundaryConditionSolid==2){
ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi);
}
}
void ScaLBL_Poisson::SolvePoissonAAeven(double *ChargeDensity){
ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, 0, ScaLBL_Comm->LastExterior(), Np);
if (BoundaryConditionSolid==1){
ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi);
}
else if (BoundaryConditionSolid==2){
ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi);
}
}
void ScaLBL_Poisson::DummyChargeDensity(){
double *ChargeDensity_host;
ChargeDensity_host = new double[Np];
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
int idx=Map(i,j,k);
if (!(idx < 0))
ChargeDensity_host[idx] = chargeDen_dummy*(h*h*h*1.0e-18);
}
}
}
ScaLBL_AllocateDeviceMemory((void **) &ChargeDensityDummy, sizeof(double)*Np);
ScaLBL_CopyToDevice(ChargeDensityDummy, ChargeDensity_host, sizeof(double)*Np);
ScaLBL_DeviceBarrier();
delete [] ChargeDensity_host;
}
void ScaLBL_Poisson::getElectricPotential_debug(int timestep){
//This function write out decomposed data
DoubleArray PhaseField(Nx,Ny,Nz);
//ScaLBL_Comm->RegularLayout(Map,Psi,PhaseField);
ScaLBL_CopyToHost(PhaseField.data(),Psi,sizeof(double)*Nx*Ny*Nz);
//ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
FILE *OUTFILE;
sprintf(LocalRankFilename,"Electric_Potential_Time_%i.%05i.raw",timestep,rank);
OUTFILE = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,OUTFILE);
fclose(OUTFILE);
}
void ScaLBL_Poisson::getElectricPotential(DoubleArray &ReturnValues){
//This function wirte out the data in a normal layout (by aggregating all decomposed domains)
//ScaLBL_Comm->RegularLayout(Map,Psi,PhaseField);
ScaLBL_CopyToHost(ReturnValues.data(),Psi,sizeof(double)*Nx*Ny*Nz);
}
void ScaLBL_Poisson::getElectricField(DoubleArray &Values_x, DoubleArray &Values_y, DoubleArray &Values_z){
ScaLBL_Comm->RegularLayout(Map,&ElectricField[0*Np],Values_x);
ElectricField_LB_to_Phys(Values_x);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
ScaLBL_Comm->RegularLayout(Map,&ElectricField[1*Np],Values_y);
ElectricField_LB_to_Phys(Values_y);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
ScaLBL_Comm->RegularLayout(Map,&ElectricField[2*Np],Values_z);
ElectricField_LB_to_Phys(Values_z);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
}
void ScaLBL_Poisson::getElectricField_debug(int timestep){
//ScaLBL_D3Q7_Poisson_getElectricField(fq,ElectricField,tau,Np);
//ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
DoubleArray PhaseField(Nx,Ny,Nz);
ScaLBL_Comm->RegularLayout(Map,&ElectricField[0*Np],PhaseField);
ElectricField_LB_to_Phys(PhaseField);
FILE *EX;
sprintf(LocalRankFilename,"ElectricField_X_Time_%i.%05i.raw",timestep,rank);
EX = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,EX);
fclose(EX);
ScaLBL_Comm->RegularLayout(Map,&ElectricField[1*Np],PhaseField);
ElectricField_LB_to_Phys(PhaseField);
FILE *EY;
sprintf(LocalRankFilename,"ElectricField_Y_Time_%i.%05i.raw",timestep,rank);
EY = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,EY);
fclose(EY);
ScaLBL_Comm->RegularLayout(Map,&ElectricField[2*Np],PhaseField);
ElectricField_LB_to_Phys(PhaseField);
FILE *EZ;
sprintf(LocalRankFilename,"ElectricField_Z_Time_%i.%05i.raw",timestep,rank);
EZ = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,EZ);
fclose(EZ);
}
void ScaLBL_Poisson::ElectricField_LB_to_Phys(DoubleArray &Efield_reg){
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
int idx=Map(i,j,k);
if (!(idx < 0)){
Efield_reg(i,j,k) = Efield_reg(i,j,k)/(h*1.0e-6);
}
}
}
}
}
//void ScaLBL_Poisson::SolveElectricField(){
// ScaLBL_Comm_Regular->SendHalo(Psi);
// ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid,
// Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
// ScaLBL_Comm_Regular->RecvHalo(Psi);
// ScaLBL_DeviceBarrier();
// if (BoundaryCondition == 1){
// ScaLBL_Comm->Poisson_D3Q7_BC_z(dvcMap,Psi,Vin);
// ScaLBL_Comm->Poisson_D3Q7_BC_Z(dvcMap,Psi,Vout);
// }
// ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np);
//
//}
//void ScaLBL_Poisson::getElectricPotential(){
//
// DoubleArray PhaseField(Nx,Ny,Nz);
// ScaLBL_Comm->RegularLayout(Map,Psi,PhaseField);
// //ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
// FILE *OUTFILE;
// sprintf(LocalRankFilename,"Electric_Potential.%05i.raw",rank);
// OUTFILE = fopen(LocalRankFilename,"wb");
// fwrite(PhaseField.data(),8,N,OUTFILE);
// fclose(OUTFILE);
//}
//old version where Psi is of size Np
//void ScaLBL_Poisson::AssignSolidBoundary(double *poisson_solid)
//{
// size_t NLABELS=0;
// signed char VALUE=0;
// double AFFINITY=0.f;
//
// auto LabelList = electric_db->getVector<int>( "SolidLabels" );
// auto AffinityList = electric_db->getVector<double>( "SolidValues" );
//
// NLABELS=LabelList.size();
// if (NLABELS != AffinityList.size()){
// ERROR("Error: LB-Poisson Solver: SolidLabels and SolidValues must be the same length! \n");
// }
//
// double label_count[NLABELS];
// double label_count_global[NLABELS];
// // Assign the labels
//
// for (size_t idx=0; idx<NLABELS; idx++) label_count[idx]=0;
//
// for (int k=0;k<Nz;k++){
// for (int j=0;j<Ny;j++){
// for (int i=0;i<Nx;i++){
// int n = k*Nx*Ny+j*Nx+i;
// VALUE=Mask->id[n];
// AFFINITY=0.f;
// // Assign the affinity from the paired list
// for (unsigned int idx=0; idx < NLABELS; idx++){
// //printf("idx=%i, value=%i, %i, \n",idx, VALUE,LabelList[idx]);
// if (VALUE == LabelList[idx]){
// AFFINITY=AffinityList[idx];
// //NOTE need to convert the user input phys unit to LB unit
// if (BoundaryConditionSolid==2){
// //for BCS=1, i.e. Dirichlet-type, no need for unit conversion
// //TODO maybe there is a factor of gamm missing here ?
// AFFINITY = AFFINITY*(h*h*1.0e-12)/epsilon_LB;
// }
// label_count[idx] += 1.0;
// idx = NLABELS;
// //Mask->id[n] = 0; // set mask to zero since this is an immobile component
// }
// }
// poisson_solid[n] = AFFINITY;
// }
// }
// }
//
// for (size_t idx=0; idx<NLABELS; idx++)
// label_count_global[idx]=sumReduce( Dm->Comm, label_count[idx]);
//
// if (rank==0){
// printf("LB-Poisson Solver: number of Poisson solid labels: %lu \n",NLABELS);
// for (unsigned int idx=0; idx<NLABELS; idx++){
// VALUE=LabelList[idx];
// AFFINITY=AffinityList[idx];
// double volume_fraction = double(label_count_global[idx])/double((Nx-2)*(Ny-2)*(Nz-2)*nprocs);
// switch (BoundaryConditionSolid){
// case 1:
// printf(" label=%d, surface potential=%.3g [V], volume fraction=%.2g\n",VALUE,AFFINITY,volume_fraction);
// break;
// case 2:
// printf(" label=%d, surface charge density=%.3g [C/m^2], volume fraction=%.2g\n",VALUE,AFFINITY,volume_fraction);
// break;
// default:
// printf(" label=%d, surface potential=%.3g [V], volume fraction=%.2g\n",VALUE,AFFINITY,volume_fraction);
// break;
// }
// }
// }
//}
// old version where Psi is of size Np
//void ScaLBL_Poisson::Potential_Init(double *psi_init){
//
// if (BoundaryCondition==1){
// if (electric_db->keyExists( "Vin" )){
// Vin = electric_db->getScalar<double>( "Vin" );
// }
// if (electric_db->keyExists( "Vout" )){
// Vout = electric_db->getScalar<double>( "Vout" );
// }
// }
// //By default only periodic BC is applied and Vin=Vout=1.0, i.e. there is no potential gradient along Z-axis
// double slope = (Vout-Vin)/(Nz-2);
// double psi_linearized;
// for (int k=0;k<Nz;k++){
// if (k==0 || k==1){
// psi_linearized = Vin;
// }
// else if (k==Nz-1 || k==Nz-2){
// psi_linearized = Vout;
// }
// else{
// psi_linearized = slope*(k-1)+Vin;
// }
// for (int j=0;j<Ny;j++){
// for (int i=0;i<Nx;i++){
// int idx = Map(i,j,k);
// if (!(idx < 0)){
// psi_init[idx] = psi_linearized;
// }
// }
// }
// }
//}

102
models/PoissonSolver.h Normal file
View File

@ -0,0 +1,102 @@
/*
* Multi-relaxation time LBM Model
*/
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <sys/stat.h>
#include <iostream>
#include <exception>
#include <stdexcept>
#include <fstream>
#include "common/ScaLBL.h"
#include "common/Communication.h"
#include "common/MPI_Helpers.h"
#include "analysis/Minkowski.h"
#include "ProfilerApp.h"
#ifndef ScaLBL_POISSON_INC
#define ScaLBL_POISSON_INC
class ScaLBL_Poisson{
public:
ScaLBL_Poisson(int RANK, int NP, MPI_Comm COMM);
~ScaLBL_Poisson();
// functions in they should be run
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void ReadInput();
void Create();
void Initialize();
void Run(double *ChargeDensity);
void getElectricPotential(DoubleArray &ReturnValues);
void getElectricPotential_debug(int timestep);
void getElectricField(DoubleArray &Values_x, DoubleArray &Values_y, DoubleArray &Values_z);
void getElectricField_debug(int timestep);
void DummyChargeDensity();//for debugging
//bool Restart,pBC;
int timestep,timestepMax;
int analysis_interval;
int BoundaryCondition;
int BoundaryConditionSolid;
double tau;
double tolerance;
double k2_inv;
double epsilon0,epsilon0_LB,epsilonR,epsilon_LB;
double Vin, Vout;
double chargeDen_dummy;//for debugging
bool WriteLog;
int Nx,Ny,Nz,N,Np;
int rank,nprocx,nprocy,nprocz,nprocs;
double Lx,Ly,Lz;
double h;//image resolution
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm_Regular;
// input database
std::shared_ptr<Database> db;
std::shared_ptr<Database> domain_db;
std::shared_ptr<Database> electric_db;
IntArray Map;
DoubleArray Distance;
DoubleArray Psi_host;
int *NeighborList;
int *dvcMap;
//signed char *dvcID;
double *fq;
double *Psi;
double *ElectricField;
double *ChargeDensityDummy;// for debugging
private:
MPI_Comm comm;
// filenames
char LocalRankString[8];
char LocalRankFilename[40];
char LocalRestartFile[40];
char OutputFilename[200];
FILE *TIMELOG;
//int rank,nprocs;
void LoadParams(std::shared_ptr<Database> db0);
void AssignSolidBoundary(double *poisson_solid);
void Potential_Init(double *psi_init);
void ElectricField_LB_to_Phys(DoubleArray &Efield_reg);
void SolveElectricPotentialAAodd();
void SolveElectricPotentialAAeven();
//void SolveElectricField();
void SolvePoissonAAodd(double *ChargeDensity);
void SolvePoissonAAeven(double *ChargeDensity);
void getConvergenceLog(int timestep,double error);
};
#endif

815
models/StokesModel.cpp Normal file
View File

@ -0,0 +1,815 @@
/*
* Multi-relaxation time LBM Model
*/
#include "models/StokesModel.h"
#include "analysis/distance.h"
#include "common/ReadMicroCT.h"
ScaLBL_StokesModel::ScaLBL_StokesModel(int RANK, int NP, MPI_Comm COMM):
rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0),
Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),mu(0),h(0),nu_phys(0),rho_phys(0),rho0(0),den_scale(0),time_conv(0),tolerance(0),
Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM)
{
}
ScaLBL_StokesModel::~ScaLBL_StokesModel(){
}
void ScaLBL_StokesModel::ReadParams(string filename,int num_iter){
// read the input database
db = std::make_shared<Database>( filename );
domain_db = db->getDatabase( "Domain" );
stokes_db = db->getDatabase( "Stokes" );
//------ Load number of iteration from multiphysics controller ------//
timestepMax = num_iter;
//-------------------------------------------------------------------//
//---------------------- Default model parameters --------------------------//
rho_phys = 1000.0; //by default use water density; unit [kg/m^3]
nu_phys = 1.004e-6;//by default use water kinematic viscosity at 20C; unit [m^2/sec]
h = 1.0;//image resolution;[um]
tau = 1.0;
mu = (tau-0.5)/3.0;//LB kinematic viscosity;unit [lu^2/lt]
time_conv = h*h*mu/nu_phys;//time conversion factor from physical to LB unit; [sec/lt]
rho0 = 1.0;//LB density
den_scale = rho_phys/rho0*(h*h*h*1.0e-18);//scale factor for density
tolerance = 1.0e-8;
Fx = Fy = 0.0;
Fz = 1.0e-5;
//--------------------------------------------------------------------------//
// Read domain parameters
if (domain_db->keyExists( "voxel_length" )){//default unit: um/lu
h = domain_db->getScalar<double>( "voxel_length" );
}
// Single-fluid Navier-Stokes Model parameters
//if (stokes_db->keyExists( "timestepMax" )){
// timestepMax = stokes_db->getScalar<int>( "timestepMax" );
//}
BoundaryCondition = 0;
if (stokes_db->keyExists( "BC" )){
BoundaryCondition = stokes_db->getScalar<int>( "BC" );
}
if (stokes_db->keyExists( "tolerance" )){
tolerance = stokes_db->getScalar<double>( "tolerance" );
}
if (stokes_db->keyExists( "tau" )){
tau = stokes_db->getScalar<double>( "tau" );
}
if (stokes_db->keyExists( "rho0" )){
rho0 = stokes_db->getScalar<double>( "rho0" );
}
if (stokes_db->keyExists( "nu_phys" )){
nu_phys = stokes_db->getScalar<double>( "nu_phys" );
}
if (stokes_db->keyExists( "rho_phys" )){
rho_phys = stokes_db->getScalar<double>( "rho_phys" );
}
if (stokes_db->keyExists( "F" )){
Fx = stokes_db->getVector<double>( "F" )[0];
Fy = stokes_db->getVector<double>( "F" )[1];
Fz = stokes_db->getVector<double>( "F" )[2];
}
if (stokes_db->keyExists( "Restart" )){
Restart = stokes_db->getScalar<bool>( "Restart" );
}
if (stokes_db->keyExists( "din" )){
din = stokes_db->getScalar<double>( "din" );
}
if (stokes_db->keyExists( "dout" )){
dout = stokes_db->getScalar<double>( "dout" );
}
if (stokes_db->keyExists( "flux" )){
flux = stokes_db->getScalar<double>( "flux" );
}
// Re-calculate model parameters due to parameter read
mu=(tau-0.5)/3.0;
time_conv = (h*h*1.0e-12)*mu/nu_phys;//time conversion factor from physical to LB unit; [sec/lt]
den_scale = rho_phys/rho0*(h*h*h*1.0e-18);//scale factor for density
}
void ScaLBL_StokesModel::ReadParams(string filename){
//NOTE the max time step is left unspecified
// read the input database
db = std::make_shared<Database>( filename );
domain_db = db->getDatabase( "Domain" );
stokes_db = db->getDatabase( "Stokes" );
//---------------------- Default model parameters --------------------------//
rho_phys = 1000.0; //by default use water density; unit [kg/m^3]
nu_phys = 1.004e-6;//by default use water kinematic viscosity at 20C; unit [m^2/sec]
h = 1.0;//image resolution;[um]
tau = 1.0;
mu = (tau-0.5)/3.0;//LB kinematic viscosity;unit [lu^2/lt]
time_conv = h*h*mu/nu_phys;//time conversion factor from physical to LB unit; [sec/lt]
rho0 = 1.0;//LB density
den_scale = rho_phys/rho0*(h*h*h*1.0e-18);//scale factor for density
tolerance = 1.0e-8;
Fx = Fy = 0.0;
Fz = 1.0e-5;
//--------------------------------------------------------------------------//
// Read domain parameters
if (domain_db->keyExists( "voxel_length" )){//default unit: um/lu
h = domain_db->getScalar<double>( "voxel_length" );
}
// Single-fluid Navier-Stokes Model parameters
//if (stokes_db->keyExists( "timestepMax" )){
// timestepMax = stokes_db->getScalar<int>( "timestepMax" );
//}
BoundaryCondition = 0;
if (stokes_db->keyExists( "BC" )){
BoundaryCondition = stokes_db->getScalar<int>( "BC" );
}
if (stokes_db->keyExists( "tolerance" )){
tolerance = stokes_db->getScalar<double>( "tolerance" );
}
if (stokes_db->keyExists( "tau" )){
tau = stokes_db->getScalar<double>( "tau" );
}
if (stokes_db->keyExists( "rho0" )){
rho0 = stokes_db->getScalar<double>( "rho0" );
}
if (stokes_db->keyExists( "nu_phys" )){
nu_phys = stokes_db->getScalar<double>( "nu_phys" );
}
if (stokes_db->keyExists( "rho_phys" )){
rho_phys = stokes_db->getScalar<double>( "rho_phys" );
}
if (stokes_db->keyExists( "F" )){
Fx = stokes_db->getVector<double>( "F" )[0];
Fy = stokes_db->getVector<double>( "F" )[1];
Fz = stokes_db->getVector<double>( "F" )[2];
}
if (stokes_db->keyExists( "Restart" )){
Restart = stokes_db->getScalar<bool>( "Restart" );
}
if (stokes_db->keyExists( "din" )){
din = stokes_db->getScalar<double>( "din" );
}
if (stokes_db->keyExists( "dout" )){
dout = stokes_db->getScalar<double>( "dout" );
}
if (stokes_db->keyExists( "flux" )){
flux = stokes_db->getScalar<double>( "flux" );
}
// Re-calculate model parameters due to parameter read
mu=(tau-0.5)/3.0;
time_conv = (h*h*1.0e-12)*mu/nu_phys;//time conversion factor from physical to LB unit; [sec/lt]
den_scale = rho_phys/rho0*(h*h*h*1.0e-18);//scale factor for density
}
void ScaLBL_StokesModel::SetDomain(){
Dm = std::shared_ptr<Domain>(new Domain(domain_db,comm)); // full domain for analysis
Mask = std::shared_ptr<Domain>(new Domain(domain_db,comm)); // mask domain removes immobile phases
// domain parameters
Nx = Dm->Nx;
Ny = Dm->Ny;
Nz = Dm->Nz;
Lx = Dm->Lx;
Ly = Dm->Ly;
Lz = Dm->Lz;
N = Nx*Ny*Nz;
Distance.resize(Nx,Ny,Nz);
Velocity_x.resize(Nx,Ny,Nz);
Velocity_y.resize(Nx,Ny,Nz);
Velocity_z.resize(Nx,Ny,Nz);
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = 1; // initialize this way
//Averages = std::shared_ptr<TwoPhase> ( new TwoPhase(Dm) ); // TwoPhase analysis object
MPI_Barrier(comm);
Dm->BoundaryCondition = BoundaryCondition;
Mask->BoundaryCondition = BoundaryCondition;
Dm->CommInit();
MPI_Barrier(comm);
rank = Dm->rank();
nprocx = Dm->nprocx();
nprocy = Dm->nprocy();
nprocz = Dm->nprocz();
}
void ScaLBL_StokesModel::ReadInput(){
sprintf(LocalRankString,"%05d",Dm->rank());
sprintf(LocalRankFilename,"%s%s","ID.",LocalRankString);
sprintf(LocalRestartFile,"%s%s","Restart.",LocalRankString);
if (domain_db->keyExists( "Filename" )){
auto Filename = domain_db->getScalar<std::string>( "Filename" );
Mask->Decomp(Filename);
}
else if (domain_db->keyExists( "GridFile" )){
// Read the local domain data
auto input_id = readMicroCT( *domain_db, comm );
// Fill the halo (assuming GCW of 1)
array<int,3> size0 = { (int) input_id.size(0), (int) input_id.size(1), (int) input_id.size(2) };
ArraySize size1 = { (size_t) Mask->Nx, (size_t) Mask->Ny, (size_t) Mask->Nz };
ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 );
fillHalo<signed char> fill( comm, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 );
Array<signed char> id_view;
id_view.viewRaw( size1, Mask->id );
fill.copy( input_id, id_view );
fill.fill( id_view );
}
else{
Mask->ReadIDs();
}
// Generate the signed distance map
// Initialize the domain and communication
Array<char> id_solid(Nx,Ny,Nz);
// Solve for the position of the solid phase
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
int n = k*Nx*Ny+j*Nx+i;
// Initialize the solid phase
if (Mask->id[n] > 0) id_solid(i,j,k) = 1;
else id_solid(i,j,k) = 0;
}
}
}
// Initialize the signed distance function
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
// Initialize distance to +/- 1
Distance(i,j,k) = 2.0*double(id_solid(i,j,k))-1.0;
}
}
}
// MeanFilter(Averages->SDs);
if (rank==0) printf("LB Single-Fluid Solver: initialized solid phase & converting to Signed Distance function \n");
CalcDist(Distance,id_solid,*Dm);
if (rank == 0) cout << " Domain set." << endl;
}
void ScaLBL_StokesModel::Create(){
/*
* This function creates the variables needed to run a LBM
*/
int rank=Mask->rank();
//.........................................................
// Initialize communication structures in averaging domain
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = Mask->id[i];
Mask->CommInit();
Np=Mask->PoreCount();
//...........................................................................
if (rank==0) printf ("LB Single-Fluid Solver: Create ScaLBL_Communicator \n");
// Create a communicator for the device (will use optimized layout)
// ScaLBL_Communicator ScaLBL_Comm(Mask); // original
ScaLBL_Comm = std::shared_ptr<ScaLBL_Communicator>(new ScaLBL_Communicator(Mask));
int Npad=(Np/16 + 2)*16;
if (rank==0) printf ("LB Single-Fluid Solver: Set up memory efficient layout \n");
Map.resize(Nx,Ny,Nz); Map.fill(-2);
auto neighborList= new int[18*Npad];
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np);
MPI_Barrier(comm);
//...........................................................................
// MAIN VARIABLES ALLOCATED HERE
//...........................................................................
// LBM variables
if (rank==0) printf ("LB Single-Fluid Solver: Allocating distributions \n");
//......................device distributions.................................
int dist_mem_size = Np*sizeof(double);
int neighborSize=18*(Np*sizeof(int));
//...........................................................................
ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize);
ScaLBL_AllocateDeviceMemory((void **) &fq, 19*dist_mem_size);
ScaLBL_AllocateDeviceMemory((void **) &Pressure, sizeof(double)*Np);
ScaLBL_AllocateDeviceMemory((void **) &Velocity, 3*sizeof(double)*Np);
//...........................................................................
// Update GPU data structures
if (rank==0) printf ("LB Single-Fluid Solver: Setting up device map and neighbor list \n");
// copy the neighbor list
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
MPI_Barrier(comm);
}
void ScaLBL_StokesModel::Initialize(){
/*
* This function initializes model
*/
if (rank==0) printf("LB Single-Fluid Solver: Initializing distributions \n");
if (rank==0) printf("****************************************************************\n");
ScaLBL_D3Q19_Init(fq, Np);
if (rank==0) printf("*****************************************************\n");
if (rank==0) printf("LB Single-Fluid Navier-Stokes Solver: \n");
if (rank==0) printf(" Time conversion factor: %.5g [sec/lt]\n", time_conv);
if (rank==0) printf(" Internal iteration: %i [lt]\n", timestepMax);
if (rank==0) printf("*****************************************************\n");
}
void ScaLBL_StokesModel::Run_Lite(double *ChargeDensity, double *ElectricField){
double rlx_setA=1.0/tau;
double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA);
timestep = 0;
while (timestep < timestepMax) {
//************************************************************************/
timestep++;
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
ScaLBL_D3Q19_AAodd_StokesMRT(NeighborList, fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz,rho0,den_scale,h,time_conv,
ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
// Set boundary conditions
if (BoundaryCondition == 3){
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
}
else if (BoundaryCondition == 4){
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
}
else if (BoundaryCondition == 5){
ScaLBL_Comm->D3Q19_Reflection_BC_z(fq);
ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq);
}
ScaLBL_D3Q19_AAodd_StokesMRT(NeighborList, fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz,rho0,den_scale,h,time_conv,
0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
timestep++;
ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL
ScaLBL_D3Q19_AAeven_StokesMRT(fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz,rho0,den_scale,h,time_conv,
ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
// Set boundary conditions
if (BoundaryCondition == 3){
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
}
else if (BoundaryCondition == 4){
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
}
else if (BoundaryCondition == 5){
ScaLBL_Comm->D3Q19_Reflection_BC_z(fq);
ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq);
}
ScaLBL_D3Q19_AAeven_StokesMRT(fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz,rho0,den_scale,h,time_conv,
0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
//************************************************************************/
}
}
void ScaLBL_StokesModel::getVelocity(DoubleArray &Vel_x, DoubleArray &Vel_y, DoubleArray &Vel_z){
//get velocity in physical unit [m/sec]
ScaLBL_D3Q19_Momentum(fq, Velocity, Np);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Vel_x);
Velocity_LB_to_Phys(Vel_x);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Vel_y);
Velocity_LB_to_Phys(Vel_y);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Vel_z);
Velocity_LB_to_Phys(Vel_z);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
}
void ScaLBL_StokesModel::getVelocity_debug(int timestep){
//get velocity in physical unit [m/sec]
ScaLBL_D3Q19_Momentum(fq, Velocity, Np);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
DoubleArray PhaseField(Nx,Ny,Nz);
ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField);
Velocity_LB_to_Phys(PhaseField);
FILE *VELX_FILE;
sprintf(LocalRankFilename,"Velocity_X_Time_%i.%05i.raw",timestep,rank);
VELX_FILE = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,VELX_FILE);
fclose(VELX_FILE);
ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],PhaseField);
Velocity_LB_to_Phys(PhaseField);
FILE *VELY_FILE;
sprintf(LocalRankFilename,"Velocity_Y_Time_%i.%05i.raw",timestep,rank);
VELY_FILE = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,VELY_FILE);
fclose(VELY_FILE);
ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],PhaseField);
Velocity_LB_to_Phys(PhaseField);
FILE *VELZ_FILE;
sprintf(LocalRankFilename,"Velocity_Z_Time_%i.%05i.raw",timestep,rank);
VELZ_FILE = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,VELZ_FILE);
fclose(VELZ_FILE);
}
void ScaLBL_StokesModel::Velocity_LB_to_Phys(DoubleArray &Vel_reg){
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
int idx=Map(i,j,k);
if (!(idx < 0)){
Vel_reg(i,j,k) = Vel_reg(i,j,k)*(h*1.0e-6)/time_conv;
}
}
}
}
}
vector<double> ScaLBL_StokesModel::computeElectricForceAvg(double *ChargeDensity, double *ElectricField){
double *Ex_host;
double *Ey_host;
double *Ez_host;
Ex_host = new double[Np];
Ey_host = new double[Np];
Ez_host = new double[Np];
double *rhoE_host;
rhoE_host = new double[Np];
ScaLBL_CopyToHost(Ex_host,&ElectricField[0*Np],Np*sizeof(double));
ScaLBL_CopyToHost(Ey_host,&ElectricField[1*Np],Np*sizeof(double));
ScaLBL_CopyToHost(Ez_host,&ElectricField[2*Np],Np*sizeof(double));
ScaLBL_CopyToHost(rhoE_host,ChargeDensity,Np*sizeof(double));
double count_loc=0;
double count;
double Fx_avg,Fy_avg,Fz_avg;//average electric field induced force
double Fx_loc,Fy_loc,Fz_loc;
Fx_loc = Fy_loc = Fz_loc = 0.0;
for (int idx=0; idx<ScaLBL_Comm->LastExterior(); idx++){
Fx_loc += rhoE_host[idx]*Ex_host[idx]*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
Fy_loc += rhoE_host[idx]*Ey_host[idx]*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
Fz_loc += rhoE_host[idx]*Ez_host[idx]*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
count_loc+=1.0;
}
for (int idx=ScaLBL_Comm->FirstInterior(); idx<ScaLBL_Comm->LastInterior(); idx++){
Fx_loc += rhoE_host[idx]*Ex_host[idx]*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
Fy_loc += rhoE_host[idx]*Ey_host[idx]*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
Fz_loc += rhoE_host[idx]*Ez_host[idx]*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
count_loc+=1.0;
}
MPI_Allreduce(&Fx_loc,&Fx_avg,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&Fy_loc,&Fy_avg,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&Fz_loc,&Fz_avg,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
Fx_avg /= count;
Fy_avg /= count;
Fz_avg /= count;
vector<double>F_avg{Fx_avg,Fy_avg,Fz_avg};
delete [] Ex_host;
delete [] Ey_host;
delete [] Ez_host;
delete [] rhoE_host;
return F_avg;
}
double ScaLBL_StokesModel::CalVelocityConvergence(double& flow_rate_previous,double *ChargeDensity, double *ElectricField){
//-----------------------------------------------------
ScaLBL_D3Q19_Momentum(fq,Velocity, Np);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x);
ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y);
ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z);
double count_loc=0;
double count;
double vax,vay,vaz;
double vax_loc,vay_loc,vaz_loc;
vax_loc = vay_loc = vaz_loc = 0.f;
for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
if (Distance(i,j,k) > 0){
vax_loc += Velocity_x(i,j,k);
vay_loc += Velocity_y(i,j,k);
vaz_loc += Velocity_z(i,j,k);
count_loc+=1.0;
}
}
}
}
MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
vax /= count;
vay /= count;
vaz /= count;
vector<double> Eforce;
Eforce = computeElectricForceAvg(ChargeDensity,ElectricField);
double TFx = Fx+Eforce[0];//TF: total body force
double TFy = Fy+Eforce[1];
double TFz = Fz+Eforce[2];
double force_mag = sqrt(TFx*TFx+TFy*TFy+TFz*TFz);
double dir_x = TFx/force_mag;
double dir_y = TFy/force_mag;
double dir_z = TFz/force_mag;
if (force_mag == 0.0){
// default to z direction
dir_x = 0.0;
dir_y = 0.0;
dir_z = 1.0;
force_mag = 1.0;
}
double flow_rate = (vax*dir_x + vay*dir_y + vaz*dir_z);
double error = fabs(flow_rate - flow_rate_previous) / fabs(flow_rate);
flow_rate_previous = flow_rate;
//----------------------------------------------------
//for debugging
if (rank==0){
printf("StokesModel: error: %.5g\n",error);
}
return error;
}
void ScaLBL_StokesModel::Run(){
double rlx_setA=1.0/tau;
double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA);
Minkowski Morphology(Mask);
if (rank==0){
bool WriteHeader=false;
FILE *log_file = fopen("Permeability.csv","r");
if (log_file != NULL)
fclose(log_file);
else
WriteHeader=true;
if (WriteHeader){
log_file = fopen("Permeability.csv","a+");
fprintf(log_file,"time Fx Fy Fz mu Vs As Js Xs vx vy vz k\n");
fclose(log_file);
}
}
//.......create and start timer............
double starttime,stoptime,cputime;
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
starttime = MPI_Wtime();
if (rank==0) printf("****************************************************************\n");
if (rank==0) printf("LB Single-Fluid Navier-Stokes Solver: timestepMax = %i\n", timestepMax);
if (rank==0) printf("****************************************************************\n");
timestep=0;
double error = 1.0;
double flow_rate_previous = 0.0;
while (timestep < timestepMax && error > tolerance) {
//************************************************************************/
timestep++;
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
// Set boundary conditions
if (BoundaryCondition == 3){
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
}
else if (BoundaryCondition == 4){
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
}
else if (BoundaryCondition == 5){
ScaLBL_Comm->D3Q19_Reflection_BC_z(fq);
ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq);
}
ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
timestep++;
ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL
ScaLBL_D3Q19_AAeven_MRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
// Set boundary conditions
if (BoundaryCondition == 3){
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
}
else if (BoundaryCondition == 4){
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
}
else if (BoundaryCondition == 5){
ScaLBL_Comm->D3Q19_Reflection_BC_z(fq);
ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq);
}
ScaLBL_D3Q19_AAeven_MRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
//************************************************************************/
if (timestep%1000==0){
ScaLBL_D3Q19_Momentum(fq,Velocity, Np);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x);
ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y);
ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z);
double count_loc=0;
double count;
double vax,vay,vaz;
double vax_loc,vay_loc,vaz_loc;
vax_loc = vay_loc = vaz_loc = 0.f;
for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
if (Distance(i,j,k) > 0){
vax_loc += Velocity_x(i,j,k);
vay_loc += Velocity_y(i,j,k);
vaz_loc += Velocity_z(i,j,k);
count_loc+=1.0;
}
}
}
}
MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
vax /= count;
vay /= count;
vaz /= count;
double force_mag = sqrt(Fx*Fx+Fy*Fy+Fz*Fz);
double dir_x = Fx/force_mag;
double dir_y = Fy/force_mag;
double dir_z = Fz/force_mag;
if (force_mag == 0.0){
// default to z direction
dir_x = 0.0;
dir_y = 0.0;
dir_z = 1.0;
force_mag = 1.0;
}
double flow_rate = (vax*dir_x + vay*dir_y + vaz*dir_z);
error = fabs(flow_rate - flow_rate_previous) / fabs(flow_rate);
flow_rate_previous = flow_rate;
//if (rank==0) printf("Computing Minkowski functionals \n");
Morphology.ComputeScalar(Distance,0.f);
//Morphology.PrintAll();
double mu = (tau-0.5)/3.f;
double Vs = Morphology.V();
double As = Morphology.A();
double Hs = Morphology.H();
double Xs = Morphology.X();
Vs=sumReduce( Dm->Comm, Vs);
As=sumReduce( Dm->Comm, As);
Hs=sumReduce( Dm->Comm, Hs);
Xs=sumReduce( Dm->Comm, Xs);
double h = Dm->voxel_length;
double absperm = h*h*mu*Mask->Porosity()*flow_rate / force_mag;
if (rank==0) {
printf(" %f\n",absperm);
FILE * log_file = fopen("Permeability.csv","a");
fprintf(log_file,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",timestep, Fx, Fy, Fz, mu,
h*h*h*Vs,h*h*As,h*Hs,Xs,vax,vay,vaz, absperm);
fclose(log_file);
}
}
}
//************************************************************************/
stoptime = MPI_Wtime();
if (rank==0) printf("-------------------------------------------------------------------\n");
// Compute the walltime per timestep
cputime = (stoptime - starttime)/timestep;
// Performance obtained from each node
double MLUPS = double(Np)/cputime/1000000;
if (rank==0) printf("********************************************************\n");
if (rank==0) printf("CPU time = %f \n", cputime);
if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS);
MLUPS *= nprocs;
if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS);
if (rank==0) printf("********************************************************\n");
}
void ScaLBL_StokesModel::VelocityField(){
/* Minkowski Morphology(Mask);
int SIZE=Np*sizeof(double);
ScaLBL_D3Q19_Momentum(fq,Velocity, Np);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
ScaLBL_CopyToHost(&VELOCITY[0],&Velocity[0],3*SIZE);
memcpy(Morphology.SDn.data(), Distance.data(), Nx*Ny*Nz*sizeof(double));
Morphology.Initialize();
Morphology.UpdateMeshValues();
Morphology.ComputeLocal();
Morphology.Reduce();
double count_loc=0;
double count;
double vax,vay,vaz;
double vax_loc,vay_loc,vaz_loc;
vax_loc = vay_loc = vaz_loc = 0.f;
for (int n=0; n<ScaLBL_Comm->LastExterior(); n++){
vax_loc += VELOCITY[n];
vay_loc += VELOCITY[Np+n];
vaz_loc += VELOCITY[2*Np+n];
count_loc+=1.0;
}
for (int n=ScaLBL_Comm->FirstInterior(); n<ScaLBL_Comm->LastInterior(); n++){
vax_loc += VELOCITY[n];
vay_loc += VELOCITY[Np+n];
vaz_loc += VELOCITY[2*Np+n];
count_loc+=1.0;
}
MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
vax /= count;
vay /= count;
vaz /= count;
double mu = (tau-0.5)/3.f;
if (rank==0) printf("Fx Fy Fz mu Vs As Js Xs vx vy vz\n");
if (rank==0) printf("%.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",Fx, Fy, Fz, mu,
Morphology.V(),Morphology.A(),Morphology.J(),Morphology.X(),vax,vay,vaz);
*/
std::vector<IO::MeshDataStruct> visData;
fillHalo<double> fillData(Dm->Comm,Dm->rank_info,{Dm->Nx-2,Dm->Ny-2,Dm->Nz-2},{1,1,1},0,1);
auto VxVar = std::make_shared<IO::Variable>();
auto VyVar = std::make_shared<IO::Variable>();
auto VzVar = std::make_shared<IO::Variable>();
auto SignDistVar = std::make_shared<IO::Variable>();
IO::initialize("","silo","false");
// Create the MeshDataStruct
visData.resize(1);
visData[0].meshName = "domain";
visData[0].mesh = std::make_shared<IO::DomainMesh>( Dm->rank_info,Dm->Nx-2,Dm->Ny-2,Dm->Nz-2,Dm->Lx,Dm->Ly,Dm->Lz );
SignDistVar->name = "SignDist";
SignDistVar->type = IO::VariableType::VolumeVariable;
SignDistVar->dim = 1;
SignDistVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2);
visData[0].vars.push_back(SignDistVar);
VxVar->name = "Velocity_x";
VxVar->type = IO::VariableType::VolumeVariable;
VxVar->dim = 1;
VxVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2);
visData[0].vars.push_back(VxVar);
VyVar->name = "Velocity_y";
VyVar->type = IO::VariableType::VolumeVariable;
VyVar->dim = 1;
VyVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2);
visData[0].vars.push_back(VyVar);
VzVar->name = "Velocity_z";
VzVar->type = IO::VariableType::VolumeVariable;
VzVar->dim = 1;
VzVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2);
visData[0].vars.push_back(VzVar);
Array<double>& SignData = visData[0].vars[0]->data;
Array<double>& VelxData = visData[0].vars[1]->data;
Array<double>& VelyData = visData[0].vars[2]->data;
Array<double>& VelzData = visData[0].vars[3]->data;
ASSERT(visData[0].vars[0]->name=="SignDist");
ASSERT(visData[0].vars[1]->name=="Velocity_x");
ASSERT(visData[0].vars[2]->name=="Velocity_y");
ASSERT(visData[0].vars[3]->name=="Velocity_z");
fillData.copy(Distance,SignData);
fillData.copy(Velocity_x,VelxData);
fillData.copy(Velocity_y,VelyData);
fillData.copy(Velocity_z,VelzData);
IO::writeData( timestep, visData, Dm->Comm );
}

92
models/StokesModel.h Normal file
View File

@ -0,0 +1,92 @@
/*
* Multi-relaxation time LBM Model
*/
#ifndef ScaLBL_StokesModel_INC
#define ScaLBL_StokesModel_INC
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <iostream>
#include <exception>
#include <stdexcept>
#include <fstream>
#include "common/ScaLBL.h"
#include "common/Communication.h"
#include "common/MPI_Helpers.h"
#include "analysis/Minkowski.h"
#include "ProfilerApp.h"
class ScaLBL_StokesModel{
public:
ScaLBL_StokesModel(int RANK, int NP, MPI_Comm COMM);
~ScaLBL_StokesModel();
// functions in they should be run
void ReadParams(string filename,int num_iter);
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void ReadInput();
void Create();
void Initialize();
void Run();
void Run_Lite(double *ChargeDensity, double *ElectricField);
void VelocityField();
void getVelocity(DoubleArray &Velx, DoubleArray &Vel_y, DoubleArray &Vel_z);
void getVelocity_debug(int timestep);
double CalVelocityConvergence(double& flow_rate_previous,double *ChargeDensity, double *ElectricField);
bool Restart,pBC;
int timestep,timestepMax;
int BoundaryCondition;
double tau,mu;
double rho0;
double Fx,Fy,Fz,flux;
double din,dout;
double tolerance;
double nu_phys;
double rho_phys;
double time_conv;
double h;//image resolution
double den_scale;//scale factor for density
int Nx,Ny,Nz,N,Np;
int rank,nprocx,nprocy,nprocz,nprocs;
double Lx,Ly,Lz;
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
// input database
std::shared_ptr<Database> db;
std::shared_ptr<Database> domain_db;
std::shared_ptr<Database> stokes_db;
IntArray Map;
DoubleArray Distance;
int *NeighborList;
double *fq;
double *Velocity;
double *Pressure;
//Minkowski Morphology;
DoubleArray Velocity_x;
DoubleArray Velocity_y;
DoubleArray Velocity_z;
private:
MPI_Comm comm;
// filenames
char LocalRankString[8];
char LocalRankFilename[40];
char LocalRestartFile[40];
char OutputFilename[200];
//int rank,nprocs;
void LoadParams(std::shared_ptr<Database> db0);
void Velocity_LB_to_Phys(DoubleArray &Vel_reg);
vector<double> computeElectricForceAvg(double *ChargeDensity, double *ElectricField);
};
#endif

View File

@ -7,6 +7,7 @@ module load cuda
export HDF5_DIR=/ccs/proj/csc380/mcclurej/install/hdf5/1.8.12/
export SILO_DIR=/ccs/proj/csc380/mcclurej/install/silo/4.10.2/
export NETCDF_DIR=/ccs/proj/geo136/install/netcdf/4.6.1
# configure
rm -rf CMake*
@ -28,6 +29,8 @@ cmake \
-D USE_SILO=1 \
-D SILO_LIB="$SILO_DIR/lib/libsiloh5.a" \
-D SILO_DIRECTORY="$SILO_DIR" \
-D USE_NETCDF=1 \
-D NETCDF_DIRECTORY="$NETCDF_DIR" \
-D USE_DOXYGEN:BOOL=false \
-D USE_TIMER=0 \
~/LBPM-WIA

View File

@ -101,9 +101,9 @@ int main(int argc, char **argv)
{
// Initialize MPI
Utilities::startup( argc, argv );
Utilities::MPI comm( MPI_COMM_WORLD );
int rank = comm.getRank();
int nprocs = comm.getSize();
Utilities::MPI comm( MPI_COMM_WORLD );
int rank = comm.getRank();
int nprocs = comm.getSize();
Utilities::setAbortBehavior( true, true, true );
Utilities::setErrorHandlers();
PROFILE_ENABLE(0);
@ -136,20 +136,20 @@ int main(int argc, char **argv)
domain >> Ly;
domain >> Lz;
}
comm.barrier();
MPI_Barrier(comm);
// Computational domain
comm.bcast(&nx,1,0);
comm.bcast(&ny,1,0);
comm.bcast(&nz,1,0);
comm.bcast(&nprocx,1,0);
comm.bcast(&nprocy,1,0);
comm.bcast(&nprocz,1,0);
comm.bcast(&nspheres,1,0);
comm.bcast(&Lx,1,0);
comm.bcast(&Ly,1,0);
comm.bcast(&Lz,1,0);
MPI_Bcast(&nx,1,MPI_INT,0,comm);
MPI_Bcast(&ny,1,MPI_INT,0,comm);
MPI_Bcast(&nz,1,MPI_INT,0,comm);
MPI_Bcast(&nprocx,1,MPI_INT,0,comm);
MPI_Bcast(&nprocy,1,MPI_INT,0,comm);
MPI_Bcast(&nprocz,1,MPI_INT,0,comm);
MPI_Bcast(&nspheres,1,MPI_INT,0,comm);
MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm);
//.................................................
comm.barrier();
MPI_Barrier(comm);
// Check that the number of processors >= the number of ranks
if ( rank==0 ) {
@ -208,7 +208,7 @@ int main(int argc, char **argv)
// WriteLocalSolidID(LocalRankFilename, id, N);
sprintf(LocalRankFilename,"%s%s","SignDist.",LocalRankString);
ReadBinaryFile(LocalRankFilename, Averages.SDs.get(), N);
comm.barrier();
MPI_Barrier(comm);
if (rank == 0) cout << "Domain set." << endl;
//.......................................................................
//copies of data needed to perform checkpointing from cpu
@ -220,7 +220,7 @@ int main(int argc, char **argv)
if (rank==0) printf("Reading restart file! \n");
// Read in the restart file to CPU buffers
ReadCheckpoint(LocalRestartFile, Den, DistEven, DistOdd, N);
comm.barrier();
MPI_Barrier(comm);
//.........................................................................
// Populate the arrays needed to perform averaging
if (rank==0) printf("Populate arrays \n");
@ -291,7 +291,7 @@ int main(int argc, char **argv)
}
Dm.CommInit(); // Initialize communications for domains
sum_global = comm.sumReduce( sum );
MPI_Allreduce(&sum,&sum_global,1,MPI_DOUBLE,MPI_SUM,comm);
porosity = sum_global/Dm.Volume;
if (rank==0) printf("Porosity = %f \n",porosity);
@ -328,14 +328,14 @@ int main(int argc, char **argv)
// BlobContainer Blobs;
DoubleArray RecvBuffer(dimx);
// MPI_Allreduce(&Averages.BlobAverages.get(),&Blobs.get(),1,MPI_DOUBLE,MPI_SUM,Dm.Comm);
comm.barrier();
MPI_Barrier(comm);
if (rank==0) printf("Number of components is %i \n",dimy);
for (int b=0; b<dimy; b++){
MPI_Allreduce(&Averages.BlobAverages(0,b),&RecvBuffer(0),dimx,MPI_DOUBLE,MPI_SUM,comm);
for (int idx=0; idx<dimx-1; idx++) Averages.BlobAverages(idx,b)=RecvBuffer(idx);
comm.barrier();
MPI_Barrier(comm);
if (Averages.BlobAverages(0,b) > 0.0){
double Vn,pn,awn,ans,Jwn,Kwn,lwns,cwns,trawn,trJwn;

View File

@ -48,9 +48,9 @@ int main(int argc, char **argv)
{
// Initialize MPI
Utilities::startup( argc, argv );
Utilities::MPI comm( MPI_COMM_WORLD );
int rank = comm.getRank();
int nprocs = comm.getSize();
Utilities::MPI comm( MPI_COMM_WORLD );
int rank = comm.getRank();
int nprocs = comm.getSize();
#ifdef PROFILE
PROFILE_ENABLE(0);
PROFILE_DISABLE_TRACE();
@ -128,8 +128,8 @@ int main(int argc, char **argv)
PROFILE_STOP("main");
PROFILE_SAVE("BlobIdentifyParallel",false);
#endif
comm.barrier();
Utilities::shutdown();
return 0;
comm.barrier();
Utilities::shutdown();
return 0;
}

View File

@ -3,6 +3,9 @@
#ADD_LBPM_EXECUTABLE( lbpm_nondarcy_simulator )
ADD_LBPM_EXECUTABLE( lbpm_color_simulator )
ADD_LBPM_EXECUTABLE( lbpm_permeability_simulator )
ADD_LBPM_EXECUTABLE( lbpm_greyscale_simulator )
ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_SingleFluid_simulator )
ADD_LBPM_EXECUTABLE( lbpm_greyscaleColor_simulator )
#ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator )
#ADD_LBPM_EXECUTABLE( lbpm_color_macro_simulator )
ADD_LBPM_EXECUTABLE( lbpm_dfh_simulator )
@ -35,6 +38,7 @@ ADD_LBPM_EXECUTABLE( GenerateSphereTest )
#ADD_LBPM_EXECUTABLE( BlobAnalyzeParallel )
ADD_LBPM_EXECUTABLE( lbpm_minkowski_scalar )
CONFIGURE_FILE( ${CMAKE_CURRENT_SOURCE_DIR}/cylindertest ${CMAKE_CURRENT_BINARY_DIR}/cylindertest COPYONLY )
# Add the tests
@ -45,6 +49,10 @@ ADD_LBPM_TEST( TestTorusEvolve )
ADD_LBPM_TEST( TestTopo3D )
ADD_LBPM_TEST( TestFluxBC )
ADD_LBPM_TEST( TestMap )
ADD_LBPM_TEST( TestPoissonSolver )
ADD_LBPM_TEST( TestIonModel )
ADD_LBPM_TEST( TestNernstPlanck )
ADD_LBPM_TEST( TestPNP_Stokes )
#ADD_LBPM_TEST( TestMRT )
#ADD_LBPM_TEST( TestColorGrad )
#ADD_LBPM_TEST( TestColorGradDFH )
@ -69,6 +77,7 @@ ADD_LBPM_TEST_PARALLEL( TestCommD3Q19 8 )
ADD_LBPM_TEST_1_2_4( testCommunication )
ADD_LBPM_TEST( TestWriter )
ADD_LBPM_TEST( TestDatabase )
ADD_LBPM_TEST( TestSetDevice )
ADD_LBPM_PROVISIONAL_TEST( TestMicroCTReader )
IF ( USE_NETCDF )
ADD_LBPM_TEST_PARALLEL( TestNetcdf 8 )

Some files were not shown because too many files have changed in this diff Show More