# To run clang tools:
# cd to root directory
# To update format only:
# find . -name "*.cpp" -or -name "*.cc" -or -name "*.h" -or -name "*.hpp" -or -name "*.I" | xargs -I{} clang-format -i {}
# git status -s . | sed s/^...// | grep -E "(\.cpp|\.h|\.cc|\.hpp|\.I)" | xargs -I{} clang-format -i {}
# To run modernize
# export CLANG_PATH=/packages/llvm/build/llvm-60
# export PATH=${CLANG_PATH}/bin:${CLANG_PATH}/share/clang:$PATH
# find src -name "*.cpp" -or -name "*.cc" | xargs -I{} clang-tidy -checks=modernize* -p=/projects/AtomicModel/build/debug -fix {}
# find src -name "*.cpp" -or -name "*.cc" -or -name "*.h" -or -name "*.hpp" -or -name "*.I" | xargs -I{} clang-format -i {}
# clang-format
Language: Cpp
# BasedOnStyle: LLVM
Language: Cpp
BasedOnStyle: LLVM
AccessModifierOffset: -4
AlignAfterOpenBracket: DontAlign
AlignConsecutiveAssignments: true
AlignConsecutiveDeclarations: false
AlignEscapedNewlinesLeft: true
AlignOperands: true
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: true
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: true
BinPackArguments: true
BinPackParameters: true
AfterClass: true
AfterControlStatement: false
AfterEnum: false
AfterFunction: true
AfterNamespace: false
AfterObjCDeclaration: true
AfterStruct: false
AfterUnion: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
BreakBeforeBinaryOperators: None
#BreakBeforeBraces: Stroustrup
BreakBeforeBraces: Custom
BreakBeforeTernaryOperators: false
BreakConstructorInitializersBeforeComma: false
ColumnLimit: 100
CommentPragmas: '^ IWYU pragma:'
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: false
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
Priority: 2
- Regex: '^(<|"(gtest|isl|json)/)'
Priority: 3
- Regex: '.*'
Priority: 1
IndentCaseLabels: false
IndentWidth: 4
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: true
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 2
NamespaceIndentation: None
ObjCBlockIndentWidth: 4
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000
PenaltyReturnTypeOnItsOwnLine: 60
PointerAlignment: Right
ReflowComments: true
SortIncludes: true
SortUsingDeclarations: true
SpaceAfterCStyleCast: true
SpaceAfterTemplateKeyword: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: true
SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 4
UseTab: Never
IndentWidth: 4
# Our includes are not order-agnostic
SortIncludes: false
# Some of our comments include insightful insight
ReflowComments: false

View File

@ -1,150 +1,177 @@
#include "analysis/ElectroChemistry.h"
ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(std::shared_ptr<Domain> dm)
: Dm(dm) {
ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(std::shared_ptr <Domain> dm):
Nx=dm->Nx; Ny=dm->Ny; Nz=dm->Nz;
ChemicalPotential.resize(Nx,Ny,Nz); ChemicalPotential.fill(0);
ElectricalPotential.resize(Nx,Ny,Nz); ElectricalPotential.fill(0);
ElectricalField_x.resize(Nx,Ny,Nz); ElectricalField_x.fill(0);
ElectricalField_y.resize(Nx,Ny,Nz); ElectricalField_y.fill(0);
ElectricalField_z.resize(Nx,Ny,Nz); ElectricalField_z.fill(0);
Pressure.resize(Nx,Ny,Nz); Pressure.fill(0);
Rho.resize(Nx,Ny,Nz); Rho.fill(0);
Vel_x.resize(Nx,Ny,Nz); Vel_x.fill(0); // Gradient of the phase indicator field
Vel_y.resize(Nx,Ny,Nz); Vel_y.fill(0);
Vel_z.resize(Nx,Ny,Nz); Vel_z.fill(0);
SDs.resize(Nx,Ny,Nz); SDs.fill(0);
IonFluxDiffusive_x.resize(Nx,Ny,Nz); IonFluxDiffusive_x.fill(0);
IonFluxDiffusive_y.resize(Nx,Ny,Nz); IonFluxDiffusive_y.fill(0);
IonFluxDiffusive_z.resize(Nx,Ny,Nz); IonFluxDiffusive_z.fill(0);
IonFluxAdvective_x.resize(Nx,Ny,Nz); IonFluxAdvective_x.fill(0);
IonFluxAdvective_y.resize(Nx,Ny,Nz); IonFluxAdvective_y.fill(0);
IonFluxAdvective_z.resize(Nx,Ny,Nz); IonFluxAdvective_z.fill(0);
IonFluxElectrical_x.resize(Nx,Ny,Nz); IonFluxElectrical_x.fill(0);
IonFluxElectrical_y.resize(Nx,Ny,Nz); IonFluxElectrical_y.fill(0);
IonFluxElectrical_z.resize(Nx,Ny,Nz); IonFluxElectrical_z.fill(0);
Nx = dm->Nx;
Ny = dm->Ny;
Nz = dm->Nz;
Volume = (Nx - 2) * (Ny - 2) * (Nz - 2) * Dm->nprocx() * Dm->nprocy() *
Dm->nprocz() * 1.0;
if (Dm->rank()==0){
bool WriteHeader=false;
TIMELOG = fopen("electrokinetic.csv","r");
ChemicalPotential.resize(Nx, Ny, Nz);
ElectricalPotential.resize(Nx, Ny, Nz);
ElectricalField_x.resize(Nx, Ny, Nz);
ElectricalField_y.resize(Nx, Ny, Nz);
ElectricalField_z.resize(Nx, Ny, Nz);
Pressure.resize(Nx, Ny, Nz);
Rho.resize(Nx, Ny, Nz);
Vel_x.resize(Nx, Ny, Nz);
Vel_x.fill(0); // Gradient of the phase indicator field
Vel_y.resize(Nx, Ny, Nz);
Vel_z.resize(Nx, Ny, Nz);
SDs.resize(Nx, Ny, Nz);
IonFluxDiffusive_x.resize(Nx, Ny, Nz);
IonFluxDiffusive_y.resize(Nx, Ny, Nz);
IonFluxDiffusive_z.resize(Nx, Ny, Nz);
IonFluxAdvective_x.resize(Nx, Ny, Nz);
IonFluxAdvective_y.resize(Nx, Ny, Nz);
IonFluxAdvective_z.resize(Nx, Ny, Nz);
IonFluxElectrical_x.resize(Nx, Ny, Nz);
IonFluxElectrical_y.resize(Nx, Ny, Nz);
IonFluxElectrical_z.resize(Nx, Ny, Nz);
TIMELOG = fopen("electrokinetic.csv","a+");
if (WriteHeader)
// If timelog is empty, write a short header to list the averages
fprintf(TIMELOG,"TBD TBD\n");
if (Dm->rank() == 0) {
bool WriteHeader = false;
TIMELOG = fopen("electrokinetic.csv", "r");
WriteHeader = true;
TIMELOG = fopen("electrokinetic.csv", "a+");
if (WriteHeader) {
// If timelog is empty, write a short header to list the averages
fprintf(TIMELOG, "TBD TBD\n");
if (Dm->rank()==0){
ElectroChemistryAnalyzer::~ElectroChemistryAnalyzer() {
if (Dm->rank() == 0) {
void ElectroChemistryAnalyzer::SetParams(){
void ElectroChemistryAnalyzer::SetParams() {}
void ElectroChemistryAnalyzer::Basic(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes, int timestep){
void ElectroChemistryAnalyzer::Basic(ScaLBL_IonModel &Ion,
ScaLBL_Poisson &Poisson,
ScaLBL_StokesModel &Stokes, int timestep) {
int i,j,k;
double Vin=0.0;
double Vout=0.0;
/* local sub-domain averages */
double *rho_avg_local;
double *rho_mu_avg_local;
double *rho_mu_fluctuation_local;
double *rho_psi_avg_local;
double *rho_psi_fluctuation_local;
/* global averages */
double *rho_avg_global;
double *rho_mu_avg_global;
double *rho_mu_fluctuation_global;
double *rho_psi_avg_global;
double *rho_psi_fluctuation_global;
/* local sub-domain averages */
rho_avg_local = new double [Ion.number_ion_species];
rho_mu_avg_local = new double [Ion.number_ion_species];
rho_mu_fluctuation_local = new double [Ion.number_ion_species];
rho_psi_avg_local = new double [Ion.number_ion_species];
rho_psi_fluctuation_local = new double [Ion.number_ion_species];
/* global averages */
rho_avg_global = new double [Ion.number_ion_species];
rho_mu_avg_global = new double [Ion.number_ion_species];
rho_mu_fluctuation_global = new double [Ion.number_ion_species];
rho_psi_avg_global = new double [Ion.number_ion_species];
rho_psi_fluctuation_global = new double [Ion.number_ion_species];
for (size_t ion=0; ion<Ion.number_ion_species; ion++){
rho_avg_local[ion] = 0.0;
rho_mu_avg_local[ion] = 0.0;
rho_psi_avg_local[ion] = 0.0;
/* Compute averages for each ion */
for (k=1; k<Nz; k++){
for (j=1; j<Ny; j++){
for (i=1; i<Nx; i++){
rho_avg_local[ion] += Rho(i,j,k);
rho_mu_avg_local[ion] += Rho(i,j,k)*Rho(i,j,k);
rho_psi_avg_local[ion] += Rho(i,j,k)*ElectricalPotential(i,j,k);
rho_avg_global[ion]=Dm->Comm.sumReduce( rho_avg_local[ion]) / Volume;
rho_mu_avg_global[ion]=Dm->Comm.sumReduce( rho_mu_avg_local[ion]) / Volume;
rho_psi_avg_global[ion]=Dm->Comm.sumReduce( rho_psi_avg_local[ion]) / Volume;
int i, j, k;
double Vin = 0.0;
double Vout = 0.0;
if (rho_avg_global[ion] > 0.0){
rho_mu_avg_global[ion] /= rho_avg_global[ion];
rho_psi_avg_global[ion] /= rho_avg_global[ion];
for (size_t ion=0; ion<Ion.number_ion_species; ion++){
rho_mu_fluctuation_local[ion] = 0.0;
rho_psi_fluctuation_local[ion] = 0.0;
/* Compute averages for each ion */
for (k=1; k<Nz; k++){
for (j=1; j<Ny; j++){
for (i=1; i<Nx; i++){
rho_mu_fluctuation_local[ion] += (Rho(i,j,k)*Rho(i,j,k) - rho_mu_avg_global[ion]);
rho_psi_fluctuation_local[ion] += (Rho(i,j,k)*ElectricalPotential(i,j,k) - rho_psi_avg_global[ion]);
rho_mu_fluctuation_global[ion]=Dm->Comm.sumReduce( rho_mu_fluctuation_local[ion]);
rho_psi_fluctuation_global[ion]=Dm->Comm.sumReduce( rho_psi_fluctuation_local[ion]);
if (Dm->rank()==0){
fprintf(TIMELOG,"%i ",timestep);
for (size_t ion=0; ion<Ion.number_ion_species; ion++){
fprintf(TIMELOG,"%.8g ",rho_avg_global[ion]);
fprintf(TIMELOG,"%.8g ",rho_mu_avg_global[ion]);
fprintf(TIMELOG,"%.8g ",rho_psi_avg_global[ion]);
fprintf(TIMELOG,"%.8g ",rho_mu_fluctuation_global[ion]);
fprintf(TIMELOG,"%.8g ",rho_psi_fluctuation_global[ion]);
fprintf(TIMELOG,"%.8g %.8g\n",Vin,Vout);
/* else{
/* local sub-domain averages */
double *rho_avg_local;
double *rho_mu_avg_local;
double *rho_mu_fluctuation_local;
double *rho_psi_avg_local;
double *rho_psi_fluctuation_local;
/* global averages */
double *rho_avg_global;
double *rho_mu_avg_global;
double *rho_mu_fluctuation_global;
double *rho_psi_avg_global;
double *rho_psi_fluctuation_global;
/* local sub-domain averages */
rho_avg_local = new double[Ion.number_ion_species];
rho_mu_avg_local = new double[Ion.number_ion_species];
rho_mu_fluctuation_local = new double[Ion.number_ion_species];
rho_psi_avg_local = new double[Ion.number_ion_species];
rho_psi_fluctuation_local = new double[Ion.number_ion_species];
/* global averages */
rho_avg_global = new double[Ion.number_ion_species];
rho_mu_avg_global = new double[Ion.number_ion_species];
rho_mu_fluctuation_global = new double[Ion.number_ion_species];
rho_psi_avg_global = new double[Ion.number_ion_species];
rho_psi_fluctuation_global = new double[Ion.number_ion_species];
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
rho_avg_local[ion] = 0.0;
rho_mu_avg_local[ion] = 0.0;
rho_psi_avg_local[ion] = 0.0;
Ion.getIonConcentration(Rho, ion);
/* Compute averages for each ion */
for (k = 1; k < Nz; k++) {
for (j = 1; j < Ny; j++) {
for (i = 1; i < Nx; i++) {
rho_avg_local[ion] += Rho(i, j, k);
rho_mu_avg_local[ion] += Rho(i, j, k) * Rho(i, j, k);
rho_psi_avg_local[ion] +=
Rho(i, j, k) * ElectricalPotential(i, j, k);
rho_avg_global[ion] = Dm->Comm.sumReduce(rho_avg_local[ion]) / Volume;
rho_mu_avg_global[ion] =
Dm->Comm.sumReduce(rho_mu_avg_local[ion]) / Volume;
rho_psi_avg_global[ion] =
Dm->Comm.sumReduce(rho_psi_avg_local[ion]) / Volume;
if (rho_avg_global[ion] > 0.0) {
rho_mu_avg_global[ion] /= rho_avg_global[ion];
rho_psi_avg_global[ion] /= rho_avg_global[ion];
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
rho_mu_fluctuation_local[ion] = 0.0;
rho_psi_fluctuation_local[ion] = 0.0;
/* Compute averages for each ion */
for (k = 1; k < Nz; k++) {
for (j = 1; j < Ny; j++) {
for (i = 1; i < Nx; i++) {
rho_mu_fluctuation_local[ion] +=
(Rho(i, j, k) * Rho(i, j, k) - rho_mu_avg_global[ion]);
rho_psi_fluctuation_local[ion] +=
(Rho(i, j, k) * ElectricalPotential(i, j, k) -
rho_mu_fluctuation_global[ion] =
rho_psi_fluctuation_global[ion] =
if (Dm->rank() == 0) {
fprintf(TIMELOG, "%i ", timestep);
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
fprintf(TIMELOG, "%.8g ", rho_avg_global[ion]);
fprintf(TIMELOG, "%.8g ", rho_mu_avg_global[ion]);
fprintf(TIMELOG, "%.8g ", rho_psi_avg_global[ion]);
fprintf(TIMELOG, "%.8g ", rho_mu_fluctuation_global[ion]);
fprintf(TIMELOG, "%.8g ", rho_psi_fluctuation_global[ion]);
fprintf(TIMELOG, "%.8g %.8g\n", Vin, Vout);
/* else{
fprintf(TIMELOG,"%i ",timestep);
for (int ion=0; ion<Ion.number_ion_species; ion++){
fprintf(TIMELOG,"%.8g ",rho_avg_local[ion]);
@ -157,20 +184,28 @@ void ElectroChemistryAnalyzer::Basic(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poiss
} */
void ElectroChemistryAnalyzer::WriteVis( ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes, std::shared_ptr<Database> input_db, int timestep){
auto vis_db = input_db->getDatabase( "Visualization" );
char VisName[40];
std::vector<IO::MeshDataStruct> visData;
fillHalo<double> fillData(Dm->Comm,Dm->rank_info,{Dm->Nx-2,Dm->Ny-2,Dm->Nz-2},{1,1,1},0,1);
void ElectroChemistryAnalyzer::WriteVis(ScaLBL_IonModel &Ion,
ScaLBL_Poisson &Poisson,
ScaLBL_StokesModel &Stokes,
std::shared_ptr<Database> input_db,
int timestep) {
// Create the MeshDataStruct
auto vis_db = input_db->getDatabase("Visualization");
char VisName[40];
std::vector<IO::MeshDataStruct> visData;
fillHalo<double> fillData(Dm->Comm, Dm->rank_info,
{Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2}, {1, 1, 1},
0, 1);
IO::initialize("", "silo", "false");
// Create the MeshDataStruct
visData[0].meshName = "domain";
visData[0].mesh = std::make_shared<IO::DomainMesh>( Dm->rank_info,Dm->Nx-2,Dm->Ny-2,Dm->Nz-2,Dm->Lx,Dm->Ly,Dm->Lz );
visData[0].mesh =
std::make_shared<IO::DomainMesh>(Dm->rank_info, Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2, Dm->Lx, Dm->Ly, Dm->Lz);
//electric potential
auto ElectricPotentialVar = std::make_shared<IO::Variable>();
//electric field
@ -180,7 +215,7 @@ void ElectroChemistryAnalyzer::WriteVis( ScaLBL_IonModel &Ion, ScaLBL_Poisson &P
//ion concentration
std::vector<shared_ptr<IO::Variable>> IonConcentration;
for (size_t ion=0; ion<Ion.number_ion_species; ion++){
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
//fluid velocity
@ -189,7 +224,7 @@ void ElectroChemistryAnalyzer::WriteVis( ScaLBL_IonModel &Ion, ScaLBL_Poisson &P
auto VzVar = std::make_shared<IO::Variable>();
// diffusive ion flux
std::vector<shared_ptr<IO::Variable>> IonFluxDiffusive;
for (size_t ion=0; ion<Ion.number_ion_species; ion++){
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
//push in x-,y-, and z-component for each ion species
@ -197,7 +232,7 @@ void ElectroChemistryAnalyzer::WriteVis( ScaLBL_IonModel &Ion, ScaLBL_Poisson &P
// advective ion flux
std::vector<shared_ptr<IO::Variable>> IonFluxAdvective;
for (size_t ion=0; ion<Ion.number_ion_species; ion++){
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
//push in x-,y-, and z-component for each ion species
@ -205,7 +240,7 @@ void ElectroChemistryAnalyzer::WriteVis( ScaLBL_IonModel &Ion, ScaLBL_Poisson &P
// electro-migrational ion flux
std::vector<shared_ptr<IO::Variable>> IonFluxElectrical;
for (size_t ion=0; ion<Ion.number_ion_species; ion++){
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
//push in x-,y-, and z-component for each ion species
@ -214,270 +249,348 @@ void ElectroChemistryAnalyzer::WriteVis( ScaLBL_IonModel &Ion, ScaLBL_Poisson &P
//-------------------------------------Create Names for Variables------------------------------------------------------
if (vis_db->getWithDefault<bool>( "save_electric_potential", true )){
ElectricPotentialVar->name = "ElectricPotential";
ElectricPotentialVar->type = IO::VariableType::VolumeVariable;
ElectricPotentialVar->dim = 1;
if (vis_db->getWithDefault<bool>("save_electric_potential", true)) {
ElectricPotentialVar->name = "ElectricPotential";
ElectricPotentialVar->type = IO::VariableType::VolumeVariable;
ElectricPotentialVar->dim = 1;
ElectricPotentialVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
if (vis_db->getWithDefault<bool>( "save_concentration", true )){
for (size_t ion=0; ion<Ion.number_ion_species; ion++){
IonConcentration[ion]->name = VisName;
IonConcentration[ion]->type = IO::VariableType::VolumeVariable;
IonConcentration[ion]->dim = 1;
if (vis_db->getWithDefault<bool>("save_concentration", true)) {
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
sprintf(VisName, "IonConcentration_%zu", ion + 1);
IonConcentration[ion]->name = VisName;
IonConcentration[ion]->type = IO::VariableType::VolumeVariable;
IonConcentration[ion]->dim = 1;
IonConcentration[ion]->data.resize(Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2);
if (vis_db->getWithDefault<bool>( "save_velocity", false )){
if (vis_db->getWithDefault<bool>("save_velocity", false)) {
VxVar->name = "Velocity_x";
VxVar->type = IO::VariableType::VolumeVariable;
VxVar->dim = 1;
VxVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
VyVar->name = "Velocity_y";
VyVar->type = IO::VariableType::VolumeVariable;
VyVar->dim = 1;
VyVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
VzVar->name = "Velocity_z";
VzVar->type = IO::VariableType::VolumeVariable;
VzVar->dim = 1;
VzVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
if (vis_db->getWithDefault<bool>( "save_ion_flux_diffusive", false )){
for (size_t ion=0; ion<Ion.number_ion_species; ion++){
if (vis_db->getWithDefault<bool>("save_ion_flux_diffusive", false)) {
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
// x-component of diffusive flux
IonFluxDiffusive[3*ion+0]->name = VisName;
IonFluxDiffusive[3*ion+0]->type = IO::VariableType::VolumeVariable;
IonFluxDiffusive[3*ion+0]->dim = 1;
sprintf(VisName, "Ion%zu_FluxDiffusive_x", ion + 1);
IonFluxDiffusive[3 * ion + 0]->name = VisName;
IonFluxDiffusive[3 * ion + 0]->type =
IonFluxDiffusive[3 * ion + 0]->dim = 1;
IonFluxDiffusive[3 * ion + 0]->data.resize(Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2);
visData[0].vars.push_back(IonFluxDiffusive[3 * ion + 0]);
// y-component of diffusive flux
IonFluxDiffusive[3*ion+1]->name = VisName;
IonFluxDiffusive[3*ion+1]->type = IO::VariableType::VolumeVariable;
IonFluxDiffusive[3*ion+1]->dim = 1;
sprintf(VisName, "Ion%zu_FluxDiffusive_y", ion + 1);
IonFluxDiffusive[3 * ion + 1]->name = VisName;
IonFluxDiffusive[3 * ion + 1]->type =
IonFluxDiffusive[3 * ion + 1]->dim = 1;
IonFluxDiffusive[3 * ion + 1]->data.resize(Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2);
visData[0].vars.push_back(IonFluxDiffusive[3 * ion + 1]);
// z-component of diffusive flux
IonFluxDiffusive[3*ion+2]->name = VisName;
IonFluxDiffusive[3*ion+2]->type = IO::VariableType::VolumeVariable;
IonFluxDiffusive[3*ion+2]->dim = 1;
sprintf(VisName, "Ion%zu_FluxDiffusive_z", ion + 1);
IonFluxDiffusive[3 * ion + 2]->name = VisName;
IonFluxDiffusive[3 * ion + 2]->type =
IonFluxDiffusive[3 * ion + 2]->dim = 1;
IonFluxDiffusive[3 * ion + 2]->data.resize(Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2);
visData[0].vars.push_back(IonFluxDiffusive[3 * ion + 2]);
if (vis_db->getWithDefault<bool>( "save_ion_flux_advective", false )){
for (size_t ion=0; ion<Ion.number_ion_species; ion++){
if (vis_db->getWithDefault<bool>("save_ion_flux_advective", false)) {
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
// x-component of advective flux
IonFluxAdvective[3*ion+0]->name = VisName;
IonFluxAdvective[3*ion+0]->type = IO::VariableType::VolumeVariable;
IonFluxAdvective[3*ion+0]->dim = 1;
sprintf(VisName, "Ion%zu_FluxAdvective_x", ion + 1);
IonFluxAdvective[3 * ion + 0]->name = VisName;
IonFluxAdvective[3 * ion + 0]->type =
IonFluxAdvective[3 * ion + 0]->dim = 1;
IonFluxAdvective[3 * ion + 0]->data.resize(Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2);
visData[0].vars.push_back(IonFluxAdvective[3 * ion + 0]);
// y-component of advective flux
IonFluxAdvective[3*ion+1]->name = VisName;
IonFluxAdvective[3*ion+1]->type = IO::VariableType::VolumeVariable;
IonFluxAdvective[3*ion+1]->dim = 1;
sprintf(VisName, "Ion%zu_FluxAdvective_y", ion + 1);
IonFluxAdvective[3 * ion + 1]->name = VisName;
IonFluxAdvective[3 * ion + 1]->type =
IonFluxAdvective[3 * ion + 1]->dim = 1;
IonFluxAdvective[3 * ion + 1]->data.resize(Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2);
visData[0].vars.push_back(IonFluxAdvective[3 * ion + 1]);
// z-component of advective flux
IonFluxAdvective[3*ion+2]->name = VisName;
IonFluxAdvective[3*ion+2]->type = IO::VariableType::VolumeVariable;
IonFluxAdvective[3*ion+2]->dim = 1;
sprintf(VisName, "Ion%zu_FluxAdvective_z", ion + 1);
IonFluxAdvective[3 * ion + 2]->name = VisName;
IonFluxAdvective[3 * ion + 2]->type =
IonFluxAdvective[3 * ion + 2]->dim = 1;
IonFluxAdvective[3 * ion + 2]->data.resize(Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2);
visData[0].vars.push_back(IonFluxAdvective[3 * ion + 2]);
if (vis_db->getWithDefault<bool>( "save_ion_flux_electrical", false )){
for (size_t ion=0; ion<Ion.number_ion_species; ion++){
if (vis_db->getWithDefault<bool>("save_ion_flux_electrical", false)) {
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
// x-component of electro-migrational flux
IonFluxElectrical[3*ion+0]->name = VisName;
IonFluxElectrical[3*ion+0]->type = IO::VariableType::VolumeVariable;
IonFluxElectrical[3*ion+0]->dim = 1;
sprintf(VisName, "Ion%zu_FluxElectrical_x", ion + 1);
IonFluxElectrical[3 * ion + 0]->name = VisName;
IonFluxElectrical[3 * ion + 0]->type =
IonFluxElectrical[3 * ion + 0]->dim = 1;
IonFluxElectrical[3 * ion + 0]->data.resize(Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2);
visData[0].vars.push_back(IonFluxElectrical[3 * ion + 0]);
// y-component of electro-migrational flux
IonFluxElectrical[3*ion+1]->name = VisName;
IonFluxElectrical[3*ion+1]->type = IO::VariableType::VolumeVariable;
IonFluxElectrical[3*ion+1]->dim = 1;
sprintf(VisName, "Ion%zu_FluxElectrical_y", ion + 1);
IonFluxElectrical[3 * ion + 1]->name = VisName;
IonFluxElectrical[3 * ion + 1]->type =
IonFluxElectrical[3 * ion + 1]->dim = 1;
IonFluxElectrical[3 * ion + 1]->data.resize(Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2);
visData[0].vars.push_back(IonFluxElectrical[3 * ion + 1]);
// z-component of electro-migrational flux
IonFluxElectrical[3*ion+2]->name = VisName;
IonFluxElectrical[3*ion+2]->type = IO::VariableType::VolumeVariable;
IonFluxElectrical[3*ion+2]->dim = 1;
sprintf(VisName, "Ion%zu_FluxElectrical_z", ion + 1);
IonFluxElectrical[3 * ion + 2]->name = VisName;
IonFluxElectrical[3 * ion + 2]->type =
IonFluxElectrical[3 * ion + 2]->dim = 1;
IonFluxElectrical[3 * ion + 2]->data.resize(Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2);
visData[0].vars.push_back(IonFluxElectrical[3 * ion + 2]);
if (vis_db->getWithDefault<bool>( "save_electric_field", false )){
if (vis_db->getWithDefault<bool>("save_electric_field", false)) {
ElectricFieldVar_x->name = "ElectricField_x";
ElectricFieldVar_x->type = IO::VariableType::VolumeVariable;
ElectricFieldVar_x->dim = 1;
ElectricFieldVar_x->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
ElectricFieldVar_y->name = "ElectricField_y";
ElectricFieldVar_y->type = IO::VariableType::VolumeVariable;
ElectricFieldVar_y->dim = 1;
ElectricFieldVar_y->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
ElectricFieldVar_z->name = "ElectricField_z";
ElectricFieldVar_z->type = IO::VariableType::VolumeVariable;
ElectricFieldVar_z->dim = 1;
ElectricFieldVar_z->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
//------------------------------------Save All Variables--------------------------------------------------------------
if (vis_db->getWithDefault<bool>( "save_electric_potential", true )){
Array<double>& ElectricPotentialData = visData[0].vars[0]->data;
if (vis_db->getWithDefault<bool>("save_electric_potential", true)) {
ASSERT(visData[0].vars[0]->name == "ElectricPotential");
Array<double> &ElectricPotentialData = visData[0].vars[0]->data;
fillData.copy(ElectricalPotential, ElectricPotentialData);
if (vis_db->getWithDefault<bool>( "save_concentration", true )){
for (size_t ion=0; ion<Ion.number_ion_species; ion++){
//IonConcentration[ion]->name = VisName;
Array<double>& IonConcentrationData = visData[0].vars[1+ion]->data;
if (vis_db->getWithDefault<bool>("save_concentration", true)) {
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
sprintf(VisName, "IonConcentration_%zu", ion + 1);
//IonConcentration[ion]->name = VisName;
ASSERT(visData[0].vars[1 + ion]->name == VisName);
Array<double> &IonConcentrationData =
visData[0].vars[1 + ion]->data;
Ion.getIonConcentration(Rho, ion);
fillData.copy(Rho, IonConcentrationData);
if (vis_db->getWithDefault<bool>( "save_velocity", false )){
Array<double>& VelxData = visData[0].vars[1+Ion.number_ion_species+0]->data;
Array<double>& VelyData = visData[0].vars[1+Ion.number_ion_species+1]->data;
Array<double>& VelzData = visData[0].vars[1+Ion.number_ion_species+2]->data;
if (vis_db->getWithDefault<bool>("save_velocity", false)) {
ASSERT(visData[0].vars[1 + Ion.number_ion_species + 0]->name ==
ASSERT(visData[0].vars[1 + Ion.number_ion_species + 1]->name ==
ASSERT(visData[0].vars[1 + Ion.number_ion_species + 2]->name ==
Stokes.getVelocity(Vel_x, Vel_y, Vel_z);
Array<double> &VelxData =
visData[0].vars[1 + Ion.number_ion_species + 0]->data;
Array<double> &VelyData =
visData[0].vars[1 + Ion.number_ion_species + 1]->data;
Array<double> &VelzData =
visData[0].vars[1 + Ion.number_ion_species + 2]->data;
fillData.copy(Vel_x, VelxData);
fillData.copy(Vel_y, VelyData);
fillData.copy(Vel_z, VelzData);
if (vis_db->getWithDefault<bool>( "save_ion_flux_diffusive", false )){
for (size_t ion=0; ion<Ion.number_ion_species; ion++){
if (vis_db->getWithDefault<bool>("save_ion_flux_diffusive", false)) {
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
// x-component of diffusive flux
//IonFluxDiffusive[3*ion+0]->name = VisName;
sprintf(VisName, "Ion%zu_FluxDiffusive_x", ion + 1);
//IonFluxDiffusive[3*ion+0]->name = VisName;
.vars[4 + Ion.number_ion_species + 3 * ion + 0]
->name == VisName);
// y-component of diffusive flux
//IonFluxDiffusive[3*ion+1]->name = VisName;
sprintf(VisName, "Ion%zu_FluxDiffusive_y", ion + 1);
//IonFluxDiffusive[3*ion+1]->name = VisName;
.vars[4 + Ion.number_ion_species + 3 * ion + 1]
->name == VisName);
// z-component of diffusive flux
//IonFluxDiffusive[3*ion+2]->name = VisName;
sprintf(VisName, "Ion%zu_FluxDiffusive_z", ion + 1);
//IonFluxDiffusive[3*ion+2]->name = VisName;
.vars[4 + Ion.number_ion_species + 3 * ion + 2]
->name == VisName);
Array<double>& IonFluxData_x = visData[0].vars[4+Ion.number_ion_species+3*ion+0]->data;
Array<double>& IonFluxData_y = visData[0].vars[4+Ion.number_ion_species+3*ion+1]->data;
Array<double>& IonFluxData_z = visData[0].vars[4+Ion.number_ion_species+3*ion+2]->data;
Array<double> &IonFluxData_x =
visData[0].vars[4 + Ion.number_ion_species + 3 * ion + 0]->data;
Array<double> &IonFluxData_y =
visData[0].vars[4 + Ion.number_ion_species + 3 * ion + 1]->data;
Array<double> &IonFluxData_z =
visData[0].vars[4 + Ion.number_ion_species + 3 * ion + 2]->data;
Ion.getIonFluxDiffusive(IonFluxDiffusive_x, IonFluxDiffusive_y,
IonFluxDiffusive_z, ion);
fillData.copy(IonFluxDiffusive_x, IonFluxData_x);
fillData.copy(IonFluxDiffusive_y, IonFluxData_y);
fillData.copy(IonFluxDiffusive_z, IonFluxData_z);
if (vis_db->getWithDefault<bool>( "save_ion_flux_advective", false )){
for (size_t ion=0; ion<Ion.number_ion_species; ion++){
if (vis_db->getWithDefault<bool>("save_ion_flux_advective", false)) {
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
// x-component of diffusive flux
//IonFluxDiffusive[3*ion+0]->name = VisName;
sprintf(VisName, "Ion%zu_FluxAdvective_x", ion + 1);
//IonFluxDiffusive[3*ion+0]->name = VisName;
.vars[4 + Ion.number_ion_species * (1 + 3) + 3 * ion + 0]
->name == VisName);
// y-component of diffusive flux
//IonFluxDiffusive[3*ion+1]->name = VisName;
sprintf(VisName, "Ion%zu_FluxAdvective_y", ion + 1);
//IonFluxDiffusive[3*ion+1]->name = VisName;
.vars[4 + Ion.number_ion_species * (1 + 3) + 3 * ion + 1]
->name == VisName);
// z-component of diffusive flux
//IonFluxDiffusive[3*ion+2]->name = VisName;
sprintf(VisName, "Ion%zu_FluxAdvective_z", ion + 1);
//IonFluxDiffusive[3*ion+2]->name = VisName;
.vars[4 + Ion.number_ion_species * (1 + 3) + 3 * ion + 2]
->name == VisName);
Array<double>& IonFluxData_x = visData[0].vars[4+Ion.number_ion_species*(1+3)+3*ion+0]->data;
Array<double>& IonFluxData_y = visData[0].vars[4+Ion.number_ion_species*(1+3)+3*ion+1]->data;
Array<double>& IonFluxData_z = visData[0].vars[4+Ion.number_ion_species*(1+3)+3*ion+2]->data;
Array<double> &IonFluxData_x =
.vars[4 + Ion.number_ion_species * (1 + 3) + 3 * ion + 0]
Array<double> &IonFluxData_y =
.vars[4 + Ion.number_ion_species * (1 + 3) + 3 * ion + 1]
Array<double> &IonFluxData_z =
.vars[4 + Ion.number_ion_species * (1 + 3) + 3 * ion + 2]
Ion.getIonFluxAdvective(IonFluxAdvective_x, IonFluxAdvective_y,
IonFluxAdvective_z, ion);
fillData.copy(IonFluxAdvective_x, IonFluxData_x);
fillData.copy(IonFluxAdvective_y, IonFluxData_y);
fillData.copy(IonFluxAdvective_z, IonFluxData_z);
if (vis_db->getWithDefault<bool>( "save_ion_flux_electrical", false )){
for (size_t ion=0; ion<Ion.number_ion_species; ion++){
if (vis_db->getWithDefault<bool>("save_ion_flux_electrical", false)) {
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
// x-component of diffusive flux
//IonFluxDiffusive[3*ion+0]->name = VisName;
sprintf(VisName, "Ion%zu_FluxElectrical_x", ion + 1);
//IonFluxDiffusive[3*ion+0]->name = VisName;
.vars[4 + Ion.number_ion_species * (1 + 6) + 3 * ion + 0]
->name == VisName);
// y-component of diffusive flux
//IonFluxDiffusive[3*ion+1]->name = VisName;
sprintf(VisName, "Ion%zu_FluxElectrical_y", ion + 1);
//IonFluxDiffusive[3*ion+1]->name = VisName;
.vars[4 + Ion.number_ion_species * (1 + 6) + 3 * ion + 1]
->name == VisName);
// z-component of diffusive flux
//IonFluxDiffusive[3*ion+2]->name = VisName;
sprintf(VisName, "Ion%zu_FluxElectrical_z", ion + 1);
//IonFluxDiffusive[3*ion+2]->name = VisName;
.vars[4 + Ion.number_ion_species * (1 + 6) + 3 * ion + 2]
->name == VisName);
Array<double>& IonFluxData_x = visData[0].vars[4+Ion.number_ion_species*(1+6)+3*ion+0]->data;
Array<double>& IonFluxData_y = visData[0].vars[4+Ion.number_ion_species*(1+6)+3*ion+1]->data;
Array<double>& IonFluxData_z = visData[0].vars[4+Ion.number_ion_species*(1+6)+3*ion+2]->data;
Array<double> &IonFluxData_x =
.vars[4 + Ion.number_ion_species * (1 + 6) + 3 * ion + 0]
Array<double> &IonFluxData_y =
.vars[4 + Ion.number_ion_species * (1 + 6) + 3 * ion + 1]
Array<double> &IonFluxData_z =
.vars[4 + Ion.number_ion_species * (1 + 6) + 3 * ion + 2]
Ion.getIonFluxElectrical(IonFluxElectrical_x, IonFluxElectrical_y,
IonFluxElectrical_z, ion);
fillData.copy(IonFluxElectrical_x, IonFluxData_x);
fillData.copy(IonFluxElectrical_y, IonFluxData_y);
fillData.copy(IonFluxElectrical_z, IonFluxData_z);
if (vis_db->getWithDefault<bool>( "save_electric_field", false )){
Poisson.getElectricField(ElectricalField_x, ElectricalField_y, ElectricalField_z);
Array<double>& ElectricalFieldxData = visData[0].vars[4+Ion.number_ion_species*(1+9)+0]->data;
Array<double>& ElectricalFieldyData = visData[0].vars[4+Ion.number_ion_species*(1+9)+1]->data;
Array<double>& ElectricalFieldzData = visData[0].vars[4+Ion.number_ion_species*(1+9)+2]->data;
if (vis_db->getWithDefault<bool>("save_electric_field", false)) {
visData[0].vars[4 + Ion.number_ion_species * (1 + 9) + 0]->name ==
visData[0].vars[4 + Ion.number_ion_species * (1 + 9) + 1]->name ==
visData[0].vars[4 + Ion.number_ion_species * (1 + 9) + 2]->name ==
Poisson.getElectricField(ElectricalField_x, ElectricalField_y,
Array<double> &ElectricalFieldxData =
visData[0].vars[4 + Ion.number_ion_species * (1 + 9) + 0]->data;
Array<double> &ElectricalFieldyData =
visData[0].vars[4 + Ion.number_ion_species * (1 + 9) + 1]->data;
Array<double> &ElectricalFieldzData =
visData[0].vars[4 + Ion.number_ion_species * (1 + 9) + 2]->data;
fillData.copy(ElectricalField_x, ElectricalFieldxData);
fillData.copy(ElectricalField_y, ElectricalFieldyData);
fillData.copy(ElectricalField_z, ElectricalFieldzData);
if (vis_db->getWithDefault<bool>( "write_silo", true ))
IO::writeData( timestep, visData, Dm->Comm );
if (vis_db->getWithDefault<bool>("write_silo", true))
IO::writeData(timestep, visData, Dm->Comm);
/* if (vis_db->getWithDefault<bool>( "save_8bit_raw", true )){
/* if (vis_db->getWithDefault<bool>( "save_8bit_raw", true )){
char CurrentIDFilename[40];

View File

@ -20,29 +20,29 @@
#include "models/PoissonSolver.h"
#include "models/StokesModel.h"
class ElectroChemistryAnalyzer{
class ElectroChemistryAnalyzer {
std::shared_ptr <Domain> Dm;
double Volume;
// input variables
double rho_n, rho_w;
double nu_n, nu_w;
double gamma_wn, beta;
double Fx, Fy, Fz;
std::shared_ptr<Domain> Dm;
double Volume;
// input variables
double rho_n, rho_w;
double nu_n, nu_w;
double gamma_wn, beta;
double Fx, Fy, Fz;
int Nx,Ny,Nz;
DoubleArray Rho; // density field
DoubleArray ChemicalPotential; // density field
DoubleArray ElectricalPotential; // density field
DoubleArray ElectricalField_x; // density field
DoubleArray ElectricalField_y; // density field
DoubleArray ElectricalField_z; // density field
DoubleArray Pressure; // pressure field
DoubleArray Vel_x; // velocity field
DoubleArray Vel_y;
DoubleArray Vel_z;
DoubleArray SDs;
int Nx, Ny, Nz;
DoubleArray Rho; // density field
DoubleArray ChemicalPotential; // density field
DoubleArray ElectricalPotential; // density field
DoubleArray ElectricalField_x; // density field
DoubleArray ElectricalField_y; // density field
DoubleArray ElectricalField_z; // density field
DoubleArray Pressure; // pressure field
DoubleArray Vel_x; // velocity field
DoubleArray Vel_y;
DoubleArray Vel_z;
DoubleArray SDs;
DoubleArray IonFluxDiffusive_x; //ion diffusive flux components
DoubleArray IonFluxDiffusive_y;
DoubleArray IonFluxDiffusive_z;
@ -53,15 +53,17 @@ public:
DoubleArray IonFluxElectrical_y;
DoubleArray IonFluxElectrical_z;
ElectroChemistryAnalyzer(std::shared_ptr <Domain> Dm);
void SetParams();
void Basic( ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes, int timestep);
void WriteVis( ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes, std::shared_ptr<Database> input_db, int timestep);
ElectroChemistryAnalyzer(std::shared_ptr<Domain> Dm);
void SetParams();
void Basic(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson,
ScaLBL_StokesModel &Stokes, int timestep);
void WriteVis(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson,
ScaLBL_StokesModel &Stokes,
std::shared_ptr<Database> input_db, int timestep);

File diff suppressed because it is too large Load Diff

View File

@ -12,7 +12,6 @@
#include "models/ColorModel.h"
* \class FlowAdaptor
* @brief
@ -20,21 +19,19 @@
class FlowAdaptor{
class FlowAdaptor {
* \brief Create a flow adaptor to operate on the LB model
* @param M ScaLBL_ColorModel
FlowAdaptor(ScaLBL_ColorModel &M);
FlowAdaptor(ScaLBL_ColorModel &M);
* \brief Destructor
* \brief Fast-forward interface motion
* \details Accelerate the movement of interfaces based on the time derivative
@ -43,29 +40,30 @@ public:
* move_interface_factor -- determines how much to ``fast forward"
* @param M ScaLBL_ColorModel
double MoveInterface(ScaLBL_ColorModel &M);
double MoveInterface(ScaLBL_ColorModel &M);
* \brief Image re-initialization
* \details Re-initialize LB simulation from image data
* @param M ScaLBL_ColorModel
* @param Filename name of input file to be used to read image
double ImageInit(ScaLBL_ColorModel &M, std::string Filename);
double ImageInit(ScaLBL_ColorModel &M, std::string Filename);
* \details Update volume fraction based on morphological algorithm. Dilation / erosion algorithm will be applied to
* grow / shrink the phase regions
* @param M ScaLBL_ColorModel
* @param delta_volume target change in volume fraction
double ShellAggregation(ScaLBL_ColorModel &M, const double delta_volume);
double ShellAggregation(ScaLBL_ColorModel &M, const double delta_volume);
* \details Update fractional flow condition. Mass will be preferentially added or removed from
* phase regions based on where flow is occurring
* @param M ScaLBL_ColorModel
*/ double UpdateFractionalFlow(ScaLBL_ColorModel &M);
double UpdateFractionalFlow(ScaLBL_ColorModel &M);
* \brief image re-initialization
@ -73,18 +71,19 @@ public:
* @param M ScaLBL_ColorModel
* @param seed_water_in_oil controls amount of mass to randomly seed into fluids
double SeedPhaseField(ScaLBL_ColorModel &M, const double seed_water_in_oil);
double SeedPhaseField(ScaLBL_ColorModel &M, const double seed_water_in_oil);
* \brief Re-initialize LB simulation
* @param M ScaLBL_ColorModel
void Flatten(ScaLBL_ColorModel &M);
DoubleArray phi;
DoubleArray phi_t;
void Flatten(ScaLBL_ColorModel &M);
DoubleArray phi;
DoubleArray phi_t;
int Nx, Ny, Nz;
int timestep;
int timestep_previous;
int Nx, Ny, Nz;
int timestep;
int timestep_previous;

View File

@ -1,55 +1,60 @@
#include "analysis/FreeEnergy.h"
FreeEnergyAnalyzer::FreeEnergyAnalyzer(std::shared_ptr <Domain> dm):
Nx=dm->Nx; Ny=dm->Ny; Nz=dm->Nz;
ChemicalPotential.resize(Nx,Ny,Nz); ChemicalPotential.fill(0);
Phi.resize(Nx,Ny,Nz); Phi.fill(0);
Pressure.resize(Nx,Ny,Nz); Pressure.fill(0);
Rho.resize(Nx,Ny,Nz); Rho.fill(0);
Vel_x.resize(Nx,Ny,Nz); Vel_x.fill(0); // Gradient of the phase indicator field
Vel_y.resize(Nx,Ny,Nz); Vel_y.fill(0);
Vel_z.resize(Nx,Ny,Nz); Vel_z.fill(0);
SDs.resize(Nx,Ny,Nz); SDs.fill(0);
if (Dm->rank()==0){
bool WriteHeader=false;
TIMELOG = fopen("free.csv","r");
FreeEnergyAnalyzer::FreeEnergyAnalyzer(std::shared_ptr<Domain> dm) : Dm(dm) {
TIMELOG = fopen("free.csv","a+");
if (WriteHeader)
// If timelog is empty, write a short header to list the averages
Nx = dm->Nx;
Ny = dm->Ny;
Nz = dm->Nz;
Volume = (Nx - 2) * (Ny - 2) * (Nz - 2) * Dm->nprocx() * Dm->nprocy() *
Dm->nprocz() * 1.0;
ChemicalPotential.resize(Nx, Ny, Nz);
Phi.resize(Nx, Ny, Nz);
Pressure.resize(Nx, Ny, Nz);
Rho.resize(Nx, Ny, Nz);
Vel_x.resize(Nx, Ny, Nz);
Vel_x.fill(0); // Gradient of the phase indicator field
Vel_y.resize(Nx, Ny, Nz);
Vel_z.resize(Nx, Ny, Nz);
SDs.resize(Nx, Ny, Nz);
if (Dm->rank() == 0) {
bool WriteHeader = false;
TIMELOG = fopen("free.csv", "r");
WriteHeader = true;
TIMELOG = fopen("free.csv", "a+");
if (WriteHeader) {
// If timelog is empty, write a short header to list the averages
fprintf(TIMELOG, "timestep\n");
if (Dm->rank()==0){
FreeEnergyAnalyzer::~FreeEnergyAnalyzer() {
if (Dm->rank() == 0) {
void FreeEnergyAnalyzer::SetParams(){
void FreeEnergyAnalyzer::SetParams() {}
void FreeEnergyAnalyzer::Basic(ScaLBL_FreeLeeModel &LeeModel, int timestep){
void FreeEnergyAnalyzer::Basic(ScaLBL_FreeLeeModel &LeeModel, int timestep) {
if (Dm->rank()==0){
fprintf(TIMELOG,"%i ",timestep);
/*for (int ion=0; ion<Ion.number_ion_species; ion++){
if (Dm->rank() == 0) {
fprintf(TIMELOG, "%i ", timestep);
/*for (int ion=0; ion<Ion.number_ion_species; ion++){
fprintf(TIMELOG,"%.8g ",rho_avg_global[ion]);
fprintf(TIMELOG,"%.8g ",rho_mu_avg_global[ion]);
fprintf(TIMELOG,"%.8g ",rho_psi_avg_global[ion]);
@ -57,10 +62,10 @@ void FreeEnergyAnalyzer::Basic(ScaLBL_FreeLeeModel &LeeModel, int timestep){
fprintf(TIMELOG,"%.8g ",rho_psi_fluctuation_global[ion]);
/* else{
fprintf(TIMELOG, "\n");
/* else{
fprintf(TIMELOG,"%i ",timestep);
for (int ion=0; ion<Ion.number_ion_species; ion++){
fprintf(TIMELOG,"%.8g ",rho_avg_local[ion]);
@ -73,106 +78,111 @@ void FreeEnergyAnalyzer::Basic(ScaLBL_FreeLeeModel &LeeModel, int timestep){
} */
void FreeEnergyAnalyzer::WriteVis( ScaLBL_FreeLeeModel &LeeModel, std::shared_ptr<Database> input_db, int timestep){
auto vis_db = input_db->getDatabase( "Visualization" );
std::vector<IO::MeshDataStruct> visData;
fillHalo<double> fillData(Dm->Comm,Dm->rank_info,{Dm->Nx-2,Dm->Ny-2,Dm->Nz-2},{1,1,1},0,1);
void FreeEnergyAnalyzer::WriteVis(ScaLBL_FreeLeeModel &LeeModel,
std::shared_ptr<Database> input_db,
int timestep) {
// Create the MeshDataStruct
auto vis_db = input_db->getDatabase("Visualization");
std::vector<IO::MeshDataStruct> visData;
fillHalo<double> fillData(Dm->Comm, Dm->rank_info,
{Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2}, {1, 1, 1},
0, 1);
IO::initialize("", "silo", "false");
// Create the MeshDataStruct
visData[0].meshName = "domain";
visData[0].mesh = std::make_shared<IO::DomainMesh>( Dm->rank_info,Dm->Nx-2,Dm->Ny-2,Dm->Nz-2,Dm->Lx,Dm->Ly,Dm->Lz );
visData[0].mesh =
std::make_shared<IO::DomainMesh>(Dm->rank_info, Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2, Dm->Lx, Dm->Ly, Dm->Lz);
auto VisPhase = std::make_shared<IO::Variable>();
auto VisPressure = std::make_shared<IO::Variable>();
auto VisChemicalPotential = std::make_shared<IO::Variable>();
auto VxVar = std::make_shared<IO::Variable>();
auto VyVar = std::make_shared<IO::Variable>();
auto VzVar = std::make_shared<IO::Variable>();
if (vis_db->getWithDefault<bool>( "save_phase_field", true )){
VisPhase->name = "Phase";
VisPhase->type = IO::VariableType::VolumeVariable;
VisPhase->dim = 1;
if (vis_db->getWithDefault<bool>("save_phase_field", true)) {
VisPhase->name = "Phase";
VisPhase->type = IO::VariableType::VolumeVariable;
VisPhase->dim = 1;
VisPhase->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
if (vis_db->getWithDefault<bool>( "save_potential", true )){
VisPressure->name = "Pressure";
VisPressure->type = IO::VariableType::VolumeVariable;
VisPressure->dim = 1;
if (vis_db->getWithDefault<bool>("save_potential", true)) {
VisPressure->name = "Pressure";
VisPressure->type = IO::VariableType::VolumeVariable;
VisPressure->dim = 1;
VisPressure->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
VisChemicalPotential->name = "ChemicalPotential";
VisChemicalPotential->type = IO::VariableType::VolumeVariable;
VisChemicalPotential->dim = 1;
VisChemicalPotential->name = "ChemicalPotential";
VisChemicalPotential->type = IO::VariableType::VolumeVariable;
VisChemicalPotential->dim = 1;
VisChemicalPotential->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
if (vis_db->getWithDefault<bool>( "save_velocity", false )){
if (vis_db->getWithDefault<bool>("save_velocity", false)) {
VxVar->name = "Velocity_x";
VxVar->type = IO::VariableType::VolumeVariable;
VxVar->dim = 1;
VxVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
VyVar->name = "Velocity_y";
VyVar->type = IO::VariableType::VolumeVariable;
VyVar->dim = 1;
VyVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
VzVar->name = "Velocity_z";
VzVar->type = IO::VariableType::VolumeVariable;
VzVar->dim = 1;
VzVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
if (vis_db->getWithDefault<bool>( "save_phase", true )){
Array<double>& PhaseData = visData[0].vars[0]->data;
if (vis_db->getWithDefault<bool>("save_phase", true)) {
ASSERT(visData[0].vars[0]->name == "Phase");
Array<double> &PhaseData = visData[0].vars[0]->data;
fillData.copy(Phi, PhaseData);
if (vis_db->getWithDefault<bool>( "save_potential", true )){
LeeModel.getPotential(Pressure, ChemicalPotential);
Array<double>& PressureData = visData[0].vars[1]->data;
Array<double>& ChemicalPotentialData = visData[0].vars[2]->data;
if (vis_db->getWithDefault<bool>( "save_velocity", false )){
Array<double>& VelxData = visData[0].vars[3]->data;
Array<double>& VelyData = visData[0].vars[4]->data;
Array<double>& VelzData = visData[0].vars[5]->data;
if (vis_db->getWithDefault<bool>( "write_silo", true ))
IO::writeData( timestep, visData, Dm->Comm );
if (vis_db->getWithDefault<bool>("save_potential", true)) {
ASSERT(visData[0].vars[1]->name == "Pressure");
LeeModel.getPotential(Pressure, ChemicalPotential);
Array<double> &PressureData = visData[0].vars[1]->data;
fillData.copy(Pressure, PressureData);
/* if (vis_db->getWithDefault<bool>( "save_8bit_raw", true )){
ASSERT(visData[0].vars[2]->name == "ChemicalPotential");
Array<double> &ChemicalPotentialData = visData[0].vars[2]->data;
fillData.copy(ChemicalPotential, ChemicalPotentialData);
if (vis_db->getWithDefault<bool>("save_velocity", false)) {
ASSERT(visData[0].vars[3]->name == "Velocity_x");
ASSERT(visData[0].vars[4]->name == "Velocity_y");
ASSERT(visData[0].vars[5]->name == "Velocity_z");
LeeModel.getVelocity(Vel_x, Vel_y, Vel_z);
Array<double> &VelxData = visData[0].vars[3]->data;
Array<double> &VelyData = visData[0].vars[4]->data;
Array<double> &VelzData = visData[0].vars[5]->data;
fillData.copy(Vel_x, VelxData);
fillData.copy(Vel_y, VelyData);
fillData.copy(Vel_z, VelzData);
if (vis_db->getWithDefault<bool>("write_silo", true))
IO::writeData(timestep, visData, Dm->Comm);
/* if (vis_db->getWithDefault<bool>( "save_8bit_raw", true )){
char CurrentIDFilename[40];

View File

@ -27,36 +27,36 @@
class FreeEnergyAnalyzer{
class FreeEnergyAnalyzer {
std::shared_ptr <Domain> Dm;
double Volume;
// input variables
double rho_n, rho_w;
double nu_n, nu_w;
double gamma_wn, beta;
double Fx, Fy, Fz;
std::shared_ptr<Domain> Dm;
double Volume;
// input variables
double rho_n, rho_w;
double nu_n, nu_w;
double gamma_wn, beta;
double Fx, Fy, Fz;
int Nx,Ny,Nz;
DoubleArray Rho;
DoubleArray Phi;
DoubleArray ChemicalPotential;
DoubleArray Pressure;
DoubleArray Vel_x;
DoubleArray Vel_y;
DoubleArray Vel_z;
DoubleArray SDs;
int Nx, Ny, Nz;
DoubleArray Rho;
DoubleArray Phi;
DoubleArray ChemicalPotential;
DoubleArray Pressure;
DoubleArray Vel_x;
DoubleArray Vel_y;
DoubleArray Vel_z;
DoubleArray SDs;
FreeEnergyAnalyzer(std::shared_ptr <Domain> Dm);
void SetParams();
void Basic( ScaLBL_FreeLeeModel &LeeModel, int timestep);
void WriteVis( ScaLBL_FreeLeeModel &LeeModel, std::shared_ptr<Database> input_db, int timestep);
FreeEnergyAnalyzer(std::shared_ptr<Domain> Dm);
void SetParams();
void Basic(ScaLBL_FreeLeeModel &LeeModel, int timestep);
void WriteVis(ScaLBL_FreeLeeModel &LeeModel,
std::shared_ptr<Database> input_db, int timestep);

View File

@ -1,206 +1,234 @@
#include "analysis/GreyPhase.h"
// Constructor
GreyPhaseAnalysis::GreyPhaseAnalysis(std::shared_ptr <Domain> dm):
Nx=dm->Nx; Ny=dm->Ny; Nz=dm->Nz;
// Global arrays
SDs.resize(Nx,Ny,Nz); SDs.fill(0);
Porosity.resize(Nx,Ny,Nz); Porosity.fill(0);
//PhaseID.resize(Nx,Ny,Nz); PhaseID.fill(0);
Rho_n.resize(Nx,Ny,Nz); Rho_n.fill(0);
Rho_w.resize(Nx,Ny,Nz); Rho_w.fill(0);
Pressure.resize(Nx,Ny,Nz); Pressure.fill(0);
//Phi.resize(Nx,Ny,Nz); Phi.fill(0);
//DelPhi.resize(Nx,Ny,Nz); DelPhi.fill(0);
Vel_x.resize(Nx,Ny,Nz); Vel_x.fill(0); // Gradient of the phase indicator field
Vel_y.resize(Nx,Ny,Nz); Vel_y.fill(0);
Vel_z.resize(Nx,Ny,Nz); Vel_z.fill(0);
MobilityRatio.resize(Nx,Ny,Nz); MobilityRatio.fill(0);
if (Dm->rank()==0){
bool WriteHeader=false;
TIMELOG = fopen("timelog.csv","r");
GreyPhaseAnalysis::GreyPhaseAnalysis(std::shared_ptr<Domain> dm) : Dm(dm) {
Nx = dm->Nx;
Ny = dm->Ny;
Nz = dm->Nz;
Volume = (Nx - 2) * (Ny - 2) * (Nz - 2) * Dm->nprocx() * Dm->nprocy() *
Dm->nprocz() * 1.0;
TIMELOG = fopen("timelog.csv","a+");
if (WriteHeader)
// If timelog is empty, write a short header to list the averages
fprintf(TIMELOG,"sw krw krn vw vn pw pn\n");
// Global arrays
SDs.resize(Nx, Ny, Nz);
Porosity.resize(Nx, Ny, Nz);
//PhaseID.resize(Nx,Ny,Nz); PhaseID.fill(0);
Rho_n.resize(Nx, Ny, Nz);
Rho_w.resize(Nx, Ny, Nz);
Pressure.resize(Nx, Ny, Nz);
//Phi.resize(Nx,Ny,Nz); Phi.fill(0);
//DelPhi.resize(Nx,Ny,Nz); DelPhi.fill(0);
Vel_x.resize(Nx, Ny, Nz);
Vel_x.fill(0); // Gradient of the phase indicator field
Vel_y.resize(Nx, Ny, Nz);
Vel_z.resize(Nx, Ny, Nz);
MobilityRatio.resize(Nx, Ny, Nz);
if (Dm->rank() == 0) {
bool WriteHeader = false;
TIMELOG = fopen("timelog.csv", "r");
WriteHeader = true;
TIMELOG = fopen("timelog.csv", "a+");
if (WriteHeader) {
// If timelog is empty, write a short header to list the averages
fprintf(TIMELOG, "sw krw krn vw vn pw pn\n");
// Destructor
GreyPhaseAnalysis::~GreyPhaseAnalysis() {}
void GreyPhaseAnalysis::Write(int timestep) {}
void GreyPhaseAnalysis::Write(int timestep)
void GreyPhaseAnalysis::SetParams(double rhoA, double rhoB, double tauA, double tauB, double force_x, double force_y, double force_z, double alpha, double B, double GreyPorosity)
Fx = force_x;
Fy = force_y;
Fz = force_z;
rho_n = rhoA;
rho_w = rhoB;
nu_n = (tauA-0.5)/3.f;
nu_w = (tauB-0.5)/3.f;
gamma_wn = 6.0*alpha;
beta = B;
void GreyPhaseAnalysis::SetParams(double rhoA, double rhoB, double tauA,
double tauB, double force_x, double force_y,
double force_z, double alpha, double B,
double GreyPorosity) {
Fx = force_x;
Fy = force_y;
Fz = force_z;
rho_n = rhoA;
rho_w = rhoB;
nu_n = (tauA - 0.5) / 3.f;
nu_w = (tauB - 0.5) / 3.f;
gamma_wn = 6.0 * alpha;
beta = B;
grey_porosity = GreyPorosity;
void GreyPhaseAnalysis::Basic(){
int i,j,k,n,imin,jmin,kmin,kmax;
void GreyPhaseAnalysis::Basic() {
int i, j, k, n, imin, jmin, kmin, kmax;
// If external boundary conditions are set, do not average over the inlet
kmin=1; kmax=Nz-1;
if (Dm->inlet_layers_z > 0 && Dm->kproc() == 0) kmin += Dm->inlet_layers_z;
if (Dm->outlet_layers_z > 0 && Dm->kproc() == Dm->nprocz()-1) kmax -= Dm->outlet_layers_z;
// If external boundary conditions are set, do not average over the inlet
kmin = 1;
kmax = Nz - 1;
imin = jmin = 1;
if (Dm->inlet_layers_z > 0 && Dm->kproc() == 0)
kmin += Dm->inlet_layers_z;
if (Dm->outlet_layers_z > 0 && Dm->kproc() == Dm->nprocz() - 1)
kmax -= Dm->outlet_layers_z;
double count_w = 0.0;
double count_n = 0.0;
for (k=kmin; k<kmax; k++){
for (j=jmin; j<Ny-1; j++){
for (i=imin; i<Nx-1; i++){
n = k*Nx*Ny + j*Nx + i;
// Compute volume averages
if ( Dm->id[n] > 0 ){
// compute density
double nA = Rho_n(n);
double nB = Rho_w(n);
double phi = (nA-nB)/(nA+nB);
double porosity = Porosity(n);
double mobility_ratio = MobilityRatio(n);
Water_local.M += nB*porosity;
Water_local.Px += porosity*(nA+nB)*Vel_x(n)*0.5*(1.0-mobility_ratio);
Water_local.Py += porosity*(nA+nB)*Vel_y(n)*0.5*(1.0-mobility_ratio);
Water_local.Pz += porosity*(nA+nB)*Vel_z(n)*0.5*(1.0-mobility_ratio);
Oil_local.M += nA*porosity;
Oil_local.Px += porosity*(nA+nB)*Vel_x(n)*0.5*(1.0+mobility_ratio);
Oil_local.Py += porosity*(nA+nB)*Vel_y(n)*0.5*(1.0+mobility_ratio);
Oil_local.Pz += porosity*(nA+nB)*Vel_z(n)*0.5*(1.0+mobility_ratio);
double count_w = 0.0;
double count_n = 0.0;
for (k = kmin; k < kmax; k++) {
for (j = jmin; j < Ny - 1; j++) {
for (i = imin; i < Nx - 1; i++) {
n = k * Nx * Ny + j * Nx + i;
// Compute volume averages
if (Dm->id[n] > 0) {
// compute density
double nA = Rho_n(n);
double nB = Rho_w(n);
double phi = (nA - nB) / (nA + nB);
double porosity = Porosity(n);
double mobility_ratio = MobilityRatio(n);
if ( phi > 0.99 ){
Oil_local.p += Pressure(n);
//Oil_local.p += pressure*(rho_n*nA)/(rho_n*nA+rho_w*nB);
count_n += 1.0;
else if ( phi < -0.99 ){
Water_local.p += Pressure(n);
//Water_local.p += pressure*(rho_w*nB)/(rho_n*nA+rho_w*nB);
count_w += 1.0;
Oil.M=Dm->Comm.sumReduce( Oil_local.M);
Oil.Px=Dm->Comm.sumReduce( Oil_local.Px);
Oil.Py=Dm->Comm.sumReduce( Oil_local.Py);
Oil.Pz=Dm->Comm.sumReduce( Oil_local.Pz);
Water.M=Dm->Comm.sumReduce( Water_local.M);
Water.Px=Dm->Comm.sumReduce( Water_local.Px);
Water.Py=Dm->Comm.sumReduce( Water_local.Py);
Water.Pz=Dm->Comm.sumReduce( Water_local.Pz);
Water_local.M += nB * porosity;
Water_local.Px += porosity * (nA + nB) * Vel_x(n) * 0.5 *
(1.0 - mobility_ratio);
Water_local.Py += porosity * (nA + nB) * Vel_y(n) * 0.5 *
(1.0 - mobility_ratio);
Water_local.Pz += porosity * (nA + nB) * Vel_z(n) * 0.5 *
(1.0 - mobility_ratio);
Oil_local.M += nA * porosity;
Oil_local.Px += porosity * (nA + nB) * Vel_x(n) * 0.5 *
(1.0 + mobility_ratio);
Oil_local.Py += porosity * (nA + nB) * Vel_y(n) * 0.5 *
(1.0 + mobility_ratio);
Oil_local.Pz += porosity * (nA + nB) * Vel_z(n) * 0.5 *
(1.0 + mobility_ratio);
//Oil.p /= Oil.M;
//Water.p /= Water.M;
count_w=Dm->Comm.sumReduce( count_w);
count_n=Dm->Comm.sumReduce( count_n);
if (count_w > 0.0)
Water.p=Dm->Comm.sumReduce( Water_local.p) / count_w;
Water.p = 0.0;
if (count_n > 0.0)
Oil.p=Dm->Comm.sumReduce( Oil_local.p) / count_n;
Oil.p = 0.0;
if (phi > 0.99) {
Oil_local.p += Pressure(n);
//Oil_local.p += pressure*(rho_n*nA)/(rho_n*nA+rho_w*nB);
count_n += 1.0;
} else if (phi < -0.99) {
Water_local.p += Pressure(n);
//Water_local.p += pressure*(rho_w*nB)/(rho_n*nA+rho_w*nB);
count_w += 1.0;
Oil.M = Dm->Comm.sumReduce(Oil_local.M);
Oil.Px = Dm->Comm.sumReduce(Oil_local.Px);
Oil.Py = Dm->Comm.sumReduce(Oil_local.Py);
Oil.Pz = Dm->Comm.sumReduce(Oil_local.Pz);
// check for NaN
bool err=false;
if (Water.M != Water.M) err=true;
if (Water.p != Water.p) err=true;
if (Water.Px != Water.Px) err=true;
if (Water.Py != Water.Py) err=true;
if (Water.Pz != Water.Pz) err=true;
Water.M = Dm->Comm.sumReduce(Water_local.M);
Water.Px = Dm->Comm.sumReduce(Water_local.Px);
Water.Py = Dm->Comm.sumReduce(Water_local.Py);
Water.Pz = Dm->Comm.sumReduce(Water_local.Pz);
if (Oil.M != Oil.M) err=true;
if (Oil.p != Oil.p) err=true;
if (Oil.Px != Oil.Px) err=true;
if (Oil.Py != Oil.Py) err=true;
if (Oil.Pz != Oil.Pz) err=true;
if (Dm->rank() == 0){
double force_mag = sqrt(Fx*Fx+Fy*Fy+Fz*Fz);
double dir_x = 0.0;
double dir_y = 0.0;
double dir_z = 0.0;
if (force_mag > 0.0){
dir_x = Fx/force_mag;
dir_y = Fy/force_mag;
dir_z = Fz/force_mag;
else {
// default to z direction
dir_x = 0.0;
dir_y = 0.0;
dir_z = 1.0;
if (Dm->BoundaryCondition == 1 || Dm->BoundaryCondition == 2 || Dm->BoundaryCondition == 3 || Dm->BoundaryCondition == 4 ){
// compute the pressure drop
double pressure_drop = (Pressure(Nx*Ny + Nx + 1) - 1.0) / 3.0;
double length = ((Nz-2)*Dm->nprocz());
force_mag -= pressure_drop/length;
if (force_mag == 0.0){
// default to z direction
dir_x = 0.0;
dir_y = 0.0;
dir_z = 1.0;
force_mag = 1.0;
saturation=Water.M/(Water.M + Oil.M); // assume constant density
water_flow_rate=grey_porosity*saturation*(Water.Px*dir_x + Water.Py*dir_y + Water.Pz*dir_z)/Water.M;
oil_flow_rate =grey_porosity*(1.0-saturation)*(Oil.Px*dir_x + Oil.Py*dir_y + Oil.Pz*dir_z)/Oil.M;
//Oil.p /= Oil.M;
//Water.p /= Water.M;
count_w = Dm->Comm.sumReduce(count_w);
count_n = Dm->Comm.sumReduce(count_n);
if (count_w > 0.0)
Water.p = Dm->Comm.sumReduce(Water_local.p) / count_w;
Water.p = 0.0;
if (count_n > 0.0)
Oil.p = Dm->Comm.sumReduce(Oil_local.p) / count_n;
Oil.p = 0.0;
double h = Dm->voxel_length;
// check for NaN
bool err = false;
if (Water.M != Water.M)
err = true;
if (Water.p != Water.p)
err = true;
if (Water.Px != Water.Px)
err = true;
if (Water.Py != Water.Py)
err = true;
if (Water.Pz != Water.Pz)
err = true;
if (Oil.M != Oil.M)
err = true;
if (Oil.p != Oil.p)
err = true;
if (Oil.Px != Oil.Px)
err = true;
if (Oil.Py != Oil.Py)
err = true;
if (Oil.Pz != Oil.Pz)
err = true;
if (Dm->rank() == 0) {
double force_mag = sqrt(Fx * Fx + Fy * Fy + Fz * Fz);
double dir_x = 0.0;
double dir_y = 0.0;
double dir_z = 0.0;
if (force_mag > 0.0) {
dir_x = Fx / force_mag;
dir_y = Fy / force_mag;
dir_z = Fz / force_mag;
} else {
// default to z direction
dir_x = 0.0;
dir_y = 0.0;
dir_z = 1.0;
if (Dm->BoundaryCondition == 1 || Dm->BoundaryCondition == 2 ||
Dm->BoundaryCondition == 3 || Dm->BoundaryCondition == 4) {
// compute the pressure drop
double pressure_drop = (Pressure(Nx * Ny + Nx + 1) - 1.0) / 3.0;
double length = ((Nz - 2) * Dm->nprocz());
force_mag -= pressure_drop / length;
if (force_mag == 0.0) {
// default to z direction
dir_x = 0.0;
dir_y = 0.0;
dir_z = 1.0;
force_mag = 1.0;
saturation = Water.M / (Water.M + Oil.M); // assume constant density
water_flow_rate =
grey_porosity * saturation *
(Water.Px * dir_x + Water.Py * dir_y + Water.Pz * dir_z) / Water.M;
oil_flow_rate = grey_porosity * (1.0 - saturation) *
(Oil.Px * dir_x + Oil.Py * dir_y + Oil.Pz * dir_z) /
double h = Dm->voxel_length;
//TODO check if need greyporosity or domain porosity ? - compare to analytical solution
double krn = h*h*nu_n*oil_flow_rate / force_mag ;
double krw = h*h*nu_w*water_flow_rate / force_mag;
//printf(" water saturation = %f, fractional flow =%f \n",saturation,fractional_flow);
fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %.5g %.5g %.5g\n",saturation,krw,krn,h*water_flow_rate,h*oil_flow_rate, Water.p, Oil.p);
double krn = h * h * nu_n * oil_flow_rate / force_mag;
double krw = h * h * nu_w * water_flow_rate / force_mag;
//printf(" water saturation = %f, fractional flow =%f \n",saturation,fractional_flow);
fprintf(TIMELOG, "%.5g %.5g %.5g %.5g %.5g %.5g %.5g\n", saturation,
krw, krn, h * water_flow_rate, h * oil_flow_rate, Water.p,
if (err==true){
// exception if simulation produceds NaN
printf("GreyPhaseAnalysis.cpp: NaN encountered, may need to check simulation parameters \n");
if (err == true) {
// exception if simulation produceds NaN
printf("GreyPhaseAnalysis.cpp: NaN encountered, may need to check "
"simulation parameters \n");
ASSERT(err == false);
inline void InterfaceTransportMeasures( double beta, double rA, double rB, double nA, double nB,

View File

@ -15,7 +15,6 @@
#include "IO/Reader.h"
#include "IO/Writer.h"
* \class GreyPhase
@ -23,18 +22,15 @@
* The GreyPhase class tracks pressure, mass and momentum within a grey phase
class GreyPhase{
double p;
double M,Px,Py,Pz;
void reset(){
class GreyPhase {
double p;
double M, Px, Py, Pz;
void reset() { p = M = Px = Py = Pz = 0.0; }
* \class GreyPhaseAnalysis
@ -42,47 +38,48 @@ class GreyPhase{
* The GreyPhaseAnalysis class is constructed to analyze the LBPM greyscale model
class GreyPhaseAnalysis{
class GreyPhaseAnalysis {
std::shared_ptr <Domain> Dm;
double Volume;
// input variables
double rho_n, rho_w;
double nu_n, nu_w;
double gamma_wn, beta;
double Fx, Fy, Fz;
std::shared_ptr<Domain> Dm;
double Volume;
// input variables
double rho_n, rho_w;
double nu_n, nu_w;
double gamma_wn, beta;
double Fx, Fy, Fz;
double grey_porosity;
// outputs
double saturation,water_flow_rate, oil_flow_rate;
// outputs
double saturation, water_flow_rate, oil_flow_rate;
//simulation outputs (averaged values)
GreyPhase Water, Oil;
GreyPhase Water_local, Oil_local;
int Nx,Ny,Nz;
//IntArray PhaseID; // Phase ID array
DoubleArray SDs; // contains porosity map
DoubleArray Porosity; // contains porosity map
DoubleArray Rho_n; // density field
DoubleArray Rho_w; // density field
//DoubleArray Phi; // phase indicator field
//DoubleArray DelPhi; // Magnitude of Gradient of the phase indicator field
DoubleArray Pressure; // pressure field
DoubleArray Vel_x; // velocity field
DoubleArray Vel_y;
DoubleArray Vel_z;
DoubleArray MobilityRatio;
//simulation outputs (averaged values)
GreyPhase Water, Oil;
GreyPhase Water_local, Oil_local;
int Nx, Ny, Nz;
//IntArray PhaseID; // Phase ID array
DoubleArray SDs; // contains porosity map
DoubleArray Porosity; // contains porosity map
DoubleArray Rho_n; // density field
DoubleArray Rho_w; // density field
//DoubleArray Phi; // phase indicator field
//DoubleArray DelPhi; // Magnitude of Gradient of the phase indicator field
DoubleArray Pressure; // pressure field
DoubleArray Vel_x; // velocity field
DoubleArray Vel_y;
DoubleArray Vel_z;
DoubleArray MobilityRatio;
GreyPhaseAnalysis(std::shared_ptr <Domain> Dm);
void SetParams(double rhoA, double rhoB, double tauA, double tauB, double force_x, double force_y, double force_z, double alpha, double beta, double GreyPorosity);
void Basic();
void Write(int time);
GreyPhaseAnalysis(std::shared_ptr<Domain> Dm);
void SetParams(double rhoA, double rhoB, double tauA, double tauB,
double force_x, double force_y, double force_z, double alpha,
double beta, double GreyPorosity);
void Basic();
void Write(int time);

View File

@ -29,282 +29,278 @@
#include <memory>
#define PI 3.14159265359
// Constructor
Minkowski::Minkowski(std::shared_ptr <Domain> dm):
kstart(0), kfinish(0), isovalue(0), Volume(0),
LOGFILE(NULL), Dm(dm), Vi(0), Vi_global(0)
Nx=dm->Nx; Ny=dm->Ny; Nz=dm->Nz;
id.resize(Nx,Ny,Nz); id.fill(0);
label.resize(Nx,Ny,Nz); label.fill(0);
distance.resize(Nx,Ny,Nz); distance.fill(0);
Minkowski::Minkowski(std::shared_ptr<Domain> dm)
: kstart(0), kfinish(0), isovalue(0), Volume(0), LOGFILE(NULL), Dm(dm),
Vi(0), Vi_global(0) {
Nx = dm->Nx;
Ny = dm->Ny;
Nz = dm->Nz;
Volume = double((Nx - 2) * (Ny - 2) * (Nz - 2)) *
double(Dm->nprocx() * Dm->nprocy() * Dm->nprocz());
if (Dm->rank()==0){
LOGFILE = fopen("minkowski.csv","a+");
if (fseek(LOGFILE,0,SEEK_SET) == fseek(LOGFILE,0,SEEK_CUR))
// If LOGFILE is empty, write a short header to list the averages
fprintf(LOGFILE,"Vn An Jn Xn\n"); //miknowski measures,
id.resize(Nx, Ny, Nz);
label.resize(Nx, Ny, Nz);
distance.resize(Nx, Ny, Nz);
if (Dm->rank() == 0) {
LOGFILE = fopen("minkowski.csv", "a+");
if (fseek(LOGFILE, 0, SEEK_SET) == fseek(LOGFILE, 0, SEEK_CUR)) {
// If LOGFILE is empty, write a short header to list the averages
fprintf(LOGFILE, "Vn An Jn Xn\n"); //miknowski measures,
// Destructor
if ( LOGFILE!=NULL ) { fclose(LOGFILE); }
Minkowski::~Minkowski() {
if (LOGFILE != NULL) {
void Minkowski::ComputeScalar(const DoubleArray& Field, const double isovalue)
void Minkowski::ComputeScalar(const DoubleArray &Field, const double isovalue) {
Xi = Ji = Ai = 0.0;
DCEL object;
int e1,e2,e3;
double s,s1,s2,s3;
double a1,a2,a3;
//double Vx,Vy,Vz,Wx,Wy,Wz,nx,ny,nz,norm;
//int Nx = Field.size(0);
//int Ny = Field.size(1);
//int Nz = Field.size(2);
for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
for (int idx=0; idx<object.TriangleCount; idx++){
e1 = object.Face(idx);
e2 =;
e3 =;
auto P1 = object.vertex.coords(object.halfedge.v1(e1));
auto P2 = object.vertex.coords(object.halfedge.v1(e2));
auto P3 = object.vertex.coords(object.halfedge.v1(e3));
// Surface area
s1 = Distance( P1, P2 );
s2 = Distance( P2, P3 );
s3 = Distance( P1, P3 );
s = 0.5*(s1+s2+s3);
Ai += sqrt(s*(s-s1)*(s-s2)*(s-s3));
// Mean curvature based on half edge angle
a1 = object.EdgeAngle(e1);
a2 = object.EdgeAngle(e2);
a3 = object.EdgeAngle(e3);
Ji += (a1*s1+a2*s2+a3*s3);
//if (0.08333333333333*(a1*s1+a2*s2+a3*s3) < 0.f){
//double intcurv=0.08333333333333*(a1*s1+a2*s2+a3*s3);
//double surfarea=sqrt(s*(s-s1)*(s-s2)*(s-s3));
//printf(" (%i,%i,%i) PQ(%i,%i)={%f,%f,%f} {%f,%f,%f} a=%f l=%f \n",i,j,k,e1,object.halfedge.twin(e1),P1.x,P1.y,P1.z,P2.x,P2.y,P2.z,a1,s1);
// printf(" (%i,%i,%i) QR(%i,%i)={%f,%f,%f} {%f,%f,%f} a=%f l=%f \n",i,j,k,e2,object.halfedge.twin(e2),P2.x,P2.y,P2.z,P3.x,P3.y,P3.z,a2,s2);
// printf(" (%i,%i,%i) RP(%i,%i)={%f,%f,%f} {%f,%f,%f} a=%f l=%f \n",i,j,k,e3,object.halfedge.twin(e3),P3.x,P3.y,P3.z,P1.x,P1.y,P1.z,a3,s3);
// Euler characteristic (half edge rule: one face - 0.5*(three edges))
Xi -= 0.5;
// Euler characteristic -- each vertex shared by four cubes
//Xi += 0.25*double(object.VertexCount);
// check if vertices are at corners
for (int idx=0; idx<object.VertexCount; idx++){
/*auto P1 = object.vertex.coords(idx);
Xi = Ji = Ai = 0.0;
DCEL object;
int e1, e2, e3;
double s, s1, s2, s3;
double a1, a2, a3;
//double Vx,Vy,Vz,Wx,Wy,Wz,nx,ny,nz,norm;
//int Nx = Field.size(0);
//int Ny = Field.size(1);
//int Nz = Field.size(2);
for (int k = 1; k < Nz - 1; k++) {
for (int j = 1; j < Ny - 1; j++) {
for (int i = 1; i < Nx - 1; i++) {
object.LocalIsosurface(Field, isovalue, i, j, k);
for (int idx = 0; idx < object.TriangleCount; idx++) {
e1 = object.Face(idx);
e2 =;
e3 =;
auto P1 = object.vertex.coords(object.halfedge.v1(e1));
auto P2 = object.vertex.coords(object.halfedge.v1(e2));
auto P3 = object.vertex.coords(object.halfedge.v1(e3));
// Surface area
s1 = Distance(P1, P2);
s2 = Distance(P2, P3);
s3 = Distance(P1, P3);
s = 0.5 * (s1 + s2 + s3);
Ai += sqrt(s * (s - s1) * (s - s2) * (s - s3));
// Mean curvature based on half edge angle
a1 = object.EdgeAngle(e1);
a2 = object.EdgeAngle(e2);
a3 = object.EdgeAngle(e3);
Ji += (a1 * s1 + a2 * s2 + a3 * s3);
//if (0.08333333333333*(a1*s1+a2*s2+a3*s3) < 0.f){
//double intcurv=0.08333333333333*(a1*s1+a2*s2+a3*s3);
//double surfarea=sqrt(s*(s-s1)*(s-s2)*(s-s3));
//printf(" (%i,%i,%i) PQ(%i,%i)={%f,%f,%f} {%f,%f,%f} a=%f l=%f \n",i,j,k,e1,object.halfedge.twin(e1),P1.x,P1.y,P1.z,P2.x,P2.y,P2.z,a1,s1);
// printf(" (%i,%i,%i) QR(%i,%i)={%f,%f,%f} {%f,%f,%f} a=%f l=%f \n",i,j,k,e2,object.halfedge.twin(e2),P2.x,P2.y,P2.z,P3.x,P3.y,P3.z,a2,s2);
// printf(" (%i,%i,%i) RP(%i,%i)={%f,%f,%f} {%f,%f,%f} a=%f l=%f \n",i,j,k,e3,object.halfedge.twin(e3),P3.x,P3.y,P3.z,P1.x,P1.y,P1.z,a3,s3);
// Euler characteristic (half edge rule: one face - 0.5*(three edges))
Xi -= 0.5;
// Euler characteristic -- each vertex shared by four cubes
//Xi += 0.25*double(object.VertexCount);
// check if vertices are at corners
for (int idx = 0; idx < object.VertexCount; idx++) {
/*auto P1 = object.vertex.coords(idx);
if ( remainder(P1.x,1.0)==0.0 && remainder(P1.y,1.0)==0.0 && remainder(P1.z,1.0)==0.0 ){
Xi += 0.125;
Xi += 0.25;
/*double nside_extern = double(npts);
Xi += 0.25;
/*double nside_extern = double(npts);
double nside_intern = double(npts)-3.0;
if (npts > 0) EulerChar = (0.25*nvert - nside_intern - 0.5*nside_extern + nface); */
// Voxel counting for volume fraction
Vi = 0.f;
for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
if (Field(i,j,k) < isovalue){
Vi += 1.0;
// convert X for 2D manifold to 3D object
Xi *= 0.5;
// Phase averages
Vi_global = Dm->Comm.sumReduce( Vi );
Xi_global = Dm->Comm.sumReduce( Xi );
Ai_global = Dm->Comm.sumReduce( Ai );
Ji_global = Dm->Comm.sumReduce( Ji );
// Voxel counting for volume fraction
Vi = 0.f;
for (int k = 1; k < Nz - 1; k++) {
for (int j = 1; j < Ny - 1; j++) {
for (int i = 1; i < Nx - 1; i++) {
if (Field(i, j, k) < isovalue) {
Vi += 1.0;
// convert X for 2D manifold to 3D object
Xi *= 0.5;
// Phase averages
Vi_global = Dm->Comm.sumReduce(Vi);
Xi_global = Dm->Comm.sumReduce(Xi);
Ai_global = Dm->Comm.sumReduce(Ai);
Ji_global = Dm->Comm.sumReduce(Ji);
void Minkowski::MeasureObject(){
void Minkowski::MeasureObject() {
* compute the distance to an object
* THIS ALGORITHM ASSUMES THAT id() is populated with phase id to distinguish objects
* 0 - labels the object
* 1 - labels the rest of the
//DoubleArray smooth_distance(Nx,Ny,Nz);
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
distance(i,j,k) =2.0*double(id(i,j,k))-1.0;
//Eikonal(distance, id, *Dm, 20, {true, true, true});
//DoubleArray smooth_distance(Nx,Ny,Nz);
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
distance(i, j, k) = 2.0 * double(id(i, j, k)) - 1.0;
CalcDist(distance, id, *Dm);
//Eikonal(distance, id, *Dm, 20, {true, true, true});
ComputeScalar(distance, 0.0);
void Minkowski::MeasureObject(double factor, const DoubleArray &Phi){
void Minkowski::MeasureObject(double factor, const DoubleArray &Phi) {
* compute the distance to an object
* THIS ALGORITHM ASSUMES THAT id() is populated with phase id to distinguish objects
* 0 - labels the object
* 1 - labels the rest of the
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
distance(i,j,k) =2.0*double(id(i,j,k))-1.0;
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
double value = Phi(i,j,k);
double dist_value = distance(i,j,k);
if (dist_value < 2.5 && dist_value > -2.5) {
double new_distance = factor*log((1.0+value)/(1.0-value));
if (dist_value*new_distance < 0.0 )
new_distance = (-1.0)*new_distance;
distance(i,j,k) = new_distance;
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
distance(i, j, k) = 2.0 * double(id(i, j, k)) - 1.0;
CalcDist(distance, id, *Dm);
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
double value = Phi(i, j, k);
double dist_value = distance(i, j, k);
if (dist_value < 2.5 && dist_value > -2.5) {
double new_distance =
factor * log((1.0 + value) / (1.0 - value));
if (dist_value * new_distance < 0.0)
new_distance = (-1.0) * new_distance;
distance(i, j, k) = new_distance;
ComputeScalar(distance, 0.0);
int Minkowski::MeasureConnectedPathway(){
int Minkowski::MeasureConnectedPathway() {
* compute the connected pathway for object with LABEL in id field
* compute the labels for connected components
* compute the distance to the connected pathway
* THIS ALGORITHM ASSUMES THAT id() is populated with phase id to distinguish objects
char LABEL = 0;
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
if (id(i,j,k) == LABEL){
distance(i,j,k) = 1.0;
distance(i,j,k) = -1.0;
// Extract only the connected part of NWP
double vF=0.0;
n_connected_components = ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,Dm->rank_info,distance,distance,vF,vF,label,Dm->Comm);
// int n_connected_components = ComputeGlobalPhaseComponent(Nx-2,Ny-2,Nz-2,Dm->rank_info,const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, Dm->Comm )
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
if ( label(i,j,k) == 0){
id(i,j,k) = 0;
id(i,j,k) = 1;
return n_connected_components;
char LABEL = 0;
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
if (id(i, j, k) == LABEL) {
distance(i, j, k) = 1.0;
} else
distance(i, j, k) = -1.0;
// Extract only the connected part of NWP
double vF = 0.0;
n_connected_components =
ComputeGlobalBlobIDs(Nx - 2, Ny - 2, Nz - 2, Dm->rank_info, distance,
distance, vF, vF, label, Dm->Comm);
// int n_connected_components = ComputeGlobalPhaseComponent(Nx-2,Ny-2,Nz-2,Dm->rank_info,const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, Dm->Comm )
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
if (label(i, j, k) == 0) {
id(i, j, k) = 0;
} else {
id(i, j, k) = 1;
return n_connected_components;
int Minkowski::MeasureConnectedPathway(double factor, const DoubleArray &Phi){
int Minkowski::MeasureConnectedPathway(double factor, const DoubleArray &Phi) {
* compute the connected pathway for object with LABEL in id field
* compute the labels for connected components
* compute the distance to the connected pathway
* THIS ALGORITHM ASSUMES THAT id() is populated with phase id to distinguish objects
char LABEL = 0;
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
if (id(i,j,k) == LABEL){
distance(i,j,k) = 1.0;
distance(i,j,k) = -1.0;
// Extract only the connected part of NWP
double vF=0.0;
n_connected_components = ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,Dm->rank_info,distance,distance,vF,vF,label,Dm->Comm);
// int n_connected_components = ComputeGlobalPhaseComponent(Nx-2,Ny-2,Nz-2,Dm->rank_info,const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, Dm->Comm )
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
if ( label(i,j,k) == 0){
id(i,j,k) = 0;
id(i,j,k) = 1;
return n_connected_components;
char LABEL = 0;
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
if (id(i, j, k) == LABEL) {
distance(i, j, k) = 1.0;
} else
distance(i, j, k) = -1.0;
// Extract only the connected part of NWP
double vF = 0.0;
n_connected_components =
ComputeGlobalBlobIDs(Nx - 2, Ny - 2, Nz - 2, Dm->rank_info, distance,
distance, vF, vF, label, Dm->Comm);
// int n_connected_components = ComputeGlobalPhaseComponent(Nx-2,Ny-2,Nz-2,Dm->rank_info,const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, Dm->Comm )
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
if (label(i, j, k) == 0) {
id(i, j, k) = 0;
} else {
id(i, j, k) = 1;
MeasureObject(factor, Phi);
return n_connected_components;
void Minkowski::PrintAll()
if (Dm->rank()==0){
fprintf(LOGFILE,"%.5g %.5g %.5g %.5g\n",Vi_global, Ai_global, Ji_global, Xi_global); // minkowski measures
void Minkowski::PrintAll() {
if (Dm->rank() == 0) {
fprintf(LOGFILE, "%.5g %.5g %.5g %.5g\n", Vi_global, Ai_global,
Ji_global, Xi_global); // minkowski measures

View File

@ -42,60 +42,51 @@
class Minkowski {
int kstart, kfinish;
class Minkowski{
int kstart,kfinish;
double isovalue;
double Volume;
double isovalue;
double Volume;
// CSV / text file where time history of averages is saved
// CSV / text file where time history of averages is saved
std::shared_ptr <Domain> Dm;
Array <char> id;
Array <int> label;
Array <double> distance;
// Averaging variables
// local averages (to each MPI process)
double Ai,Ji,Xi,Vi;
// Global averages (all processes)
double Ai_global,Ji_global,Xi_global,Vi_global;
int n_connected_components;
int Nx,Ny,Nz;
std::shared_ptr<Domain> Dm;
Array<char> id;
Array<int> label;
Array<double> distance;
// Averaging variables
// local averages (to each MPI process)
double Ai, Ji, Xi, Vi;
// Global averages (all processes)
double Ai_global, Ji_global, Xi_global, Vi_global;
int n_connected_components;
int Nx, Ny, Nz;
double V(){
return Vi;
double A(){
return Ai;
double H(){
return Ji;
double X(){
return Xi;
double V() { return Vi; }
double A() { return Ai; }
double H() { return Ji; }
double X() { return Xi; }
* \brief Null constructor
Minkowski(){};//NULL CONSTRUCTOR
Minkowski(){}; //NULL CONSTRUCTOR
* \brief Constructor based on an existing Domain
* @param Dm - Domain structure
Minkowski(std::shared_ptr <Domain> Dm);
Minkowski(std::shared_ptr<Domain> Dm);
* \brief Compute scalar minkowski functionals
* step 1. compute the distance to an object
@ -104,11 +95,11 @@ public:
* THIS ALGORITHM ASSUMES THAT id() is populated with phase id to distinguish objects
* 0 - labels the object
* 1 - labels everything else
void MeasureObject();
void MeasureObject(double factor, const DoubleArray &Phi);
void MeasureObject();
void MeasureObject(double factor, const DoubleArray &Phi);
* \details Compute scalar minkowski functionals for connected part of a structure
* step 1. compute connected components and extract largest region by volume
@ -118,25 +109,23 @@ public:
* THIS ALGORITHM ASSUMES THAT id() is populated with phase id to distinguish objects
* 0 - labels the object
* 1 - labels everything else
int MeasureConnectedPathway();
int MeasureConnectedPathway();
int MeasureConnectedPathway(double factor, const DoubleArray &Phi);
int MeasureConnectedPathway(double factor, const DoubleArray &Phi);
* \brief Compute scalar minkowski functionals
* \details Construct an isosurface and return the geometric invariants based on the triangulated list
* @param isovalue - threshold value to use to determine iso-surface
* @param Field - DoubleArray containing the field to threshold
void ComputeScalar(const DoubleArray& Field, const double isovalue);
void ComputeScalar(const DoubleArray &Field, const double isovalue);
* \brief print the scalar invariants
void PrintAll();
void PrintAll();

View File

@ -21,25 +21,49 @@
struct LBPM_Point {
LBPM_Point() : x(0.0), y(0.0), z(0.0) {}
LBPM_Point(double xv,double yv,double zv) : x(xv), y(yv), z(zv) {}
double x,y,z;
LBPM_Point(double xv, double yv, double zv) : x(xv), y(yv), z(zv) {}
double x, y, z;
typedef LBPM_Point Point;
inline Point operator+(const Point &A,const Point &B) {return Point(A.x+B.x,A.y+B.y,A.z+B.z);}
inline Point operator-(const Point &A,const Point &B) {return Point(A.x-B.x,A.y-B.y,A.z-B.z);}
inline Point operator*(const Point &A,double v) {return Point(A.x*v,A.y*v,A.z*v);}
inline Point operator*(double v,const Point &A) {return Point(A.x*v,A.y*v,A.z*v);}
inline Point operator/(const Point &A,double v) {return Point(A.x/v,A.y/v,A.z/v);}
inline Point operator-(const Point &A) {return Point(-A.x,-A.y,-A.z);}
inline Point operator+(const Point &A, const Point &B) {
return Point(A.x + B.x, A.y + B.y, A.z + B.z);
inline Point operator-(const Point &A, const Point &B) {
return Point(A.x - B.x, A.y - B.y, A.z - B.z);
inline Point operator*(const Point &A, double v) {
return Point(A.x * v, A.y * v, A.z * v);
inline Point operator*(double v, const Point &A) {
return Point(A.x * v, A.y * v, A.z * v);
inline Point operator/(const Point &A, double v) {
return Point(A.x / v, A.y / v, A.z / v);
inline Point operator-(const Point &A) { return Point(-A.x, -A.y, -A.z); }
inline bool operator==(const Point &A,const Point &B) {return (A.x==B.x && A.y==B.y && A.z==B.z);}
inline bool operator!=(const Point &A,const Point &B) {return (A.x!=B.x || A.y!=B.y || A.z!=B.z);}
inline bool operator==(const Point &A, const Point &B) {
return (A.x == B.x && A.y == B.y && A.z == B.z);
inline bool operator!=(const Point &A, const Point &B) {
return (A.x != B.x || A.y != B.y || A.z != B.z);
inline double Norm(const Point &A) {return sqrt(A.x*A.x+A.y*A.y+A.z*A.z);}
inline Point Cross(const Point &A,const Point &B) {return Point(A.y*B.z-A.z*B.y,B.x*A.z-A.x*B.z,A.x*B.y-A.y*B.x);}
inline double Dot(const Point &A,const Point &B) {return (A.x*B.x+A.y*B.y+A.z*B.z);}
inline double Distance(const Point &A,const Point &B) {return sqrt((A.x-B.x)*(A.x-B.x)+(A.y-B.y)*(A.y-B.y)+(A.z-B.z)*(A.z-B.z));}
inline double Norm(const Point &A) {
return sqrt(A.x * A.x + A.y * A.y + A.z * A.z);
inline Point Cross(const Point &A, const Point &B) {
return Point(A.y * B.z - A.z * B.y, B.x * A.z - A.x * B.z,
A.x * B.y - A.y * B.x);
inline double Dot(const Point &A, const Point &B) {
return (A.x * B.x + A.y * B.y + A.z * B.z);
inline double Distance(const Point &A, const Point &B) {
return sqrt((A.x - B.x) * (A.x - B.x) + (A.y - B.y) * (A.y - B.y) +
(A.z - B.z) * (A.z - B.z));
class PointList{
@ -104,25 +128,38 @@ PointList::~PointList()
delete data;
template <class T>
class DTList {
template <class T> class DTList {
DTList() : Data(0), length(0), refCount(new size_t(1)), outOfRange() {}
DTList(const DTList<T> &A) : Data(A.Data), length(A.length), refCount(A.refCount), outOfRange() {++(*refCount);}
DTList(size_t len) : Data(len<=0 ? 0 : new T[len]), length(len<=0 ? 0 : len), refCount(new size_t(1)), outOfRange() {}
virtual ~DTList() {
if (*refCount==0) {delete [] Data; delete refCount;}
Data = 0; refCount = 0; length=0;
DTList(const DTList<T> &A)
: Data(A.Data), length(A.length), refCount(A.refCount), outOfRange() {
DTList(size_t len)
: Data(len <= 0 ? 0 : new T[len]), length(len <= 0 ? 0 : len),
refCount(new size_t(1)), outOfRange() {}
virtual ~DTList() {
if (*refCount == 0) {
delete[] Data;
delete refCount;
Data = 0;
refCount = 0;
length = 0;
DTList<T> &operator=(const DTList<T> &A) {
if (A.refCount!=refCount) { // Otherwise doing A=A.
if (A.refCount != refCount) { // Otherwise doing A=A.
if (*refCount==0) {delete [] Data; delete refCount;}
if (*refCount == 0) {
delete[] Data;
delete refCount;
refCount = A.refCount;
length = A.length;
@ -130,62 +167,69 @@ public:
return *this;
size_t MemoryUsed(void) const {return length*sizeof(T);}
const T *Pointer(void) const {return Data;}
size_t IsEmpty(void) const {return (Data==0);}
size_t Length(void) const {return length;}
const T operator()(size_t i) const {return Data[i];}
size_t MemoryUsed(void) const { return length * sizeof(T); }
const T *Pointer(void) const { return Data; }
size_t IsEmpty(void) const { return (Data == 0); }
size_t Length(void) const { return length; }
const T operator()(size_t i) const { return Data[i]; }
T *Data;
size_t length;
size_t *refCount;
// Should be static.
T outOfRange;
template <class T>
class DTMutableList : public DTList<T> {
template <class T> class DTMutableList : public DTList<T> {
DTMutableList() : DTList<T>() {}
DTMutableList(size_t len) : DTList<T>(len) {}
DTMutableList(const DTMutableList<T> &A) : DTList<T>(A) {}
DTMutableList<T> &operator=(const DTMutableList<T> &A) {DTList<T>::operator=(A); return *this;}
T *Pointer(void) {return DTList<T>::Data;}
const T *Pointer(void) const {return DTList<T>::Data;}
T &operator()(size_t i) {return DTList<T>::Data[i];}
T operator()(size_t i) const {return DTList<T>::Data[i];}
DTMutableList<T> &operator=(T v) {for (size_t i=0;i<DTList<T>::length;i++) DTList<T>::Data[i] = v; return *this;}
DTMutableList<T> &operator=(const DTMutableList<T> &A) {
return *this;
T *Pointer(void) { return DTList<T>::Data; }
const T *Pointer(void) const { return DTList<T>::Data; }
T &operator()(size_t i) { return DTList<T>::Data[i]; }
T operator()(size_t i) const { return DTList<T>::Data[i]; }
DTMutableList<T> &operator=(T v) {
for (size_t i = 0; i < DTList<T>::length; i++)
DTList<T>::Data[i] = v;
return *this;
template <class T> DTMutableList<T> TruncateSize(const DTList<T> &A,size_t length)
if (length>A.Length()) length = A.Length();
template <class T>
DTMutableList<T> TruncateSize(const DTList<T> &A, size_t length) {
if (length > A.Length())
length = A.Length();
DTMutableList<T> toReturn(length);
const T *fromP = A.Pointer();
T *toP = toReturn.Pointer();
for (size_t i=0;i<length;i++) toP[i] = fromP[i];
for (size_t i = 0; i < length; i++)
toP[i] = fromP[i];
return toReturn;
template <class T> DTMutableList<T> IncreaseSize(const DTList<T> &A,size_t addLength)
DTMutableList<T> toReturn(A.Length()+(addLength>=0 ? addLength : 0));
template <class T>
DTMutableList<T> IncreaseSize(const DTList<T> &A, size_t addLength) {
DTMutableList<T> toReturn(A.Length() + (addLength >= 0 ? addLength : 0));
size_t len = A.Length();
const T *fromP = A.Pointer();
T *toP = toReturn.Pointer();
for (size_t i=0;i<len;i++) toP[i] = fromP[i];
for (size_t i = 0; i < len; i++)
toP[i] = fromP[i];
return toReturn;

File diff suppressed because it is too large Load Diff

View File

@ -17,49 +17,48 @@
#include "IO/Reader.h"
#include "IO/Writer.h"
class phase{
class phase {
int Nc;
double p;
double M,Px,Py,Pz,K,visc;
double V,A,H,X;
void reset(){
int Nc;
double p;
double M, Px, Py, Pz, K, visc;
double V, A, H, X;
void reset() {
p = M = Px = Py = Pz = K = 0.0;
visc = 0.0;
V = A = H = X = 0.0;
Nc = 1;
class interface{
class interface {
int Nc;
double M,Px,Py,Pz,K;
double Mw,Mn,Pnx,Pny,Pnz,Pwx,Pwy,Pwz,Kw,Kn;
double V,A,H,X;
void reset(){
Nc = 0;
int Nc;
double M, Px, Py, Pz, K;
double Mw, Mn, Pnx, Pny, Pnz, Pwx, Pwy, Pwz, Kw, Kn;
double V, A, H, X;
void reset() {
Nc = 0;
M = Px = Py = Pz = K = 0.0;
V = A = H = X = 0.0;
Mw = Mn = Pnx = Pny = Pnz = Pwx = Pwy = Pwz = Kw = Kn = 0.0;
class SubPhase{
class SubPhase {
std::shared_ptr <Domain> Dm;
double Volume;
// input variables
double rho_n, rho_w;
double nu_n, nu_w;
double gamma_wn, beta;
double Fx, Fy, Fz;
std::shared_ptr<Domain> Dm;
double Volume;
// input variables
double rho_n, rho_w;
double nu_n, nu_w;
double gamma_wn, beta;
double Fx, Fy, Fz;
* indices
* w - water phase
* n - not water phase
@ -68,53 +67,55 @@ public:
* i - interface region
* b - bulk (total)
// local entities
phase wc,wd,wb,nc,nd,nb,solid;
interface iwn,iwnc;
interface ifs;
// global entities
phase gwc,gwd,gwb,gnc,gnd,gnb,gsolid;
interface giwn,giwnc;
interface gifs;
/* fluid-solid wetting interaction */
double total_wetting_interaction, count_wetting_interaction;
double total_wetting_interaction_global, count_wetting_interaction_global;
int Nx,Ny,Nz;
IntArray PhaseID; // Phase ID array (solid=0, non-wetting=1, wetting=2)
BlobIDArray Label_WP; // Wetting phase label
BlobIDArray Label_NWP; // Non-wetting phase label index (0:nblobs-1)
std::vector<BlobIDType> Label_NWP_map; // Non-wetting phase label for each index
DoubleArray Rho_n; // density field
DoubleArray Rho_w; // density field
DoubleArray Phi; // phase indicator field
DoubleArray DelPhi; // Magnitude of Gradient of the phase indicator field
DoubleArray Pressure; // pressure field
DoubleArray Vel_x; // velocity field
DoubleArray Vel_y;
DoubleArray Vel_z;
DoubleArray Dissipation;
DoubleArray SDs;
// local entities
phase wc, wd, wb, nc, nd, nb, solid;
interface iwn, iwnc;
interface ifs;
std::shared_ptr<Minkowski> morph_w;
std::shared_ptr<Minkowski> morph_n;
std::shared_ptr<Minkowski> morph_i;
// global entities
phase gwc, gwd, gwb, gnc, gnd, gnb, gsolid;
interface giwn, giwnc;
interface gifs;
/* fluid-solid wetting interaction */
double total_wetting_interaction, count_wetting_interaction;
double total_wetting_interaction_global, count_wetting_interaction_global;
SubPhase(std::shared_ptr <Domain> Dm);
void SetParams(double rhoA, double rhoB, double tauA, double tauB, double force_x, double force_y, double force_z, double alpha, double beta);
void Basic();
void Full();
void Write(int time);
void AggregateLabels( const std::string& filename );
int Nx, Ny, Nz;
IntArray PhaseID; // Phase ID array (solid=0, non-wetting=1, wetting=2)
BlobIDArray Label_WP; // Wetting phase label
BlobIDArray Label_NWP; // Non-wetting phase label index (0:nblobs-1)
Label_NWP_map; // Non-wetting phase label for each index
DoubleArray Rho_n; // density field
DoubleArray Rho_w; // density field
DoubleArray Phi; // phase indicator field
DoubleArray DelPhi; // Magnitude of Gradient of the phase indicator field
DoubleArray Pressure; // pressure field
DoubleArray Vel_x; // velocity field
DoubleArray Vel_y;
DoubleArray Vel_z;
DoubleArray Dissipation;
DoubleArray SDs;
std::shared_ptr<Minkowski> morph_w;
std::shared_ptr<Minkowski> morph_n;
std::shared_ptr<Minkowski> morph_i;
SubPhase(std::shared_ptr<Domain> Dm);
void SetParams(double rhoA, double rhoB, double tauA, double tauB,
double force_x, double force_y, double force_z, double alpha,
double beta);
void Basic();
void Full();
void Write(int time);
void AggregateLabels(const std::string &filename);

File diff suppressed because it is too large Load Diff

View File

@ -33,171 +33,170 @@
#include "IO/Reader.h"
#include "IO/Writer.h"
class TwoPhase {
class TwoPhase{
int n_nw_pts, n_ns_pts, n_ws_pts, n_nws_pts, n_local_sol_pts,
int n_nw_tris, n_ns_tris, n_ws_tris, n_nws_seg, n_local_sol_tris;
int nc;
int kstart, kfinish;
int n_nw_pts,n_ns_pts,n_ws_pts,n_nws_pts,n_local_sol_pts,n_local_nws_pts;
int n_nw_tris,n_ns_tris,n_ws_tris,n_nws_seg,n_local_sol_tris;
int nc;
int kstart,kfinish;
double fluid_isovalue, solid_isovalue;
double Volume;
// initialize lists for vertices for surfaces, common line
DTMutableList<Point> nw_pts;
DTMutableList<Point> ns_pts;
DTMutableList<Point> ws_pts;
DTMutableList<Point> nws_pts;
DTMutableList<Point> local_sol_pts;
DTMutableList<Point> local_nws_pts;
DTMutableList<Point> tmp;
double fluid_isovalue, solid_isovalue;
double Volume;
// initialize lists for vertices for surfaces, common line
DTMutableList<Point> nw_pts;
DTMutableList<Point> ns_pts;
DTMutableList<Point> ws_pts;
DTMutableList<Point> nws_pts;
DTMutableList<Point> local_sol_pts;
DTMutableList<Point> local_nws_pts;
DTMutableList<Point> tmp;
// initialize triangle lists for surfaces
IntArray nw_tris;
IntArray ns_tris;
IntArray ws_tris;
IntArray nws_seg;
IntArray local_sol_tris;
// initialize triangle lists for surfaces
IntArray nw_tris;
IntArray ns_tris;
IntArray ws_tris;
IntArray nws_seg;
IntArray local_sol_tris;
// Temporary storage arrays
DoubleArray CubeValues;
DoubleArray Values;
DoubleArray DistanceValues;
DoubleArray KGwns_values;
DoubleArray KNwns_values;
DoubleArray InterfaceSpeed;
DoubleArray NormalVector;
// Temporary storage arrays
DoubleArray CubeValues;
DoubleArray Values;
DoubleArray DistanceValues;
DoubleArray KGwns_values;
DoubleArray KNwns_values;
DoubleArray InterfaceSpeed;
DoubleArray NormalVector;
DoubleArray RecvBuffer;
DoubleArray RecvBuffer;
char *TempID;
char *TempID;
// CSV / text file where time history of averages is saved
// CSV / text file where time history of averages is saved
std::shared_ptr <Domain> Dm;
int NumberComponents_WP,NumberComponents_NWP;
// Averaging variables
// local averages (to each MPI process)
double trimdist; // pixel distance to trim surface for specified averages
double porosity,poreVol;
double awn,ans,aws,lwns;
double wp_volume,nwp_volume;
double As, dummy;
double vol_w, vol_n; // volumes the exclude the interfacial region
double sat_w, sat_w_previous;
double pan,paw; // local phase averaged pressure
// Global averages (all processes)
double pan_global,paw_global; // local phase averaged pressure
double vol_w_global, vol_n_global; // volumes the exclude the interfacial region
double awn_global,ans_global,aws_global;
double lwns_global;
double efawns,efawns_global; // averaged contact angle
double euler,Kn,Jn,An;
double euler_global,Kn_global,Jn_global,An_global;
double rho_n, rho_w;
double nu_n, nu_w;
double gamma_wn;
double Fx, Fy, Fz;
std::shared_ptr<Domain> Dm;
int NumberComponents_WP, NumberComponents_NWP;
// Averaging variables
// local averages (to each MPI process)
double trimdist; // pixel distance to trim surface for specified averages
double porosity, poreVol;
double awn, ans, aws, lwns;
double wp_volume, nwp_volume;
double As, dummy;
double vol_w, vol_n; // volumes the exclude the interfacial region
double sat_w, sat_w_previous;
double pan, paw; // local phase averaged pressure
// Global averages (all processes)
double pan_global, paw_global; // local phase averaged pressure
double vol_w_global,
vol_n_global; // volumes the exclude the interfacial region
double awn_global, ans_global, aws_global;
double lwns_global;
double efawns, efawns_global; // averaged contact angle
double euler, Kn, Jn, An;
double euler_global, Kn_global, Jn_global, An_global;
double Jwn,Jwn_global; // average mean curavture - wn interface
double Kwn,Kwn_global; // average Gaussian curavture - wn interface
double KNwns,KNwns_global; // wns common curve normal curavture
double KGwns,KGwns_global; // wns common curve geodesic curavture
double trawn,trawn_global; // trimmed interfacial area
double trJwn,trJwn_global; // trimmed interfacial area
double trRwn,trRwn_global; // trimmed interfacial area
double nwp_volume_global; // volume for the non-wetting phase
double wp_volume_global; // volume for the wetting phase
double As_global;
double wwndnw, wwndnw_global;
double wwnsdnwn, wwnsdnwn_global;
double Jwnwwndnw, Jwnwwndnw_global;
double dEs,dAwn,dAns; // Global surface energy (calculated by rank=0)
DoubleArray van;
DoubleArray vaw;
DoubleArray vawn;
DoubleArray vawns;
DoubleArray Gwn;
DoubleArray Gns;
DoubleArray Gws;
DoubleArray van_global;
DoubleArray vaw_global;
DoubleArray vawn_global;
DoubleArray vawns_global;
DoubleArray Gwn_global;
DoubleArray Gns_global;
DoubleArray Gws_global;
int Nx,Ny,Nz;
IntArray PhaseID; // Phase ID array (solid=0, non-wetting=1, wetting=2)
BlobIDArray Label_WP; // Wetting phase label
BlobIDArray Label_NWP; // Non-wetting phase label index (0:nblobs-1)
std::vector<BlobIDType> Label_NWP_map; // Non-wetting phase label for each index
DoubleArray SDn;
DoubleArray SDs;
DoubleArray Phase;
DoubleArray Press;
DoubleArray dPdt;
DoubleArray MeanCurvature;
DoubleArray GaussCurvature;
DoubleArray SDs_x; // Gradient of the signed distance
DoubleArray SDs_y;
DoubleArray SDs_z;
DoubleArray SDn_x; // Gradient of the signed distance
DoubleArray SDn_y;
DoubleArray SDn_z;
DoubleArray DelPhi; // Magnitude of Gradient of the phase indicator field
DoubleArray Phase_tplus;
DoubleArray Phase_tminus;
DoubleArray Vel_x; // Velocity
DoubleArray Vel_y;
DoubleArray Vel_z;
DoubleArray PhaseDistance;
double rho_n, rho_w;
double nu_n, nu_w;
double gamma_wn;
double Fx, Fy, Fz;
std::shared_ptr<Minkowski> wet_morph;
std::shared_ptr<Minkowski> nonwet_morph;
// Container for averages;
DoubleArray ComponentAverages_WP;
DoubleArray ComponentAverages_NWP;
TwoPhase(std::shared_ptr <Domain> Dm);
void Initialize();
// void SetupCubes(Domain &Dm);
void UpdateMeshValues();
void UpdateSolid();
void ComputeDelPhi();
void ColorToSignedDistance(double Beta, DoubleArray &ColorData, DoubleArray &DistData);
void ComputeLocal();
void AssignComponentLabels();
void ComponentAverages();
void Reduce();
void NonDimensionalize(double D, double viscosity, double IFT);
void PrintAll(int timestep);
int GetCubeLabel(int i, int j, int k, IntArray &BlobLabel);
void SortBlobs();
void PrintComponents(int timestep);
void SetParams(double rhoA, double rhoB, double tauA, double tauB, double force_x, double force_y, double force_z, double alpha);
double Volume_w(){
return wp_volume_global;
double Volume_n(){
return nwp_volume_global;
double Jwn, Jwn_global; // average mean curavture - wn interface
double Kwn, Kwn_global; // average Gaussian curavture - wn interface
double KNwns, KNwns_global; // wns common curve normal curavture
double KGwns, KGwns_global; // wns common curve geodesic curavture
double trawn, trawn_global; // trimmed interfacial area
double trJwn, trJwn_global; // trimmed interfacial area
double trRwn, trRwn_global; // trimmed interfacial area
double nwp_volume_global; // volume for the non-wetting phase
double wp_volume_global; // volume for the wetting phase
double As_global;
double wwndnw, wwndnw_global;
double wwnsdnwn, wwnsdnwn_global;
double Jwnwwndnw, Jwnwwndnw_global;
double dEs, dAwn, dAns; // Global surface energy (calculated by rank=0)
DoubleArray van;
DoubleArray vaw;
DoubleArray vawn;
DoubleArray vawns;
DoubleArray Gwn;
DoubleArray Gns;
DoubleArray Gws;
DoubleArray van_global;
DoubleArray vaw_global;
DoubleArray vawn_global;
DoubleArray vawns_global;
DoubleArray Gwn_global;
DoubleArray Gns_global;
DoubleArray Gws_global;
int Nx, Ny, Nz;
IntArray PhaseID; // Phase ID array (solid=0, non-wetting=1, wetting=2)
BlobIDArray Label_WP; // Wetting phase label
BlobIDArray Label_NWP; // Non-wetting phase label index (0:nblobs-1)
Label_NWP_map; // Non-wetting phase label for each index
DoubleArray SDn;
DoubleArray SDs;
DoubleArray Phase;
DoubleArray Press;
DoubleArray dPdt;
DoubleArray MeanCurvature;
DoubleArray GaussCurvature;
DoubleArray SDs_x; // Gradient of the signed distance
DoubleArray SDs_y;
DoubleArray SDs_z;
DoubleArray SDn_x; // Gradient of the signed distance
DoubleArray SDn_y;
DoubleArray SDn_z;
DoubleArray DelPhi; // Magnitude of Gradient of the phase indicator field
DoubleArray Phase_tplus;
DoubleArray Phase_tminus;
DoubleArray Vel_x; // Velocity
DoubleArray Vel_y;
DoubleArray Vel_z;
DoubleArray PhaseDistance;
std::shared_ptr<Minkowski> wet_morph;
std::shared_ptr<Minkowski> nonwet_morph;
// Container for averages;
DoubleArray ComponentAverages_WP;
DoubleArray ComponentAverages_NWP;
TwoPhase(std::shared_ptr<Domain> Dm);
void Initialize();
// void SetupCubes(Domain &Dm);
void UpdateMeshValues();
void UpdateSolid();
void ComputeDelPhi();
void ColorToSignedDistance(double Beta, DoubleArray &ColorData,
DoubleArray &DistData);
void ComputeLocal();
void AssignComponentLabels();
void ComponentAverages();
void Reduce();
void NonDimensionalize(double D, double viscosity, double IFT);
void PrintAll(int timestep);
int GetCubeLabel(int i, int j, int k, IntArray &BlobLabel);
void SortBlobs();
void PrintComponents(int timestep);
void SetParams(double rhoA, double rhoB, double tauA, double tauB,
double force_x, double force_y, double force_z,
double alpha);
double Volume_w() { return wp_volume_global; }
double Volume_n() { return nwp_volume_global; }

File diff suppressed because it is too large Load Diff

View File

@ -24,12 +24,10 @@
#include <map>
#include <vector>
// Define types to use for blob ids
typedef int32_t BlobIDType;
typedef Array<BlobIDType> BlobIDArray;
* @brief Compute the blob
* @details Compute the blob (F>vf|S>vs) starting from (i,j,k) - oil blob
@ -42,8 +40,9 @@ typedef Array<BlobIDType> BlobIDArray;
* @param[in] periodic Optional value
* @return Returns the number of blobs
int ComputeLocalBlobIDs( const DoubleArray& Phase, const DoubleArray& SignDist,
double vF, double vS, BlobIDArray& LocalBlobID, bool periodic=true );
int ComputeLocalBlobIDs(const DoubleArray &Phase, const DoubleArray &SignDist,
double vF, double vS, BlobIDArray &LocalBlobID,
bool periodic = true);
* @brief Compute blob of an arbitrary phase
@ -54,8 +53,8 @@ int ComputeLocalBlobIDs( const DoubleArray& Phase, const DoubleArray& SignDist,
* @param[out] ComponentLabel
* @param[in] periodic
int ComputeLocalPhaseComponent( const IntArray &PhaseID, int &VALUE, IntArray &ComponentLabel, bool periodic );
int ComputeLocalPhaseComponent(const IntArray &PhaseID, int &VALUE,
IntArray &ComponentLabel, bool periodic);
* @brief Compute the blob
@ -73,10 +72,11 @@ int ComputeLocalPhaseComponent( const IntArray &PhaseID, int &VALUE, IntArray &C
* @param[in] comm MPI communicator
* @return Returns the number of blobs
int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_info,
const DoubleArray& Phase, const DoubleArray& SignDist, double vF, double vS,
BlobIDArray& GlobalBlobID, const Utilities::MPI& comm );
int ComputeGlobalBlobIDs(int nx, int ny, int nz,
const RankInfoStruct &rank_info,
const DoubleArray &Phase, const DoubleArray &SignDist,
double vF, double vS, BlobIDArray &GlobalBlobID,
const Utilities::MPI &comm);
* @brief Compute component of the specified phase
@ -92,9 +92,11 @@ int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_inf
* @param[in] comm The communicator to use
* @return Return the number of components in the specified phase
int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& rank_info,
const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, const Utilities::MPI& comm );
int ComputeGlobalPhaseComponent(int nx, int ny, int nz,
const RankInfoStruct &rank_info,
const IntArray &PhaseID, int &VALUE,
BlobIDArray &GlobalBlobID,
const Utilities::MPI &comm);
* @brief Reorder the blobs
@ -103,31 +105,35 @@ int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& r
* @param[in,out] ID The ids of the blobs
* @param[in] comm MPI communicator
void ReorderBlobIDs( BlobIDArray& ID, const Utilities::MPI& comm );
void ReorderBlobIDs(BlobIDArray &ID, const Utilities::MPI &comm);
typedef std::pair<BlobIDType,std::vector<BlobIDType> > BlobIDSplitStruct;
typedef std::pair<std::vector<BlobIDType>,BlobIDType> BlobIDMergeStruct;
typedef std::pair<std::vector<BlobIDType>,std::vector<BlobIDType> > BlobIDMergeSplitStruct;
typedef std::pair<BlobIDType,BlobIDType> OverlapID;
typedef std::pair<BlobIDType, std::vector<BlobIDType>> BlobIDSplitStruct;
typedef std::pair<std::vector<BlobIDType>, BlobIDType> BlobIDMergeStruct;
typedef std::pair<std::vector<BlobIDType>, std::vector<BlobIDType>>
typedef std::pair<BlobIDType, BlobIDType> OverlapID;
struct ID_map_struct {
std::vector<BlobIDType> created; // list of new blobs that were created
std::vector<BlobIDType> destroyed; // list of blobs that disappeared
std::vector<std::pair<BlobIDType,BlobIDType> > src_dst; // one-one mapping of blobs (first,second timestep id)
std::vector<BlobIDSplitStruct> split; // list of blobs that split
std::vector<BlobIDMergeStruct> merge; // list of blobs that merged
std::vector<BlobIDMergeSplitStruct> merge_split; // list of blobs that both merged and split
std::map<OverlapID,int64_t> overlap; // for ids that are not a 1-1 mapping, this is a list of the overlaps <src,dst>
std::vector<BlobIDType> created; // list of new blobs that were created
std::vector<BlobIDType> destroyed; // list of blobs that disappeared
std::vector<std::pair<BlobIDType, BlobIDType>>
src_dst; // one-one mapping of blobs (first,second timestep id)
std::vector<BlobIDSplitStruct> split; // list of blobs that split
std::vector<BlobIDMergeStruct> merge; // list of blobs that merged
merge_split; // list of blobs that both merged and split
std::map<OverlapID, int64_t>
overlap; // for ids that are not a 1-1 mapping, this is a list of the overlaps <src,dst>
//! Empty constructor
ID_map_struct() {}
//! Create initial map from N blobs (ordered 1:N-1)
ID_map_struct( int N ) {
ID_map_struct(int N) {
for (int i=0; i<N; i++) { created[i]=i; }
for (int i = 0; i < N; i++) {
created[i] = i;
* @brief Get the mapping of blob ids between iterations
* @details This functions computes the map of blob ids between iterations
@ -140,8 +146,8 @@ struct ID_map_struct {
* @param[in] ID2 The blob ids at the second timestep
* @param[in] comm The communicator to use
ID_map_struct computeIDMap( int nx, int ny, int nz, const BlobIDArray& ID1, const BlobIDArray& ID2, const Utilities::MPI& comm );
ID_map_struct computeIDMap(int nx, int ny, int nz, const BlobIDArray &ID1,
const BlobIDArray &ID2, const Utilities::MPI &comm);
* @brief Compute the new global ids based on the map
@ -151,8 +157,8 @@ ID_map_struct computeIDMap( int nx, int ny, int nz, const BlobIDArray& ID1, cons
* @param[in] id_max The globally largest id used previously
* @param[out] new_ids The newly renumbered blob ids (0:ids.max())
void getNewIDs( ID_map_struct& map, BlobIDType& id_max, std::vector<BlobIDType>& new_ids );
void getNewIDs(ID_map_struct &map, BlobIDType &id_max,
std::vector<BlobIDType> &new_ids);
* @brief Update the blob ids based on mapping
@ -161,8 +167,7 @@ void getNewIDs( ID_map_struct& map, BlobIDType& id_max, std::vector<BlobIDType>&
* @param[out] new_ids The newly renumbered blob ids (0:ids.max())
* @param[in,out] IDs The blob ids to renumber
void renumberIDs( const std::vector<BlobIDType>& new_ids, BlobIDArray& IDs );
void renumberIDs(const std::vector<BlobIDType> &new_ids, BlobIDArray &IDs);
* @brief Write the ID map
@ -173,8 +178,7 @@ void renumberIDs( const std::vector<BlobIDType>& new_ids, BlobIDArray& IDs );
* @param[in] timestep The current timestep (timestep 0 creates the file)
* @param[in] filename The filename to write/append
void writeIDMap( const ID_map_struct& map, long long int timestep, const std::string& filename );
void writeIDMap(const ID_map_struct &map, long long int timestep,
const std::string &filename);

View File

@ -1,337 +1,377 @@
#include "analysis/dcel.h"
TriangleCount = 0;
VertexCount = 0;
int DCEL::Face(int index) { return FaceData[index]; }
void DCEL::Write() {
int e1, e2, e3;
TRIANGLES = fopen("triangles.stl", "w");
fprintf(TRIANGLES, "solid \n");
for (int idx = 0; idx < TriangleCount; idx++) {
e1 = Face(idx);
e2 =;
e3 =;
auto P1 = vertex.coords(halfedge.v1(e1));
auto P2 = vertex.coords(halfedge.v1(e2));
auto P3 = vertex.coords(halfedge.v1(e3));
fprintf(TRIANGLES, "vertex %f %f %f\n", P1.x, P1.y, P1.z);
fprintf(TRIANGLES, "vertex %f %f %f\n", P2.x, P2.y, P2.z);
fprintf(TRIANGLES, "vertex %f %f %f\n", P3.x, P3.y, P3.z);
int DCEL::Face(int index){
return FaceData[index];
void DCEL::LocalIsosurface(const DoubleArray &A, double value, const int i,
const int j, const int k) {
Point P, Q;
Point PlaceHolder;
Point C0, C1, C2, C3, C4, C5, C6, C7;
Point VertexList[12];
Point NewVertexList[12];
int LocalRemap[12];
Point cellvertices[20];
std::array<std::array<int, 3>, 20> Triangles;
// Values from array 'A' at the cube corners
double CubeValues[8];
// Points corresponding to cube corners
C0.x = 0.0;
C0.y = 0.0;
C0.z = 0.0;
C1.x = 1.0;
C1.y = 0.0;
C1.z = 0.0;
C2.x = 1.0;
C2.y = 1.0;
C2.z = 0.0;
C3.x = 0.0;
C3.y = 1.0;
C3.z = 0.0;
C4.x = 0.0;
C4.y = 0.0;
C4.z = 1.0;
C5.x = 1.0;
C5.y = 0.0;
C5.z = 1.0;
C6.x = 1.0;
C6.y = 1.0;
C6.z = 1.0;
C7.x = 0.0;
C7.y = 1.0;
C7.z = 1.0;
CubeValues[0] = A(i, j, k) - value;
CubeValues[1] = A(i + 1, j, k) - value;
CubeValues[2] = A(i + 1, j + 1, k) - value;
CubeValues[3] = A(i, j + 1, k) - value;
CubeValues[4] = A(i, j, k + 1) - value;
CubeValues[5] = A(i + 1, j, k + 1) - value;
CubeValues[6] = A(i + 1, j + 1, k + 1) - value;
CubeValues[7] = A(i, j + 1, k + 1) - value;
//printf("Set cube values: %i, %i, %i \n",i,j,k);
//Determine the index into the edge table which
//tells us which vertices are inside of the surface
int CubeIndex = 0;
if (CubeValues[0] < 0.0f)
CubeIndex |= 1;
if (CubeValues[1] < 0.0f)
CubeIndex |= 2;
if (CubeValues[2] < 0.0f)
CubeIndex |= 4;
if (CubeValues[3] < 0.0f)
CubeIndex |= 8;
if (CubeValues[4] < 0.0f)
CubeIndex |= 16;
if (CubeValues[5] < 0.0f)
CubeIndex |= 32;
if (CubeValues[6] < 0.0f)
CubeIndex |= 64;
if (CubeValues[7] < 0.0f)
CubeIndex |= 128;
//Find the vertices where the surface intersects the cube
if (edgeTable[CubeIndex] & 1) {
P = VertexInterp(C0, C1, CubeValues[0], CubeValues[1]);
VertexList[0] = P;
Q = C0;
if (edgeTable[CubeIndex] & 2) {
P = VertexInterp(C1, C2, CubeValues[1], CubeValues[2]);
VertexList[1] = P;
Q = C1;
if (edgeTable[CubeIndex] & 4) {
P = VertexInterp(C2, C3, CubeValues[2], CubeValues[3]);
VertexList[2] = P;
Q = C2;
if (edgeTable[CubeIndex] & 8) {
P = VertexInterp(C3, C0, CubeValues[3], CubeValues[0]);
VertexList[3] = P;
Q = C3;
if (edgeTable[CubeIndex] & 16) {
P = VertexInterp(C4, C5, CubeValues[4], CubeValues[5]);
VertexList[4] = P;
Q = C4;
if (edgeTable[CubeIndex] & 32) {
P = VertexInterp(C5, C6, CubeValues[5], CubeValues[6]);
VertexList[5] = P;
Q = C5;
if (edgeTable[CubeIndex] & 64) {
P = VertexInterp(C6, C7, CubeValues[6], CubeValues[7]);
VertexList[6] = P;
Q = C6;
if (edgeTable[CubeIndex] & 128) {
P = VertexInterp(C7, C4, CubeValues[7], CubeValues[4]);
VertexList[7] = P;
Q = C7;
if (edgeTable[CubeIndex] & 256) {
NewVertexList[VertexCount] = VertexList[triTable[CubeIndex][idx]];
LocalRemap[triTable[CubeIndex][idx]] = VertexCount;
for (int idx = 0; idx < VertexCount; idx++) {
P = NewVertexList[idx];
cellvertices[idx] = P;
TriangleCount = 0;
for (int idx = 0; triTable[CubeIndex][idx] != -1; idx += 3) {
Triangles[TriangleCount][0] = LocalRemap[triTable[CubeIndex][idx + 0]];
Triangles[TriangleCount][1] = LocalRemap[triTable[CubeIndex][idx + 1]];
Triangles[TriangleCount][2] = LocalRemap[triTable[CubeIndex][idx + 2]];
int nTris = TriangleCount;
if (nTris > 0) {
halfedge.resize(nTris * 3);
int idx_edge = 0;
for (int idx = 0; idx < TriangleCount; idx++) {
int V1 = Triangles[idx][0];
int V2 = Triangles[idx][1];
int V3 = Triangles[idx][2];
FaceData[idx] = idx_edge;
// first edge: V1->V2, idx_edge) = V1; // first vertex, idx_edge) = V2; // second vertex, idx_edge) = idx; // triangle, idx_edge) = -1; // twin, idx_edge) = idx_edge + 2; // previous edge, idx_edge) = idx_edge + 1; // next edge
// second edge: V2->V3, idx_edge) = V2; // first vertex, idx_edge) = V3; // second vertex, idx_edge) = idx; // triangle, idx_edge) = -1; // twin, idx_edge) = idx_edge - 1; // previous edge, idx_edge) = idx_edge + 1; // next edge
// third edge: V3->V1, idx_edge) = V3; // first vertex, idx_edge) = V1; // second vertex, idx_edge) = idx; // triangle, idx_edge) = -1; // twin, idx_edge) = idx_edge - 1; // previous edge, idx_edge) = idx_edge - 2; // next edge
int EdgeCount = idx_edge;
for (int idx = 0; idx < EdgeCount; idx++) {
int V1 =, idx);
int V2 =, idx);
// Find all the twins within the cube
for (int jdx = 0; jdx < EdgeCount; jdx++) {
if (, jdx) == V1 &&, jdx) == V2) {
// this is the pair, idx) = jdx;, jdx) = idx;
if (, jdx) == V2 &&, jdx) == V1 && !(idx == jdx)) {
"WARNING: half edges with identical orientation! \n");
// Use "ghost" twins if edge is on a cube face
P = cellvertices[V1];
Q = cellvertices[V2];
if (P.x == 0.0 && Q.x == 0.0), idx) = -1; // ghost twin for x=0 face
if (P.x == 1.0 && Q.x == 1.0), idx) = -4; // ghost twin for x=1 face
if (P.y == 0.0 && Q.y == 0.0), idx) = -2; // ghost twin for y=0 face
if (P.y == 1.0 && Q.y == 1.0), idx) = -5; // ghost twin for y=1 face
if (P.z == 0.0 && Q.z == 0.0), idx) = -3; // ghost twin for z=0 face
if (P.z == 1.0 && Q.z == 1.0), idx) = -6; // ghost twin for z=1 face
// Map vertices to global coordinates
for (int idx = 0; idx < VertexCount; idx++) {
P = cellvertices[idx];
P.x += i;
P.y += j;
P.z += k;
vertex.assign(idx, P);
Point DCEL::TriNormal(int edge) {
Point P, Q, R;
Point U, V, W;
double nx, ny, nz, len;
// at cube faces define outward normal to cube
if (edge == -1) {
W.x = -1.0;
W.y = 0.0;
W.z = 0.0; // x cube face
} else if (edge == -2) {
W.x = 0.0;
W.y = -1.0;
W.z = 0.0; // y cube face
} else if (edge == -3) {
W.x = 0.0;
W.y = 0.0;
W.z = -1.0; // z cube face
} else if (edge == -4) {
W.x = 1.0;
W.y = 0.0;
W.z = 0.0; // x cube face
} else if (edge == -5) {
W.x = 0.0;
W.y = 1.0;
W.z = 0.0; // y cube face
} else if (edge == -6) {
W.x = 0.0;
W.y = 0.0;
W.z = 1.0; // z cube face
} else {
// vertices for triange
int e2 =;
int e3 =;
P = vertex.coords(halfedge.v1(edge));
Q = vertex.coords(halfedge.v1(e2));
R = vertex.coords(halfedge.v1(e3));
// edge vectors
U = Q - P;
V = R - Q;
// normal vector
nx = U.y * V.z - U.z * V.y;
ny = U.z * V.x - U.x * V.z;
nz = U.x * V.y - U.y * V.x;
len = sqrt(nx * nx + ny * ny + nz * nz);
W.x = nx / len;
W.y = ny / len;
W.z = nz / len;
return W;
Triangles[TriangleCount][0] = LocalRemap[triTable[CubeIndex][idx+0]];
Triangles[TriangleCount][1] = LocalRemap[triTable[CubeIndex][idx+1]];
Triangles[TriangleCount][2] = LocalRemap[triTable[CubeIndex][idx+2]];
Point P,Q,R;
Point U,V,W;
double nx,ny,nz,len;
// at cube faces define outward normal to cube
if (edge == -1){
W.x = -1.0; W.y = 0.0; W.z = 0.0; // x cube face
else if (edge == -2){
W.x = 0.0; W.y = -1.0; W.z = 0.0; // y cube face
else if (edge == -3){
W.x = 0.0; W.y = 0.0; W.z = -1.0; // z cube face
else if (edge == -4){
W.x = 1.0; W.y = 0.0; W.z = 0.0; // x cube face
else if (edge == -5){
W.x = 0.0; W.y = 1.0; W.z = 0.0; // y cube face
else if (edge == -6){
W.x = 0.0; W.y = 0.0; W.z = 1.0; // z cube face
// vertices for triange
int e2 =;
int e3 =;
// edge vectors
U = Q-P;
V = R-Q;
// normal vector
nx = U.y*V.z - U.z*V.y;
ny = U.z*V.x - U.x*V.z;
nz = U.x*V.y - U.y*V.x;
len = sqrt(nx*nx+ny*ny+nz*nz);
W.x = nx/len; W.y = ny/len; W.z = nz/len;
return W;
double DCEL::EdgeAngle(int edge)
double angle;
double dotprod;
Point P,Q,R; // triangle vertices
Point U,V,W; // normal vectors
int e2 =;
int e3 =;
U = TriNormal(edge);
V = TriNormal(halfedge.twin(edge));
if (halfedge.twin(edge) < 0 ){
// compute edge normal in plane of cube face
W = P - Q; // edge tangent vector
double length = sqrt(W.x*W.x+W.y*W.y+W.z*W.z);
W.x /= length;
W.y /= length;
W.z /= length;
// edge normal within the plane of the cube face
double nx = W.y*V.z - W.z*V.y;
double ny = W.z*V.x - W.x*V.z;
double nz = W.x*V.y - W.y*V.x;
length = sqrt(nx*nx+ny*ny+nz*nz);
// new value for V is this normal vector
V.x = nx/length; V.y = ny/length; V.z = nz/length;
dotprod = U.x*V.x + U.y*V.y + U.z*V.z;
if (dotprod < 0.f){
//printf("negative dot product on face\n");
V.x = -V.x; V.y = -V.y; V.z = -V.z;
if (dotprod > 1.f) dotprod=1.f;
if (dotprod < -1.f) dotprod=-1.f;
angle = acos(dotprod);
/* project onto plane of cube face also works
if (dotprod > 1.f)
dotprod = 1.f;
if (dotprod < -1.f)
dotprod = -1.f;
angle = acos(dotprod);
/* project onto plane of cube face also works
W = U - dotprod*V;
length = sqrt(W.x*W.x+W.y*W.y+W.z*W.z); // for normalization
dotprod = (U.x*W.x + U.y*W.y + U.z*W.z)/length;
if (dotprod < -1.f) dotprod=-1.f;
angle = acos(dotprod);
dotprod=U.x*V.x + U.y*V.y + U.z*V.z;
if (dotprod > 1.f) dotprod=1.f;
if (dotprod < -1.f) dotprod=-1.f;
angle = 0.5*acos(dotprod);
// determine if angle is concave or convex based on edge normal
W.x = (P.y-Q.y)*U.z - (P.z-Q.z)*U.y;
W.y = (P.z-Q.z)*U.x - (P.x-Q.x)*U.z;
W.z = (P.x-Q.x)*U.y - (P.y-Q.y)*U.x;
//length = sqrt(nx*nx+ny*ny+nz*nz);
Point w=0.5*(P+Q)-R;
if (W.x*w.x + W.y*w.y + W.z*w.z < 0.f){
//printf("flip edge normal \n");
W.x = -W.x;
W.y = -W.y;
W.z = -W.z;
if (W.x*V.x + W.y*V.y + W.z*V.z > 0.f){
// concave
angle = -angle;
if (angle != angle) angle = 0.0;
//printf("angle=%f,dot=%f (Edge=%i, twin=%i): P={%f, %f, %f}, Q={%f, %f, %f} U={%f, %f, %f}, V={%f, %f, %f}\n",angle,dotprod,edge,halfedge.twin(edge),P.x,P.y,P.z,Q.x,Q.y,Q.z,U.x,U.y,U.z,V.x,V.y,V.z);
void iso_surface(const Array<double>&Field, const double isovalue)
DCEL object;
int e1,e2,e3;
TRIANGLES = fopen("isosurface.stl","w");
fprintf(TRIANGLES,"solid isosurface\n");
int Nx = Field.size(0);
int Ny = Field.size(1);
int Nz = Field.size(2);
for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
for (int idx=0; idx<object.TriangleCount; idx++){
e1 = object.Face(idx);
e2 =;
e3 =;
auto P1 = object.vertex.coords(object.halfedge.v1(e1));
auto P2 = object.vertex.coords(object.halfedge.v1(e2));
auto P3 = object.vertex.coords(object.halfedge.v1(e3));
auto Normal = object.TriNormal(e1);
// P1.x += 1.0*i; P1.y += 1.0*j; P1.z +=1.0*k;
//P2.x += 1.0*i; P2.y += 1.0*j; P2.z +=1.0*k;
//P3.x += 1.0*i; P3.y += 1.0*j; P3.z +=1.0*k;
fprintf(TRIANGLES,"facet normal %f %f %f\n",Normal.x,Normal.y,Normal.z);
fprintf(TRIANGLES," outer loop\n");
fprintf(TRIANGLES," vertex %f %f %f\n",P1.x,P1.y,P1.z);
fprintf(TRIANGLES," vertex %f %f %f\n",P2.x,P2.y,P2.z);
fprintf(TRIANGLES," vertex %f %f %f\n",P3.x,P3.y,P3.z);
fprintf(TRIANGLES," endloop\n");
} else {
dotprod = U.x * V.x + U.y * V.y + U.z * V.z;
if (dotprod > 1.f)
dotprod = 1.f;
if (dotprod < -1.f)
dotprod = -1.f;
angle = 0.5 * acos(dotprod);
fprintf(TRIANGLES,"endsolid isosurface\n");
W.x = (P.y - Q.y) * U.z - (P.z - Q.z) * U.y;
W.y = (P.z - Q.z) * U.x - (P.x - Q.x) * U.z;
W.z = (P.x - Q.x) * U.y - (P.y - Q.y) * U.x;
//length = sqrt(nx*nx+ny*ny+nz*nz);
Point w = 0.5 * (P + Q) - R;
if (W.x * w.x + W.y * w.y + W.z * w.z < 0.f) {
//printf("flip edge normal \n");
W.x = -W.x;
W.y = -W.y;
W.z = -W.z;
if (W.x * V.x + W.y * V.y + W.z * V.z > 0.f) {
// concave
angle = -angle;
if (angle != angle)
angle = 0.0;
//printf("angle=%f,dot=%f (Edge=%i, twin=%i): P={%f, %f, %f}, Q={%f, %f, %f} U={%f, %f, %f}, V={%f, %f, %f}\n",angle,dotprod,edge,halfedge.twin(edge),P.x,P.y,P.z,Q.x,Q.y,Q.z,U.x,U.y,U.z,V.x,V.y,V.z);
return angle;
void iso_surface(const Array<double> &Field, const double isovalue) {
DCEL object;
int e1, e2, e3;
TRIANGLES = fopen("isosurface.stl", "w");
fprintf(TRIANGLES, "solid isosurface\n");
int Nx = Field.size(0);
int Ny = Field.size(1);
int Nz = Field.size(2);
for (int k = 1; k < Nz - 1; k++) {
for (int j = 1; j < Ny - 1; j++) {
for (int i = 1; i < Nx - 1; i++) {
object.LocalIsosurface(Field, isovalue, i, j, k);
for (int idx = 0; idx < object.TriangleCount; idx++) {
e1 = object.Face(idx);
e2 =;
e3 =;
auto P1 = object.vertex.coords(object.halfedge.v1(e1));
auto P2 = object.vertex.coords(object.halfedge.v1(e2));
auto P3 = object.vertex.coords(object.halfedge.v1(e3));
auto Normal = object.TriNormal(e1);
// P1.x += 1.0*i; P1.y += 1.0*j; P1.z +=1.0*k;
//P2.x += 1.0*i; P2.y += 1.0*j; P2.z +=1.0*k;
//P3.x += 1.0*i; P3.y += 1.0*j; P3.z +=1.0*k;
fprintf(TRIANGLES, "facet normal %f %f %f\n", Normal.x,
Normal.y, Normal.z);
fprintf(TRIANGLES, " outer loop\n");
fprintf(TRIANGLES, " vertex %f %f %f\n", P1.x, P1.y,
fprintf(TRIANGLES, " vertex %f %f %f\n", P2.x, P2.y,
fprintf(TRIANGLES, " vertex %f %f %f\n", P3.x, P3.y,
fprintf(TRIANGLES, " endloop\n");
fprintf(TRIANGLES, "endfacet\n");
fprintf(TRIANGLES, "endsolid isosurface\n");

// Vertex structure
class Vertex{
class Vertex {
Vertex() { d_data.resize(12); }
~Vertex() = default;
Vertex( const Vertex& ) = delete;
Vertex operator=( const Vertex& ) = delete;
Vertex() { d_data.resize(12); }
~Vertex() = default;
Vertex(const Vertex &) = delete;
Vertex operator=(const Vertex &) = delete;
// Add/assign a point
inline void add( const Point& P ) { d_data.push_back( P ); }
inline void assign( int idx, const Point& P ) { d_data[idx] = P; }
inline void add(const Point &P) { d_data.push_back(P); }
inline void assign(int idx, const Point &P) { d_data[idx] = P; }
// Get a point
inline Point& coords( int idx ) { return d_data[idx]; }
inline const Point& coords( int idx ) const { return d_data[idx]; }
inline Point &coords(int idx) { return d_data[idx]; }
inline const Point &coords(int idx) const { return d_data[idx]; }
int IncidentEdge();
int IncidentEdge();
// Return the number of points
inline int size() const { return d_data.size(); }
inline int size() const { return d_data.size(); }
std::vector<Point> d_data;
std::vector<Point> d_data;
* \class Halfedge
* @brief store half edge for DCEL data structure
class Halfedge{
class Halfedge {
Halfedge() = default;
~Halfedge() = default;
Halfedge( const Halfedge& ) = delete;
Halfedge operator=( const Halfedge& ) = delete;
Halfedge() = default;
~Halfedge() = default;
Halfedge(const Halfedge &) = delete;
Halfedge operator=(const Halfedge &) = delete;
inline int v1(int edge) const { return d_data[edge][0]; }
inline int v2(int edge) const { return d_data[edge][1]; }
inline int face(int edge) const { return d_data[edge][2]; }
inline int twin(int edge) const { return d_data[edge][3]; }
inline int prev(int edge) const { return d_data[edge][4]; }
inline int next(int edge) const { return d_data[edge][5]; }
inline int v1(int edge) const { return d_data[edge][0]; }
inline int v2(int edge) const { return d_data[edge][1]; }
inline int face(int edge) const { return d_data[edge][2]; }
inline int twin(int edge) const { return d_data[edge][3]; }
inline int prev(int edge) const { return d_data[edge][4]; }
inline int next(int edge) const { return d_data[edge][5]; }
inline int size() const { return d_data.size(); }
inline void resize( int N ) { d_data.resize( N ); }
inline int size() const { return d_data.size(); }
inline void resize(int N) { d_data.resize(N); }
inline int& data( int i, int j ) { return d_data[j][i]; }
inline const int& data( int i, int j ) const { return d_data[j][i]; }
inline int &data(int i, int j) { return d_data[j][i]; }
inline const int &data(int i, int j) const { return d_data[j][i]; }
std::vector<std::array<int,6>> d_data;
std::vector<std::array<int, 6>> d_data;
* \class DCEL
* @details doubly connected edge list data structure
class DCEL{
class DCEL {
int face();
Vertex vertex;
Halfedge halfedge;
void LocalIsosurface(const DoubleArray& A, double value, int i, int j, int k);
void Write();
int Face(int index);
double origin(int edge);
double EdgeAngle(int edge);
Point TriNormal(int edge);
int TriangleCount;
int VertexCount;
int face();
Vertex vertex;
Halfedge halfedge;
void LocalIsosurface(const DoubleArray &A, double value, int i, int j,
int k);
void Write();
int Face(int index);
double origin(int edge);
double EdgeAngle(int edge);
Point TriNormal(int edge);
int TriangleCount;
int VertexCount;
std::vector<int> FaceData;
std::vector<int> FaceData;
void iso_surface(const Array<double>&Field, const double isovalue);
void iso_surface(const Array<double> &Field, const double isovalue);

View File

@ -16,39 +16,37 @@
#include "analysis/distance.h"
* A fast distance calculation *
template<class TYPE>
void CalcDist( Array<TYPE> &Distance, const Array<char> &ID, const Domain &Dm,
const std::array<bool,3>& periodic, const std::array<double,3>& dx )
ASSERT( Distance.size() == ID.size() );
std::array<int,3> n = { Dm.Nx-2, Dm.Ny-2, Dm.Nz-2 };
fillHalo<int> fillData( Dm.Comm, Dm.rank_info, n, {1,1,1}, 50, 1, {true,false,false}, periodic );
template <class TYPE>
void CalcDist(Array<TYPE> &Distance, const Array<char> &ID, const Domain &Dm,
const std::array<bool, 3> &periodic,
const std::array<double, 3> &dx) {
ASSERT(Distance.size() == ID.size());
std::array<int, 3> n = {Dm.Nx - 2, Dm.Ny - 2, Dm.Nz - 2};
fillHalo<int> fillData(Dm.Comm, Dm.rank_info, n, {1, 1, 1}, 50, 1,
{true, false, false}, periodic);
Array<int> id(ID.size());
Array<Vec> vecDist(Distance.size());
for (size_t i=0; i<ID.length(); i++)
id(i) = ID(i) == 0 ? -1:1;
fillData.fill( id );
CalcVecDist( vecDist, id, Dm, periodic, dx );
for (size_t i=0; i<Distance.length(); i++)
Distance(i) = id(i)*vecDist(i).norm();
for (size_t i = 0; i < ID.length(); i++)
id(i) = ID(i) == 0 ? -1 : 1;
CalcVecDist(vecDist, id, Dm, periodic, dx);
for (size_t i = 0; i < Distance.length(); i++)
Distance(i) = id(i) * vecDist(i).norm();
* Vector-based distance calculation *
* Initialize cells adjacent to boundaries *
static void calcVecInitialize( Array<Vec> &d, const Array<int> &ID, double dx, double dy, double dz )
d.fill( Vec( 1e50, 1e50, 1e50 ) );
const double dx0 = 0.5*dx;
const double dy0 = 0.5*dy;
const double dz0 = 0.5*dz;
static void calcVecInitialize(Array<Vec> &d, const Array<int> &ID, double dx,
double dy, double dz) {
d.fill(Vec(1e50, 1e50, 1e50));
const double dx0 = 0.5 * dx;
const double dy0 = 0.5 * dy;
const double dz0 = 0.5 * dz;
//const double dxy0 = 0.25*sqrt( dx*dx + dy*dy );
//const double dxz0 = 0.25*sqrt( dx*dx + dz*dz );
//const double dyz0 = 0.25*sqrt( dy*dy + dz*dz );
@ -56,19 +54,25 @@ static void calcVecInitialize( Array<Vec> &d, const Array<int> &ID, double dx, d
int Nx = d.size(0);
int Ny = d.size(1);
int Nz = d.size(2);
for (int k=1; k<Nz-1; k++) {
for (int j=1; j<Ny-1; j++) {
for (int i=1; i<Nx-1; i++) {
int id = ID(i,j,k);
bool x[2] = { id != ID(i-1,j,k), id != ID(i+1,j,k) };
bool y[2] = { id != ID(i,j-1,k), id != ID(i,j+1,k) };
bool z[2] = { id != ID(i,j,k-1), id != ID(i,j,k+1) };
if ( x[0] ) d(i,j,k) = Vec( dx0, 0, 0 );
if ( x[1] ) d(i,j,k) = Vec( -dx0, 0, 0 );
if ( y[0] ) d(i,j,k) = Vec( 0, dy0, 0 );
if ( y[1] ) d(i,j,k) = Vec( 0, -dy0, 0 );
if ( z[0] ) d(i,j,k) = Vec( 0, 0, dz0 );
if ( z[1] ) d(i,j,k) = Vec( 0, 0, -dz0 );
for (int k = 1; k < Nz - 1; k++) {
for (int j = 1; j < Ny - 1; j++) {
for (int i = 1; i < Nx - 1; i++) {
int id = ID(i, j, k);
bool x[2] = {id != ID(i - 1, j, k), id != ID(i + 1, j, k)};
bool y[2] = {id != ID(i, j - 1, k), id != ID(i, j + 1, k)};
bool z[2] = {id != ID(i, j, k - 1), id != ID(i, j, k + 1)};
if (x[0])
d(i, j, k) = Vec(dx0, 0, 0);
if (x[1])
d(i, j, k) = Vec(-dx0, 0, 0);
if (y[0])
d(i, j, k) = Vec(0, dy0, 0);
if (y[1])
d(i, j, k) = Vec(0, -dy0, 0);
if (z[0])
d(i, j, k) = Vec(0, 0, dz0);
if (z[1])
d(i, j, k) = Vec(0, 0, -dz0);
/*if ( x[0] && y[0] ) d(i,j,k) = Vec( dxy0, dxy0, 0 );
if ( x[0] && y[1] ) d(i,j,k) = Vec( dxy0, -dxy0, 0 );
if ( x[1] && y[0] ) d(i,j,k) = Vec( -dxy0, dxy0, 0 );
@ -84,56 +88,54 @@ static void calcVecInitialize( Array<Vec> &d, const Array<int> &ID, double dx, d
* Vector-based distance calculation *
* Update interior cells *
static double calcVecUpdateInterior( Array<Vec> &d, double dx, double dy, double dz )
static double calcVecUpdateInterior(Array<Vec> &d, double dx, double dy,
double dz) {
double err = 0;
int Nx = d.size(0);
int Ny = d.size(1);
int Nz = d.size(2);
// Propagate (+,+,+)
for (int k=1; k<Nz; k++) {
for (int j=1; j<Ny; j++) {
for (int i=1; i<Nx; i++) {
auto vx = d(i-1,j,k);
auto vy = d(i,j-1,k);
auto vz = d(i,j,k-1);
for (int k = 1; k < Nz; k++) {
for (int j = 1; j < Ny; j++) {
for (int i = 1; i < Nx; i++) {
auto vx = d(i - 1, j, k);
auto vy = d(i, j - 1, k);
auto vz = d(i, j, k - 1);
vx.x += dx;
vy.y += dy;
vz.z += dz;
auto v = std::min( std::min(vx,vy), vz );
auto v = std::min(std::min(vx, vy), vz);
double d1 = v.norm2();
double d2 = d(i,j,k).norm2();
if ( d1 < d2 ) {
d(i,j,k) = v;
err = std::max( err, sqrt(d2)-sqrt(d1) );
double d2 = d(i, j, k).norm2();
if (d1 < d2) {
d(i, j, k) = v;
err = std::max(err, sqrt(d2) - sqrt(d1));
// Propagate (-,-,-)
for (int k=Nz-2; k>=0; k--) {
for (int j=Ny-2; j>=0; j--) {
for (int i=Nx-2; i>=0; i--) {
auto vx = d(i+1,j,k);
auto vy = d(i,j+1,k);
auto vz = d(i,j,k+1);
for (int k = Nz - 2; k >= 0; k--) {
for (int j = Ny - 2; j >= 0; j--) {
for (int i = Nx - 2; i >= 0; i--) {
auto vx = d(i + 1, j, k);
auto vy = d(i, j + 1, k);
auto vz = d(i, j, k + 1);
vx.x -= dx;
vy.y -= dy;
vz.z -= dz;
auto v = std::min( std::min(vx,vy), vz );
auto v = std::min(std::min(vx, vy), vz);
double d1 = v.norm2();
double d2 = d(i,j,k).norm2();
if ( d1 < d2 ) {
d(i,j,k) = v;
err = std::max( err, sqrt(d2)-sqrt(d1) );
double d2 = d(i, j, k).norm2();
if (d1 < d2) {
d(i, j, k) = v;
err = std::max(err, sqrt(d2) - sqrt(d1));
@ -141,66 +143,68 @@ static double calcVecUpdateInterior( Array<Vec> &d, double dx, double dy, double
return err;
* Vector-based distance calculation *
void CalcVecDist( Array<Vec> &d, const Array<int> &ID0, const Domain &Dm,
const std::array<bool,3>& periodic, const std::array<double,3>& dx )
std::array<int,3> N = { Dm.Nx, Dm.Ny, Dm.Nz };
std::array<int,3> n = { Dm.Nx-2, Dm.Ny-2, Dm.Nz-2 };
void CalcVecDist(Array<Vec> &d, const Array<int> &ID0, const Domain &Dm,
const std::array<bool, 3> &periodic,
const std::array<double, 3> &dx) {
std::array<int, 3> N = {Dm.Nx, Dm.Ny, Dm.Nz};
std::array<int, 3> n = {Dm.Nx - 2, Dm.Ny - 2, Dm.Nz - 2};
// Create ID with ghosts
Array<int> ID(N[0],N[1],N[2]);
fillHalo<int> fillDataID( Dm.Comm, Dm.rank_info, n, {1,1,1}, 50, 1, {true,true,true}, periodic );
fillDataID.copy( ID0, ID );
Array<int> ID(N[0], N[1], N[2]);
fillHalo<int> fillDataID(Dm.Comm, Dm.rank_info, n, {1, 1, 1}, 50, 1,
{true, true, true}, periodic);
fillDataID.copy(ID0, ID);
// Fill ghosts with nearest neighbor
for (int k=1; k<N[2]-1; k++) {
for (int j=1; j<N[1]-1; j++) {
ID(0,j,k) = ID(1,j,k);
ID(N[0]-1,j,k) = ID(N[0]-2,j,k);
for (int k = 1; k < N[2] - 1; k++) {
for (int j = 1; j < N[1] - 1; j++) {
ID(0, j, k) = ID(1, j, k);
ID(N[0] - 1, j, k) = ID(N[0] - 2, j, k);
for (int k=1; k<N[2]-1; k++) {
for (int i=0; i<N[0]; i++) {
ID(i,0,k) = ID(i,1,k);
ID(i,N[1]-1,k) = ID(i,N[1]-2,k);
for (int k = 1; k < N[2] - 1; k++) {
for (int i = 0; i < N[0]; i++) {
ID(i, 0, k) = ID(i, 1, k);
ID(i, N[1] - 1, k) = ID(i, N[1] - 2, k);
for (int i=0; i<N[0]; i++) {
for (int j=0; j<N[1]; j++) {
ID(i,j,0) = ID(i,j,1);
ID(i,j,N[2]-1) = ID(i,j,N[2]-2);
for (int i = 0; i < N[0]; i++) {
for (int j = 0; j < N[1]; j++) {
ID(i, j, 0) = ID(i, j, 1);
ID(i, j, N[2] - 1) = ID(i, j, N[2] - 2);
// Communicate ghosts
fillDataID.fill( ID );
// Create communicator for distance
fillHalo<Vec> fillData( Dm.Comm, Dm.rank_info, n, {1,1,1}, 50, 1, {true,false,false}, periodic );
fillHalo<Vec> fillData(Dm.Comm, Dm.rank_info, n, {1, 1, 1}, 50, 1,
{true, false, false}, periodic);
// Calculate the local distances
calcVecInitialize( d, ID, dx[0], dx[1], dx[2] );
calcVecInitialize(d, ID, dx[0], dx[1], dx[2]);
double err = 1e100;
double tol = 0.5 * std::min( std::min(dx[0],dx[1]), dx[2] );
for (int it=0; it<=50 && err>tol; it++) {
err = calcVecUpdateInterior( d, dx[0], dx[1], dx[2] );
double tol = 0.5 * std::min(std::min(dx[0], dx[1]), dx[2]);
for (int it = 0; it <= 50 && err > tol; it++) {
err = calcVecUpdateInterior(d, dx[0], dx[1], dx[2]);
// Calculate the global distances
int N_it = Dm.nprocx() + Dm.nprocy() + Dm.nprocz() + 100;
for ( int it=0; it<N_it; it++ ) {
for (int it = 0; it < N_it; it++) {
// Update ghosts
fillData.fill( d );
// Update distance
double err = calcVecUpdateInterior( d, dx[0], dx[1], dx[2] );
double err = calcVecUpdateInterior(d, dx[0], dx[1], dx[2]);
// Check if we are finished
err = Dm.Comm.maxReduce( err );
if ( err < tol )
err = Dm.Comm.maxReduce(err);
if (err < tol)
// Explicit instantiations
template void CalcDist<float>( Array<float>&, const Array<char>&, const Domain&, const std::array<bool,3>&, const std::array<double,3>& );
template void CalcDist<double>( Array<double>&, const Array<char>&, const Domain&, const std::array<bool,3>&, const std::array<double,3>& );
template void CalcDist<float>(Array<float> &, const Array<char> &,
const Domain &, const std::array<bool, 3> &,
const std::array<double, 3> &);
template void CalcDist<double>(Array<double> &, const Array<char> &,
const Domain &, const std::array<bool, 3> &,
const std::array<double, 3> &);

View File

@ -20,18 +20,19 @@
#include "common/Domain.h"
#include "common/Array.hpp"
struct Vec {
double x;
double y;
double z;
inline Vec(): x(0), y(0), z(0) {}
inline Vec( double x_, double y_, double z_ ): x(x_), y(y_), z(z_) {}
inline double norm() const { return sqrt(x*x+y*y+z*z); }
inline double norm2() const { return x*x+y*y+z*z; }
inline Vec() : x(0), y(0), z(0) {}
inline Vec(double x_, double y_, double z_) : x(x_), y(y_), z(z_) {}
inline double norm() const { return sqrt(x * x + y * y + z * z); }
inline double norm2() const { return x * x + y * y + z * z; }
inline bool operator<(const Vec& l, const Vec& r){ return l.x*l.x+l.y*l.y+l.z*l.z < r.x*r.x+r.y*r.y+r.z*r.z; }
inline bool operator<(const Vec &l, const Vec &r) {
return l.x * l.x + l.y * l.y + l.z * l.z <
r.x * r.x + r.y * r.y + r.z * r.z;
* @brief Calculate the distance using a simple method
@ -42,9 +43,10 @@ inline bool operator<(const Vec& l, const Vec& r){ return l.x*l.x+l.y*l.y+l.z*l.
* @param[in] periodic Directions that are periodic
* @param[in] dx Cell size
template<class TYPE>
void CalcDist( Array<TYPE> &Distance, const Array<char> &ID, const Domain &Dm,
const std::array<bool,3>& periodic = {true,true,true}, const std::array<double,3>& dx = {1,1,1} );
template <class TYPE>
void CalcDist(Array<TYPE> &Distance, const Array<char> &ID, const Domain &Dm,
const std::array<bool, 3> &periodic = {true, true, true},
const std::array<double, 3> &dx = {1, 1, 1});
* @brief Calculate the distance using a simple method
@ -55,7 +57,8 @@ void CalcDist( Array<TYPE> &Distance, const Array<char> &ID, const Domain &Dm,
* @param[in] periodic Directions that are periodic
* @param[in] dx Cell size
void CalcVecDist( Array<Vec> &Distance, const Array<int> &ID, const Domain &Dm,
const std::array<bool,3>& periodic = {true,true,true}, const std::array<double,3>& dx = {1,1,1} );
void CalcVecDist(Array<Vec> &Distance, const Array<int> &ID, const Domain &Dm,
const std::array<bool, 3> &periodic = {true, true, true},
const std::array<double, 3> &dx = {1, 1, 1});

View File

@ -18,175 +18,181 @@
#include "math.h"
#include "ProfilerApp.h"
void Mean3D( const Array<double> &Input, Array<double> &Output )
// Perform a 3D Mean filter on Input array
int i,j,k;
void Mean3D(const Array<double> &Input, Array<double> &Output) {
// Perform a 3D Mean filter on Input array
int i, j, k;
int Nx = int(Input.size(0));
int Ny = int(Input.size(1));
int Nz = int(Input.size(2));
int Nx = int(Input.size(0));
int Ny = int(Input.size(1));
int Nz = int(Input.size(2));
for (k=1; k<Nz-1; k++){
for (j=1; j<Ny-1; j++){
for (i=1; i<Nx-1; i++){
double MeanValue = Input(i,j,k);
// next neighbors
MeanValue += Input(i+1,j,k)+Input(i,j+1,k)+Input(i,j,k+1)+Input(i-1,j,k)+Input(i,j-1,k)+Input(i,j,k-1);
MeanValue += Input(i+1,j+1,k)+Input(i-1,j+1,k)+Input(i+1,j-1,k)+Input(i-1,j-1,k);
MeanValue += Input(i+1,j,k+1)+Input(i-1,j,k+1)+Input(i+1,j,k-1)+Input(i-1,j,k-1);
MeanValue += Input(i,j+1,k+1)+Input(i,j-1,k+1)+Input(i,j+1,k-1)+Input(i,j-1,k-1);
MeanValue += Input(i+1,j+1,k+1)+Input(i-1,j+1,k+1)+Input(i+1,j-1,k+1)+Input(i-1,j-1,k+1);
MeanValue += Input(i+1,j+1,k-1)+Input(i-1,j+1,k-1)+Input(i+1,j-1,k-1)+Input(i-1,j-1,k-1);
Output(i,j,k) = MeanValue/27.0;
for (k = 1; k < Nz - 1; k++) {
for (j = 1; j < Ny - 1; j++) {
for (i = 1; i < Nx - 1; i++) {
double MeanValue = Input(i, j, k);
// next neighbors
MeanValue += Input(i + 1, j, k) + Input(i, j + 1, k) +
Input(i, j, k + 1) + Input(i - 1, j, k) +
Input(i, j - 1, k) + Input(i, j, k - 1);
MeanValue += Input(i + 1, j + 1, k) + Input(i - 1, j + 1, k) +
Input(i + 1, j - 1, k) + Input(i - 1, j - 1, k);
MeanValue += Input(i + 1, j, k + 1) + Input(i - 1, j, k + 1) +
Input(i + 1, j, k - 1) + Input(i - 1, j, k - 1);
MeanValue += Input(i, j + 1, k + 1) + Input(i, j - 1, k + 1) +
Input(i, j + 1, k - 1) + Input(i, j - 1, k - 1);
MeanValue +=
Input(i + 1, j + 1, k + 1) + Input(i - 1, j + 1, k + 1) +
Input(i + 1, j - 1, k + 1) + Input(i - 1, j - 1, k + 1);
MeanValue +=
Input(i + 1, j + 1, k - 1) + Input(i - 1, j + 1, k - 1) +
Input(i + 1, j - 1, k - 1) + Input(i - 1, j - 1, k - 1);
Output(i, j, k) = MeanValue / 27.0;
void Med3D( const Array<float> &Input, Array<float> &Output )
// Perform a 3D Median filter on Input array with specified width
int i,j,k,ii,jj,kk;
int imin,jmin,kmin,imax,jmax,kmax;
void Med3D(const Array<float> &Input, Array<float> &Output) {
// Perform a 3D Median filter on Input array with specified width
int i, j, k, ii, jj, kk;
int imin, jmin, kmin, imax, jmax, kmax;
float *List;
List=new float[27];
float *List;
List = new float[27];
int Nx = int(Input.size(0));
int Ny = int(Input.size(1));
int Nz = int(Input.size(2));
int Nx = int(Input.size(0));
int Ny = int(Input.size(1));
int Nz = int(Input.size(2));
for (k=1; k<Nz-1; k++){
for (j=1; j<Ny-1; j++){
for (i=1; i<Nx-1; i++){
for (k = 1; k < Nz - 1; k++) {
for (j = 1; j < Ny - 1; j++) {
for (i = 1; i < Nx - 1; i++) {
// Just use a 3x3x3 window (hit recursively if needed)
imin = i-1;
jmin = j-1;
kmin = k-1;
imax = i+2;
jmax = j+2;
kmax = k+2;
// Just use a 3x3x3 window (hit recursively if needed)
imin = i - 1;
jmin = j - 1;
kmin = k - 1;
imax = i + 2;
jmax = j + 2;
kmax = k + 2;
// Populate the list with values in the window
int Number=0;
for (kk=kmin; kk<kmax; kk++){
for (jj=jmin; jj<jmax; jj++){
for (ii=imin; ii<imax; ii++){
List[Number++] = Input(ii,jj,kk);
// Sort the first 5 entries and return the median
for (ii=0; ii<14; ii++){
for (jj=ii+1; jj<27; jj++){
if (List[jj] < List[ii]){
float tmp = List[ii];
List[ii] = List[jj];
List[jj] = tmp;
// Return the median
Output(i,j,k) = List[13];
// Populate the list with values in the window
int Number = 0;
for (kk = kmin; kk < kmax; kk++) {
for (jj = jmin; jj < jmax; jj++) {
for (ii = imin; ii < imax; ii++) {
List[Number++] = Input(ii, jj, kk);
// Sort the first 5 entries and return the median
for (ii = 0; ii < 14; ii++) {
for (jj = ii + 1; jj < 27; jj++) {
if (List[jj] < List[ii]) {
float tmp = List[ii];
List[ii] = List[jj];
List[jj] = tmp;
// Return the median
Output(i, j, k) = List[13];
int NLM3D(const Array<float> &Input, Array<float> &Mean,
const Array<float> &Distance, Array<float> &Output, const int d,
const float h) {
// Implemenation of 3D non-local means filter
// d determines the width of the search volume
// h is a free parameter for non-local means (i.e. 1/sigma^2)
// Distance is the signed distance function
// If Distance(i,j,k) > THRESHOLD_DIST then don't compute NLM
int NLM3D( const Array<float> &Input, Array<float> &Mean,
const Array<float> &Distance, Array<float> &Output, const int d, const float h)
// Implemenation of 3D non-local means filter
// d determines the width of the search volume
// h is a free parameter for non-local means (i.e. 1/sigma^2)
// Distance is the signed distance function
// If Distance(i,j,k) > THRESHOLD_DIST then don't compute NLM
float THRESHOLD_DIST = float(d);
float weight, sum;
int i, j, k, ii, jj, kk;
int imin, jmin, kmin, imax, jmax, kmax;
int returnCount = 0;
float THRESHOLD_DIST = float(d);
float weight, sum;
int i,j,k,ii,jj,kk;
int imin,jmin,kmin,imax,jmax,kmax;
int returnCount=0;
int Nx = int(Input.size(0));
int Ny = int(Input.size(1));
int Nz = int(Input.size(2));
int Nx = int(Input.size(0));
int Ny = int(Input.size(1));
int Nz = int(Input.size(2));
// Compute the local means
for (k = 1; k < Nz - 1; k++) {
for (j = 1; j < Ny - 1; j++) {
for (i = 1; i < Nx - 1; i++) {
// Compute the local means
for (k=1; k<Nz-1; k++){
for (j=1; j<Ny-1; j++){
for (i=1; i<Nx-1; i++){
imin = std::max(0, i - d);
jmin = std::max(0, j - d);
kmin = std::max(0, k - d);
imax = std::min(Nx - 1, i + d);
jmax = std::min(Ny - 1, j + d);
kmax = std::min(Nz - 1, k + d);
imin = std::max(0,i-d);
jmin = std::max(0,j-d);
kmin = std::max(0,k-d);
imax = std::min(Nx-1,i+d);
jmax = std::min(Ny-1,j+d);
kmax = std::min(Nz-1,k+d);
// Populate the list with values in the window
sum = 0;
weight = 0;
for (kk = kmin; kk < kmax; kk++) {
for (jj = jmin; jj < jmax; jj++) {
for (ii = imin; ii < imax; ii++) {
sum += Input(ii, jj, kk);
// Populate the list with values in the window
sum = 0; weight=0;
for (kk=kmin; kk<kmax; kk++){
for (jj=jmin; jj<jmax; jj++){
for (ii=imin; ii<imax; ii++){
sum += Input(ii,jj,kk);
Mean(i, j, k) = sum / weight;
Mean(i,j,k) = sum / weight;
// Compute the non-local means
for (k = 1; k < Nz - 1; k++) {
for (j = 1; j < Ny - 1; j++) {
for (i = 1; i < Nx - 1; i++) {
// Compute the non-local means
for (k=1; k<Nz-1; k++){
for (j=1; j<Ny-1; j++){
for (i=1; i<Nx-1; i++){
if (fabs(Distance(i, j, k)) < THRESHOLD_DIST) {
// compute the expensive non-local means
sum = 0;
weight = 0;
imin = std::max(0, i - d);
jmin = std::max(0, j - d);
kmin = std::max(0, k - d);
imax = std::min(Nx - 1, i + d);
jmax = std::min(Ny - 1, j + d);
kmax = std::min(Nz - 1, k + d);
if (fabs(Distance(i,j,k)) < THRESHOLD_DIST){
// compute the expensive non-local means
sum = 0; weight=0;
for (kk = kmin; kk < kmax; kk++) {
for (jj = jmin; jj < jmax; jj++) {
for (ii = imin; ii < imax; ii++) {
float tmp = Mean(i, j, k) - Mean(ii, jj, kk);
sum += exp(-tmp * tmp * h) * Input(ii, jj, kk);
weight += exp(-tmp * tmp * h);
imin = std::max(0,i-d);
jmin = std::max(0,j-d);
kmin = std::max(0,k-d);
imax = std::min(Nx-1,i+d);
jmax = std::min(Ny-1,j+d);
kmax = std::min(Nz-1,k+d);
for (kk=kmin; kk<kmax; kk++){
for (jj=jmin; jj<jmax; jj++){
for (ii=imin; ii<imax; ii++){
float tmp = Mean(i,j,k) - Mean(ii,jj,kk);
sum += exp(-tmp*tmp*h)*Input(ii,jj,kk);
weight += exp(-tmp*tmp*h);
//Output(i,j,k) = Mean(i,j,k);
Output(i,j,k) = sum / weight;
// Just return the mean
Output(i,j,k) = Mean(i,j,k);
// Return the number of sites where NLM was applied
return returnCount;
//Output(i,j,k) = Mean(i,j,k);
Output(i, j, k) = sum / weight;
} else {
// Just return the mean
Output(i, j, k) = Mean(i, j, k);
// Return the number of sites where NLM was applied
return returnCount;

View File

@ -17,7 +17,6 @@
#ifndef Filters_H_INC
#define Filters_H_INC
#include "common/Array.h"
@ -26,7 +25,7 @@
* @param[in] Input Input image
* @param[out] Output Output image
void Mean3D( const Array<double> &Input, Array<double> &Output );
void Mean3D(const Array<double> &Input, Array<double> &Output);
* @brief Filter image
@ -34,7 +33,7 @@ void Mean3D( const Array<double> &Input, Array<double> &Output );
* @param[in] Input Input image
* @param[out] Output Output image
void Med3D( const Array<float> &Input, Array<float> &Output );
void Med3D(const Array<float> &Input, Array<float> &Output);
* @brief Filter image
@ -46,8 +45,8 @@ void Med3D( const Array<float> &Input, Array<float> &Output );
* @param[in] d
* @param[in] h
int NLM3D( const Array<float> &Input, Array<float> &Mean,
const Array<float> &Distance, Array<float> &Output, const int d, const float h);
int NLM3D(const Array<float> &Input, Array<float> &Mean,
const Array<float> &Distance, Array<float> &Output, const int d,
const float h);

View File

View File

@ -22,13 +22,10 @@
#include "common/Array.h"
#include <vector>
namespace imfilter {
//! enum to store the BC type
enum class BC { fixed=0, symmetric=1, replicate=2, circular=3 };
enum class BC { fixed = 0, symmetric = 1, replicate = 2, circular = 3 };
* @brief N-D filtering of multidimensional images
@ -47,9 +44,10 @@ enum class BC { fixed=0, symmetric=1, replicate=2, circular=3 };
* computed by implicitly assuming the input array is periodic.
* @param[in] X The value to use for boundary conditions (only used if boundary==fixed)
template<class TYPE>
Array<TYPE> imfilter( const Array<TYPE>& A, const Array<TYPE>& H, const std::vector<imfilter::BC>& boundary, const TYPE X=0 );
template <class TYPE>
Array<TYPE> imfilter(const Array<TYPE> &A, const Array<TYPE> &H,
const std::vector<imfilter::BC> &boundary,
const TYPE X = 0);
* @brief N-D filtering of multidimensional images
@ -70,11 +68,11 @@ Array<TYPE> imfilter( const Array<TYPE>& A, const Array<TYPE>& H, const std::vec
* computed by implicitly assuming the input array is periodic.
* @param[in] X The value to use for boundary conditions (only used if boundary==fixed)
template<class TYPE>
Array<TYPE> imfilter( const Array<TYPE>& A, const std::vector<int>& Nh,
std::function<TYPE(const Array<TYPE>&)> H,
const std::vector<imfilter::BC>& boundary, const TYPE X=0 );
template <class TYPE>
Array<TYPE> imfilter(const Array<TYPE> &A, const std::vector<int> &Nh,
std::function<TYPE(const Array<TYPE> &)> H,
const std::vector<imfilter::BC> &boundary,
const TYPE X = 0);
* @brief N-D filtering of multidimensional images
@ -94,10 +92,10 @@ Array<TYPE> imfilter( const Array<TYPE>& A, const std::vector<int>& Nh,
* computed by implicitly assuming the input array is periodic.
* @param[in] X The value to use for boundary conditions (only used if boundary==fixed)
template<class TYPE>
Array<TYPE> imfilter_separable( const Array<TYPE>& A, const std::vector<Array<TYPE>>& H,
const std::vector<imfilter::BC>& boundary, const TYPE X=0 );
template <class TYPE>
imfilter_separable(const Array<TYPE> &A, const std::vector<Array<TYPE>> &H,
const std::vector<imfilter::BC> &boundary, const TYPE X = 0);
* @brief N-D filtering of multidimensional images
@ -117,11 +115,11 @@ Array<TYPE> imfilter_separable( const Array<TYPE>& A, const std::vector<Array<TY
* computed by implicitly assuming the input array is periodic.
* @param[in] X The value to use for boundary conditions (only used if boundary==fixed)
template<class TYPE>
Array<TYPE> imfilter_separable( const Array<TYPE>& A, const std::vector<int>& Nh,
std::vector<std::function<TYPE(const Array<TYPE>&)>> H,
const std::vector<imfilter::BC>& boundary, const TYPE X=0 );
template <class TYPE>
imfilter_separable(const Array<TYPE> &A, const std::vector<int> &Nh,
std::vector<std::function<TYPE(const Array<TYPE> &)>> H,
const std::vector<imfilter::BC> &boundary, const TYPE X = 0);
* @brief N-D filtering of multidimensional images
@ -142,11 +140,11 @@ Array<TYPE> imfilter_separable( const Array<TYPE>& A, const std::vector<int>& Nh
* computed by implicitly assuming the input array is periodic.
* @param[in] X The value to use for boundary conditions (only used if boundary==fixed)
template<class TYPE>
Array<TYPE> imfilter_separable( const Array<TYPE>& A, const std::vector<int>& Nh,
std::vector<std::function<TYPE(int, const TYPE*)>> H,
const std::vector<imfilter::BC>& boundary, const TYPE X=0 );
template <class TYPE>
imfilter_separable(const Array<TYPE> &A, const std::vector<int> &Nh,
std::vector<std::function<TYPE(int, const TYPE *)>> H,
const std::vector<imfilter::BC> &boundary, const TYPE X = 0);
* @brief Create a filter to use with imfilter
@ -163,14 +161,12 @@ Array<TYPE> imfilter_separable( const Array<TYPE>& A, const std::vector<int>& Nh
* A default value of 0.5 is used if not provided.
* \param[in] args An optional argument that some of the filters use
template<class TYPE>
Array<TYPE> create_filter( const std::vector<int>& N, const std::string &type, const void *args = NULL );
template <class TYPE>
Array<TYPE> create_filter(const std::vector<int> &N, const std::string &type,
const void *args = NULL);
} // namespace imfilter
#include "analysis/imfilter.hpp"

@ -35,185 +35,176 @@
#include <math.h>
#include <string.h>
// Function to convert an index
static inline int imfilter_index( int index, const int N, const imfilter::BC bc )
if ( index < 0 || index >= N ) {
if ( bc == imfilter::BC::symmetric ) {
index = ( 2 * N - index ) % N;
} else if ( bc == imfilter::BC::replicate ) {
static inline int imfilter_index(int index, const int N,
const imfilter::BC bc) {
if (index < 0 || index >= N) {
if (bc == imfilter::BC::symmetric) {
index = (2 * N - index) % N;
} else if (bc == imfilter::BC::replicate) {
index = index < 0 ? 0 : N - 1;
} else if ( bc == imfilter::BC::circular ) {
index = ( index + N ) % N;
} else if ( bc == imfilter::BC::fixed ) {
} else if (bc == imfilter::BC::circular) {
index = (index + N) % N;
} else if (bc == imfilter::BC::fixed) {
index = -1;
return index;
// Function to copy a 1D array and pad with the appropriate BC
template<class TYPE>
static inline void copy_array( const int N, const int Ns, const int Nh,
const TYPE *A, const imfilter::BC BC, const TYPE X, TYPE *B )
template <class TYPE>
static inline void copy_array(const int N, const int Ns, const int Nh,
const TYPE *A, const imfilter::BC BC,
const TYPE X, TYPE *B) {
// Fill the center with a memcpy
for (int i=0; i<N; i++ )
B[i+Nh] = A[i*Ns];
for (int i = 0; i < N; i++)
B[i + Nh] = A[i * Ns];
// Fill the boundaries
for (int i=0; i<Nh; i++ ) {
int j1 = imfilter_index( -(i+1), N, BC );
int j2 = imfilter_index( N+i, N, BC );
B[Nh-i-1] = j1==-1 ? X : B[Nh+j1];
B[N+Nh+i] = j2==-1 ? X : B[Nh+j2];
for (int i = 0; i < Nh; i++) {
int j1 = imfilter_index(-(i + 1), N, BC);
int j2 = imfilter_index(N + i, N, BC);
B[Nh - i - 1] = j1 == -1 ? X : B[Nh + j1];
B[N + Nh + i] = j2 == -1 ? X : B[Nh + j2];
* Perform a 1D filter in a single direction *
template<class TYPE>
static void filter_direction( int Ns, int N, int Ne, int Nh, const TYPE *H,
imfilter::BC boundary, TYPE X, TYPE *A )
if ( Nh < 0 )
template <class TYPE>
static void filter_direction(int Ns, int N, int Ne, int Nh, const TYPE *H,
imfilter::BC boundary, TYPE X, TYPE *A) {
if (Nh < 0)
IMFILTER_ERROR("Invalid filter size");
if ( Nh == 0 ) {
for (int i=0; i<Ns*N*Ne; i++)
if (Nh == 0) {
for (int i = 0; i < Ns * N * Ne; i++)
A[i] *= H[0];
TYPE *tmp = new TYPE[N+2*Nh];
for (int j=0; j<Ne; j++) {
for (int i=0; i<Ns; i++) {
copy_array( N, Ns, Nh, &A[i+j*Ns*N], boundary, X, tmp );
for (int k=0; k<N; k++) {
TYPE *tmp = new TYPE[N + 2 * Nh];
for (int j = 0; j < Ne; j++) {
for (int i = 0; i < Ns; i++) {
copy_array(N, Ns, Nh, &A[i + j * Ns * N], boundary, X, tmp);
for (int k = 0; k < N; k++) {
TYPE tmp2 = 0;
for (int m=0; m<=2*Nh; m++)
tmp2 += H[m] * tmp[k+m];
A[i+k*Ns+j*Ns*N] = tmp2;
for (int m = 0; m <= 2 * Nh; m++)
tmp2 += H[m] * tmp[k + m];
A[i + k * Ns + j * Ns * N] = tmp2;
delete[] tmp;
template<class TYPE>
static void filter_direction( int Ns, int N, int Ne, int Nh,
std::function<TYPE(const Array<TYPE>&)> H, imfilter::BC boundary, TYPE X, TYPE *A )
if ( Nh < 0 )
template <class TYPE>
static void filter_direction(int Ns, int N, int Ne, int Nh,
std::function<TYPE(const Array<TYPE> &)> H,
imfilter::BC boundary, TYPE X, TYPE *A) {
if (Nh < 0)
IMFILTER_ERROR("Invalid filter size");
TYPE *tmp = new TYPE[N+2*Nh];
Array<TYPE> tmp2(2*Nh+1);
for (int j=0; j<Ne; j++) {
for (int i=0; i<Ns; i++) {
copy_array( N, Ns, Nh, &A[i+j*Ns*N], boundary, X, tmp );
for (int k=0; k<N; k++) {
for (int m=0; m<=2*Nh; m++)
tmp2(m) = tmp[k+m];
A[i+k*Ns+j*Ns*N] = H(tmp2);
TYPE *tmp = new TYPE[N + 2 * Nh];
Array<TYPE> tmp2(2 * Nh + 1);
for (int j = 0; j < Ne; j++) {
for (int i = 0; i < Ns; i++) {
copy_array(N, Ns, Nh, &A[i + j * Ns * N], boundary, X, tmp);
for (int k = 0; k < N; k++) {
for (int m = 0; m <= 2 * Nh; m++)
tmp2(m) = tmp[k + m];
A[i + k * Ns + j * Ns * N] = H(tmp2);
delete[] tmp;
template<class TYPE>
static void filter_direction( int Ns, int N, int Ne, int Nh,
std::function<TYPE(int, const TYPE*)> H, imfilter::BC boundary, TYPE X, TYPE *A )
if ( Nh < 0 )
template <class TYPE>
static void filter_direction(int Ns, int N, int Ne, int Nh,
std::function<TYPE(int, const TYPE *)> H,
imfilter::BC boundary, TYPE X, TYPE *A) {
if (Nh < 0)
IMFILTER_ERROR("Invalid filter size");
TYPE *tmp = new TYPE[N+2*Nh];
int Nh2 = 2*Nh+1;
for (int j=0; j<Ne; j++) {
for (int i=0; i<Ns; i++) {
copy_array( N, Ns, Nh, &A[i+j*Ns*N], boundary, X, tmp );
for (int k=0; k<N; k++)
A[i+k*Ns+j*Ns*N] = H(Nh2,&tmp[k]);
TYPE *tmp = new TYPE[N + 2 * Nh];
int Nh2 = 2 * Nh + 1;
for (int j = 0; j < Ne; j++) {
for (int i = 0; i < Ns; i++) {
copy_array(N, Ns, Nh, &A[i + j * Ns * N], boundary, X, tmp);
for (int k = 0; k < N; k++)
A[i + k * Ns + j * Ns * N] = H(Nh2, &tmp[k]);
delete[] tmp;
* Create a filter *
template<class TYPE>
Array<TYPE> imfilter::create_filter( const std::vector<int>& N0, const std::string &type, const void *args )
template <class TYPE>
Array<TYPE> imfilter::create_filter(const std::vector<int> &N0,
const std::string &type, const void *args) {
std::vector<size_t> N2(N0.size());
for (size_t i=0; i<N2.size(); i++)
N2[i] = 2*N0[i]+1;
for (size_t i = 0; i < N2.size(); i++)
N2[i] = 2 * N0[i] + 1;
Array<TYPE> h(N2);
if ( type == "average" ) {
if (type == "average") {
// average
h.fill( 1.0 / static_cast<TYPE>( h.length() ) );
} else if ( type == "gaussian" ) {
h.fill(1.0 / static_cast<TYPE>(h.length()));
} else if (type == "gaussian") {
// gaussian
if ( N0.size() > 3 )
IMFILTER_ERROR( "Not implimented for dimensions > 3" );
TYPE std[3] = { 0.5, 0.5, 0.5 };
if ( args != NULL ) {
const TYPE *args2 = reinterpret_cast<const TYPE*>( args );
for ( size_t d = 0; d < N0.size(); d++ )
std[d] = args2[d];
if (N0.size() > 3)
IMFILTER_ERROR("Not implimented for dimensions > 3");
TYPE std[3] = {0.5, 0.5, 0.5};
if (args != NULL) {
const TYPE *args2 = reinterpret_cast<const TYPE *>(args);
for (size_t d = 0; d < N0.size(); d++)
std[d] = args2[d];
auto N = N0;
for ( int k = -N[2]; k <= N[2]; k++ ) {
for ( int j = -N[1]; j <= N[1]; j++ ) {
for ( int i = -N[0]; i <= N[0]; i++ ) {
h(i+N[0],j+N[1],k+N[2]) =
exp( -i * i / ( 2 * std[0] * std[0] ) ) *
exp( -j * j / ( 2 * std[1] * std[1] ) ) *
exp( -k * k / ( 2 * std[2] * std[2] ) );
N.resize(3, 0);
for (int k = -N[2]; k <= N[2]; k++) {
for (int j = -N[1]; j <= N[1]; j++) {
for (int i = -N[0]; i <= N[0]; i++) {
h(i + N[0], j + N[1], k + N[2]) =
exp(-i * i / (2 * std[0] * std[0])) *
exp(-j * j / (2 * std[1] * std[1])) *
exp(-k * k / (2 * std[2] * std[2]));
h.scale( 1.0/h.sum() );
h.scale(1.0 / h.sum());
} else {
IMFILTER_ERROR( "Unknown filter" );
IMFILTER_ERROR("Unknown filter");
return h;
// Perform 2-D filtering
template<class TYPE>
void imfilter_2D( int Nx, int Ny, const TYPE *A, int Nhx, int Nhy, const TYPE *H,
imfilter::BC BCx, imfilter::BC BCy, const TYPE X, TYPE *B )
PROFILE_START( "imfilter_2D" );
memset( B, 0, Nx * Ny * sizeof( TYPE ) );
for ( int j1 = 0; j1 < Ny; j1++ ) {
for ( int i1 = 0; i1 < Nx; i1++ ) {
template <class TYPE>
void imfilter_2D(int Nx, int Ny, const TYPE *A, int Nhx, int Nhy, const TYPE *H,
imfilter::BC BCx, imfilter::BC BCy, const TYPE X, TYPE *B) {
memset(B, 0, Nx * Ny * sizeof(TYPE));
for (int j1 = 0; j1 < Ny; j1++) {
for (int i1 = 0; i1 < Nx; i1++) {
TYPE tmp = 0;
if ( i1 >= Nhx && i1 < Nx - Nhx && j1 >= Nhy && j1 < Ny - Nhy ) {
if (i1 >= Nhx && i1 < Nx - Nhx && j1 >= Nhy && j1 < Ny - Nhy) {
int ijkh = 0;
for ( int j2 = j1 - Nhy; j2 <= j1 + Nhy; j2++ ) {
for ( int i2 = i1 - Nhx; i2 <= i1 + Nhx; i2++, ijkh++ )
for (int j2 = j1 - Nhy; j2 <= j1 + Nhy; j2++) {
for (int i2 = i1 - Nhx; i2 <= i1 + Nhx; i2++, ijkh++)
tmp += H[ijkh] * A[i2 + j2 * Nx];
} else {
int ijkh = 0;
for ( int jh = -Nhy; jh <= Nhy; jh++ ) {
int j2 = imfilter_index( j1+jh, Ny, BCy );
for ( int ih = -Nhx; ih <= Nhx; ih++ ) {
int i2 = imfilter_index( i1+ih, Nx, BCx );
for (int jh = -Nhy; jh <= Nhy; jh++) {
int j2 = imfilter_index(j1 + jh, Ny, BCy);
for (int ih = -Nhx; ih <= Nhx; ih++) {
int i2 = imfilter_index(i1 + ih, Nx, BCx);
bool fixed = i2 == -1 || j2 == -1;
TYPE A2 = fixed ? X : A[i2 + j2 * Nx];
TYPE A2 = fixed ? X : A[i2 + j2 * Nx];
tmp += H[ijkh] * A2;
@ -222,32 +213,31 @@ void imfilter_2D( int Nx, int Ny, const TYPE *A, int Nhx, int Nhy, const TYPE *H
B[i1 + j1 * Nx] = tmp;
PROFILE_STOP( "imfilter_2D" );
// Perform 3-D filtering
template<class TYPE>
void imfilter_3D( int Nx, int Ny, int Nz, const TYPE *A, int Nhx, int Nhy, int Nhz,
const TYPE *H, imfilter::BC BCx, imfilter::BC BCy, imfilter::BC BCz,
const TYPE X, TYPE *B )
PROFILE_START( "imfilter_3D" );
memset( B, 0, Nx * Ny * Nz * sizeof( TYPE ) );
for ( int k1 = 0; k1 < Nz; k1++ ) {
for ( int j1 = 0; j1 < Ny; j1++ ) {
for ( int i1 = 0; i1 < Nx; i1++ ) {
template <class TYPE>
void imfilter_3D(int Nx, int Ny, int Nz, const TYPE *A, int Nhx, int Nhy,
int Nhz, const TYPE *H, imfilter::BC BCx, imfilter::BC BCy,
imfilter::BC BCz, const TYPE X, TYPE *B) {
memset(B, 0, Nx * Ny * Nz * sizeof(TYPE));
for (int k1 = 0; k1 < Nz; k1++) {
for (int j1 = 0; j1 < Ny; j1++) {
for (int i1 = 0; i1 < Nx; i1++) {
TYPE tmp = 0;
int ijkh = 0;
for ( int kh = -Nhz; kh <= Nhz; kh++ ) {
int k2 = imfilter_index( k1+kh, Nz, BCz );
for ( int jh = -Nhy; jh <= Nhy; jh++ ) {
int j2 = imfilter_index( j1+jh, Ny, BCy );
for ( int ih = -Nhx; ih <= Nhx; ih++ ) {
int i2 = imfilter_index( i1+ih, Nx, BCx );
int ijkh = 0;
for (int kh = -Nhz; kh <= Nhz; kh++) {
int k2 = imfilter_index(k1 + kh, Nz, BCz);
for (int jh = -Nhy; jh <= Nhy; jh++) {
int j2 = imfilter_index(j1 + jh, Ny, BCy);
for (int ih = -Nhx; ih <= Nhx; ih++) {
int i2 = imfilter_index(i1 + ih, Nx, BCx);
bool fixed = i2 == -1 || j2 == -1 || k2 == -1;
TYPE A2 = fixed ? X : A[i2 + j2 * Nx + k2 * Nx * Ny];
fixed ? X : A[i2 + j2 * Nx + k2 * Nx * Ny];
tmp += H[ijkh] * A2;
@ -257,154 +247,155 @@ void imfilter_3D( int Nx, int Ny, int Nz, const TYPE *A, int Nhx, int Nhy, int N
PROFILE_STOP( "imfilter_3D" );
* Perform N-D filtering *
template<class TYPE>
Array<TYPE> imfilter::imfilter( const Array<TYPE>& A,
const Array<TYPE>& H, const std::vector<imfilter::BC>& BC, const TYPE X )
IMFILTER_ASSERT( A.ndim() == H.ndim() );
IMFILTER_ASSERT( A.ndim() == BC.size() );
template <class TYPE>
Array<TYPE> imfilter::imfilter(const Array<TYPE> &A, const Array<TYPE> &H,
const std::vector<imfilter::BC> &BC,
const TYPE X) {
IMFILTER_ASSERT(A.ndim() == H.ndim());
IMFILTER_ASSERT(A.ndim() == BC.size());
std::vector<size_t> Nh = H.size();
for (int d=0; d<A.ndim(); d++) {
Nh[d] = (H.size(d)-1)/2;
IMFILTER_INSIST(2*Nh[d]+1==H.size(d),"Filter must be of size 2*N+1");
for (int d = 0; d < A.ndim(); d++) {
Nh[d] = (H.size(d) - 1) / 2;
IMFILTER_INSIST(2 * Nh[d] + 1 == H.size(d),
"Filter must be of size 2*N+1");
auto B = A;
if ( A.ndim() == 1 ) {
PROFILE_START( "imfilter_1D" );
filter_direction( 1, A.size(0), 1, Nh[0],, BC[0], X, );
PROFILE_STOP( "imfilter_1D" );
} else if ( A.ndim() == 2 ) {
imfilter_2D( A.size(0), A.size(1),, Nh[0], Nh[1],, BC[0], BC[1], X, );
} else if ( A.ndim() == 3 ) {
imfilter_3D( A.size(0), A.size(1), A.size(2),,
Nh[0], Nh[1], Nh[2],, BC[0], BC[1], BC[2], X, );
if (A.ndim() == 1) {
filter_direction(1, A.size(0), 1, Nh[0],, BC[0], X,;
} else if (A.ndim() == 2) {
imfilter_2D(A.size(0), A.size(1),, Nh[0], Nh[1],,
BC[0], BC[1], X,;
} else if (A.ndim() == 3) {
imfilter_3D(A.size(0), A.size(1), A.size(2),, Nh[0], Nh[1],
Nh[2],, BC[0], BC[1], BC[2], X,;
} else {
IMFILTER_ERROR( "Arbitrary dimension not yet supported" );
IMFILTER_ERROR("Arbitrary dimension not yet supported");
return B;
template<class TYPE>
Array<TYPE> imfilter::imfilter( const Array<TYPE>& A, const std::vector<int>& Nh0,
std::function<TYPE(const Array<TYPE>&)> H,
const std::vector<imfilter::BC>& BC0, const TYPE X )
PROFILE_START( "imfilter (lambda)" );
IMFILTER_ASSERT( A.ndim() == Nh0.size() );
IMFILTER_ASSERT( A.ndim() == BC0.size() );
std::vector<size_t> Nh2( A.size() );
for (int d=0; d<A.ndim(); d++)
Nh2[d] = 2*Nh0[d]+1;
template <class TYPE>
imfilter::imfilter(const Array<TYPE> &A, const std::vector<int> &Nh0,
std::function<TYPE(const Array<TYPE> &)> H,
const std::vector<imfilter::BC> &BC0, const TYPE X) {
PROFILE_START("imfilter (lambda)");
IMFILTER_ASSERT(A.ndim() == Nh0.size());
IMFILTER_ASSERT(A.ndim() == BC0.size());
std::vector<size_t> Nh2(A.size());
for (int d = 0; d < A.ndim(); d++)
Nh2[d] = 2 * Nh0[d] + 1;
auto B = A;
Array<TYPE> data(Nh2);
IMFILTER_INSIST(A.ndim()<=3,"Not programmed for more than 3 dimensions yet");
IMFILTER_INSIST(A.ndim() <= 3,
"Not programmed for more than 3 dimensions yet");
auto N = A.size();
auto Nh = Nh0;
auto BC = BC0;
for ( int k1 = 0; k1 < N[2]; k1++ ) {
for ( int j1 = 0; j1 < N[1]; j1++ ) {
for ( int i1 = 0; i1 < N[0]; i1++ ) {
for ( int kh = -Nh[2]; kh <= Nh[2]; kh++ ) {
int k2 = imfilter_index( k1+kh, N[2], BC[2] );
for ( int jh = -Nh[1]; jh <= Nh[1]; jh++ ) {
int j2 = imfilter_index( j1+jh, N[1], BC[1] );
for ( int ih = -Nh[0]; ih <= Nh[0]; ih++ ) {
int i2 = imfilter_index( i1+ih, N[0], BC[0] );
N.resize(3, 1);
Nh.resize(3, 0);
BC.resize(3, imfilter::BC::fixed);
for (int k1 = 0; k1 < N[2]; k1++) {
for (int j1 = 0; j1 < N[1]; j1++) {
for (int i1 = 0; i1 < N[0]; i1++) {
for (int kh = -Nh[2]; kh <= Nh[2]; kh++) {
int k2 = imfilter_index(k1 + kh, N[2], BC[2]);
for (int jh = -Nh[1]; jh <= Nh[1]; jh++) {
int j2 = imfilter_index(j1 + jh, N[1], BC[1]);
for (int ih = -Nh[0]; ih <= Nh[0]; ih++) {
int i2 = imfilter_index(i1 + ih, N[0], BC[0]);
bool fixed = i2 == -1 || j2 == -1 || k2 == -1;
data(ih+Nh[0],jh+Nh[1],kh+Nh[2]) = fixed ? X : A(i2,j2,k2);
data(ih + Nh[0], jh + Nh[1], kh + Nh[2]) =
fixed ? X : A(i2, j2, k2);
B(i1,j1,k1) = H( data );
B(i1, j1, k1) = H(data);
PROFILE_STOP( "imfilter (lambda)" );
PROFILE_STOP("imfilter (lambda)");
return B;
* imfilter with separable filter functions *
template<class TYPE>
Array<TYPE> imfilter::imfilter_separable( const Array<TYPE>& A,
const std::vector<Array<TYPE>>& H,
const std::vector<imfilter::BC>& boundary, const TYPE X )
PROFILE_START( "imfilter_separable" );
IMFILTER_ASSERT( A.ndim() == (int) H.size() );
IMFILTER_ASSERT( A.ndim() == (int) boundary.size() );
std::vector<size_t> Nh( H.size() );
for (int d=0; d<A.ndim(); d++) {
Nh[d] = (H[d].length()-1)/2;
IMFILTER_INSIST(2*Nh[d]+1==H[d].length(),"Filter must be of size 2*N+1");
template <class TYPE>
Array<TYPE> imfilter::imfilter_separable(
const Array<TYPE> &A, const std::vector<Array<TYPE>> &H,
const std::vector<imfilter::BC> &boundary, const TYPE X) {
IMFILTER_ASSERT(A.ndim() == (int)H.size());
IMFILTER_ASSERT(A.ndim() == (int)boundary.size());
std::vector<size_t> Nh(H.size());
for (int d = 0; d < A.ndim(); d++) {
IMFILTER_ASSERT(H[d].ndim() == 1);
Nh[d] = (H[d].length() - 1) / 2;
IMFILTER_INSIST(2 * Nh[d] + 1 == H[d].length(),
"Filter must be of size 2*N+1");
auto B = A;
for ( int d = 0; d < A.ndim(); d++ ) {
for (int d = 0; d < A.ndim(); d++) {
int N = A.size(d);
int Ns = 1;
int Ne = 1;
for ( int d2 = 0; d2 < d; d2++ )
for (int d2 = 0; d2 < d; d2++)
Ns *= A.size(d2);
for ( int d2 = d+1; d2 < A.ndim(); d2++ )
for (int d2 = d + 1; d2 < A.ndim(); d2++)
Ne *= A.size(d2);
filter_direction( Ns, N, Ne, Nh[d], H[d].data(), boundary[d], X, );
filter_direction(Ns, N, Ne, Nh[d], H[d].data(), boundary[d], X,;
PROFILE_STOP( "imfilter_separable" );
return B;
template<class TYPE>
Array<TYPE> imfilter::imfilter_separable( const Array<TYPE>& A, const std::vector<int>& Nh,
std::vector<std::function<TYPE(const Array<TYPE>&)>> H,
const std::vector<imfilter::BC>& boundary, const TYPE X )
PROFILE_START( "imfilter_separable (lambda)" );
IMFILTER_ASSERT( A.ndim() == (int) boundary.size() );
template <class TYPE>
Array<TYPE> imfilter::imfilter_separable(
const Array<TYPE> &A, const std::vector<int> &Nh,
std::vector<std::function<TYPE(const Array<TYPE> &)>> H,
const std::vector<imfilter::BC> &boundary, const TYPE X) {
PROFILE_START("imfilter_separable (lambda)");
IMFILTER_ASSERT(A.ndim() == (int)boundary.size());
auto B = A;
for ( int d = 0; d < A.ndim(); d++ ) {
for (int d = 0; d < A.ndim(); d++) {
int N = A.size(d);
int Ns = 1;
int Ne = 1;
for ( int d2 = 0; d2 < d; d2++ )
for (int d2 = 0; d2 < d; d2++)
Ns *= A.size(d2);
for ( int d2 = d+1; d2 < A.ndim(); d2++ )
for (int d2 = d + 1; d2 < A.ndim(); d2++)
Ne *= A.size(d2);
filter_direction( Ns, N, Ne, Nh[d], H[d], boundary[d], X, );
filter_direction(Ns, N, Ne, Nh[d], H[d], boundary[d], X,;
PROFILE_STOP( "imfilter_separable (lambda)" );
PROFILE_STOP("imfilter_separable (lambda)");
return B;
template<class TYPE>
Array<TYPE> imfilter::imfilter_separable( const Array<TYPE>& A, const std::vector<int>& Nh,
std::vector<std::function<TYPE(int, const TYPE*)>> H,
const std::vector<imfilter::BC>& boundary, const TYPE X )
PROFILE_START( "imfilter_separable (function)" );
IMFILTER_ASSERT( A.ndim() == (int) boundary.size() );
template <class TYPE>
Array<TYPE> imfilter::imfilter_separable(
const Array<TYPE> &A, const std::vector<int> &Nh,
std::vector<std::function<TYPE(int, const TYPE *)>> H,
const std::vector<imfilter::BC> &boundary, const TYPE X) {
PROFILE_START("imfilter_separable (function)");
IMFILTER_ASSERT(A.ndim() == (int)boundary.size());
auto B = A;
for ( int d = 0; d < A.ndim(); d++ ) {
for (int d = 0; d < A.ndim(); d++) {
int N = A.size(d);
int Ns = 1;
int Ne = 1;
for ( int d2 = 0; d2 < d; d2++ )
for (int d2 = 0; d2 < d; d2++)
Ns *= A.size(d2);
for ( int d2 = d+1; d2 < A.ndim(); d2++ )
for (int d2 = d + 1; d2 < A.ndim(); d2++)
Ne *= A.size(d2);
filter_direction( Ns, N, Ne, Nh[d], H[d], boundary[d], X, );
filter_direction(Ns, N, Ne, Nh[d], H[d], boundary[d], X,;
PROFILE_STOP( "imfilter_separable (function)" );
PROFILE_STOP("imfilter_separable (function)");
return B;

@ -3,9 +3,14 @@
#include "common/Domain.h"
#include "analysis/runAnalysis.h"
double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain> Dm, double VoidFraction, signed char ErodeLabel, signed char ReplaceLabel);
double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr<Domain> Dm, double VoidFraction);
double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array<char> &id, std::shared_ptr<Domain> Dm, double TargetVol, double WallFactor);
double MorphOpen(DoubleArray &SignDist, signed char *id,
std::shared_ptr<Domain> Dm, double VoidFraction,
signed char ErodeLabel, signed char ReplaceLabel);
double MorphDrain(DoubleArray &SignDist, signed char *id,
std::shared_ptr<Domain> Dm, double VoidFraction);
double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array<char> &id,
std::shared_ptr<Domain> Dm, double TargetVol,
double WallFactor);
@ -15,84 +20,103 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array<char> &id,
* The Morphology class supports morphological operations on complex structures
class Morphology{
class Morphology {
* \brief Create a flow adaptor to operate on the LB model
* \brief Destructor
* \brief Initialize morphology structure from distance map
* @param Dm Domain structure
* @param Distance Signed distance to boundary of structure
void Initialize(std::shared_ptr <Domain> Dm, DoubleArray &Distance);
void Initialize(std::shared_ptr<Domain> Dm, DoubleArray &Distance);
* \brief Find all sites such that the reach of the signed distance at the site overlaps with a sub-domain boundary
* @param Dm Domain structure
* @param id image labels
* @param ErodeLabel label to erode based on morphological operation
* @param NewLabel label to assign based on morphological operation
int GetOverlaps(std::shared_ptr <Domain> Dm, signed char *id, const signed char ErodeLabel, const signed char NewLabel);
int GetOverlaps(std::shared_ptr<Domain> Dm, signed char *id,
const signed char ErodeLabel, const signed char NewLabel);
* data structures to store non-local morphological information
std::vector<int> xShift, yShift, zShift;
std::vector<int> sendID;
std::vector<double> morphRadius;
std::vector<double> morphRadius;
std::vector<unsigned char> localID;
std::vector<unsigned char> nonlocalID;
int sendtag,recvtag;
int sendtag, recvtag;
int sendCount, recvCount;
int sendOffset_x, sendOffset_y, sendOffset_z, sendOffset_X, sendOffset_Y, sendOffset_Z;
int sendOffset_xy, sendOffset_yz, sendOffset_xz, sendOffset_Xy, sendOffset_Yz, sendOffset_xZ;
int sendOffset_xY, sendOffset_yZ, sendOffset_Xz, sendOffset_XY, sendOffset_YZ, sendOffset_XZ;
int sendOffset_x, sendOffset_y, sendOffset_z, sendOffset_X, sendOffset_Y,
int sendOffset_xy, sendOffset_yz, sendOffset_xz, sendOffset_Xy,
sendOffset_Yz, sendOffset_xZ;
int sendOffset_xY, sendOffset_yZ, sendOffset_Xz, sendOffset_XY,
sendOffset_YZ, sendOffset_XZ;
int sendOffset_xyz, sendOffset_XYZ, sendOffset_xYz, sendOffset_XyZ;
int sendOffset_Xyz, sendOffset_xYZ, sendOffset_xyZ, sendOffset_XYz;
int recvOffset_x, recvOffset_y, recvOffset_z, recvOffset_X, recvOffset_Y, recvOffset_Z;
int recvOffset_xy, recvOffset_yz, recvOffset_xz, recvOffset_Xy, recvOffset_Yz, recvOffset_xZ;
int recvOffset_xY, recvOffset_yZ, recvOffset_Xz, recvOffset_XY, recvOffset_YZ, recvOffset_XZ;
int recvOffset_x, recvOffset_y, recvOffset_z, recvOffset_X, recvOffset_Y,
int recvOffset_xy, recvOffset_yz, recvOffset_xz, recvOffset_Xy,
recvOffset_Yz, recvOffset_xZ;
int recvOffset_xY, recvOffset_yZ, recvOffset_Xz, recvOffset_XY,
recvOffset_YZ, recvOffset_XZ;
int recvOffset_xyz, recvOffset_XYZ, recvOffset_xYz, recvOffset_XyZ;
int recvOffset_Xyz, recvOffset_xYZ, recvOffset_xyZ, recvOffset_XYz;
int recvOffset_Xyz, recvOffset_xYZ, recvOffset_xyZ, recvOffset_XYz;
int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z;
int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ;
int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ;
int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y,
int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz,
int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ,
int sendCount_xyz, sendCount_XYZ, sendCount_xYz, sendCount_XyZ;
int sendCount_Xyz, sendCount_xYZ, sendCount_xyZ, sendCount_XYz;
int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z;
int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, recvCount_xZ;
int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, recvCount_XZ;
int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y,
int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz,
int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ,
int recvCount_xyz, recvCount_XYZ, recvCount_xYz, recvCount_XyZ;
int recvCount_Xyz, recvCount_xYZ, recvCount_xyZ, recvCount_XYz;
int recvCount_Xyz, recvCount_xYZ, recvCount_xyZ, recvCount_XYz;
std::vector<char> sendList;
std::vector<char> recvList;
// Communication buffers
signed char *sendID_x, *sendID_y, *sendID_z, *sendID_X, *sendID_Y, *sendID_Z;
signed char *sendID_xy, *sendID_yz, *sendID_xz, *sendID_Xy, *sendID_Yz, *sendID_xZ;
signed char *sendID_xY, *sendID_yZ, *sendID_Xz, *sendID_XY, *sendID_YZ, *sendID_XZ;
signed char *recvID_x, *recvID_y, *recvID_z, *recvID_X, *recvID_Y, *recvID_Z;
signed char *recvID_xy, *recvID_yz, *recvID_xz, *recvID_Xy, *recvID_Yz, *recvID_xZ;
signed char *recvID_xY, *recvID_yZ, *recvID_Xz, *recvID_XY, *recvID_YZ, *recvID_XZ;
// Communication buffers
signed char *sendID_x, *sendID_y, *sendID_z, *sendID_X, *sendID_Y,
signed char *sendID_xy, *sendID_yz, *sendID_xz, *sendID_Xy, *sendID_Yz,
signed char *sendID_xY, *sendID_yZ, *sendID_Xz, *sendID_XY, *sendID_YZ,
signed char *recvID_x, *recvID_y, *recvID_z, *recvID_X, *recvID_Y,
signed char *recvID_xy, *recvID_yz, *recvID_xz, *recvID_Xy, *recvID_Yz,
signed char *recvID_xY, *recvID_yZ, *recvID_Xz, *recvID_XY, *recvID_YZ,

View File

@ -26,42 +26,42 @@
#include "models/ColorModel.h"
#include <limits.h>
// Types of analysis
enum class AnalysisType : uint64_t {
AnalyzeNone = 0,
IdentifyBlobs = 0x01,
AnalyzeNone = 0,
IdentifyBlobs = 0x01,
CopyPhaseIndicator = 0x02,
CopySimState = 0x04,
ComputeAverages = 0x08,
CreateRestart = 0x10,
WriteVis = 0x20,
ComputeSubphase = 0x40
CopySimState = 0x04,
ComputeAverages = 0x08,
CreateRestart = 0x10,
WriteVis = 0x20,
ComputeSubphase = 0x40
//! Class to run the analysis in multiple threads
class runAnalysis
class runAnalysis {
//! Constructor
runAnalysis( std::shared_ptr<Database> db, const RankInfoStruct &rank_info,
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm, std::shared_ptr<Domain> dm, int Np,
bool Regular, IntArray Map );
runAnalysis( ScaLBL_ColorModel &ColorModel);
runAnalysis(std::shared_ptr<Database> db, const RankInfoStruct &rank_info,
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm,
std::shared_ptr<Domain> dm, int Np, bool Regular, IntArray Map);
runAnalysis(ScaLBL_ColorModel &ColorModel);
//! Destructor
//! Run the next analysis
void run( int timestep, std::shared_ptr<Database> db, TwoPhase &Averages, const double *Phi,
double *Pressure, double *Velocity, double *fq, double *Den );
void run(int timestep, std::shared_ptr<Database> db, TwoPhase &Averages,
const double *Phi, double *Pressure, double *Velocity, double *fq,
double *Den);
void basic( int timestep, std::shared_ptr<Database> db, SubPhase &Averages, const double *Phi,
double *Pressure, double *Velocity, double *fq, double *Den );
void WriteVisData( int timestep, std::shared_ptr<Database> vis_db, SubPhase &Averages,
const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den );
void basic(int timestep, std::shared_ptr<Database> db, SubPhase &Averages,
const double *Phi, double *Pressure, double *Velocity,
double *fq, double *Den);
void WriteVisData(int timestep, std::shared_ptr<Database> vis_db,
SubPhase &Averages, const double *Phi, double *Pressure,
double *Velocity, double *fq, double *Den);
//! Finish all active analysis
void finish();
@ -80,27 +80,26 @@ public:
* that all threads run on independent cores
* @param[in] N_threads Number of threads, only used by some of the methods
void createThreads( const std::string &method = "default", int N_threads = 4 );
void createThreads(const std::string &method = "default",
int N_threads = 4);
// Determine the analysis to perform
AnalysisType computeAnalysisType( int timestep );
AnalysisType computeAnalysisType(int timestep);
class commWrapper
class commWrapper {
Utilities::MPI comm;
int tag;
runAnalysis *analysis;
commWrapper( int tag, const Utilities::MPI &comm, runAnalysis *analysis );
commWrapper() = delete;
commWrapper( const commWrapper &rhs ) = delete;
commWrapper &operator=( const commWrapper &rhs ) = delete;
commWrapper( commWrapper &&rhs );
commWrapper(int tag, const Utilities::MPI &comm, runAnalysis *analysis);
commWrapper() = delete;
commWrapper(const commWrapper &rhs) = delete;
commWrapper &operator=(const commWrapper &rhs) = delete;
commWrapper(commWrapper &&rhs);
@ -112,12 +111,13 @@ private:
std::array<int, 3> d_N; // Number of local cells with ghosts
int d_Np;
int d_rank;
int d_restart_interval, d_analysis_interval, d_blobid_interval, d_visualization_interval;
int d_restart_interval, d_analysis_interval, d_blobid_interval,
int d_subphase_analysis_interval;
double d_beta;
bool d_regular;
std::string format; // IO format string "silo" or "hdf5"
std::string format; // IO format string "silo" or "hdf5"
ThreadPool d_tpool;
RankInfoStruct d_rank_info;
IntArray d_Map;

View File

@ -20,316 +20,312 @@
#include "analysis/filters.h"
#include "analysis/imfilter.h"
template<class T>
inline int sign( T x )
if ( x==0 )
template <class T> inline int sign(T x) {
if (x == 0)
return 0;
return x>0 ? 1:-1;
return x > 0 ? 1 : -1;
inline float trilinear( float dx, float dy, float dz, float f1, float f2,
float f3, float f4, float f5, float f6, float f7, float f8 )
inline float trilinear(float dx, float dy, float dz, float f1, float f2,
float f3, float f4, float f5, float f6, float f7,
float f8) {
double f, dx2, dy2, dz2, h0, h1;
dx2 = 1.0 - dx;
dy2 = 1.0 - dy;
dz2 = 1.0 - dz;
h0 = ( dx * f2 + dx2 * f1 ) * dy2 + ( dx * f4 + dx2 * f3 ) * dy;
h1 = ( dx * f6 + dx2 * f5 ) * dy2 + ( dx * f8 + dx2 * f7 ) * dy;
f = h0 * dz2 + h1 * dz;
return ( f );
h0 = (dx * f2 + dx2 * f1) * dy2 + (dx * f4 + dx2 * f3) * dy;
h1 = (dx * f6 + dx2 * f5) * dy2 + (dx * f8 + dx2 * f7) * dy;
f = h0 * dz2 + h1 * dz;
return (f);
void InterpolateMesh(const Array<float> &Coarse, Array<float> &Fine) {
void InterpolateMesh( const Array<float> &Coarse, Array<float> &Fine )
// Interpolate values from a Coarse mesh to a fine one
// This routine assumes cell-centered meshes with 1 ghost cell
// Interpolate values from a Coarse mesh to a fine one
// This routine assumes cell-centered meshes with 1 ghost cell
// Fine mesh
int Nx = int(Fine.size(0)) - 2;
int Ny = int(Fine.size(1)) - 2;
int Nz = int(Fine.size(2)) - 2;
// Fine mesh
int Nx = int(Fine.size(0))-2;
int Ny = int(Fine.size(1))-2;
int Nz = int(Fine.size(2))-2;
// Coarse mesh
int nx = int(Coarse.size(0)) - 2;
int ny = int(Coarse.size(1)) - 2;
int nz = int(Coarse.size(2)) - 2;
// Coarse mesh
int nx = int(Coarse.size(0))-2;
int ny = int(Coarse.size(1))-2;
int nz = int(Coarse.size(2))-2;
// compute the stride
int hx = Nx / nx;
int hy = Ny / ny;
int hz = Nz / nz;
ASSERT(nx * hx == Nx);
ASSERT(ny * hy == Ny);
ASSERT(nz * hz == Nz);
// compute the stride
int hx = Nx/nx;
int hy = Ny/ny;
int hz = Nz/nz;
// value to map distance between meshes (since distance is in voxels)
// usually hx=hy=hz (or something very close)
// the mapping is not exact
// however, it's assumed the coarse solution will be refined
// a good guess is the goal here!
float mapvalue = sqrt(hx * hx + hy * hy + hz * hz);
// value to map distance between meshes (since distance is in voxels)
// usually hx=hy=hz (or something very close)
// the mapping is not exact
// however, it's assumed the coarse solution will be refined
// a good guess is the goal here!
float mapvalue = sqrt(hx*hx+hy*hy+hz*hz);
// Interpolate to the fine mesh
for (int k=-1; k<Nz+1; k++){
int k0 = floor((k-0.5*hz)/hz);
int k1 = k0+1;
int k2 = k0+2;
float dz = ( (k+0.5) - (k0+0.5)*hz ) / hz;
for (int j=-1; j<Ny+1; j++){
int j0 = floor((j-0.5*hy)/hy);
int j1 = j0+1;
int j2 = j0+2;
float dy = ( (j+0.5) - (j0+0.5)*hy ) / hy;
for (int i=-1; i<Nx+1; i++){
int i0 = floor((i-0.5*hx)/hx);
int i1 = i0+1;
int i2 = i0+2;
float dx = ( (i+0.5) - (i0+0.5)*hx ) / hx;
float val = trilinear( dx, dy, dz,
Coarse(i1,j1,k1), Coarse(i2,j1,k1), Coarse(i1,j2,k1), Coarse(i2,j2,k1),
Coarse(i1,j1,k2), Coarse(i2,j1,k2), Coarse(i1,j2,k2), Coarse(i2,j2,k2) );
Fine(i+1,j+1,k+1) = mapvalue*val;
// Interpolate to the fine mesh
for (int k = -1; k < Nz + 1; k++) {
int k0 = floor((k - 0.5 * hz) / hz);
int k1 = k0 + 1;
int k2 = k0 + 2;
float dz = ((k + 0.5) - (k0 + 0.5) * hz) / hz;
ASSERT(k0 >= -1 && k0 < nz + 1 && dz >= 0 && dz <= 1);
for (int j = -1; j < Ny + 1; j++) {
int j0 = floor((j - 0.5 * hy) / hy);
int j1 = j0 + 1;
int j2 = j0 + 2;
float dy = ((j + 0.5) - (j0 + 0.5) * hy) / hy;
ASSERT(j0 >= -1 && j0 < ny + 1 && dy >= 0 && dy <= 1);
for (int i = -1; i < Nx + 1; i++) {
int i0 = floor((i - 0.5 * hx) / hx);
int i1 = i0 + 1;
int i2 = i0 + 2;
float dx = ((i + 0.5) - (i0 + 0.5) * hx) / hx;
ASSERT(i0 >= -1 && i0 < nx + 1 && dx >= 0 && dx <= 1);
float val = trilinear(
dx, dy, dz, Coarse(i1, j1, k1), Coarse(i2, j1, k1),
Coarse(i1, j2, k1), Coarse(i2, j2, k1), Coarse(i1, j1, k2),
Coarse(i2, j1, k2), Coarse(i1, j2, k2), Coarse(i2, j2, k2));
Fine(i + 1, j + 1, k + 1) = mapvalue * val;
// Smooth the data using the distance
void smooth( const Array<float>& VOL, const Array<float>& Dist, float sigma, Array<float>& MultiScaleSmooth, fillHalo<float>& fillFloat )
for (size_t i=0; i<VOL.length(); i++) {
// use exponential weight based on the distance
float dst = Dist(i);
float tmp = exp(-(dst*dst)/(sigma*sigma));
float value = dst>0 ? -1:1;
MultiScaleSmooth(i) = tmp*VOL(i) + (1-tmp)*value;
void smooth(const Array<float> &VOL, const Array<float> &Dist, float sigma,
Array<float> &MultiScaleSmooth, fillHalo<float> &fillFloat) {
for (size_t i = 0; i < VOL.length(); i++) {
// use exponential weight based on the distance
float dst = Dist(i);
float tmp = exp(-(dst * dst) / (sigma * sigma));
float value = dst > 0 ? -1 : 1;
MultiScaleSmooth(i) = tmp * VOL(i) + (1 - tmp) * value;
// Segment the data
void segment( const Array<float>& data, Array<char>& ID, float tol )
for (size_t i=0; i<data.length(); i++) {
if ( data(i) > tol )
void segment(const Array<float> &data, Array<char> &ID, float tol) {
ASSERT(data.size() == ID.size());
for (size_t i = 0; i < data.length(); i++) {
if (data(i) > tol)
ID(i) = 0;
ID(i) = 1;
// Remove disconnected phases
void removeDisconnected( Array<char>& ID, const Domain& Dm )
void removeDisconnected(Array<char> &ID, const Domain &Dm) {
// Run blob identification to remove disconnected volumes
BlobIDArray GlobalBlobID;
DoubleArray SignDist(ID.size());
DoubleArray Phase(ID.size());
for (size_t i=0; i<ID.length(); i++) {
SignDist(i) = (2*ID(i)-1);
for (size_t i = 0; i < ID.length(); i++) {
SignDist(i) = (2 * ID(i) - 1);
Phase(i) = 1;
ComputeGlobalBlobIDs( ID.size(0)-2, ID.size(1)-2, ID.size(2)-2,
Dm.rank_info, Phase, SignDist, 0, 0, GlobalBlobID, Dm.Comm );
for (size_t i=0; i<ID.length(); i++) {
if ( GlobalBlobID(i) > 0 )
ComputeGlobalBlobIDs(ID.size(0) - 2, ID.size(1) - 2, ID.size(2) - 2,
Dm.rank_info, Phase, SignDist, 0, 0, GlobalBlobID,
for (size_t i = 0; i < ID.length(); i++) {
if (GlobalBlobID(i) > 0)
ID(i) = 0;
ID(i) = GlobalBlobID(i);
// Solve a level (without any coarse level information)
void solve( const Array<float>& VOL, Array<float>& Mean, Array<char>& ID,
Array<float>& Dist, Array<float>& MultiScaleSmooth, Array<float>& NonLocalMean,
fillHalo<float>& fillFloat, const Domain& Dm, int nprocx,
float threshold, float lamda, float sigsq, int depth)
void solve(const Array<float> &VOL, Array<float> &Mean, Array<char> &ID,
Array<float> &Dist, Array<float> &MultiScaleSmooth,
Array<float> &NonLocalMean, fillHalo<float> &fillFloat,
const Domain &Dm, int nprocx, float threshold, float lamda,
float sigsq, int depth) {
PROFILE_SCOPED(timer, "solve");
// Compute the median filter on the sparse array
Med3D( VOL, Mean );
fillFloat.fill( Mean );
segment( Mean, ID, threshold );
Med3D(VOL, Mean);
segment(Mean, ID, threshold);
// Compute the distance using the segmented volume
CalcDist( Dist, ID, Dm );
smooth( VOL, Dist, 2.0, MultiScaleSmooth, fillFloat );
CalcDist(Dist, ID, Dm);
smooth(VOL, Dist, 2.0, MultiScaleSmooth, fillFloat);
// Compute non-local mean
// int depth = 5;
// float sigsq=0.1;
int nlm_count = NLM3D( MultiScaleSmooth, Mean, Dist, NonLocalMean, depth, sigsq);
NULL_USE( nlm_count );
int nlm_count =
NLM3D(MultiScaleSmooth, Mean, Dist, NonLocalMean, depth, sigsq);
// Refine a solution from a coarse grid to a fine grid
void refine( const Array<float>& Dist_coarse,
const Array<float>& VOL, Array<float>& Mean, Array<char>& ID,
Array<float>& Dist, Array<float>& MultiScaleSmooth, Array<float>& NonLocalMean,
fillHalo<float>& fillFloat, const Domain& Dm, int nprocx, int level,
float threshold, float lamda, float sigsq, int depth)
int ratio[3] = { int(Dist.size(0)/Dist_coarse.size(0)),
int(Dist.size(2)/Dist_coarse.size(2)) };
void refine(const Array<float> &Dist_coarse, const Array<float> &VOL,
Array<float> &Mean, Array<char> &ID, Array<float> &Dist,
Array<float> &MultiScaleSmooth, Array<float> &NonLocalMean,
fillHalo<float> &fillFloat, const Domain &Dm, int nprocx, int level,
float threshold, float lamda, float sigsq, int depth) {
PROFILE_SCOPED(timer, "refine");
int ratio[3] = {int(Dist.size(0) / Dist_coarse.size(0)),
int(Dist.size(1) / Dist_coarse.size(1)),
int(Dist.size(2) / Dist_coarse.size(2))};
// Interpolate the distance from the coarse to fine grid
InterpolateMesh( Dist_coarse, Dist );
InterpolateMesh(Dist_coarse, Dist);
// Compute the median filter on the array and segment
Med3D( VOL, Mean );
fillFloat.fill( Mean );
segment( Mean, ID, threshold );
Med3D(VOL, Mean);
segment(Mean, ID, threshold);
// If the ID has the wrong distance, set the distance to 0 and run a simple filter to set neighbors to 0
for (size_t i=0; i<ID.length(); i++) {
char id = Dist(i)>0 ? 1:0;
if ( id != ID(i) )
for (size_t i = 0; i < ID.length(); i++) {
char id = Dist(i) > 0 ? 1 : 0;
if (id != ID(i))
Dist(i) = 0;
fillFloat.fill( Dist );
std::function<float(int,const float*)> filter_1D = []( int N, const float* data )
bool zero = data[0]==0 || data[2]==0;
return zero ? data[1]*1e-12 : data[1];
std::function<float(int, const float *)> filter_1D = [](int N,
const float *data) {
bool zero = data[0] == 0 || data[2] == 0;
return zero ? data[1] * 1e-12 : data[1];
std::vector<imfilter::BC> BC(3,imfilter::BC::replicate);
std::vector<std::function<float(int,const float*)>> filter_set(3,filter_1D);
Dist = imfilter::imfilter_separable<float>( Dist, {1,1,1}, filter_set, BC );
fillFloat.fill( Dist );
std::vector<imfilter::BC> BC(3, imfilter::BC::replicate);
std::vector<std::function<float(int, const float *)>> filter_set(3,
Dist = imfilter::imfilter_separable<float>(Dist, {1, 1, 1}, filter_set, BC);
// Smooth the volume data
float h = 2*lamda*sqrt(double(ratio[0]*ratio[0]+ratio[1]*ratio[1]+ratio[2]*ratio[2]));
smooth( VOL, Dist, h, MultiScaleSmooth, fillFloat );
float h = 2 * lamda *
sqrt(double(ratio[0] * ratio[0] + ratio[1] * ratio[1] +
ratio[2] * ratio[2]));
smooth(VOL, Dist, h, MultiScaleSmooth, fillFloat);
// Compute non-local mean
// int depth = 3;
// float sigsq = 0.1;
int nlm_count = NLM3D( MultiScaleSmooth, Mean, Dist, NonLocalMean, depth, sigsq);
NULL_USE( nlm_count );
segment( NonLocalMean, ID, 0.001 );
for (size_t i=0; i<ID.length(); i++) {
char id = Dist(i)>0 ? 1:0;
if ( id!=ID(i) || fabs(Dist(i))<1 )
Dist(i) = 2.0*ID(i)-1.0;
// int depth = 3;
// float sigsq = 0.1;
int nlm_count =
NLM3D(MultiScaleSmooth, Mean, Dist, NonLocalMean, depth, sigsq);
segment(NonLocalMean, ID, 0.001);
for (size_t i = 0; i < ID.length(); i++) {
char id = Dist(i) > 0 ? 1 : 0;
if (id != ID(i) || fabs(Dist(i)) < 1)
Dist(i) = 2.0 * ID(i) - 1.0;
// Remove disconnected domains
//removeDisconnected( ID, Dm );
// Compute the distance using the segmented volume
if ( level > 0 ) {
CalcDist( Dist, ID, Dm );
if (level > 0) {
CalcDist(Dist, ID, Dm);
// Remove regions that are likely noise by shrinking the volumes by dx,
// removing all values that are more than dx+delta from the surface, and then
// growing by dx+delta and intersecting with the original data
void filter_final( Array<char>& ID, Array<float>& Dist,
fillHalo<float>& fillFloat, const Domain& Dm,
Array<float>& Mean, Array<float>& Dist1, Array<float>& Dist2 )
int rank = Dm.Comm.getRank();
int Nx = Dm.Nx-2;
int Ny = Dm.Ny-2;
int Nz = Dm.Nz-2;
void filter_final(Array<char> &ID, Array<float> &Dist,
fillHalo<float> &fillFloat, const Domain &Dm,
Array<float> &Mean, Array<float> &Dist1,
Array<float> &Dist2) {
PROFILE_SCOPED(timer, "filter_final");
int rank = Dm.Comm.getRank();
int Nx = Dm.Nx - 2;
int Ny = Dm.Ny - 2;
int Nz = Dm.Nz - 2;
// Calculate the distance
CalcDist( Dist, ID, Dm );
CalcDist(Dist, ID, Dm);
// Compute the range to shrink the volume based on the L2 norm of the distance
Array<float> Dist0(Nx,Ny,Nz);
Array<float> Dist0(Nx, Ny, Nz);
fillFloat.copy(Dist, Dist0);
float tmp = 0;
for (size_t i=0; i<Dist0.length(); i++)
tmp += Dist0(i)*Dist0(i);
tmp = sqrt( Dm.Comm.sumReduce(tmp) / Dm.Comm.sumReduce<float>(Dist0.length()) );
const float dx1 = 0.3*tmp;
const float dx2 = 1.05*dx1;
if (rank==0)
printf(" %0.1f %0.1f %0.1f\n",tmp,dx1,dx2);
for (size_t i = 0; i < Dist0.length(); i++)
tmp += Dist0(i) * Dist0(i);
tmp =
sqrt(Dm.Comm.sumReduce(tmp) / Dm.Comm.sumReduce<float>(Dist0.length()));
const float dx1 = 0.3 * tmp;
const float dx2 = 1.05 * dx1;
if (rank == 0)
printf(" %0.1f %0.1f %0.1f\n", tmp, dx1, dx2);
// Update the IDs/Distance removing regions that are < dx of the range
Dist1 = Dist;
Dist2 = Dist;
Array<char> ID1 = ID;
Array<char> ID2 = ID;
for (size_t i=0; i<ID.length(); i++) {
ID1(i) = Dist(i)<-dx1 ? 1:0;
ID2(i) = Dist(i)> dx1 ? 1:0;
for (size_t i = 0; i < ID.length(); i++) {
ID1(i) = Dist(i) < -dx1 ? 1 : 0;
ID2(i) = Dist(i) > dx1 ? 1 : 0;
//Array<float> Dist1 = Dist;
//Array<float> Dist2 = Dist;
CalcDist( Dist1, ID1, Dm );
CalcDist( Dist2, ID2, Dm );
CalcDist(Dist1, ID1, Dm);
CalcDist(Dist2, ID2, Dm);
// Keep those regions that are within dx2 of the new volumes
Mean = Dist;
for (size_t i=0; i<ID.length(); i++) {
if ( Dist1(i)+dx2>0 && ID(i)<=0 ) {
for (size_t i = 0; i < ID.length(); i++) {
if (Dist1(i) + dx2 > 0 && ID(i) <= 0) {
Mean(i) = -1;
} else if ( Dist2(i)+dx2>0 && ID(i)>0 ) {
} else if (Dist2(i) + dx2 > 0 && ID(i) > 0) {
Mean(i) = 1;
} else {
Mean(i) = Dist(i)>0 ? 0.5:-0.5;
Mean(i) = Dist(i) > 0 ? 0.5 : -0.5;
// Find regions of uncertainty that are entirely contained within another region
fillHalo<double> fillDouble(Dm.Comm,Dm.rank_info,{Nx,Ny,Nz},{1,1,1},0,1);
fillHalo<BlobIDType> fillInt(Dm.Comm,Dm.rank_info,{Nx,Ny,Nz},{1,1,1},0,1);
fillHalo<double> fillDouble(Dm.Comm, Dm.rank_info, {Nx, Ny, Nz}, {1, 1, 1},
0, 1);
fillHalo<BlobIDType> fillInt(Dm.Comm, Dm.rank_info, {Nx, Ny, Nz}, {1, 1, 1},
0, 1);
BlobIDArray GlobalBlobID;
DoubleArray SignDist(ID.size());
for (size_t i=0; i<ID.length(); i++)
SignDist(i) = fabs(Mean(i))==1 ? -1:1;
for (size_t i = 0; i < ID.length(); i++)
SignDist(i) = fabs(Mean(i)) == 1 ? -1 : 1;
DoubleArray Phase(ID.size());
ComputeGlobalBlobIDs( Nx, Ny, Nz, Dm.rank_info, Phase, SignDist, 0, 0, GlobalBlobID, Dm.Comm );
ComputeGlobalBlobIDs(Nx, Ny, Nz, Dm.rank_info, Phase, SignDist, 0, 0,
GlobalBlobID, Dm.Comm);
int N_blobs = Dm.Comm.maxReduce(GlobalBlobID.max()+1);
std::vector<float> mean(N_blobs,0);
std::vector<int> count(N_blobs,0);
for (int k=1; k<=Nz; k++) {
for (int j=1; j<=Ny; j++) {
for (int i=1; i<=Nx; i++) {
int id = GlobalBlobID(i,j,k);
if ( id >= 0 ) {
if ( GlobalBlobID(i-1,j,k)<0 ) {
mean[id] += Mean(i-1,j,k);
int N_blobs = Dm.Comm.maxReduce(GlobalBlobID.max() + 1);
std::vector<float> mean(N_blobs, 0);
std::vector<int> count(N_blobs, 0);
for (int k = 1; k <= Nz; k++) {
for (int j = 1; j <= Ny; j++) {
for (int i = 1; i <= Nx; i++) {
int id = GlobalBlobID(i, j, k);
if (id >= 0) {
if (GlobalBlobID(i - 1, j, k) < 0) {
mean[id] += Mean(i - 1, j, k);
if ( GlobalBlobID(i+1,j,k)<0 ) {
mean[id] += Mean(i+1,j,k);
if (GlobalBlobID(i + 1, j, k) < 0) {
mean[id] += Mean(i + 1, j, k);
if ( GlobalBlobID(i,j-1,k)<0 ) {
mean[id] += Mean(i,j-1,k);
if (GlobalBlobID(i, j - 1, k) < 0) {
mean[id] += Mean(i, j - 1, k);
if ( GlobalBlobID(i,j+1,k)<0 ) {
mean[id] += Mean(i,j+1,k);
if (GlobalBlobID(i, j + 1, k) < 0) {
mean[id] += Mean(i, j + 1, k);
if ( GlobalBlobID(i,j,k-1)<0 ) {
mean[id] += Mean(i,j,k-1);
if (GlobalBlobID(i, j, k - 1) < 0) {
mean[id] += Mean(i, j, k - 1);
if ( GlobalBlobID(i,j,k+1)<0 ) {
mean[id] += Mean(i,j,k+1);
if (GlobalBlobID(i, j, k + 1) < 0) {
mean[id] += Mean(i, j, k + 1);
@ -338,16 +334,16 @@ void filter_final( Array<char>& ID, Array<float>& Dist,
mean = Dm.Comm.sumReduce(mean);
count = Dm.Comm.sumReduce(count);
for (size_t i=0; i<mean.size(); i++)
for (size_t i = 0; i < mean.size(); i++)
mean[i] /= count[i];
/*if (rank==0) {
for (size_t i=0; i<mean.size(); i++)
printf("%i %0.4f\n",i,mean[i]);
for (size_t i=0; i<Mean.length(); i++) {
for (size_t i = 0; i < Mean.length(); i++) {
int id = GlobalBlobID(i);
if ( id >= 0 ) {
if ( fabs(mean[id]) > 0.95 ) {
if (id >= 0) {
if (fabs(mean[id]) > 0.95) {
// Isolated domain surrounded by one domain
GlobalBlobID(i) = -2;
Mean(i) = sign(mean[id]);
@ -359,53 +355,54 @@ void filter_final( Array<char>& ID, Array<float>& Dist,
// Perform the final segmentation and update the distance
segment( Mean, ID, 0.01 );
CalcDist( Dist, ID, Dm );
segment(Mean, ID, 0.01);
CalcDist(Dist, ID, Dm);
// Filter the original data
void filter_src( const Domain& Dm, Array<float>& src )
PROFILE_START("Filter source data");
int Nx = Dm.Nx-2;
int Ny = Dm.Ny-2;
int Nz = Dm.Nz-2;
fillHalo<float> fillFloat(Dm.Comm,Dm.rank_info,{Nx,Ny,Nz},{1,1,1},0,1);
void filter_src(const Domain &Dm, Array<float> &src) {
PROFILE_START("Filter source data");
int Nx = Dm.Nx - 2;
int Ny = Dm.Ny - 2;
int Nz = Dm.Nz - 2;
fillHalo<float> fillFloat(Dm.Comm, Dm.rank_info, {Nx, Ny, Nz}, {1, 1, 1}, 0,
// Perform a hot-spot filter on the data
std::vector<imfilter::BC> BC = { imfilter::BC::replicate, imfilter::BC::replicate, imfilter::BC::replicate };
std::function<float(const Array<float>&)> filter_3D = []( const Array<float>& data )
float min1 = std::min(data(0,1,1),data(2,1,1));
float min2 = std::min(data(1,0,1),data(1,2,1));
float min3 = std::min(data(1,1,0),data(1,1,2));
float max1 = std::max(data(0,1,1),data(2,1,1));
float max2 = std::max(data(1,0,1),data(1,2,1));
float max3 = std::max(data(1,1,0),data(1,1,2));
float min = std::min(min1,std::min(min2,min3));
float max = std::max(max1,std::max(max2,max3));
return std::max(std::min(data(1,1,1),max),min);
std::function<float(const Array<float>&)> filter_1D = []( const Array<float>& data )
float min = std::min(data(0),data(2));
float max = std::max(data(0),data(2));
return std::max(std::min(data(1),max),min);
std::vector<imfilter::BC> BC = {imfilter::BC::replicate,
std::function<float(const Array<float> &)> filter_3D =
[](const Array<float> &data) {
float min1 = std::min(data(0, 1, 1), data(2, 1, 1));
float min2 = std::min(data(1, 0, 1), data(1, 2, 1));
float min3 = std::min(data(1, 1, 0), data(1, 1, 2));
float max1 = std::max(data(0, 1, 1), data(2, 1, 1));
float max2 = std::max(data(1, 0, 1), data(1, 2, 1));
float max3 = std::max(data(1, 1, 0), data(1, 1, 2));
float min = std::min(min1, std::min(min2, min3));
float max = std::max(max1, std::max(max2, max3));
return std::max(std::min(data(1, 1, 1), max), min);
std::function<float(const Array<float> &)> filter_1D =
[](const Array<float> &data) {
float min = std::min(data(0), data(2));
float max = std::max(data(0), data(2));
return std::max(std::min(data(1), max), min);
//LOCVOL[0] = imfilter::imfilter<float>( LOCVOL[0], {1,1,1}, filter_3D, BC );
std::vector<std::function<float(const Array<float>&)>> filter_set(3,filter_1D);
src = imfilter::imfilter_separable<float>( src, {1,1,1}, filter_set, BC );
fillFloat.fill( src );
std::vector<std::function<float(const Array<float> &)>> filter_set(
3, filter_1D);
src = imfilter::imfilter_separable<float>(src, {1, 1, 1}, filter_set, BC);
// Perform a gaussian filter on the data
int Nh[3] = { 2, 2, 2 };
float sigma[3] = { 1.0, 1.0, 1.0 };
int Nh[3] = {2, 2, 2};
float sigma[3] = {1.0, 1.0, 1.0};
std::vector<Array<float>> H(3);
H[0] = imfilter::create_filter<float>( { Nh[0] }, "gaussian", &sigma[0] );
H[1] = imfilter::create_filter<float>( { Nh[1] }, "gaussian", &sigma[1] );
H[2] = imfilter::create_filter<float>( { Nh[2] }, "gaussian", &sigma[2] );
src = imfilter::imfilter_separable( src, H, BC );
fillFloat.fill( src );
PROFILE_STOP("Filter source data");
H[0] = imfilter::create_filter<float>({Nh[0]}, "gaussian", &sigma[0]);
H[1] = imfilter::create_filter<float>({Nh[1]}, "gaussian", &sigma[1]);
H[2] = imfilter::create_filter<float>({Nh[2]}, "gaussian", &sigma[2]);
src = imfilter::imfilter_separable(src, H, BC);
PROFILE_STOP("Filter source data");

@ -21,54 +21,46 @@
#include "common/Domain.h"
#include "common/Communication.h"
* @brief Interpolate between meshes
* @details This routine interpolates from a coarse to a fine mesh
* @param[in] Coarse Coarse mesh solution
* @param[out] Fine Fine mesh solution
void InterpolateMesh( const Array<float> &Coarse, Array<float> &Fine );
void InterpolateMesh(const Array<float> &Coarse, Array<float> &Fine);
// Smooth the data using the distance
void smooth( const Array<float>& VOL, const Array<float>& Dist, float sigma, Array<float>& MultiScaleSmooth, fillHalo<float>& fillFloat );
void smooth(const Array<float> &VOL, const Array<float> &Dist, float sigma,
Array<float> &MultiScaleSmooth, fillHalo<float> &fillFloat);
// Segment the data
void segment( const Array<float>& data, Array<char>& ID, float tol );
void segment(const Array<float> &data, Array<char> &ID, float tol);
// Remove disconnected phases
void removeDisconnected( Array<char>& ID, const Domain& Dm );
void removeDisconnected(Array<char> &ID, const Domain &Dm);
// Solve a level (without any coarse level information)
void solve( const Array<float>& VOL, Array<float>& Mean, Array<char>& ID,
Array<float>& Dist, Array<float>& MultiScaleSmooth, Array<float>& NonLocalMean,
fillHalo<float>& fillFloat, const Domain& Dm, int nprocx,
float threshold, float lamda, float sigsq, int depth);
void solve(const Array<float> &VOL, Array<float> &Mean, Array<char> &ID,
Array<float> &Dist, Array<float> &MultiScaleSmooth,
Array<float> &NonLocalMean, fillHalo<float> &fillFloat,
const Domain &Dm, int nprocx, float threshold, float lamda,
float sigsq, int depth);
// Refine a solution from a coarse grid to a fine grid
void refine( const Array<float>& Dist_coarse,
const Array<float>& VOL, Array<float>& Mean, Array<char>& ID,
Array<float>& Dist, Array<float>& MultiScaleSmooth, Array<float>& NonLocalMean,
fillHalo<float>& fillFloat, const Domain& Dm, int nprocx, int level,
float threshold, float lamda, float sigsq, int depth);
void refine(const Array<float> &Dist_coarse, const Array<float> &VOL,
Array<float> &Mean, Array<char> &ID, Array<float> &Dist,
Array<float> &MultiScaleSmooth, Array<float> &NonLocalMean,
fillHalo<float> &fillFloat, const Domain &Dm, int nprocx, int level,
float threshold, float lamda, float sigsq, int depth);
// Remove regions that are likely noise by shrinking the volumes by dx,
// removing all values that are more than dx+delta from the surface, and then
// growing by dx+delta and intersecting with the original data
void filter_final( Array<char>& ID, Array<float>& Dist,
fillHalo<float>& fillFloat, const Domain& Dm,
Array<float>& Mean, Array<float>& Dist1, Array<float>& Dist2 );
void filter_final(Array<char> &ID, Array<float> &Dist,
fillHalo<float> &fillFloat, const Domain &Dm,
Array<float> &Mean, Array<float> &Dist1, Array<float> &Dist2);
// Filter the original data
void filter_src( const Domain& Dm, Array<float>& src );
void filter_src(const Domain &Dm, Array<float> &src);

@ -27,13 +27,10 @@
#include <string>
#include <vector>
* Class Array is a multi-dimensional array class written by Mark Berrill
template<class TYPE, class FUN, class Allocator>
class Array final
template <class TYPE, class FUN, class Allocator> class Array final {
public: // Constructors / assignment operators
* Create a new empty Array
@ -44,20 +41,20 @@ public: // Constructors / assignment operators
* Create an Array with the given size
* @param N Size of the array
explicit Array( const ArraySize &N );
explicit Array(const ArraySize &N);
* Create a new 1D Array with the given number of elements
* @param N Number of elements in the array
explicit Array( size_t N );
explicit Array(size_t N);
* Create a new 2D Array with the given number of rows and columns
* @param N_rows Number of rows
* @param N_columns Number of columns
explicit Array( size_t N_rows, size_t N_columns );
explicit Array(size_t N_rows, size_t N_columns);
* Create a new 3D Array with the given number of rows and columns
@ -65,7 +62,7 @@ public: // Constructors / assignment operators
* @param N2 Number of columns
* @param N3 Number of elements in the third dimension
explicit Array( size_t N1, size_t N2, size_t N3 );
explicit Array(size_t N1, size_t N2, size_t N3);
* Create a new 4D Array with the given number of rows and columns
@ -74,7 +71,7 @@ public: // Constructors / assignment operators
* @param N3 Number of elements in the third dimension
* @param N4 Number of elements in the fourth dimension
explicit Array( size_t N1, size_t N2, size_t N3, size_t N4 );
explicit Array(size_t N1, size_t N2, size_t N3, size_t N4);
* Create a new 4D Array with the given number of rows and columns
@ -84,76 +81,74 @@ public: // Constructors / assignment operators
* @param N4 Number of elements in the fourth dimension
* @param N5 Number of elements in the fifth dimension
explicit Array( size_t N1, size_t N2, size_t N3, size_t N4, size_t N5 );
explicit Array(size_t N1, size_t N2, size_t N3, size_t N4, size_t N5);
* Create a multi-dimensional Array with the given number of elements
* @param N Number of elements in each dimension
* @param data Optional raw array to copy the src data
explicit Array( const std::vector<size_t> &N, const TYPE *data = nullptr );
explicit Array(const std::vector<size_t> &N, const TYPE *data = nullptr);
* Create a 1D Array using a string that mimic's MATLAB
* @param range Range of the data
explicit Array( std::string range );
explicit Array(std::string range);
* Create a 1D Array with the given initializer list
* @param data Input data
Array( std::initializer_list<TYPE> data );
Array(std::initializer_list<TYPE> data);
* Create a 2D Array with the given initializer lists
* @param data Input data
Array( std::initializer_list<std::initializer_list<TYPE>> data );
Array(std::initializer_list<std::initializer_list<TYPE>> data);
* Copy constructor
* @param rhs Array to copy
Array( const Array &rhs );
Array(const Array &rhs);
* Move constructor
* @param rhs Array to copy
Array( Array &&rhs );
Array(Array &&rhs);
* Assignment operator
* @param rhs Array to copy
Array &operator=( const Array &rhs );
Array &operator=(const Array &rhs);
* Move assignment operator
* @param rhs Array to copy
Array &operator=( Array &&rhs );
Array &operator=(Array &&rhs);
* Assignment operator
* @param rhs std::vector to copy
Array &operator=( const std::vector<TYPE> &rhs );
Array &operator=(const std::vector<TYPE> &rhs);
//! Is copyable?
inline bool isCopyable() const { return d_isCopyable; }
//! Set is copyable
inline void setCopyable( bool flag ) { d_isCopyable = flag; }
inline void setCopyable(bool flag) { d_isCopyable = flag; }
//! Is fixed size?
inline bool isFixedSize() const { return d_isFixedSize; }
//! Set is copyable
inline void setFixedSize( bool flag ) { d_isFixedSize = flag; }
inline void setFixedSize(bool flag) { d_isFixedSize = flag; }
public: // Views/copies/subset
@ -161,30 +156,29 @@ public: // Views/copies/subset
* @param N Number of elements in each dimension
* @param data Pointer to the data
static std::unique_ptr<Array> view( const ArraySize &N, std::shared_ptr<TYPE> data );
static std::unique_ptr<Array> view(const ArraySize &N,
std::shared_ptr<TYPE> data);
* Create a multi-dimensional Array view to a raw block of data
* @param N Number of elements in each dimension
* @param data Pointer to the data
static std::unique_ptr<const Array> constView( const ArraySize &N,
std::shared_ptr<const TYPE> const &data );
static std::unique_ptr<const Array>
constView(const ArraySize &N, std::shared_ptr<const TYPE> const &data);
* Make this object a view of the src
* @param src Source vector to take the view of
void view2( Array &src );
void view2(Array &src);
* Make this object a view of the data
* @param N Number of elements in each dimension
* @param data Pointer to the data
void view2( const ArraySize &N, std::shared_ptr<TYPE> data );
void view2(const ArraySize &N, std::shared_ptr<TYPE> data);
* Make this object a view of the raw data (expert use only).
@ -199,10 +193,9 @@ public: // Views/copies/subset
* @param isCopyable Once the view is created, can the array be copied
* @param isFixedSize Once the view is created, is the array size fixed
inline void viewRaw(
int ndim, const size_t *dims, TYPE *data, bool isCopyable = true, bool isFixedSize = true )
viewRaw( ArraySize( ndim, dims ), data, isCopyable, isFixedSize );
inline void viewRaw(int ndim, const size_t *dims, TYPE *data,
bool isCopyable = true, bool isFixedSize = true) {
viewRaw(ArraySize(ndim, dims), data, isCopyable, isFixedSize);
@ -217,7 +210,8 @@ public: // Views/copies/subset
* @param isCopyable Once the view is created, can the array be copied
* @param isFixedSize Once the view is created, is the array size fixed
void viewRaw( const ArraySize &N, TYPE *data, bool isCopyable = true, bool isFixedSize = true );
void viewRaw(const ArraySize &N, TYPE *data, bool isCopyable = true,
bool isFixedSize = true);
* Create an array view of the given data (expert use only).
@ -229,10 +223,9 @@ public: // Views/copies/subset
* @param N Number of elements in each dimension
* @param data Pointer to the data
static inline Array staticView( const ArraySize &N, TYPE *data )
static inline Array staticView(const ArraySize &N, TYPE *data) {
Array x;
x.viewRaw( N, data, true, true );
x.viewRaw(N, data, true, true);
return x;
@ -240,39 +233,34 @@ public: // Views/copies/subset
* Convert an array of one type to another. This may or may not allocate new memory.
* @param array Input array
template<class TYPE2>
template <class TYPE2>
static inline std::unique_ptr<Array<TYPE2, FUN, Allocator>>
convert( std::shared_ptr<Array<TYPE, FUN, Allocator>> array )
auto array2 = std::make_unique<Array<TYPE2>>( array->size() );
array2.copy( *array );
convert(std::shared_ptr<Array<TYPE, FUN, Allocator>> array) {
auto array2 = std::make_unique<Array<TYPE2>>(array->size());
return array2;
* Convert an array of one type to another. This may or may not allocate new memory.
* @param array Input array
template<class TYPE2>
template <class TYPE2>
static inline std::unique_ptr<const Array<TYPE2, FUN, Allocator>>
convert( std::shared_ptr<const Array<TYPE, FUN, Allocator>> array )
auto array2 = std::make_unique<Array<TYPE2>>( array->size() );
array2.copy( *array );
convert(std::shared_ptr<const Array<TYPE, FUN, Allocator>> array) {
auto array2 = std::make_unique<Array<TYPE2>>(array->size());
return array2;
* Copy and convert data from another array to this array
* @param array Source array
template<class TYPE2, class FUN2, class Allocator2>
void inline copy( const Array<TYPE2, FUN2, Allocator2> &array )
resize( array.size() );
copy( );
template <class TYPE2, class FUN2, class Allocator2>
void inline copy(const Array<TYPE2, FUN2, Allocator2> &array) {
@ -280,39 +268,33 @@ public: // Views/copies/subset
* Note: The current array must be allocated to the proper size first.
* @param data Source data
template<class TYPE2>
inline void copy( const TYPE2 *data );
template <class TYPE2> inline void copy(const TYPE2 *data);
* Copy and convert data from this array to a raw vector.
* @param data Source data
template<class TYPE2>
inline void copyTo( TYPE2 *data ) const;
template <class TYPE2> inline void copyTo(TYPE2 *data) const;
* Copy and convert data from this array to a new array
template<class TYPE2>
Array<TYPE2, FUN, std::allocator<TYPE2>> inline cloneTo() const
Array<TYPE2, FUN, std::allocator<TYPE2>> dst( this->size() );
copyTo( );
template <class TYPE2>
Array<TYPE2, FUN, std::allocator<TYPE2>> inline cloneTo() const {
Array<TYPE2, FUN, std::allocator<TYPE2>> dst(this->size());
return dst;
/*! swap the raw data pointers for the Arrays after checking for compatibility */
void swap( Array &other );
void swap(Array &other);
* Fill the array with the given value
* @param y Value to fill
inline void fill( const TYPE &y )
for ( auto &x : *this )
inline void fill(const TYPE &y) {
for (auto &x : *this)
x = y;
@ -320,67 +302,56 @@ public: // Views/copies/subset
* Scale the array by the given value
* @param y Value to scale by
template<class TYPE2>
inline void scale( const TYPE2 &y )
for ( auto &x : *this )
template <class TYPE2> inline void scale(const TYPE2 &y) {
for (auto &x : *this)
x *= y;
* Set the values of this array to pow(base, exp)
* @param base Base array
* @param exp Exponent value
void pow( const Array &base, const TYPE &exp );
void pow(const Array &base, const TYPE &exp);
//! Destructor
//! Clear the data in the array
void clear();
//! Return the size of the Array
inline int ndim() const { return d_size.ndim(); }
//! Return the size of the Array
inline const ArraySize &size() const { return d_size; }
//! Return the size of the Array
inline size_t size( int d ) const { return d_size[d]; }
inline size_t size(int d) const { return d_size[d]; }
//! Return the size of the Array
inline size_t length() const { return d_size.length(); }
//! Return true if the Array is empty
inline bool empty() const { return d_size.length() == 0; }
//! Return true if the Array is not empty
inline operator bool() const { return d_size.length() != 0; }
* Resize the Array
* @param N NUmber of elements
inline void resize( size_t N ) { resize( ArraySize( N ) ); }
inline void resize(size_t N) { resize(ArraySize(N)); }
* Resize the Array
* @param N_row Number of rows
* @param N_col Number of columns
inline void resize( size_t N_row, size_t N_col ) { resize( ArraySize( N_row, N_col ) ); }
inline void resize(size_t N_row, size_t N_col) {
resize(ArraySize(N_row, N_col));
* Resize the Array
@ -388,14 +359,15 @@ public: // Views/copies/subset
* @param N2 Number of columns
* @param N3 Number of elements in the third dimension
inline void resize( size_t N1, size_t N2, size_t N3 ) { resize( ArraySize( N1, N2, N3 ) ); }
inline void resize(size_t N1, size_t N2, size_t N3) {
resize(ArraySize(N1, N2, N3));
* Resize the Array
* @param N Number of elements in each dimension
void resize( const ArraySize &N );
void resize(const ArraySize &N);
* Resize the given dimension of the array
@ -403,87 +375,83 @@ public: // Views/copies/subset
* @param N Number of elements for the given dimension
* @param value Value to initialize new elements
void resizeDim( int dim, size_t N, const TYPE &value );
void resizeDim(int dim, size_t N, const TYPE &value);
* Reshape the Array (total size of array will not change)
* @param N Number of elements in each dimension
void reshape( const ArraySize &N );
void reshape(const ArraySize &N);
* Remove singleton dimensions.
void squeeze();
* Reshape the Array so that the number of dimensions is the
* max of ndim and the largest dim>1.
* @param ndim Desired number of dimensions
inline void setNdim( int ndim ) { d_size.setNdim( ndim ); }
inline void setNdim(int ndim) { d_size.setNdim(ndim); }
* Subset the Array
* @param index Index to subset (imin,imax,jmin,jmax,kmin,kmax,...)
Array subset( const std::vector<size_t> &index ) const;
Array subset(const std::vector<size_t> &index) const;
* Subset the Array
* @param index Index to subset (ix:kx:jx,iy:ky:jy,...)
Array subset( const std::vector<Range<size_t>> &index ) const;
Array subset(const std::vector<Range<size_t>> &index) const;
* Copy data from an array into a subset of this array
* @param index Index of the subset (imin,imax,jmin,jmax,kmin,kmax,...)
* @param subset The subset array to copy from
void copySubset( const std::vector<size_t> &index, const Array &subset );
void copySubset(const std::vector<size_t> &index, const Array &subset);
* Copy data from an array into a subset of this array
* @param index Index of the subset
* @param subset The subset array to copy from
void copySubset( const std::vector<Range<size_t>> &index, const Array &subset );
void copySubset(const std::vector<Range<size_t>> &index,
const Array &subset);
* Add data from an array into a subset of this array
* @param index Index of the subset (imin,imax,jmin,jmax,kmin,kmax,...)
* @param subset The subset array to add from
void addSubset( const std::vector<size_t> &index, const Array &subset );
void addSubset(const std::vector<size_t> &index, const Array &subset);
* Add data from an array into a subset of this array
* @param index Index of the subset
* @param subset The subset array to add from
void addSubset( const std::vector<Range<size_t>> &index, const Array &subset );
void addSubset(const std::vector<Range<size_t>> &index,
const Array &subset);
public: // Accessors
* Access the desired element
* @param i The row index
ARRAY_ATTRIBUTE inline TYPE &operator()( size_t i ) { return d_data[d_size.index( i )]; }
ARRAY_ATTRIBUTE inline TYPE &operator()(size_t i) {
return d_data[d_size.index(i)];
* Access the desired element
* @param i The row index
ARRAY_ATTRIBUTE inline const TYPE &operator()( size_t i ) const
return d_data[d_size.index( i )];
ARRAY_ATTRIBUTE inline const TYPE &operator()(size_t i) const {
return d_data[d_size.index(i)];
@ -491,9 +459,8 @@ public: // Accessors
* @param i The row index
* @param j The column index
ARRAY_ATTRIBUTE inline TYPE &operator()( size_t i, size_t j )
return d_data[d_size.index( i, j )];
ARRAY_ATTRIBUTE inline TYPE &operator()(size_t i, size_t j) {
return d_data[d_size.index(i, j)];
@ -501,9 +468,8 @@ public: // Accessors
* @param i The row index
* @param j The column index
ARRAY_ATTRIBUTE inline const TYPE &operator()( size_t i, size_t j ) const
return d_data[d_size.index( i, j )];
ARRAY_ATTRIBUTE inline const TYPE &operator()(size_t i, size_t j) const {
return d_data[d_size.index(i, j)];
@ -512,9 +478,8 @@ public: // Accessors
* @param j The column index
* @param k The third index
ARRAY_ATTRIBUTE inline TYPE &operator()( size_t i, size_t j, size_t k )
return d_data[d_size.index( i, j, k )];
ARRAY_ATTRIBUTE inline TYPE &operator()(size_t i, size_t j, size_t k) {
return d_data[d_size.index(i, j, k)];
@ -523,9 +488,9 @@ public: // Accessors
* @param j The column index
* @param k The third index
ARRAY_ATTRIBUTE inline const TYPE &operator()( size_t i, size_t j, size_t k ) const
return d_data[d_size.index( i, j, k )];
ARRAY_ATTRIBUTE inline const TYPE &operator()(size_t i, size_t j,
size_t k) const {
return d_data[d_size.index(i, j, k)];
@ -535,9 +500,9 @@ public: // Accessors
* @param i3 The third index
* @param i4 The fourth index
ARRAY_ATTRIBUTE inline TYPE &operator()( size_t i1, size_t i2, size_t i3, size_t i4 )
return d_data[d_size.index( i1, i2, i3, i4 )];
ARRAY_ATTRIBUTE inline TYPE &operator()(size_t i1, size_t i2, size_t i3,
size_t i4) {
return d_data[d_size.index(i1, i2, i3, i4)];
@ -547,10 +512,9 @@ public: // Accessors
* @param i3 The third index
* @param i4 The fourth index
operator()( size_t i1, size_t i2, size_t i3, size_t i4 ) const
return d_data[d_size.index( i1, i2, i3, i4 )];
ARRAY_ATTRIBUTE inline const TYPE &operator()(size_t i1, size_t i2,
size_t i3, size_t i4) const {
return d_data[d_size.index(i1, i2, i3, i4)];
@ -561,9 +525,9 @@ public: // Accessors
* @param i4 The fourth index
* @param i5 The fifth index
ARRAY_ATTRIBUTE inline TYPE &operator()( size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 )
return d_data[d_size.index( i1, i2, i3, i4, i5 )];
ARRAY_ATTRIBUTE inline TYPE &operator()(size_t i1, size_t i2, size_t i3,
size_t i4, size_t i5) {
return d_data[d_size.index(i1, i2, i3, i4, i5)];
@ -575,17 +539,15 @@ public: // Accessors
* @param i5 The fifth index
operator()( size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) const
return d_data[d_size.index( i1, i2, i3, i4, i5 )];
operator()(size_t i1, size_t i2, size_t i3, size_t i4, size_t i5) const {
return d_data[d_size.index(i1, i2, i3, i4, i5)];
* Access the desired element as a raw pointer
* @param i The global index
ARRAY_ATTRIBUTE inline TYPE *ptr( size_t i )
ARRAY_ATTRIBUTE inline TYPE *ptr(size_t i) {
return i >= d_size.length() ? nullptr : &d_data[i];
@ -593,8 +555,7 @@ public: // Accessors
* Access the desired element as a raw pointer
* @param i The global index
ARRAY_ATTRIBUTE inline const TYPE *ptr( size_t i ) const
ARRAY_ATTRIBUTE inline const TYPE *ptr(size_t i) const {
return i >= d_size.length() ? nullptr : &d_data[i];
@ -622,40 +583,40 @@ public: // Accessors
//! Return the pointer to the raw data
ARRAY_ATTRIBUTE inline const TYPE *data() const { return d_data; }
public: // Operator overloading
//! Check if two matrices are equal
// Equality means the dimensions and data have to be identical
bool operator==( const Array &rhs ) const;
bool operator==(const Array &rhs) const;
//! Check if two matrices are not equal
inline bool operator!=( const Array &rhs ) const { return !this->operator==( rhs ); }
inline bool operator!=(const Array &rhs) const {
return !this->operator==(rhs);
//! Add another array
Array &operator+=( const Array &rhs );
Array &operator+=(const Array &rhs);
//! Subtract another array
Array &operator-=( const Array &rhs );
Array &operator-=(const Array &rhs);
//! Add a scalar
Array &operator+=( const TYPE &rhs );
Array &operator+=(const TYPE &rhs);
//! Subtract a scalar
Array &operator-=( const TYPE &rhs );
Array &operator-=(const TYPE &rhs);
public: // Math operations
//! Concatenates the arrays along the dimension dim.
static Array cat( const std::vector<Array> &x, int dim = 0 );
static Array cat(const std::vector<Array> &x, int dim = 0);
//! Concatenates the arrays along the dimension dim.
static Array cat( const std::initializer_list<Array> &x, int dim = 0 );
static Array cat(const std::initializer_list<Array> &x, int dim = 0);
//! Concatenates the arrays along the dimension dim.
static Array cat( size_t N_array, const Array *x, int dim );
static Array cat(size_t N_array, const Array *x, int dim);
//! Concatenates a given array with the current array
void cat( const Array &x, int dim = 0 );
void cat(const Array &x, int dim = 0);
//! Initialize the array with random values (defined from the function table)
//void rand();
@ -676,46 +637,46 @@ public: // Math operations
TYPE mean() const;
//! Return the min of all elements in a given direction
Array min( int dir ) const;
Array min(int dir) const;
//! Return the max of all elements in a given direction
Array max( int dir ) const;
Array max(int dir) const;
//! Return the sum of all elements in a given direction
Array sum( int dir ) const;
Array sum(int dir) const;
//! Return the smallest value
TYPE min( const std::vector<size_t> &index ) const;
TYPE min(const std::vector<size_t> &index) const;
//! Return the largest value
TYPE max( const std::vector<size_t> &index ) const;
TYPE max(const std::vector<size_t> &index) const;
//! Return the sum of all elements
TYPE sum( const std::vector<size_t> &index ) const;
TYPE sum(const std::vector<size_t> &index) const;
//! Return the mean of all elements
TYPE mean( const std::vector<size_t> &index ) const;
TYPE mean(const std::vector<size_t> &index) const;
//! Return the smallest value
TYPE min( const std::vector<Range<size_t>> &index ) const;
TYPE min(const std::vector<Range<size_t>> &index) const;
//! Return the largest value
TYPE max( const std::vector<Range<size_t>> &index ) const;
TYPE max(const std::vector<Range<size_t>> &index) const;
//! Return the sum of all elements
TYPE sum( const std::vector<Range<size_t>> &index ) const;
TYPE sum(const std::vector<Range<size_t>> &index) const;
//! Return the mean of all elements
TYPE mean( const std::vector<Range<size_t>> &index ) const;
TYPE mean(const std::vector<Range<size_t>> &index) const;
//! Find all elements that match the operator
std::vector<size_t> find( const TYPE &value,
std::function<bool( const TYPE &, const TYPE & )> compare ) const;
find(const TYPE &value,
std::function<bool(const TYPE &, const TYPE &)> compare) const;
//! Print an array
print( std::ostream &os, const std::string &name = "A", const std::string &prefix = "" ) const;
void print(std::ostream &os, const std::string &name = "A",
const std::string &prefix = "") const;
//! Transpose an array
Array reverseDim() const;
@ -728,7 +689,7 @@ public: // Math operations
* shiftDim shifts the dimensions to the right and pads with singletons.
* @param N Desired shift
Array shiftDim( int N ) const;
Array shiftDim(int N) const;
* @brief Permute array dimensions
@ -738,24 +699,25 @@ public: // Math operations
* needed to access any particular element are rearranged as specified.
* @param index Desired order of the subscripts
Array permute( const std::vector<uint8_t> &index ) const;
Array permute(const std::vector<uint8_t> &index) const;
//! Replicate an array a given number of times in each direction
Array repmat( const std::vector<size_t> &N ) const;
Array repmat(const std::vector<size_t> &N) const;
//! Coarsen an array using the given filter
Array coarsen( const Array &filter ) const;
Array coarsen(const Array &filter) const;
//! Coarsen an array using the given filter
Array coarsen( const std::vector<size_t> &ratio,
std::function<TYPE( const Array & )> filter ) const;
Array coarsen(const std::vector<size_t> &ratio,
std::function<TYPE(const Array &)> filter) const;
* Perform a element-wise operation y = f(x)
* @param[in] fun The function operation
* @param[in] x The input array
static Array transform( std::function<TYPE( const TYPE & )> fun, const Array &x );
static Array transform(std::function<TYPE(const TYPE &)> fun,
const Array &x);
* Perform a element-wise operation z = f(x,y)
@ -763,9 +725,8 @@ public: // Math operations
* @param[in] x The first array
* @param[in] y The second array
static Array transform( std::function<TYPE( const TYPE &, const TYPE & )> fun,
const Array &x,
const Array &y );
static Array transform(std::function<TYPE(const TYPE &, const TYPE &)> fun,
const Array &x, const Array &y);
* axpby operation: this = alpha*x + beta*this
@ -773,19 +734,21 @@ public: // Math operations
* @param[in] x x
* @param[in] beta beta
void axpby( const TYPE &alpha, const Array &x, const TYPE &beta );
void axpby(const TYPE &alpha, const Array &x, const TYPE &beta);
* Linear interpolation
* @param[in] x Position as a decimal index
inline TYPE interp( const std::vector<double> &x ) const { return interp( ); }
inline TYPE interp(const std::vector<double> &x) const {
return interp(;
* Linear interpolation
* @param[in] x Position as a decimal index
TYPE interp( const double *x ) const;
TYPE interp(const double *x) const;
* \fn equals (Array & const rhs, TYPE tol )
@ -794,7 +757,7 @@ public: // Math operations
* \param[in] tol Tolerance of comparison
* \return True iff \f$||\mathit{rhs} - x||_\infty < \mathit{tol}\f$
bool equals( const Array &rhs, TYPE tol = 0.000001 ) const;
bool equals(const Array &rhs, TYPE tol = 0.000001) const;
bool d_isCopyable; // Can the array be copied
@ -802,115 +765,106 @@ private:
ArraySize d_size; // Size of each dimension
TYPE *d_data; // Raw pointer to data in array
std::shared_ptr<TYPE> d_ptr; // Shared pointer to data in array
void allocate( const ArraySize &N );
void allocate(const ArraySize &N);
inline void checkSubsetIndex( const std::vector<Range<size_t>> &range ) const;
inline std::vector<Range<size_t>> convert( const std::vector<size_t> &index ) const;
static inline void getSubsetArrays( const std::vector<Range<size_t>> &range,
std::array<size_t, 5> &first,
std::array<size_t, 5> &last,
std::array<size_t, 5> &inc,
std::array<size_t, 5> &N );
inline void checkSubsetIndex(const std::vector<Range<size_t>> &range) const;
inline std::vector<Range<size_t>>
convert(const std::vector<size_t> &index) const;
static inline void getSubsetArrays(const std::vector<Range<size_t>> &range,
std::array<size_t, 5> &first,
std::array<size_t, 5> &last,
std::array<size_t, 5> &inc,
std::array<size_t, 5> &N);
* ostream operator *
inline std::ostream &operator<<( std::ostream &out, const ArraySize &s )
inline std::ostream &operator<<(std::ostream &out, const ArraySize &s) {
out << "[" << s[0];
for ( size_t i = 1; i < s.ndim(); i++ )
for (size_t i = 1; i < s.ndim(); i++)
out << "," << s[i];
out << "]";
return out;
* Math operations *
template<class TYPE, class FUN, class Allocator>
inline Array<TYPE, FUN, Allocator> operator+(
const Array<TYPE, FUN, Allocator> &a, const Array<TYPE, FUN, Allocator> &b )
template <class TYPE, class FUN, class Allocator>
inline Array<TYPE, FUN, Allocator>
operator+(const Array<TYPE, FUN, Allocator> &a,
const Array<TYPE, FUN, Allocator> &b) {
Array<TYPE, FUN, Allocator> c;
const auto &op = []( const TYPE &a, const TYPE &b ) { return a + b; };
FUN::transform( op, a, b, c );
const auto &op = [](const TYPE &a, const TYPE &b) { return a + b; };
FUN::transform(op, a, b, c);
return c;
template<class TYPE, class FUN, class Allocator>
inline Array<TYPE, FUN, Allocator> operator-(
const Array<TYPE, FUN, Allocator> &a, const Array<TYPE, FUN, Allocator> &b )
template <class TYPE, class FUN, class Allocator>
inline Array<TYPE, FUN, Allocator>
operator-(const Array<TYPE, FUN, Allocator> &a,
const Array<TYPE, FUN, Allocator> &b) {
Array<TYPE, FUN, Allocator> c;
const auto &op = []( const TYPE &a, const TYPE &b ) { return a - b; };
FUN::transform( op, a, b, c );
const auto &op = [](const TYPE &a, const TYPE &b) { return a - b; };
FUN::transform(op, a, b, c);
return c;
template<class TYPE, class FUN, class Allocator>
inline Array<TYPE, FUN, Allocator> operator*(
const Array<TYPE, FUN, Allocator> &a, const Array<TYPE, FUN, Allocator> &b )
template <class TYPE, class FUN, class Allocator>
inline Array<TYPE, FUN, Allocator>
operator*(const Array<TYPE, FUN, Allocator> &a,
const Array<TYPE, FUN, Allocator> &b) {
Array<TYPE, FUN, Allocator> c;
FUN::multiply( a, b, c );
FUN::multiply(a, b, c);
return c;
template<class TYPE, class FUN, class Allocator>
inline Array<TYPE, FUN, Allocator> operator*(
const Array<TYPE, FUN, Allocator> &a, const std::vector<TYPE> &b )
template <class TYPE, class FUN, class Allocator>
inline Array<TYPE, FUN, Allocator>
operator*(const Array<TYPE, FUN, Allocator> &a, const std::vector<TYPE> &b) {
Array<TYPE, FUN, Allocator> b2, c;
b2.viewRaw( { b.size() }, const_cast<TYPE *>( ) );
FUN::multiply( a, b2, c );
b2.viewRaw({b.size()}, const_cast<TYPE *>(;
FUN::multiply(a, b2, c);
return c;
template<class TYPE, class FUN, class Allocator>
inline Array<TYPE, FUN, Allocator> operator*( const TYPE &a,
const Array<TYPE, FUN, Allocator> &b )
template <class TYPE, class FUN, class Allocator>
inline Array<TYPE, FUN, Allocator>
operator*(const TYPE &a, const Array<TYPE, FUN, Allocator> &b) {
auto c = b;
c.scale( a );
return c;
template<class TYPE, class FUN, class Allocator>
inline Array<TYPE, FUN, Allocator> operator*( const Array<TYPE, FUN, Allocator> &a,
const TYPE &b )
template <class TYPE, class FUN, class Allocator>
inline Array<TYPE, FUN, Allocator>
operator*(const Array<TYPE, FUN, Allocator> &a, const TYPE &b) {
auto c = a;
c.scale( b );
return c;
* Copy array *
template<class TYPE, class FUN, class Allocator>
template<class TYPE2>
inline void Array<TYPE, FUN, Allocator>::copy( const TYPE2 *data )
if ( std::is_same<TYPE, TYPE2>::value ) {
std::copy( data, data + d_size.length(), d_data );
template <class TYPE, class FUN, class Allocator>
template <class TYPE2>
inline void Array<TYPE, FUN, Allocator>::copy(const TYPE2 *data) {
if (std::is_same<TYPE, TYPE2>::value) {
std::copy(data, data + d_size.length(), d_data);
} else {
for ( size_t i = 0; i < d_size.length(); i++ )
d_data[i] = static_cast<TYPE>( data[i] );
for (size_t i = 0; i < d_size.length(); i++)
d_data[i] = static_cast<TYPE>(data[i]);
template<class TYPE, class FUN, class Allocator>
template<class TYPE2>
inline void Array<TYPE, FUN, Allocator>::copyTo( TYPE2 *data ) const
if ( std::is_same<TYPE, TYPE2>::value ) {
std::copy( d_data, d_data + d_size.length(), data );
template <class TYPE, class FUN, class Allocator>
template <class TYPE2>
inline void Array<TYPE, FUN, Allocator>::copyTo(TYPE2 *data) const {
if (std::is_same<TYPE, TYPE2>::value) {
std::copy(d_data, d_data + d_size.length(), data);
} else {
for ( size_t i = 0; i < d_size.length(); i++ )
data[i] = static_cast<TYPE2>( d_data[i] );
for (size_t i = 0; i < d_size.length(); i++)
data[i] = static_cast<TYPE2>(d_data[i]);
* Convience typedefs *
* Copy array *
@ -918,5 +872,4 @@ inline void Array<TYPE, FUN, Allocator>::copyTo( TYPE2 *data ) const
typedef Array<double> DoubleArray;
typedef Array<int> IntArray;

File diff suppressed because it is too large Load Diff

View File

@ -11,47 +11,41 @@
#include <initializer_list>
#include <vector>
#if defined( __CUDA_ARCH__ )
#if defined(__CUDA_ARCH__)
#include <cuda.h>
#define HOST_DEVICE __host__ __device__
#if defined( USING_GCC ) || defined( USING_CLANG )
#define ARRAY_ATTRIBUTE HOST_DEVICE __attribute__( ( always_inline ) )
#if defined(USING_GCC) || defined(USING_CLANG)
#define ARRAY_ATTRIBUTE HOST_DEVICE __attribute__((always_inline))
#if ( defined( DEBUG ) || defined( _DEBUG ) ) && !defined( NDEBUG )
#define CHECK_ARRAY_LENGTH( i, length ) \
do { \
if ( i >= length ) \
throw std::out_of_range( "Index exceeds array bounds" ); \
} while ( 0 )
#if (defined(DEBUG) || defined(_DEBUG)) && !defined(NDEBUG)
#define CHECK_ARRAY_LENGTH(i, length) \
do { \
if (i >= length) \
throw std::out_of_range("Index exceeds array bounds"); \
} while (0)
#define CHECK_ARRAY_LENGTH( i, length ) \
do { \
} while ( 0 )
#define CHECK_ARRAY_LENGTH(i, length) \
do { \
} while (0)
// Forward declerations
class FunctionTable;
template<class TYPE, class FUN = FunctionTable, class Allocator = std::allocator<TYPE>>
template <class TYPE, class FUN = FunctionTable,
class Allocator = std::allocator<TYPE>>
class Array;
//! Simple range class
template<class TYPE = size_t>
class Range final
template <class TYPE = size_t> class Range final {
//! Empty constructor
Range() : i( 0 ), j( -1 ), k( 1 ) {}
Range() : i(0), j(-1), k(1) {}
* Create a range i:k:j (or i:j)
@ -59,26 +53,23 @@ public:
* @param j_ Ending value
* @param k_ Increment value
Range( const TYPE &i_, const TYPE &j_, const TYPE &k_ = 1 )
: i( i_ ), j( j_ ), k( k_ )
Range(const TYPE &i_, const TYPE &j_, const TYPE &k_ = 1)
: i(i_), j(j_), k(k_) {}
//! Get the number of values in the range
size_t size() const
if ( std::is_integral<TYPE>::value ) {
return ( static_cast<int64_t>( j ) - static_cast<int64_t>( i ) ) /
static_cast<int64_t>( k );
} else if ( std::is_floating_point<TYPE>::value ) {
double tmp = static_cast<double>( ( j - i ) ) / static_cast<double>( k );
return static_cast<size_t>( floor( tmp + 1e-12 ) + 1 );
} else if ( std::is_same<TYPE, std::complex<float>>::value ||
std::is_same<TYPE, std::complex<double>>::value ) {
double tmp = std::real( ( j - i ) / ( k ) );
return static_cast<size_t>( floor( tmp + 1e-12 ) + 1 );
size_t size() const {
if (std::is_integral<TYPE>::value) {
return (static_cast<int64_t>(j) - static_cast<int64_t>(i)) /
} else if (std::is_floating_point<TYPE>::value) {
double tmp = static_cast<double>((j - i)) / static_cast<double>(k);
return static_cast<size_t>(floor(tmp + 1e-12) + 1);
} else if (std::is_same<TYPE, std::complex<float>>::value ||
std::is_same<TYPE, std::complex<double>>::value) {
double tmp = std::real((j - i) / (k));
return static_cast<size_t>(floor(tmp + 1e-12) + 1);
} else {
ERROR( "Unsupported type for range" );
ERROR("Unsupported type for range");
@ -86,29 +77,25 @@ public:
TYPE i, j, k;
//! Simple class to store the array dimensions
class ArraySize final
class ArraySize final {
//! Empty constructor
ArraySize() : d_ndim( 1 ), d_length( 0 ), d_N{ 0, 1, 1, 1, 1 } {}
ArraySize() : d_ndim(1), d_length(0), d_N{0, 1, 1, 1, 1} {}
* Create the vector size
* @param N1 Number of elements in the first dimension
ArraySize( size_t N1 ) : d_ndim( 1 ), d_length( N1 ), d_N{ N1, 1, 1, 1, 1 } {}
ArraySize(size_t N1) : d_ndim(1), d_length(N1), d_N{N1, 1, 1, 1, 1} {}
* Create the vector size
* @param N1 Number of elements in the first dimension
* @param N2 Number of elements in the second dimension
ArraySize( size_t N1, size_t N2 )
: d_ndim( 2 ), d_length( N1 * N2 ), d_N{ N1, N2, 1, 1, 1 }
ArraySize(size_t N1, size_t N2)
: d_ndim(2), d_length(N1 * N2), d_N{N1, N2, 1, 1, 1} {}
* Create the vector size
@ -116,10 +103,8 @@ public:
* @param N2 Number of elements in the second dimension
* @param N3 Number of elements in the third dimension
ArraySize( size_t N1, size_t N2, size_t N3 )
: d_ndim( 3 ), d_length( N1 * N2 * N3 ), d_N{ N1, N2, N3, 1, 1 }
ArraySize(size_t N1, size_t N2, size_t N3)
: d_ndim(3), d_length(N1 * N2 * N3), d_N{N1, N2, N3, 1, 1} {}
* Create the vector size
@ -128,10 +113,8 @@ public:
* @param N3 Number of elements in the third dimension
* @param N4 Number of elements in the fourth dimension
ArraySize( size_t N1, size_t N2, size_t N3, size_t N4 )
: d_ndim( 4 ), d_length( N1 * N2 * N3 * N4 ), d_N{ N1, N2, N3, N4, 1 }
ArraySize(size_t N1, size_t N2, size_t N3, size_t N4)
: d_ndim(4), d_length(N1 * N2 * N3 * N4), d_N{N1, N2, N3, N4, 1} {}
* Create the vector size
@ -141,9 +124,8 @@ public:
* @param N4 Number of elements in the fourth dimension
* @param N5 Number of elements in the fifth dimension
ArraySize( size_t N1, size_t N2, size_t N3, size_t N4, size_t N5 )
: d_ndim( 5 ), d_length( N1 * N2 * N3 * N4 * N5 ), d_N{ N1, N2, N3, N4, N5 }
ArraySize(size_t N1, size_t N2, size_t N3, size_t N4, size_t N5)
: d_ndim(5), d_length(N1 * N2 * N3 * N4 * N5), d_N{N1, N2, N3, N4, N5} {
@ -151,40 +133,37 @@ public:
* @param N Size of the array
* @param ndim Number of dimensions
ArraySize( std::initializer_list<size_t> N, int ndim = -1 )
: d_ndim( N.size() ), d_length( 0 ), d_N{ 0, 1, 1, 1, 1 }
if ( ndim >= 0 )
ArraySize(std::initializer_list<size_t> N, int ndim = -1)
: d_ndim(N.size()), d_length(0), d_N{0, 1, 1, 1, 1} {
if (ndim >= 0)
d_ndim = ndim;
if ( d_ndim > 5 )
throw std::out_of_range( "Maximum number of dimensions exceeded" );
if (d_ndim > 5)
throw std::out_of_range("Maximum number of dimensions exceeded");
auto it = N.begin();
for ( size_t i = 0; i < d_ndim; i++, ++it )
for (size_t i = 0; i < d_ndim; i++, ++it)
d_N[i] = *it;
d_length = 1;
for ( unsigned long i : d_N )
for (unsigned long i : d_N)
d_length *= i;
if ( d_ndim == 0 )
if (d_ndim == 0)
d_length = 0;
* Create from raw pointer
* @param ndim Number of dimensions
* @param dims Dimensions
ArraySize( size_t ndim, const size_t *dims )
: d_ndim( ndim ), d_length( 0 ), d_N{ 0, 1, 1, 1, 1 }
if ( d_ndim > 5 )
throw std::out_of_range( "Maximum number of dimensions exceeded" );
for ( size_t i = 0; i < ndim; i++ )
ArraySize(size_t ndim, const size_t *dims)
: d_ndim(ndim), d_length(0), d_N{0, 1, 1, 1, 1} {
if (d_ndim > 5)
throw std::out_of_range("Maximum number of dimensions exceeded");
for (size_t i = 0; i < ndim; i++)
d_N[i] = dims[i];
d_length = 1;
for ( unsigned long i : d_N )
for (unsigned long i : d_N)
d_length *= i;
if ( d_ndim == 0 )
if (d_ndim == 0)
d_length = 0;
@ -192,28 +171,27 @@ public:
* Create from std::array
* @param N Size of the array
template<std::size_t NDIM>
ArraySize( const std::array<size_t, NDIM> &N ) : ArraySize( NDIM, )
template <std::size_t NDIM>
ArraySize(const std::array<size_t, NDIM> &N) : ArraySize(NDIM, {}
* Create from std::vector
* @param N Size of the array
inline ArraySize( const std::vector<size_t> &N ) : ArraySize( N.size(), ) {}
inline ArraySize(const std::vector<size_t> &N)
: ArraySize(N.size(), {}
// Copy/assignment constructors
ArraySize( ArraySize &&rhs ) = default;
ArraySize( const ArraySize &rhs ) = default;
ArraySize &operator=( ArraySize &&rhs ) = default;
ArraySize &operator=( const ArraySize &rhs ) = default;
ArraySize(ArraySize &&rhs) = default;
ArraySize(const ArraySize &rhs) = default;
ArraySize &operator=(ArraySize &&rhs) = default;
ArraySize &operator=(const ArraySize &rhs) = default;
* Access the ith dimension
* @param i Index to access
ARRAY_ATTRIBUTE size_t operator[]( size_t i ) const { return d_N[i]; }
ARRAY_ATTRIBUTE size_t operator[](size_t i) const { return d_N[i]; }
//! Return the number of dimensions
ARRAY_ATTRIBUTE uint8_t ndim() const { return d_ndim; }
@ -225,13 +203,12 @@ public:
ARRAY_ATTRIBUTE size_t length() const { return d_length; }
//! Resize the dimension
void resize( uint8_t dim, size_t N )
if ( dim >= d_ndim )
throw std::out_of_range( "Invalid dimension" );
void resize(uint8_t dim, size_t N) {
if (dim >= d_ndim)
throw std::out_of_range("Invalid dimension");
d_N[dim] = N;
d_length = 1;
for ( unsigned long i : d_N )
for (unsigned long i : d_N)
d_length *= i;
@ -240,16 +217,15 @@ public:
* max of ndim and the largest dim>1.
* @param ndim Desired number of dimensions
void setNdim( uint8_t ndim ) { d_ndim = std::max( ndim, d_ndim ); }
void setNdim(uint8_t ndim) { d_ndim = std::max(ndim, d_ndim); }
* Remove singleton dimensions
void squeeze()
void squeeze() {
d_ndim = 0;
for ( uint8_t i = 0; i < maxDim(); i++ ) {
if ( d_N[i] != 1 )
for (uint8_t i = 0; i < maxDim(); i++) {
if (d_N[i] != 1)
d_N[d_ndim++] = d_N[i];
@ -261,71 +237,65 @@ public:
const size_t *end() const { return d_N + d_ndim; }
// Check if two array sizes are equal
ARRAY_ATTRIBUTE bool operator==( const ArraySize &rhs ) const
return d_ndim == rhs.d_ndim && memcmp( d_N, rhs.d_N, sizeof( d_N ) ) == 0;
ARRAY_ATTRIBUTE bool operator==(const ArraySize &rhs) const {
return d_ndim == rhs.d_ndim && memcmp(d_N, rhs.d_N, sizeof(d_N)) == 0;
// Check if two array sizes are equal (ignoring the dimension)
ARRAY_ATTRIBUTE bool approxEqual( const ArraySize &rhs ) const
return ( length() == 0 && rhs.length() == 0 ) || memcmp( d_N, rhs.d_N, sizeof( d_N ) ) == 0;
ARRAY_ATTRIBUTE bool approxEqual(const ArraySize &rhs) const {
return (length() == 0 && rhs.length() == 0) ||
memcmp(d_N, rhs.d_N, sizeof(d_N)) == 0;
//! Check if two matrices are not equal
ARRAY_ATTRIBUTE bool operator!=( const ArraySize &rhs ) const
return d_ndim != rhs.d_ndim || memcmp( d_N, rhs.d_N, sizeof( d_N ) ) != 0;
ARRAY_ATTRIBUTE bool operator!=(const ArraySize &rhs) const {
return d_ndim != rhs.d_ndim || memcmp(d_N, rhs.d_N, sizeof(d_N)) != 0;
//! Maximum supported dimension
ARRAY_ATTRIBUTE static uint8_t maxDim() { return 5; }
//! Get the index
ARRAY_ATTRIBUTE size_t index( size_t i ) const
CHECK_ARRAY_LENGTH( i, d_length );
ARRAY_ATTRIBUTE size_t index(size_t i) const {
CHECK_ARRAY_LENGTH(i, d_length);
return i;
//! Get the index
ARRAY_ATTRIBUTE size_t index( size_t i1, size_t i2 ) const
ARRAY_ATTRIBUTE size_t index(size_t i1, size_t i2) const {
size_t index = i1 + i2 * d_N[0];
CHECK_ARRAY_LENGTH( index, d_length );
CHECK_ARRAY_LENGTH(index, d_length);
return index;
//! Get the index
ARRAY_ATTRIBUTE size_t index( size_t i1, size_t i2, size_t i3 ) const
size_t index = i1 + d_N[0] * ( i2 + d_N[1] * i3 );
CHECK_ARRAY_LENGTH( index, d_length );
ARRAY_ATTRIBUTE size_t index(size_t i1, size_t i2, size_t i3) const {
size_t index = i1 + d_N[0] * (i2 + d_N[1] * i3);
CHECK_ARRAY_LENGTH(index, d_length);
return index;
//! Get the index
ARRAY_ATTRIBUTE size_t index( size_t i1, size_t i2, size_t i3, size_t i4 ) const
size_t index = i1 + d_N[0] * ( i2 + d_N[1] * ( i3 + d_N[2] * i4 ) );
CHECK_ARRAY_LENGTH( index, d_length );
ARRAY_ATTRIBUTE size_t index(size_t i1, size_t i2, size_t i3,
size_t i4) const {
size_t index = i1 + d_N[0] * (i2 + d_N[1] * (i3 + d_N[2] * i4));
CHECK_ARRAY_LENGTH(index, d_length);
return index;
//! Get the index
index( size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) const
size_t index = i1 + d_N[0] * ( i2 + d_N[1] * ( i3 + d_N[2] * ( i4 + d_N[3] * i5 ) ) );
CHECK_ARRAY_LENGTH( index, d_length );
ARRAY_ATTRIBUTE size_t index(size_t i1, size_t i2, size_t i3, size_t i4,
size_t i5) const {
size_t index =
i1 + d_N[0] * (i2 + d_N[1] * (i3 + d_N[2] * (i4 + d_N[3] * i5)));
CHECK_ARRAY_LENGTH(index, d_length);
return index;
//! Get the index
size_t index( const std::array<size_t, 5> &i ) const
size_t index(const std::array<size_t, 5> &i) const {
size_t j = 0;
for ( size_t m = 0, N = 1; m < 5; m++ ) {
for (size_t m = 0, N = 1; m < 5; m++) {
j += i[m] * N;
N *= d_N[m];
@ -333,12 +303,11 @@ public:
//! Get the index
size_t index( std::initializer_list<size_t> i ) const
size_t index(std::initializer_list<size_t> i) const {
size_t N = 1;
size_t j = 0;
size_t m = 0;
for ( size_t k : i ) {
for (size_t k : i) {
j += k * N;
N *= d_N[m++];
@ -346,33 +315,31 @@ public:
//! Convert the index to ijk values
std::array<size_t, 5> ijk( size_t index ) const
CHECK_ARRAY_LENGTH( index, d_length );
std::array<size_t, 5> ijk(size_t index) const {
CHECK_ARRAY_LENGTH(index, d_length);
size_t i0 = index % d_N[0];
index = index / d_N[0];
index = index / d_N[0];
size_t i1 = index % d_N[1];
index = index / d_N[1];
index = index / d_N[1];
size_t i2 = index % d_N[2];
index = index / d_N[2];
index = index / d_N[2];
size_t i3 = index % d_N[3];
index = index / d_N[3];
return { i0, i1, i2, i3, index };
index = index / d_N[3];
return {i0, i1, i2, i3, index};
//! Convert the index to ijk values
void ijk( size_t index, size_t *x ) const
CHECK_ARRAY_LENGTH( index, d_length );
x[0] = index % d_N[0];
void ijk(size_t index, size_t *x) const {
CHECK_ARRAY_LENGTH(index, d_length);
x[0] = index % d_N[0];
index = index / d_N[0];
x[1] = index % d_N[1];
x[1] = index % d_N[1];
index = index / d_N[1];
x[2] = index % d_N[2];
x[2] = index % d_N[2];
index = index / d_N[2];
x[3] = index % d_N[3];
x[3] = index % d_N[3];
index = index / d_N[3];
x[4] = index;
x[4] = index;
@ -381,51 +348,42 @@ private:
size_t d_N[5];
// Function to concatenate dimensions of two array sizes
inline ArraySize cat( const ArraySize &x, const ArraySize &y )
if ( x.ndim() + y.ndim() > 5 )
throw std::out_of_range( "Maximum number of dimensions exceeded" );
size_t N[5] = { 0 };
for ( int i = 0; i < x.ndim(); i++ )
inline ArraySize cat(const ArraySize &x, const ArraySize &y) {
if (x.ndim() + y.ndim() > 5)
throw std::out_of_range("Maximum number of dimensions exceeded");
size_t N[5] = {0};
for (int i = 0; i < x.ndim(); i++)
N[i] = x[i];
for ( int i = 0; i < y.ndim(); i++ )
for (int i = 0; i < y.ndim(); i++)
N[i + x.ndim()] = y[i];
return ArraySize( x.ndim() + y.ndim(), N );
return ArraySize(x.ndim() + y.ndim(), N);
// Operator overloads
inline ArraySize operator*( size_t v, const ArraySize &x )
size_t N[5] = { v * x[0], v * x[1], v * x[2], v * x[3], v * x[4] };
return ArraySize( x.ndim(), N );
inline ArraySize operator*(size_t v, const ArraySize &x) {
size_t N[5] = {v * x[0], v * x[1], v * x[2], v * x[3], v * x[4]};
return ArraySize(x.ndim(), N);
inline ArraySize operator*( const ArraySize &x, size_t v )
size_t N[5] = { v * x[0], v * x[1], v * x[2], v * x[3], v * x[4] };
return ArraySize( x.ndim(), N );
inline ArraySize operator*(const ArraySize &x, size_t v) {
size_t N[5] = {v * x[0], v * x[1], v * x[2], v * x[3], v * x[4]};
return ArraySize(x.ndim(), N);
inline ArraySize operator-( const ArraySize &x, size_t v )
size_t N[5] = { x[0] - v, x[1] - v, x[2] - v, x[3] - v, x[4] - v };
return ArraySize( x.ndim(), N );
inline ArraySize operator-(const ArraySize &x, size_t v) {
size_t N[5] = {x[0] - v, x[1] - v, x[2] - v, x[3] - v, x[4] - v};
return ArraySize(x.ndim(), N);
inline ArraySize operator+( const ArraySize &x, size_t v )
size_t N[5] = { x[0] + v, x[1] + v, x[2] + v, x[3] + v, x[4] + v };
return ArraySize( x.ndim(), N );
inline ArraySize operator+(const ArraySize &x, size_t v) {
size_t N[5] = {x[0] + v, x[1] + v, x[2] + v, x[3] + v, x[4] + v};
return ArraySize(x.ndim(), N);
inline ArraySize operator+( size_t v, const ArraySize &x )
size_t N[5] = { x[0] + v, x[1] + v, x[2] + v, x[3] + v, x[4] + v };
return ArraySize( x.ndim(), N );
inline ArraySize operator+(size_t v, const ArraySize &x) {
size_t N[5] = {x[0] + v, x[1] + v, x[2] + v, x[3] + v, x[4] + v};
return ArraySize(x.ndim(), N);
#if defined( USING_ICC )
#if defined(USING_ICC)

View File

@ -16,100 +16,92 @@
#include "common/Communication.h"
* Structure to store the rank info *
int RankInfoStruct::getRankForBlock( int i, int j, int k ) const
int i2 = (i+nx)%nx;
int j2 = (j+ny)%ny;
int k2 = (k+nz)%nz;
return i2 + j2*nx + k2*nx*ny;
int RankInfoStruct::getRankForBlock(int i, int j, int k) const {
int i2 = (i + nx) % nx;
int j2 = (j + ny) % ny;
int k2 = (k + nz) % nz;
return i2 + j2 * nx + k2 * nx * ny;
RankInfoStruct::RankInfoStruct() {
nx = 0;
ny = 0;
nz = 0;
ix = -1;
jy = -1;
kz = -1;
for (int i=-1; i<=1; i++) {
for (int j=-1; j<=1; j++) {
for (int k=-1; k<=1; k++) {
rank[i+1][j+1][k+1] = -1;
for (int i = -1; i <= 1; i++) {
for (int j = -1; j <= 1; j++) {
for (int k = -1; k <= 1; k++) {
rank[i + 1][j + 1][k + 1] = -1;
RankInfoStruct::RankInfoStruct( int rank0, int nprocx, int nprocy, int nprocz )
RankInfoStruct::RankInfoStruct(int rank0, int nprocx, int nprocy, int nprocz) {
memset(this, 0, sizeof(RankInfoStruct));
nx = nprocx;
ny = nprocy;
nz = nprocz;
if ( rank0 >= nprocx * nprocy * nprocz ) {
ix = -1;
jy = -1;
kz = -1;
for (int i=-1; i<=1; i++) {
for (int j=-1; j<=1; j++) {
for (int k=-1; k<=1; k++) {
rank[i+1][j+1][k+1] = -1;
if (rank0 >= nprocx * nprocy * nprocz) {
ix = -1;
jy = -1;
kz = -1;
for (int i = -1; i <= 1; i++) {
for (int j = -1; j <= 1; j++) {
for (int k = -1; k <= 1; k++) {
rank[i + 1][j + 1][k + 1] = -1;
} else {
ix = rank0%nprocx;
jy = (rank0/nprocx)%nprocy;
kz = rank0/(nprocx*nprocy);
for (int i=-1; i<=1; i++) {
for (int j=-1; j<=1; j++) {
for (int k=-1; k<=1; k++) {
rank[i+1][j+1][k+1] = getRankForBlock(ix+i,jy+j,kz+k);
ix = rank0 % nprocx;
jy = (rank0 / nprocx) % nprocy;
kz = rank0 / (nprocx * nprocy);
for (int i = -1; i <= 1; i++) {
for (int j = -1; j <= 1; j++) {
for (int k = -1; k <= 1; k++) {
rank[i + 1][j + 1][k + 1] =
getRankForBlock(ix + i, jy + j, kz + k);
ASSERT(rank[1][1][1] == rank0);
* Deprecated functions *
void InitializeRanks( const int rank, const int nprocx, const int nprocy, const int nprocz,
int& iproc, int& jproc, int& kproc,
int& rank_x, int& rank_y, int& rank_z,
int& rank_X, int& rank_Y, int& rank_Z,
int& rank_xy, int& rank_XY, int& rank_xY, int& rank_Xy,
int& rank_xz, int& rank_XZ, int& rank_xZ, int& rank_Xz,
int& rank_yz, int& rank_YZ, int& rank_yZ, int& rank_Yz )
const RankInfoStruct data(rank,nprocx,nprocy,nprocz);
iproc = data.ix;
void InitializeRanks(const int rank, const int nprocx, const int nprocy,
const int nprocz, int &iproc, int &jproc, int &kproc,
int &rank_x, int &rank_y, int &rank_z, int &rank_X,
int &rank_Y, int &rank_Z, int &rank_xy, int &rank_XY,
int &rank_xY, int &rank_Xy, int &rank_xz, int &rank_XZ,
int &rank_xZ, int &rank_Xz, int &rank_yz, int &rank_YZ,
int &rank_yZ, int &rank_Yz) {
const RankInfoStruct data(rank, nprocx, nprocy, nprocz);
iproc = data.ix;
jproc = data.jy;
kproc =;
rank_X = data.rank[2][1][1];
rank_x = data.rank[0][1][1];
rank_Y = data.rank[1][2][1];
rank_y = data.rank[1][0][1];
rank_Z = data.rank[1][1][2];
rank_z = data.rank[1][1][0];
rank_XY = data.rank[2][2][1];
rank_xy = data.rank[0][0][1];
rank_Xy = data.rank[2][0][1];
rank_xY = data.rank[0][2][1];
rank_XZ = data.rank[2][1][2];
rank_xz = data.rank[0][1][0];
rank_Xz = data.rank[2][1][0];
rank_xZ = data.rank[0][1][2];
rank_YZ = data.rank[1][2][2];
rank_yz = data.rank[1][0][0];
rank_Yz = data.rank[1][2][0];
rank_yZ = data.rank[1][0][2];
rank_X = data.rank[2][1][1];
rank_x = data.rank[0][1][1];
rank_Y = data.rank[1][2][1];
rank_y = data.rank[1][0][1];
rank_Z = data.rank[1][1][2];
rank_z = data.rank[1][1][0];
rank_XY = data.rank[2][2][1];
rank_xy = data.rank[0][0][1];
rank_Xy = data.rank[2][0][1];
rank_xY = data.rank[0][2][1];
rank_XZ = data.rank[2][1][2];
rank_xz = data.rank[0][1][0];
rank_Xz = data.rank[2][1][0];
rank_xZ = data.rank[0][1][2];
rank_YZ = data.rank[1][2][2];
rank_yz = data.rank[1][0][0];
rank_Yz = data.rank[1][2][0];
rank_yZ = data.rank[1][0][2];

View File

@ -32,38 +32,35 @@
using namespace std;
* @brief Rank info structure
* @details Structure used to hold the ranks for the current process and it's neighbors
struct RankInfoStruct {
int nx; //!< The number of processors in the x direction
int ny; //!< The number of processors in the y direction
int nz; //!< The number of processors in the z direction
int ix; //!< The index of the current process in the x direction
int jy; //!< The index of the current process in the y direction
int kz; //!< The index of the current process in the z direction
int rank[3][3][3]; //!< The rank for the neighbor [i][j][k]
int nx; //!< The number of processors in the x direction
int ny; //!< The number of processors in the y direction
int nz; //!< The number of processors in the z direction
int ix; //!< The index of the current process in the x direction
int jy; //!< The index of the current process in the y direction
int kz; //!< The index of the current process in the z direction
int rank[3][3][3]; //!< The rank for the neighbor [i][j][k]
RankInfoStruct( int rank, int nprocx, int nprocy, int nprocz );
int getRankForBlock( int i, int j, int k ) const;
RankInfoStruct(int rank, int nprocx, int nprocy, int nprocz);
int getRankForBlock(int i, int j, int k) const;
//! Redistribute domain data (dst may be smaller than the src)
template<class TYPE>
Array<TYPE> redistribute( const RankInfoStruct& src_rank, const Array<TYPE>& src_data,
const RankInfoStruct& dst_rank, std::array<int,3> dst_size, const Utilities::MPI& comm );
template <class TYPE>
redistribute(const RankInfoStruct &src_rank, const Array<TYPE> &src_data,
const RankInfoStruct &dst_rank, std::array<int, 3> dst_size,
const Utilities::MPI &comm);
* @brief Communicate halo
* @details Fill the halo cells in an array from the neighboring processes
template<class TYPE>
class fillHalo
template <class TYPE> class fillHalo {
* @brief Default constructor
@ -76,37 +73,36 @@ public:
* @param[in] fill Fill {faces,edges,corners}
* @param[in] periodic Periodic dimensions
fillHalo( const Utilities::MPI& comm, const RankInfoStruct& info,
std::array<int,3> n, std::array<int,3> ng, int tag, int depth,
std::array<bool,3> fill = {true,true,true},
std::array<bool,3> periodic = {true,true,true} );
fillHalo(const Utilities::MPI &comm, const RankInfoStruct &info,
std::array<int, 3> n, std::array<int, 3> ng, int tag, int depth,
std::array<bool, 3> fill = {true, true, true},
std::array<bool, 3> periodic = {true, true, true});
//! Destructor
~fillHalo( );
fillHalo() = delete;
fillHalo(const fillHalo&) = delete;
fillHalo& operator=(const fillHalo&) = delete;
fillHalo(const fillHalo &) = delete;
fillHalo &operator=(const fillHalo &) = delete;
* @brief Communicate the halos
* @param[in] array The array on which we fill the halos
void fill( Array<TYPE>& array );
void fill(Array<TYPE> &array);
* @brief Copy data from the src array to the dst array
* @param[in] src The src array with or without halos
* @param[in] dst The dst array with or without halos
template<class TYPE1, class TYPE2>
void copy( const Array<TYPE1>& src, Array<TYPE2>& dst );
template <class TYPE1, class TYPE2>
void copy(const Array<TYPE1> &src, Array<TYPE2> &dst);
Utilities::MPI comm;
RankInfoStruct info;
std::array<int,3> n, ng;
std::array<int, 3> n, ng;
int depth;
bool fill_pattern[3][3][3];
int tag[3][3][3];
@ -114,266 +110,286 @@ private:
TYPE *mem;
TYPE *send[3][3][3], *recv[3][3][3];
MPI_Request send_req[3][3][3], recv_req[3][3][3];
void pack( const Array<TYPE>& array, int i, int j, int k, TYPE *buffer );
void unpack( Array<TYPE>& array, int i, int j, int k, const TYPE *buffer );
void pack(const Array<TYPE> &array, int i, int j, int k, TYPE *buffer);
void unpack(Array<TYPE> &array, int i, int j, int k, const TYPE *buffer);
inline void PackMeshData(const int *list, int count, double *sendbuf, double *data){
// Fill in the phase ID values from neighboring processors
// This packs up the values that need to be sent from one processor to another
int idx,n;
for (idx=0; idx<count; idx++){
n = list[idx];
sendbuf[idx] = data[n];
inline void PackMeshData(const int *list, int count, double *sendbuf,
double *data) {
// Fill in the phase ID values from neighboring processors
// This packs up the values that need to be sent from one processor to another
int idx, n;
for (idx = 0; idx < count; idx++) {
n = list[idx];
sendbuf[idx] = data[n];
inline void UnpackMeshData(const int *list, int count, double *recvbuf, double *data){
// Fill in the phase ID values from neighboring processors
// This unpacks the values once they have been recieved from neighbors
int idx,n;
inline void UnpackMeshData(const int *list, int count, double *recvbuf,
double *data) {
// Fill in the phase ID values from neighboring processors
// This unpacks the values once they have been recieved from neighbors
int idx, n;
for (idx=0; idx<count; idx++){
n = list[idx];
data[n] = recvbuf[idx];
for (idx = 0; idx < count; idx++) {
n = list[idx];
data[n] = recvbuf[idx];
// Initialize the ranks (this is deprecated, see RankInfoStruct)
void InitializeRanks( const int rank, const int nprocx, const int nprocy, const int nprocz,
int& iproc, int& jproc, int& kproc,
int& rank_x, int& rank_y, int& rank_z,
int& rank_X, int& rank_Y, int& rank_Z,
int& rank_xy, int& rank_XY, int& rank_xY, int& rank_Xy,
int& rank_xz, int& rank_XZ, int& rank_xZ, int& rank_Xz,
int& rank_yz, int& rank_YZ, int& rank_yZ, int& rank_Yz );
void InitializeRanks(const int rank, const int nprocx, const int nprocy,
const int nprocz, int &iproc, int &jproc, int &kproc,
int &rank_x, int &rank_y, int &rank_z, int &rank_X,
int &rank_Y, int &rank_Z, int &rank_xy, int &rank_XY,
int &rank_xY, int &rank_Xy, int &rank_xz, int &rank_XZ,
int &rank_xZ, int &rank_Xz, int &rank_yz, int &rank_YZ,
int &rank_yZ, int &rank_Yz);
inline void CommunicateSendRecvCounts( const Utilities::MPI& comm, int sendtag, int recvtag,
int rank_x, int rank_y, int rank_z,
int rank_X, int rank_Y, int rank_Z,
int rank_xy, int rank_XY, int rank_xY, int rank_Xy,
int rank_xz, int rank_XZ, int rank_xZ, int rank_Xz,
int rank_yz, int rank_YZ, int rank_yZ, int rank_Yz,
int sendCount_x, int sendCount_y, int sendCount_z,
int sendCount_X, int sendCount_Y, int sendCount_Z,
int sendCount_xy, int sendCount_XY, int sendCount_xY, int sendCount_Xy,
int sendCount_xz, int sendCount_XZ, int sendCount_xZ, int sendCount_Xz,
int sendCount_yz, int sendCount_YZ, int sendCount_yZ, int sendCount_Yz,
int& recvCount_x, int& recvCount_y, int& recvCount_z,
int& recvCount_X, int& recvCount_Y, int& recvCount_Z,
int& recvCount_xy, int& recvCount_XY, int& recvCount_xY, int& recvCount_Xy,
int& recvCount_xz, int& recvCount_XZ, int& recvCount_xZ, int& recvCount_Xz,
int& recvCount_yz, int& recvCount_YZ, int& recvCount_yZ, int& recvCount_Yz )
MPI_Request req1[18], req2[18];
req1[0] = comm.Isend(&sendCount_x,1,rank_x,sendtag+0);
req2[0] = comm.Irecv(&recvCount_X,1,rank_X,recvtag+0);
req1[1] = comm.Isend(&sendCount_X,1,rank_X,sendtag+1);
req2[1] = comm.Irecv(&recvCount_x,1,rank_x,recvtag+1);
req1[2] = comm.Isend(&sendCount_y,1,rank_y,sendtag+2);
req2[2] = comm.Irecv(&recvCount_Y,1,rank_Y,recvtag+2);
req1[3] = comm.Isend(&sendCount_Y,1,rank_Y,sendtag+3);
req2[3] = comm.Irecv(&recvCount_y,1,rank_y,recvtag+3);
req1[4] = comm.Isend(&sendCount_z,1,rank_z,sendtag+4);
req2[4] = comm.Irecv(&recvCount_Z,1,rank_Z,recvtag+4);
req1[5] = comm.Isend(&sendCount_Z,1,rank_Z,sendtag+5);
req2[5] = comm.Irecv(&recvCount_z,1,rank_z,recvtag+5);
inline void CommunicateSendRecvCounts(
const Utilities::MPI &comm, int sendtag, int recvtag, int rank_x,
int rank_y, int rank_z, int rank_X, int rank_Y, int rank_Z, int rank_xy,
int rank_XY, int rank_xY, int rank_Xy, int rank_xz, int rank_XZ,
int rank_xZ, int rank_Xz, int rank_yz, int rank_YZ, int rank_yZ,
int rank_Yz, int sendCount_x, int sendCount_y, int sendCount_z,
int sendCount_X, int sendCount_Y, int sendCount_Z, int sendCount_xy,
int sendCount_XY, int sendCount_xY, int sendCount_Xy, int sendCount_xz,
int sendCount_XZ, int sendCount_xZ, int sendCount_Xz, int sendCount_yz,
int sendCount_YZ, int sendCount_yZ, int sendCount_Yz, int &recvCount_x,
int &recvCount_y, int &recvCount_z, int &recvCount_X, int &recvCount_Y,
int &recvCount_Z, int &recvCount_xy, int &recvCount_XY, int &recvCount_xY,
int &recvCount_Xy, int &recvCount_xz, int &recvCount_XZ, int &recvCount_xZ,
int &recvCount_Xz, int &recvCount_yz, int &recvCount_YZ, int &recvCount_yZ,
int &recvCount_Yz) {
MPI_Request req1[18], req2[18];
req1[0] = comm.Isend(&sendCount_x, 1, rank_x, sendtag + 0);
req2[0] = comm.Irecv(&recvCount_X, 1, rank_X, recvtag + 0);
req1[1] = comm.Isend(&sendCount_X, 1, rank_X, sendtag + 1);
req2[1] = comm.Irecv(&recvCount_x, 1, rank_x, recvtag + 1);
req1[2] = comm.Isend(&sendCount_y, 1, rank_y, sendtag + 2);
req2[2] = comm.Irecv(&recvCount_Y, 1, rank_Y, recvtag + 2);
req1[3] = comm.Isend(&sendCount_Y, 1, rank_Y, sendtag + 3);
req2[3] = comm.Irecv(&recvCount_y, 1, rank_y, recvtag + 3);
req1[4] = comm.Isend(&sendCount_z, 1, rank_z, sendtag + 4);
req2[4] = comm.Irecv(&recvCount_Z, 1, rank_Z, recvtag + 4);
req1[5] = comm.Isend(&sendCount_Z, 1, rank_Z, sendtag + 5);
req2[5] = comm.Irecv(&recvCount_z, 1, rank_z, recvtag + 5);
req1[6] = comm.Isend(&sendCount_xy,1,rank_xy,sendtag+6);
req2[6] = comm.Irecv(&recvCount_XY,1,rank_XY,recvtag+6);
req1[7] = comm.Isend(&sendCount_XY,1,rank_XY,sendtag+7);
req2[7] = comm.Irecv(&recvCount_xy,1,rank_xy,recvtag+7);
req1[8] = comm.Isend(&sendCount_Xy,1,rank_Xy,sendtag+8);
req2[8] = comm.Irecv(&recvCount_xY,1,rank_xY,recvtag+8);
req1[9] = comm.Isend(&sendCount_xY,1,rank_xY,sendtag+9);
req2[9] = comm.Irecv(&recvCount_Xy,1,rank_Xy,recvtag+9);
req1[6] = comm.Isend(&sendCount_xy, 1, rank_xy, sendtag + 6);
req2[6] = comm.Irecv(&recvCount_XY, 1, rank_XY, recvtag + 6);
req1[7] = comm.Isend(&sendCount_XY, 1, rank_XY, sendtag + 7);
req2[7] = comm.Irecv(&recvCount_xy, 1, rank_xy, recvtag + 7);
req1[8] = comm.Isend(&sendCount_Xy, 1, rank_Xy, sendtag + 8);
req2[8] = comm.Irecv(&recvCount_xY, 1, rank_xY, recvtag + 8);
req1[9] = comm.Isend(&sendCount_xY, 1, rank_xY, sendtag + 9);
req2[9] = comm.Irecv(&recvCount_Xy, 1, rank_Xy, recvtag + 9);
req1[10] = comm.Isend(&sendCount_xz,1,rank_xz,sendtag+10);
req2[10] = comm.Irecv(&recvCount_XZ,1,rank_XZ,recvtag+10);
req1[11] = comm.Isend(&sendCount_XZ,1,rank_XZ,sendtag+11);
req2[11] = comm.Irecv(&recvCount_xz,1,rank_xz,recvtag+11);
req1[12] = comm.Isend(&sendCount_Xz,1,rank_Xz,sendtag+12);
req2[12] = comm.Irecv(&recvCount_xZ,1,rank_xZ,recvtag+12);
req1[13] = comm.Isend(&sendCount_xZ,1,rank_xZ,sendtag+13);
req2[13] = comm.Irecv(&recvCount_Xz,1,rank_Xz,recvtag+13);
req1[10] = comm.Isend(&sendCount_xz, 1, rank_xz, sendtag + 10);
req2[10] = comm.Irecv(&recvCount_XZ, 1, rank_XZ, recvtag + 10);
req1[11] = comm.Isend(&sendCount_XZ, 1, rank_XZ, sendtag + 11);
req2[11] = comm.Irecv(&recvCount_xz, 1, rank_xz, recvtag + 11);
req1[12] = comm.Isend(&sendCount_Xz, 1, rank_Xz, sendtag + 12);
req2[12] = comm.Irecv(&recvCount_xZ, 1, rank_xZ, recvtag + 12);
req1[13] = comm.Isend(&sendCount_xZ, 1, rank_xZ, sendtag + 13);
req2[13] = comm.Irecv(&recvCount_Xz, 1, rank_Xz, recvtag + 13);
req1[14] = comm.Isend(&sendCount_yz,1,rank_yz,sendtag+14);
req2[14] = comm.Irecv(&recvCount_YZ,1,rank_YZ,recvtag+14);
req1[15] = comm.Isend(&sendCount_YZ,1,rank_YZ,sendtag+15);
req2[15] = comm.Irecv(&recvCount_yz,1,rank_yz,recvtag+15);
req1[16] = comm.Isend(&sendCount_Yz,1,rank_Yz,sendtag+16);
req2[16] = comm.Irecv(&recvCount_yZ,1,rank_yZ,recvtag+16);
req1[17] = comm.Isend(&sendCount_yZ,1,rank_yZ,sendtag+17);
req2[17] = comm.Irecv(&recvCount_Yz,1,rank_Yz,recvtag+17);
comm.waitAll( 18, req1 );
comm.waitAll( 18, req2 );
req1[14] = comm.Isend(&sendCount_yz, 1, rank_yz, sendtag + 14);
req2[14] = comm.Irecv(&recvCount_YZ, 1, rank_YZ, recvtag + 14);
req1[15] = comm.Isend(&sendCount_YZ, 1, rank_YZ, sendtag + 15);
req2[15] = comm.Irecv(&recvCount_yz, 1, rank_yz, recvtag + 15);
req1[16] = comm.Isend(&sendCount_Yz, 1, rank_Yz, sendtag + 16);
req2[16] = comm.Irecv(&recvCount_yZ, 1, rank_yZ, recvtag + 16);
req1[17] = comm.Isend(&sendCount_yZ, 1, rank_yZ, sendtag + 17);
req2[17] = comm.Irecv(&recvCount_Yz, 1, rank_Yz, recvtag + 17);
comm.waitAll(18, req1);
comm.waitAll(18, req2);
inline void CommunicateRecvLists( const Utilities::MPI& comm, int sendtag, int recvtag,
int *sendList_x, int *sendList_y, int *sendList_z, int *sendList_X, int *sendList_Y, int *sendList_Z,
int *sendList_xy, int *sendList_XY, int *sendList_xY, int *sendList_Xy,
int *sendList_xz, int *sendList_XZ, int *sendList_xZ, int *sendList_Xz,
int *sendList_yz, int *sendList_YZ, int *sendList_yZ, int *sendList_Yz,
int sendCount_x, int sendCount_y, int sendCount_z, int sendCount_X, int sendCount_Y, int sendCount_Z,
int sendCount_xy, int sendCount_XY, int sendCount_xY, int sendCount_Xy,
int sendCount_xz, int sendCount_XZ, int sendCount_xZ, int sendCount_Xz,
int sendCount_yz, int sendCount_YZ, int sendCount_yZ, int sendCount_Yz,
int *recvList_x, int *recvList_y, int *recvList_z, int *recvList_X, int *recvList_Y, int *recvList_Z,
int *recvList_xy, int *recvList_XY, int *recvList_xY, int *recvList_Xy,
int *recvList_xz, int *recvList_XZ, int *recvList_xZ, int *recvList_Xz,
int *recvList_yz, int *recvList_YZ, int *recvList_yZ, int *recvList_Yz,
int recvCount_x, int recvCount_y, int recvCount_z, int recvCount_X, int recvCount_Y, int recvCount_Z,
int recvCount_xy, int recvCount_XY, int recvCount_xY, int recvCount_Xy,
int recvCount_xz, int recvCount_XZ, int recvCount_xZ, int recvCount_Xz,
int recvCount_yz, int recvCount_YZ, int recvCount_yZ, int recvCount_Yz,
int rank_x, int rank_y, int rank_z, int rank_X, int rank_Y, int rank_Z, int rank_xy, int rank_XY, int rank_xY,
int rank_Xy, int rank_xz, int rank_XZ, int rank_xZ, int rank_Xz, int rank_yz, int rank_YZ, int rank_yZ, int rank_Yz)
MPI_Request req1[18], req2[18];
req1[0] = comm.Isend(sendList_x,sendCount_x,rank_x,sendtag);
req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_X,recvtag);
req1[1] = comm.Isend(sendList_X,sendCount_X,rank_X,sendtag);
req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_x,recvtag);
req1[2] = comm.Isend(sendList_y,sendCount_y,rank_y,sendtag);
req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_Y,recvtag);
req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_Y,sendtag);
req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_y,recvtag);
req1[4] = comm.Isend(sendList_z,sendCount_z,rank_z,sendtag);
req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_Z,recvtag);
req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_Z,sendtag);
req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_z,recvtag);
inline void CommunicateRecvLists(
const Utilities::MPI &comm, int sendtag, int recvtag, int *sendList_x,
int *sendList_y, int *sendList_z, int *sendList_X, int *sendList_Y,
int *sendList_Z, int *sendList_xy, int *sendList_XY, int *sendList_xY,
int *sendList_Xy, int *sendList_xz, int *sendList_XZ, int *sendList_xZ,
int *sendList_Xz, int *sendList_yz, int *sendList_YZ, int *sendList_yZ,
int *sendList_Yz, int sendCount_x, int sendCount_y, int sendCount_z,
int sendCount_X, int sendCount_Y, int sendCount_Z, int sendCount_xy,
int sendCount_XY, int sendCount_xY, int sendCount_Xy, int sendCount_xz,
int sendCount_XZ, int sendCount_xZ, int sendCount_Xz, int sendCount_yz,
int sendCount_YZ, int sendCount_yZ, int sendCount_Yz, int *recvList_x,
int *recvList_y, int *recvList_z, int *recvList_X, int *recvList_Y,
int *recvList_Z, int *recvList_xy, int *recvList_XY, int *recvList_xY,
int *recvList_Xy, int *recvList_xz, int *recvList_XZ, int *recvList_xZ,
int *recvList_Xz, int *recvList_yz, int *recvList_YZ, int *recvList_yZ,
int *recvList_Yz, int recvCount_x, int recvCount_y, int recvCount_z,
int recvCount_X, int recvCount_Y, int recvCount_Z, int recvCount_xy,
int recvCount_XY, int recvCount_xY, int recvCount_Xy, int recvCount_xz,
int recvCount_XZ, int recvCount_xZ, int recvCount_Xz, int recvCount_yz,
int recvCount_YZ, int recvCount_yZ, int recvCount_Yz, int rank_x,
int rank_y, int rank_z, int rank_X, int rank_Y, int rank_Z, int rank_xy,
int rank_XY, int rank_xY, int rank_Xy, int rank_xz, int rank_XZ,
int rank_xZ, int rank_Xz, int rank_yz, int rank_YZ, int rank_yZ,
int rank_Yz) {
MPI_Request req1[18], req2[18];
req1[0] = comm.Isend(sendList_x, sendCount_x, rank_x, sendtag);
req2[0] = comm.Irecv(recvList_X, recvCount_X, rank_X, recvtag);
req1[1] = comm.Isend(sendList_X, sendCount_X, rank_X, sendtag);
req2[1] = comm.Irecv(recvList_x, recvCount_x, rank_x, recvtag);
req1[2] = comm.Isend(sendList_y, sendCount_y, rank_y, sendtag);
req2[2] = comm.Irecv(recvList_Y, recvCount_Y, rank_Y, recvtag);
req1[3] = comm.Isend(sendList_Y, sendCount_Y, rank_Y, sendtag);
req2[3] = comm.Irecv(recvList_y, recvCount_y, rank_y, recvtag);
req1[4] = comm.Isend(sendList_z, sendCount_z, rank_z, sendtag);
req2[4] = comm.Irecv(recvList_Z, recvCount_Z, rank_Z, recvtag);
req1[5] = comm.Isend(sendList_Z, sendCount_Z, rank_Z, sendtag);
req2[5] = comm.Irecv(recvList_z, recvCount_z, rank_z, recvtag);
req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_xy,sendtag);
req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_XY,recvtag);
req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_XY,sendtag);
req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_xy,recvtag);
req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_Xy,sendtag);
req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_xY,recvtag);
req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_xY,sendtag);
req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_Xy,recvtag);
req1[6] = comm.Isend(sendList_xy, sendCount_xy, rank_xy, sendtag);
req2[6] = comm.Irecv(recvList_XY, recvCount_XY, rank_XY, recvtag);
req1[7] = comm.Isend(sendList_XY, sendCount_XY, rank_XY, sendtag);
req2[7] = comm.Irecv(recvList_xy, recvCount_xy, rank_xy, recvtag);
req1[8] = comm.Isend(sendList_Xy, sendCount_Xy, rank_Xy, sendtag);
req2[8] = comm.Irecv(recvList_xY, recvCount_xY, rank_xY, recvtag);
req1[9] = comm.Isend(sendList_xY, sendCount_xY, rank_xY, sendtag);
req2[9] = comm.Irecv(recvList_Xy, recvCount_Xy, rank_Xy, recvtag);
req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_xz,sendtag);
req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_XZ,recvtag);
req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_XZ,sendtag);
req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_xz,recvtag);
req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_Xz,sendtag);
req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_xZ,recvtag);
req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_xZ,sendtag);
req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_Xz,recvtag);
req1[10] = comm.Isend(sendList_xz, sendCount_xz, rank_xz, sendtag);
req2[10] = comm.Irecv(recvList_XZ, recvCount_XZ, rank_XZ, recvtag);
req1[11] = comm.Isend(sendList_XZ, sendCount_XZ, rank_XZ, sendtag);
req2[11] = comm.Irecv(recvList_xz, recvCount_xz, rank_xz, recvtag);
req1[12] = comm.Isend(sendList_Xz, sendCount_Xz, rank_Xz, sendtag);
req2[12] = comm.Irecv(recvList_xZ, recvCount_xZ, rank_xZ, recvtag);
req1[13] = comm.Isend(sendList_xZ, sendCount_xZ, rank_xZ, sendtag);
req2[13] = comm.Irecv(recvList_Xz, recvCount_Xz, rank_Xz, recvtag);
req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_yz,sendtag);
req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_YZ,recvtag);
req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_YZ,sendtag);
req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_yz,recvtag);
req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_Yz,sendtag);
req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_yZ,recvtag);
req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_yZ,sendtag);
req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_Yz,recvtag);
comm.waitAll( 18, req1 );
comm.waitAll( 18, req2 );
req1[14] = comm.Isend(sendList_yz, sendCount_yz, rank_yz, sendtag);
req2[14] = comm.Irecv(recvList_YZ, recvCount_YZ, rank_YZ, recvtag);
req1[15] = comm.Isend(sendList_YZ, sendCount_YZ, rank_YZ, sendtag);
req2[15] = comm.Irecv(recvList_yz, recvCount_yz, rank_yz, recvtag);
req1[16] = comm.Isend(sendList_Yz, sendCount_Yz, rank_Yz, sendtag);
req2[16] = comm.Irecv(recvList_yZ, recvCount_yZ, rank_yZ, recvtag);
req1[17] = comm.Isend(sendList_yZ, sendCount_yZ, rank_yZ, sendtag);
req2[17] = comm.Irecv(recvList_Yz, recvCount_Yz, rank_Yz, recvtag);
comm.waitAll(18, req1);
comm.waitAll(18, req2);
inline void CommunicateMeshHalo(DoubleArray &Mesh, const Utilities::MPI& comm,
double *sendbuf_x,double *sendbuf_y,double *sendbuf_z,double *sendbuf_X,double *sendbuf_Y,double *sendbuf_Z,
double *sendbuf_xy,double *sendbuf_XY,double *sendbuf_xY,double *sendbuf_Xy,
double *sendbuf_xz,double *sendbuf_XZ,double *sendbuf_xZ,double *sendbuf_Xz,
double *sendbuf_yz,double *sendbuf_YZ,double *sendbuf_yZ,double *sendbuf_Yz,
double *recvbuf_x,double *recvbuf_y,double *recvbuf_z,double *recvbuf_X,double *recvbuf_Y,double *recvbuf_Z,
double *recvbuf_xy,double *recvbuf_XY,double *recvbuf_xY,double *recvbuf_Xy,
double *recvbuf_xz,double *recvbuf_XZ,double *recvbuf_xZ,double *recvbuf_Xz,
double *recvbuf_yz,double *recvbuf_YZ,double *recvbuf_yZ,double *recvbuf_Yz,
int *sendList_x,int *sendList_y,int *sendList_z,int *sendList_X,int *sendList_Y,int *sendList_Z,
int *sendList_xy,int *sendList_XY,int *sendList_xY,int *sendList_Xy,
int *sendList_xz,int *sendList_XZ,int *sendList_xZ,int *sendList_Xz,
int *sendList_yz,int *sendList_YZ,int *sendList_yZ,int *sendList_Yz,
int sendCount_x,int sendCount_y,int sendCount_z,int sendCount_X,int sendCount_Y,int sendCount_Z,
int sendCount_xy,int sendCount_XY,int sendCount_xY,int sendCount_Xy,
int sendCount_xz,int sendCount_XZ,int sendCount_xZ,int sendCount_Xz,
int sendCount_yz,int sendCount_YZ,int sendCount_yZ,int sendCount_Yz,
int *recvList_x,int *recvList_y,int *recvList_z,int *recvList_X,int *recvList_Y,int *recvList_Z,
int *recvList_xy,int *recvList_XY,int *recvList_xY,int *recvList_Xy,
int *recvList_xz,int *recvList_XZ,int *recvList_xZ,int *recvList_Xz,
int *recvList_yz,int *recvList_YZ,int *recvList_yZ,int *recvList_Yz,
int recvCount_x,int recvCount_y,int recvCount_z,int recvCount_X,int recvCount_Y,int recvCount_Z,
int recvCount_xy,int recvCount_XY,int recvCount_xY,int recvCount_Xy,
int recvCount_xz,int recvCount_XZ,int recvCount_xZ,int recvCount_Xz,
int recvCount_yz,int recvCount_YZ,int recvCount_yZ,int recvCount_Yz,
int rank_x,int rank_y,int rank_z,int rank_X,int rank_Y,int rank_Z,int rank_xy,int rank_XY,int rank_xY,
int rank_Xy,int rank_xz,int rank_XZ,int rank_xZ,int rank_Xz,int rank_yz,int rank_YZ,int rank_yZ,int rank_Yz)
int sendtag, recvtag;
sendtag = recvtag = 7;
inline void CommunicateMeshHalo(
DoubleArray &Mesh, const Utilities::MPI &comm, double *sendbuf_x,
double *sendbuf_y, double *sendbuf_z, double *sendbuf_X, double *sendbuf_Y,
double *sendbuf_Z, double *sendbuf_xy, double *sendbuf_XY,
double *sendbuf_xY, double *sendbuf_Xy, double *sendbuf_xz,
double *sendbuf_XZ, double *sendbuf_xZ, double *sendbuf_Xz,
double *sendbuf_yz, double *sendbuf_YZ, double *sendbuf_yZ,
double *sendbuf_Yz, double *recvbuf_x, double *recvbuf_y, double *recvbuf_z,
double *recvbuf_X, double *recvbuf_Y, double *recvbuf_Z, double *recvbuf_xy,
double *recvbuf_XY, double *recvbuf_xY, double *recvbuf_Xy,
double *recvbuf_xz, double *recvbuf_XZ, double *recvbuf_xZ,
double *recvbuf_Xz, double *recvbuf_yz, double *recvbuf_YZ,
double *recvbuf_yZ, double *recvbuf_Yz, int *sendList_x, int *sendList_y,
int *sendList_z, int *sendList_X, int *sendList_Y, int *sendList_Z,
int *sendList_xy, int *sendList_XY, int *sendList_xY, int *sendList_Xy,
int *sendList_xz, int *sendList_XZ, int *sendList_xZ, int *sendList_Xz,
int *sendList_yz, int *sendList_YZ, int *sendList_yZ, int *sendList_Yz,
int sendCount_x, int sendCount_y, int sendCount_z, int sendCount_X,
int sendCount_Y, int sendCount_Z, int sendCount_xy, int sendCount_XY,
int sendCount_xY, int sendCount_Xy, int sendCount_xz, int sendCount_XZ,
int sendCount_xZ, int sendCount_Xz, int sendCount_yz, int sendCount_YZ,
int sendCount_yZ, int sendCount_Yz, int *recvList_x, int *recvList_y,
int *recvList_z, int *recvList_X, int *recvList_Y, int *recvList_Z,
int *recvList_xy, int *recvList_XY, int *recvList_xY, int *recvList_Xy,
int *recvList_xz, int *recvList_XZ, int *recvList_xZ, int *recvList_Xz,
int *recvList_yz, int *recvList_YZ, int *recvList_yZ, int *recvList_Yz,
int recvCount_x, int recvCount_y, int recvCount_z, int recvCount_X,
int recvCount_Y, int recvCount_Z, int recvCount_xy, int recvCount_XY,
int recvCount_xY, int recvCount_Xy, int recvCount_xz, int recvCount_XZ,
int recvCount_xZ, int recvCount_Xz, int recvCount_yz, int recvCount_YZ,
int recvCount_yZ, int recvCount_Yz, int rank_x, int rank_y, int rank_z,
int rank_X, int rank_Y, int rank_Z, int rank_xy, int rank_XY, int rank_xY,
int rank_Xy, int rank_xz, int rank_XZ, int rank_xZ, int rank_Xz,
int rank_yz, int rank_YZ, int rank_yZ, int rank_Yz) {
int sendtag, recvtag;
sendtag = recvtag = 7;
double *MeshData =;
PackMeshData(sendList_x, sendCount_x ,sendbuf_x, MeshData);
PackMeshData(sendList_X, sendCount_X ,sendbuf_X, MeshData);
PackMeshData(sendList_y, sendCount_y ,sendbuf_y, MeshData);
PackMeshData(sendList_Y, sendCount_Y ,sendbuf_Y, MeshData);
PackMeshData(sendList_z, sendCount_z ,sendbuf_z, MeshData);
PackMeshData(sendList_Z, sendCount_Z ,sendbuf_Z, MeshData);
PackMeshData(sendList_xy, sendCount_xy ,sendbuf_xy, MeshData);
PackMeshData(sendList_Xy, sendCount_Xy ,sendbuf_Xy, MeshData);
PackMeshData(sendList_xY, sendCount_xY ,sendbuf_xY, MeshData);
PackMeshData(sendList_XY, sendCount_XY ,sendbuf_XY, MeshData);
PackMeshData(sendList_xz, sendCount_xz ,sendbuf_xz, MeshData);
PackMeshData(sendList_Xz, sendCount_Xz ,sendbuf_Xz, MeshData);
PackMeshData(sendList_xZ, sendCount_xZ ,sendbuf_xZ, MeshData);
PackMeshData(sendList_XZ, sendCount_XZ ,sendbuf_XZ, MeshData);
PackMeshData(sendList_yz, sendCount_yz ,sendbuf_yz, MeshData);
PackMeshData(sendList_Yz, sendCount_Yz ,sendbuf_Yz, MeshData);
PackMeshData(sendList_yZ, sendCount_yZ ,sendbuf_yZ, MeshData);
PackMeshData(sendList_YZ, sendCount_YZ ,sendbuf_YZ, MeshData);
UnpackMeshData(recvList_x, recvCount_x ,recvbuf_x, MeshData);
UnpackMeshData(recvList_X, recvCount_X ,recvbuf_X, MeshData);
UnpackMeshData(recvList_y, recvCount_y ,recvbuf_y, MeshData);
UnpackMeshData(recvList_Y, recvCount_Y ,recvbuf_Y, MeshData);
UnpackMeshData(recvList_z, recvCount_z ,recvbuf_z, MeshData);
UnpackMeshData(recvList_Z, recvCount_Z ,recvbuf_Z, MeshData);
UnpackMeshData(recvList_xy, recvCount_xy ,recvbuf_xy, MeshData);
UnpackMeshData(recvList_Xy, recvCount_Xy ,recvbuf_Xy, MeshData);
UnpackMeshData(recvList_xY, recvCount_xY ,recvbuf_xY, MeshData);
UnpackMeshData(recvList_XY, recvCount_XY ,recvbuf_XY, MeshData);
UnpackMeshData(recvList_xz, recvCount_xz ,recvbuf_xz, MeshData);
UnpackMeshData(recvList_Xz, recvCount_Xz ,recvbuf_Xz, MeshData);
UnpackMeshData(recvList_xZ, recvCount_xZ ,recvbuf_xZ, MeshData);
UnpackMeshData(recvList_XZ, recvCount_XZ ,recvbuf_XZ, MeshData);
UnpackMeshData(recvList_yz, recvCount_yz ,recvbuf_yz, MeshData);
UnpackMeshData(recvList_Yz, recvCount_Yz ,recvbuf_Yz, MeshData);
UnpackMeshData(recvList_yZ, recvCount_yZ ,recvbuf_yZ, MeshData);
UnpackMeshData(recvList_YZ, recvCount_YZ ,recvbuf_YZ, MeshData);
PackMeshData(sendList_x, sendCount_x, sendbuf_x, MeshData);
PackMeshData(sendList_X, sendCount_X, sendbuf_X, MeshData);
PackMeshData(sendList_y, sendCount_y, sendbuf_y, MeshData);
PackMeshData(sendList_Y, sendCount_Y, sendbuf_Y, MeshData);
PackMeshData(sendList_z, sendCount_z, sendbuf_z, MeshData);
PackMeshData(sendList_Z, sendCount_Z, sendbuf_Z, MeshData);
PackMeshData(sendList_xy, sendCount_xy, sendbuf_xy, MeshData);
PackMeshData(sendList_Xy, sendCount_Xy, sendbuf_Xy, MeshData);
PackMeshData(sendList_xY, sendCount_xY, sendbuf_xY, MeshData);
PackMeshData(sendList_XY, sendCount_XY, sendbuf_XY, MeshData);
PackMeshData(sendList_xz, sendCount_xz, sendbuf_xz, MeshData);
PackMeshData(sendList_Xz, sendCount_Xz, sendbuf_Xz, MeshData);
PackMeshData(sendList_xZ, sendCount_xZ, sendbuf_xZ, MeshData);
PackMeshData(sendList_XZ, sendCount_XZ, sendbuf_XZ, MeshData);
PackMeshData(sendList_yz, sendCount_yz, sendbuf_yz, MeshData);
PackMeshData(sendList_Yz, sendCount_Yz, sendbuf_Yz, MeshData);
PackMeshData(sendList_yZ, sendCount_yZ, sendbuf_yZ, MeshData);
PackMeshData(sendList_YZ, sendCount_YZ, sendbuf_YZ, MeshData);
comm.sendrecv(sendbuf_x, sendCount_x, rank_x, sendtag, recvbuf_X,
recvCount_X, rank_X, recvtag);
comm.sendrecv(sendbuf_X, sendCount_X, rank_X, sendtag, recvbuf_x,
recvCount_x, rank_x, recvtag);
comm.sendrecv(sendbuf_y, sendCount_y, rank_y, sendtag, recvbuf_Y,
recvCount_Y, rank_Y, recvtag);
comm.sendrecv(sendbuf_Y, sendCount_Y, rank_Y, sendtag, recvbuf_y,
recvCount_y, rank_y, recvtag);
comm.sendrecv(sendbuf_z, sendCount_z, rank_z, sendtag, recvbuf_Z,
recvCount_Z, rank_Z, recvtag);
comm.sendrecv(sendbuf_Z, sendCount_Z, rank_Z, sendtag, recvbuf_z,
recvCount_z, rank_z, recvtag);
comm.sendrecv(sendbuf_xy, sendCount_xy, rank_xy, sendtag, recvbuf_XY,
recvCount_XY, rank_XY, recvtag);
comm.sendrecv(sendbuf_XY, sendCount_XY, rank_XY, sendtag, recvbuf_xy,
recvCount_xy, rank_xy, recvtag);
comm.sendrecv(sendbuf_Xy, sendCount_Xy, rank_Xy, sendtag, recvbuf_xY,
recvCount_xY, rank_xY, recvtag);
comm.sendrecv(sendbuf_xY, sendCount_xY, rank_xY, sendtag, recvbuf_Xy,
recvCount_Xy, rank_Xy, recvtag);
comm.sendrecv(sendbuf_xz, sendCount_xz, rank_xz, sendtag, recvbuf_XZ,
recvCount_XZ, rank_XZ, recvtag);
comm.sendrecv(sendbuf_XZ, sendCount_XZ, rank_XZ, sendtag, recvbuf_xz,
recvCount_xz, rank_xz, recvtag);
comm.sendrecv(sendbuf_Xz, sendCount_Xz, rank_Xz, sendtag, recvbuf_xZ,
recvCount_xZ, rank_xZ, recvtag);
comm.sendrecv(sendbuf_xZ, sendCount_xZ, rank_xZ, sendtag, recvbuf_Xz,
recvCount_Xz, rank_Xz, recvtag);
comm.sendrecv(sendbuf_yz, sendCount_yz, rank_yz, sendtag, recvbuf_YZ,
recvCount_YZ, rank_YZ, recvtag);
comm.sendrecv(sendbuf_YZ, sendCount_YZ, rank_YZ, sendtag, recvbuf_yz,
recvCount_yz, rank_yz, recvtag);
comm.sendrecv(sendbuf_Yz, sendCount_Yz, rank_Yz, sendtag, recvbuf_yZ,
recvCount_yZ, rank_yZ, recvtag);
comm.sendrecv(sendbuf_yZ, sendCount_yZ, rank_yZ, sendtag, recvbuf_Yz,
recvCount_Yz, rank_Yz, recvtag);
UnpackMeshData(recvList_x, recvCount_x, recvbuf_x, MeshData);
UnpackMeshData(recvList_X, recvCount_X, recvbuf_X, MeshData);
UnpackMeshData(recvList_y, recvCount_y, recvbuf_y, MeshData);
UnpackMeshData(recvList_Y, recvCount_Y, recvbuf_Y, MeshData);
UnpackMeshData(recvList_z, recvCount_z, recvbuf_z, MeshData);
UnpackMeshData(recvList_Z, recvCount_Z, recvbuf_Z, MeshData);
UnpackMeshData(recvList_xy, recvCount_xy, recvbuf_xy, MeshData);
UnpackMeshData(recvList_Xy, recvCount_Xy, recvbuf_Xy, MeshData);
UnpackMeshData(recvList_xY, recvCount_xY, recvbuf_xY, MeshData);
UnpackMeshData(recvList_XY, recvCount_XY, recvbuf_XY, MeshData);
UnpackMeshData(recvList_xz, recvCount_xz, recvbuf_xz, MeshData);
UnpackMeshData(recvList_Xz, recvCount_Xz, recvbuf_Xz, MeshData);
UnpackMeshData(recvList_xZ, recvCount_xZ, recvbuf_xZ, MeshData);
UnpackMeshData(recvList_XZ, recvCount_XZ, recvbuf_XZ, MeshData);
UnpackMeshData(recvList_yz, recvCount_yz, recvbuf_yz, MeshData);
UnpackMeshData(recvList_Yz, recvCount_Yz, recvbuf_Yz, MeshData);
UnpackMeshData(recvList_yZ, recvCount_yZ, recvbuf_yZ, MeshData);
UnpackMeshData(recvList_YZ, recvCount_YZ, recvbuf_YZ, MeshData);
#include "common/Communication.hpp"

View File

@ -37,101 +37,116 @@
#include "common/MPI.h"
#include "common/Utilities.h"
* Redistribute data between two grids *
template<class TYPE>
Array<TYPE> redistribute( const RankInfoStruct& src_rank, const Array<TYPE>& src_data,
const RankInfoStruct& dst_rank, std::array<int,3> dst_size, const Utilities::MPI& comm )
if ( comm.getSize() == 1 ) {
return src_data.subset( { 0, (size_t) dst_size[0]-1, 0, (size_t) dst_size[1]-1, 0, (size_t) dst_size[2]-1 } );
template <class TYPE>
redistribute(const RankInfoStruct &src_rank, const Array<TYPE> &src_data,
const RankInfoStruct &dst_rank, std::array<int, 3> dst_size,
const Utilities::MPI &comm) {
if (comm.getSize() == 1) {
return src_data.subset({0, (size_t)dst_size[0] - 1, 0,
(size_t)dst_size[1] - 1, 0,
(size_t)dst_size[2] - 1});
// Get the src size
std::array<int,3> src_size;
int size0[3] = { (int) src_data.size(0), (int) src_data.size(1), (int) src_data.size(2) };
comm.maxReduce( size0,, 3 );
if ( !src_data.empty() )
ASSERT( src_size[0] == size0[0] && src_size[1] == size0[1] && src_size[2] == size0[2] );
std::array<int, 3> src_size;
int size0[3] = {(int)src_data.size(0), (int)src_data.size(1),
comm.maxReduce(size0,, 3);
if (!src_data.empty())
ASSERT(src_size[0] == size0[0] && src_size[1] == size0[1] &&
src_size[2] == size0[2]);
// Check that dst_size matches on all ranks
comm.maxReduce(, size0, 3 );
ASSERT( dst_size[0] == size0[0] && dst_size[1] == size0[1] && dst_size[2] == size0[2] );
comm.maxReduce(, size0, 3);
ASSERT(dst_size[0] == size0[0] && dst_size[1] == size0[1] &&
dst_size[2] == size0[2]);
// Function to get overlap range
auto calcOverlap = []( int i1[3], int i2[3], int j1[3], int j2[3] ) {
auto calcOverlap = [](int i1[3], int i2[3], int j1[3], int j2[3]) {
std::vector<size_t> index;
if ( i1[0] > j2[0] || i2[0] < j1[0] || i1[1] > j2[1] || i2[1] < j1[1] || i1[2] > j2[2] || i2[2] < j1[2] )
if (i1[0] > j2[0] || i2[0] < j1[0] || i1[1] > j2[1] || i2[1] < j1[1] ||
i1[2] > j2[2] || i2[2] < j1[2])
return index;
index.resize( 6 );
index[0] = std::max( j1[0] - i1[0], 0 );
index[1] = std::min( j2[0] - i1[0], i2[0] - i1[0] );
index[2] = std::max( j1[1] - i1[1], 0 );
index[3] = std::min( j2[1] - i1[1], i2[1] - i1[1] );
index[4] = std::max( j1[2] - i1[2], 0 );
index[5] = std::min( j2[2] - i1[2], i2[2] - i1[2] );
index[0] = std::max(j1[0] - i1[0], 0);
index[1] = std::min(j2[0] - i1[0], i2[0] - i1[0]);
index[2] = std::max(j1[1] - i1[1], 0);
index[3] = std::min(j2[1] - i1[1], i2[1] - i1[1]);
index[4] = std::max(j1[2] - i1[2], 0);
index[5] = std::min(j2[2] - i1[2], i2[2] - i1[2]);
return index;
// Pack and send my data to the appropriate ranks (including myself)
std::vector<int> send_rank;
std::vector<Array<TYPE>> send_data;
if ( !src_data.empty() ) {
int i1[3] = { src_size[0] * src_rank.ix, src_size[1] * src_rank.jy, src_size[2] * };
int i2[3] = { i1[0] + src_size[0] - 1, i1[1] + src_size[1] - 1, i1[2] + src_size[2] - 1 };
for ( int i=0; i<dst_rank.nx; i++ ) {
for ( int j=0; j<dst_rank.ny; j++ ) {
for ( int k=0; k<; k++ ) {
int j1[3] = { i * dst_size[0], j * dst_size[1], k * dst_size[2] };
int j2[3] = { j1[0] + dst_size[0] - 1, j1[1] + dst_size[1] - 1, j1[2] + dst_size[2] - 1 };
auto index = calcOverlap( i1, i2, j1, j2 );
if ( index.empty() )
if (!src_data.empty()) {
int i1[3] = {src_size[0] * src_rank.ix, src_size[1] * src_rank.jy,
src_size[2] *};
int i2[3] = {i1[0] + src_size[0] - 1, i1[1] + src_size[1] - 1,
i1[2] + src_size[2] - 1};
for (int i = 0; i < dst_rank.nx; i++) {
for (int j = 0; j < dst_rank.ny; j++) {
for (int k = 0; k <; k++) {
int j1[3] = {i * dst_size[0], j * dst_size[1],
k * dst_size[2]};
int j2[3] = {j1[0] + dst_size[0] - 1,
j1[1] + dst_size[1] - 1,
j1[2] + dst_size[2] - 1};
auto index = calcOverlap(i1, i2, j1, j2);
if (index.empty())
send_rank.push_back( dst_rank.getRankForBlock(i,j,k) );
send_data.push_back( src_data.subset( index ) );
send_rank.push_back(dst_rank.getRankForBlock(i, j, k));
std::vector<MPI_Request> send_request( send_rank.size() );
for (size_t i=0; i<send_rank.size(); i++)
send_request[i] = comm.Isend( send_data[i].data(), send_data[i].length(), send_rank[i], 5462 );
std::vector<MPI_Request> send_request(send_rank.size());
for (size_t i = 0; i < send_rank.size(); i++)
send_request[i] = comm.Isend(send_data[i].data(), send_data[i].length(),
send_rank[i], 5462);
// Unpack data from the appropriate ranks (including myself)
Array<TYPE> dst_data( dst_size[0], dst_size[1], dst_size[2] );
int i1[3] = { dst_size[0] * dst_rank.ix, dst_size[1] * dst_rank.jy, dst_size[2] * };
int i2[3] = { i1[0] + dst_size[0] - 1, i1[1] + dst_size[1] - 1, i1[2] + dst_size[2] - 1 };
for ( int i=0; i<src_rank.nx; i++ ) {
for ( int j=0; j<src_rank.ny; j++ ) {
for ( int k=0; k<; k++ ) {
int j1[3] = { i * src_size[0], j * src_size[1], k * src_size[2] };
int j2[3] = { j1[0] + src_size[0] - 1, j1[1] + src_size[1] - 1, j1[2] + src_size[2] - 1 };
auto index = calcOverlap( i1, i2, j1, j2 );
if ( index.empty() )
Array<TYPE> dst_data(dst_size[0], dst_size[1], dst_size[2]);
int i1[3] = {dst_size[0] * dst_rank.ix, dst_size[1] * dst_rank.jy,
dst_size[2] *};
int i2[3] = {i1[0] + dst_size[0] - 1, i1[1] + dst_size[1] - 1,
i1[2] + dst_size[2] - 1};
for (int i = 0; i < src_rank.nx; i++) {
for (int j = 0; j < src_rank.ny; j++) {
for (int k = 0; k <; k++) {
int j1[3] = {i * src_size[0], j * src_size[1], k * src_size[2]};
int j2[3] = {j1[0] + src_size[0] - 1, j1[1] + src_size[1] - 1,
j1[2] + src_size[2] - 1};
auto index = calcOverlap(i1, i2, j1, j2);
if (index.empty())
int rank = src_rank.getRankForBlock(i,j,k);
Array<TYPE> data( index[1] - index[0] + 1, index[3] - index[2] + 1, index[5] - index[4] + 1 );
comm.recv(, data.length(), rank, 5462 );
dst_data.copySubset( index, data );
int rank = src_rank.getRankForBlock(i, j, k);
Array<TYPE> data(index[1] - index[0] + 1,
index[3] - index[2] + 1,
index[5] - index[4] + 1);
comm.recv(, data.length(), rank, 5462);
dst_data.copySubset(index, data);
// Free data
comm.waitAll( send_request.size(), );
return dst_data;
* Structure to fill halo cells *
template<class TYPE>
fillHalo<TYPE>::fillHalo( const Utilities::MPI& comm_, const RankInfoStruct& info_,
std::array<int,3> n_, std::array<int,3> ng_, int tag0, int depth_,
std::array<bool,3> fill, std::array<bool,3> periodic ):
comm(comm_), info(info_), n(n_), ng(ng_), depth(depth_)
template <class TYPE>
fillHalo<TYPE>::fillHalo(const Utilities::MPI &comm_,
const RankInfoStruct &info_, std::array<int, 3> n_,
std::array<int, 3> ng_, int tag0, int depth_,
std::array<bool, 3> fill, std::array<bool, 3> periodic)
: comm(comm_), info(info_), n(n_), ng(ng_), depth(depth_) {
// Set the fill pattern
if ( fill[0] ) {
memset(fill_pattern, 0, sizeof(fill_pattern));
if (fill[0]) {
fill_pattern[0][1][1] = true;
fill_pattern[2][1][1] = true;
fill_pattern[1][0][1] = true;
@ -139,7 +154,7 @@ fillHalo<TYPE>::fillHalo( const Utilities::MPI& comm_, const RankInfoStruct& inf
fill_pattern[1][1][0] = true;
fill_pattern[1][1][2] = true;
if ( fill[1] ) {
if (fill[1]) {
fill_pattern[0][0][1] = true;
fill_pattern[0][2][1] = true;
fill_pattern[2][0][1] = true;
@ -153,7 +168,7 @@ fillHalo<TYPE>::fillHalo( const Utilities::MPI& comm_, const RankInfoStruct& inf
fill_pattern[1][2][0] = true;
fill_pattern[1][2][2] = true;
if ( fill[2] ) {
if (fill[2]) {
fill_pattern[0][0][0] = true;
fill_pattern[0][0][2] = true;
fill_pattern[0][2][0] = true;
@ -164,238 +179,233 @@ fillHalo<TYPE>::fillHalo( const Utilities::MPI& comm_, const RankInfoStruct& inf
fill_pattern[2][2][2] = true;
// Remove communication for non-perioidic directions
if ( !periodic[0] && info.ix==0 ) {
for (int j=0; j<3; j++) {
for (int k=0; k<3; k++)
if (!periodic[0] && info.ix == 0) {
for (int j = 0; j < 3; j++) {
for (int k = 0; k < 3; k++)
fill_pattern[0][j][k] = false;
if ( !periodic[0] && info.ix==info.nx-1 ) {
for (int j=0; j<3; j++) {
for (int k=0; k<3; k++)
if (!periodic[0] && info.ix == info.nx - 1) {
for (int j = 0; j < 3; j++) {
for (int k = 0; k < 3; k++)
fill_pattern[2][j][k] = false;
if ( !periodic[1] && info.jy==0 ) {
for (int i=0; i<3; i++) {
for (int k=0; k<3; k++)
if (!periodic[1] && info.jy == 0) {
for (int i = 0; i < 3; i++) {
for (int k = 0; k < 3; k++)
fill_pattern[i][0][k] = false;
if ( !periodic[1] && info.jy==info.ny-1 ) {
for (int i=0; i<3; i++) {
for (int k=0; k<3; k++)
if (!periodic[1] && info.jy == info.ny - 1) {
for (int i = 0; i < 3; i++) {
for (int k = 0; k < 3; k++)
fill_pattern[i][2][k] = false;
if ( !periodic[2] && ) {
for (int i=0; i<3; i++) {
for (int j=0; j<3; j++)
if (!periodic[2] && == 0) {
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++)
fill_pattern[i][j][0] = false;
if ( !periodic[2] && ) {
for (int i=0; i<3; i++) {
for (int j=0; j<3; j++)
if (!periodic[2] && == - 1) {
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++)
fill_pattern[i][j][2] = false;
// Determine the number of elements for each send/recv
for (int i=0; i<3; i++) {
int ni = (i-1)==0 ? n[0]:ng[0];
for (int j=0; j<3; j++) {
int nj = (j-1)==0 ? n[1]:ng[1];
for (int k=0; k<3; k++) {
int nk = (k-1)==0 ? n[2]:ng[2];
if ( fill_pattern[i][j][k] )
N_send_recv[i][j][k] = ni*nj*nk;
for (int i = 0; i < 3; i++) {
int ni = (i - 1) == 0 ? n[0] : ng[0];
for (int j = 0; j < 3; j++) {
int nj = (j - 1) == 0 ? n[1] : ng[1];
for (int k = 0; k < 3; k++) {
int nk = (k - 1) == 0 ? n[2] : ng[2];
if (fill_pattern[i][j][k])
N_send_recv[i][j][k] = ni * nj * nk;
N_send_recv[i][j][k] = 0;
// Create send/recv buffers
size_t N_mem=0;
for (int i=0; i<3; i++) {
for (int j=0; j<3; j++) {
for (int k=0; k<3; k++)
size_t N_mem = 0;
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
for (int k = 0; k < 3; k++)
N_mem += N_send_recv[i][j][k];
mem = new TYPE[2*depth*N_mem];
mem = new TYPE[2 * depth * N_mem];
size_t index = 0;
for (int i=0; i<3; i++) {
for (int j=0; j<3; j++) {
for (int k=0; k<3; k++) {
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
for (int k = 0; k < 3; k++) {
send[i][j][k] = &mem[index];
index += depth*N_send_recv[i][j][k];
index += depth * N_send_recv[i][j][k];
recv[i][j][k] = &mem[index];
index += depth*N_send_recv[i][j][k];
index += depth * N_send_recv[i][j][k];
// Create the tags
for (int i=0; i<3; i++) {
for (int j=0; j<3; j++) {
for (int k=0; k<3; k++) {
tag[i][j][k] = tag0 + i + j*3 + k*9;
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
for (int k = 0; k < 3; k++) {
tag[i][j][k] = tag0 + i + j * 3 + k * 9;
template<class TYPE>
fillHalo<TYPE>::~fillHalo( )
delete [] mem;
template<class TYPE>
void fillHalo<TYPE>::fill( Array<TYPE>& data )
template <class TYPE> fillHalo<TYPE>::~fillHalo() { delete[] mem; }
template <class TYPE> void fillHalo<TYPE>::fill(Array<TYPE> &data) {
int depth2 = data.size(3);
ASSERT((int)data.size(0) == n[0] + 2 * ng[0]);
ASSERT((int)data.size(1) == n[1] + 2 * ng[1]);
ASSERT((int)data.size(2) == n[2] + 2 * ng[2]);
ASSERT(depth2 <= depth);
ASSERT(data.ndim() == 3 || data.ndim() == 4);
// Start the recieves
for (int i=0; i<3; i++) {
for (int j=0; j<3; j++) {
for (int k=0; k<3; k++) {
if ( !fill_pattern[i][j][k] )
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
for (int k = 0; k < 3; k++) {
if (!fill_pattern[i][j][k])
recv_req[i][j][k] = comm.Irecv( recv[i][j][k], depth2*N_send_recv[i][j][k],
info.rank[i][j][k], tag[2-i][2-j][2-k] );
recv_req[i][j][k] =
comm.Irecv(recv[i][j][k], depth2 * N_send_recv[i][j][k],
info.rank[i][j][k], tag[2 - i][2 - j][2 - k]);
// Pack the src data and start the sends
for (int i=0; i<3; i++) {
for (int j=0; j<3; j++) {
for (int k=0; k<3; k++) {
if ( !fill_pattern[i][j][k] )
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
for (int k = 0; k < 3; k++) {
if (!fill_pattern[i][j][k])
pack( data, i-1, j-1, k-1, send[i][j][k] );
send_req[i][j][k] = comm.Isend( send[i][j][k], depth2*N_send_recv[i][j][k],
info.rank[i][j][k], tag[i][j][k] );
pack(data, i - 1, j - 1, k - 1, send[i][j][k]);
send_req[i][j][k] =
comm.Isend(send[i][j][k], depth2 * N_send_recv[i][j][k],
info.rank[i][j][k], tag[i][j][k]);
// Recv the dst data and unpack (we recive in reverse order to match the sends)
for (int i=2; i>=0; i--) {
for (int j=2; j>=0; j--) {
for (int k=2; k>=0; k--) {
if ( !fill_pattern[i][j][k] )
for (int i = 2; i >= 0; i--) {
for (int j = 2; j >= 0; j--) {
for (int k = 2; k >= 0; k--) {
if (!fill_pattern[i][j][k])
comm.wait( recv_req[i][j][k] );
unpack( data, i-1, j-1, k-1, recv[i][j][k] );
unpack(data, i - 1, j - 1, k - 1, recv[i][j][k]);
// Wait until all sends have completed
for (int i=0; i<3; i++) {
for (int j=0; j<3; j++) {
for (int k=0; k<3; k++) {
if ( !fill_pattern[i][j][k] )
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
for (int k = 0; k < 3; k++) {
if (!fill_pattern[i][j][k])
comm.wait( send_req[i][j][k] );
template<class TYPE>
void fillHalo<TYPE>::pack( const Array<TYPE>& data, int i0, int j0, int k0, TYPE *buffer )
template <class TYPE>
void fillHalo<TYPE>::pack(const Array<TYPE> &data, int i0, int j0, int k0,
TYPE *buffer) {
int depth2 = data.size(3);
int ni = i0==0 ? n[0]:ng[0];
int nj = j0==0 ? n[1]:ng[1];
int nk = k0==0 ? n[2]:ng[2];
int is = i0==0 ? ng[0]:((i0==-1)?ng[0]:n[0]);
int js = j0==0 ? ng[1]:((j0==-1)?ng[1]:n[1]);
int ks = k0==0 ? ng[2]:((k0==-1)?ng[2]:n[2]);
for (int d=0; d<depth2; d++) {
for (int k=0; k<nk; k++) {
for (int j=0; j<nj; j++) {
for (int i=0; i<ni; i++) {
buffer[i+j*ni+k*ni*nj+d*ni*nj*nk] = data(i+is,j+js,k+ks,d);
int ni = i0 == 0 ? n[0] : ng[0];
int nj = j0 == 0 ? n[1] : ng[1];
int nk = k0 == 0 ? n[2] : ng[2];
int is = i0 == 0 ? ng[0] : ((i0 == -1) ? ng[0] : n[0]);
int js = j0 == 0 ? ng[1] : ((j0 == -1) ? ng[1] : n[1]);
int ks = k0 == 0 ? ng[2] : ((k0 == -1) ? ng[2] : n[2]);
for (int d = 0; d < depth2; d++) {
for (int k = 0; k < nk; k++) {
for (int j = 0; j < nj; j++) {
for (int i = 0; i < ni; i++) {
buffer[i + j * ni + k * ni * nj + d * ni * nj * nk] =
data(i + is, j + js, k + ks, d);
template<class TYPE>
void fillHalo<TYPE>::unpack( Array<TYPE>& data, int i0, int j0, int k0, const TYPE *buffer )
template <class TYPE>
void fillHalo<TYPE>::unpack(Array<TYPE> &data, int i0, int j0, int k0,
const TYPE *buffer) {
int depth2 = data.size(3);
int ni = i0==0 ? n[0]:ng[0];
int nj = j0==0 ? n[1]:ng[1];
int nk = k0==0 ? n[2]:ng[2];
int is = i0==0 ? ng[0]:((i0==-1)?0:n[0]+ng[0]);
int js = j0==0 ? ng[1]:((j0==-1)?0:n[1]+ng[1]);
int ks = k0==0 ? ng[2]:((k0==-1)?0:n[2]+ng[2]);
for (int d=0; d<depth2; d++) {
for (int k=0; k<nk; k++) {
for (int j=0; j<nj; j++) {
for (int i=0; i<ni; i++) {
data(i+is,j+js,k+ks,d) = buffer[i+j*ni+k*ni*nj+d*ni*nj*nk];
int ni = i0 == 0 ? n[0] : ng[0];
int nj = j0 == 0 ? n[1] : ng[1];
int nk = k0 == 0 ? n[2] : ng[2];
int is = i0 == 0 ? ng[0] : ((i0 == -1) ? 0 : n[0] + ng[0]);
int js = j0 == 0 ? ng[1] : ((j0 == -1) ? 0 : n[1] + ng[1]);
int ks = k0 == 0 ? ng[2] : ((k0 == -1) ? 0 : n[2] + ng[2]);
for (int d = 0; d < depth2; d++) {
for (int k = 0; k < nk; k++) {
for (int j = 0; j < nj; j++) {
for (int i = 0; i < ni; i++) {
data(i + is, j + js, k + ks, d) =
buffer[i + j * ni + k * ni * nj + d * ni * nj * nk];
* Function to remove the ghost halo *
template<class TYPE>
template<class TYPE1, class TYPE2>
void fillHalo<TYPE>::copy( const Array<TYPE1>& src, Array<TYPE2>& dst )
template <class TYPE>
template <class TYPE1, class TYPE2>
void fillHalo<TYPE>::copy(const Array<TYPE1> &src, Array<TYPE2> &dst) {
ASSERT( (int)src.size(0)==n[0] || (int)src.size(0)==n[0]+2*ng[0] );
ASSERT( (int)dst.size(0)==n[0] || (int)dst.size(0)==n[0]+2*ng[0] );
bool src_halo = (int)src.size(0)==n[0]+2*ng[0];
bool dst_halo = (int)dst.size(0)==n[0]+2*ng[0];
if ( src_halo ) {
ASSERT((int)src.size(0) == n[0] || (int)src.size(0) == n[0] + 2 * ng[0]);
ASSERT((int)dst.size(0) == n[0] || (int)dst.size(0) == n[0] + 2 * ng[0]);
bool src_halo = (int)src.size(0) == n[0] + 2 * ng[0];
bool dst_halo = (int)dst.size(0) == n[0] + 2 * ng[0];
if (src_halo) {
ASSERT((int)src.size(0) == n[0] + 2 * ng[0]);
ASSERT((int)src.size(1) == n[1] + 2 * ng[1]);
ASSERT((int)src.size(2) == n[2] + 2 * ng[2]);
} else {
ASSERT((int)src.size(0) == n[0]);
ASSERT((int)src.size(1) == n[1]);
ASSERT((int)src.size(2) == n[2]);
if ( dst_halo ) {
if (dst_halo) {
ASSERT((int)dst.size(0) == n[0] + 2 * ng[0]);
ASSERT((int)dst.size(1) == n[1] + 2 * ng[1]);
ASSERT((int)dst.size(2) == n[2] + 2 * ng[2]);
} else {
ASSERT((int)dst.size(0) == n[0]);
ASSERT((int)dst.size(1) == n[1]);
ASSERT((int)dst.size(2) == n[2]);
if ( src_halo == dst_halo ) {
if (src_halo == dst_halo) {
// Src and dst halos match
for (size_t i=0; i<src.length(); i++)
for (size_t i = 0; i < src.length(); i++)
dst(i) = src(i);
} else if ( src_halo && !dst_halo ) {
} else if (src_halo && !dst_halo) {
// Src has halos
for (int k=0; k<n[2]; k++) {
for (int j=0; j<n[1]; j++) {
for (int i=0; i<n[0]; i++) {
dst(i,j,k) = src(i+ng[0],j+ng[1],k+ng[2]);
for (int k = 0; k < n[2]; k++) {
for (int j = 0; j < n[1]; j++) {
for (int i = 0; i < n[0]; i++) {
dst(i, j, k) = src(i + ng[0], j + ng[1], k + ng[2]);
} else if ( !src_halo && dst_halo ) {
} else if (!src_halo && dst_halo) {
// Dst has halos
for (int k=0; k<n[2]; k++) {
for (int j=0; j<n[1]; j++) {
for (int i=0; i<n[0]; i++) {
dst(i+ng[0],j+ng[1],k+ng[2]) = src(i,j,k);
for (int k = 0; k < n[2]; k++) {
for (int j = 0; j < n[1]; j++) {
for (int i = 0; i < n[0]; i++) {
dst(i + ng[0], j + ng[1], k + ng[2]) = src(i, j, k);
@ -404,5 +414,4 @@ void fillHalo<TYPE>::copy( const Array<TYPE1>& src, Array<TYPE2>& dst )

View File

@ -24,303 +24,268 @@
#include <string>
#include <tuple>
* Constructors/destructor *
Database::Database() = default;
Database::Database() = default;
Database::~Database() = default;
Database::Database( const Database& rhs ) : KeyData( rhs )
Database::Database(const Database &rhs) : KeyData(rhs) {
for ( const auto& tmp : rhs.d_data )
putData( tmp.first, tmp.second->clone() );
for (const auto &tmp : rhs.d_data)
putData(tmp.first, tmp.second->clone());
Database& Database::operator=( const Database& rhs )
if ( this == &rhs )
Database &Database::operator=(const Database &rhs) {
if (this == &rhs)
return *this;
for ( const auto& tmp : rhs.d_data )
putData( tmp.first, tmp.second->clone() );
for (const auto &tmp : rhs.d_data)
putData(tmp.first, tmp.second->clone());
return *this;
Database::Database( Database&& rhs ) { std::swap( d_data, rhs.d_data ); }
Database& Database::operator=( Database&& rhs )
if ( this != &rhs )
std::swap( d_data, rhs.d_data );
Database::Database(Database &&rhs) { std::swap(d_data, rhs.d_data); }
Database &Database::operator=(Database &&rhs) {
if (this != &rhs)
std::swap(d_data, rhs.d_data);
return *this;
* Clone the database *
std::shared_ptr<KeyData> Database::clone() const { return cloneDatabase(); }
std::shared_ptr<Database> Database::cloneDatabase() const
std::shared_ptr<Database> Database::cloneDatabase() const {
auto db = std::make_shared<Database>();
for ( const auto& tmp : d_data )
db->putData( tmp.first, tmp.second->clone() );
for (const auto &tmp : d_data)
db->putData(tmp.first, tmp.second->clone());
return db;
* Get the data object *
bool Database::keyExists( const std::string& key ) const
return d_data.find( key ) != d_data.end();
bool Database::keyExists(const std::string &key) const {
return d_data.find(key) != d_data.end();
std::shared_ptr<KeyData> Database::getData( const std::string& key )
auto it = d_data.find( key );
if ( it == d_data.end() ) {
std::shared_ptr<KeyData> Database::getData(const std::string &key) {
auto it = d_data.find(key);
if (it == d_data.end()) {
char msg[1000];
sprintf( msg, "Variable %s was not found in database", key.c_str() );
ERROR( msg );
sprintf(msg, "Variable %s was not found in database", key.c_str());
return it->second;
std::shared_ptr<const KeyData> Database::getData( const std::string& key ) const
return const_cast<Database*>( this )->getData( key );
std::shared_ptr<const KeyData> Database::getData(const std::string &key) const {
return const_cast<Database *>(this)->getData(key);
bool Database::isDatabase( const std::string& key ) const
auto ptr = getData( key );
auto ptr2 = std::dynamic_pointer_cast<const Database>( ptr );
bool Database::isDatabase(const std::string &key) const {
auto ptr = getData(key);
auto ptr2 = std::dynamic_pointer_cast<const Database>(ptr);
return ptr2 != nullptr;
std::shared_ptr<Database> Database::getDatabase( const std::string& key )
std::shared_ptr<KeyData> ptr = getData( key );
std::shared_ptr<Database> ptr2 = std::dynamic_pointer_cast<Database>( ptr );
if ( ptr2 == nullptr ) {
std::shared_ptr<Database> Database::getDatabase(const std::string &key) {
std::shared_ptr<KeyData> ptr = getData(key);
std::shared_ptr<Database> ptr2 = std::dynamic_pointer_cast<Database>(ptr);
if (ptr2 == nullptr) {
char msg[1000];
sprintf( msg, "Variable %s is not a database", key.c_str() );
ERROR( msg );
sprintf(msg, "Variable %s is not a database", key.c_str());
return ptr2;
std::shared_ptr<const Database> Database::getDatabase( const std::string& key ) const
return const_cast<Database*>( this )->getDatabase( key );
std::shared_ptr<const Database>
Database::getDatabase(const std::string &key) const {
return const_cast<Database *>(this)->getDatabase(key);
std::vector<std::string> Database::getAllKeys() const
std::vector<std::string> Database::getAllKeys() const {
std::vector<std::string> keys;
keys.reserve( d_data.size() );
for ( const auto& it : d_data )
keys.push_back( it.first );
for (const auto &it : d_data)
return keys;
void Database::putDatabase( const std::string& key, std::shared_ptr<Database> db )
d_data[key] = std::move( db );
void Database::putDatabase(const std::string &key,
std::shared_ptr<Database> db) {
d_data[key] = std::move(db);
void Database::putData( const std::string& key, std::shared_ptr<KeyData> data )
d_data[key] = std::move( data );
void Database::putData(const std::string &key, std::shared_ptr<KeyData> data) {
d_data[key] = std::move(data);
* Is the data of the given type *
bool Database::isType<double>( const std::string& key ) const
auto type = getData( key )->type();
template <> bool Database::isType<double>(const std::string &key) const {
auto type = getData(key)->type();
return type == "double";
bool Database::isType<float>( const std::string& key ) const
auto type = getData( key )->type();
template <> bool Database::isType<float>(const std::string &key) const {
auto type = getData(key)->type();
return type == "double";
bool Database::isType<int>( const std::string& key ) const
template <> bool Database::isType<int>(const std::string &key) const {
bool pass = true;
auto type = getData( key )->type();
if ( type == "double" ) {
auto data = getVector<double>( key );
for ( auto tmp : data )
pass = pass && static_cast<double>( static_cast<int>( tmp ) ) == tmp;
auto type = getData(key)->type();
if (type == "double") {
auto data = getVector<double>(key);
for (auto tmp : data)
pass = pass && static_cast<double>(static_cast<int>(tmp)) == tmp;
} else {
pass = false;
return pass;
bool Database::isType<std::string>( const std::string& key ) const
auto type = getData( key )->type();
template <> bool Database::isType<std::string>(const std::string &key) const {
auto type = getData(key)->type();
return type == "string";
bool Database::isType<bool>( const std::string& key ) const
auto type = getData( key )->type();
template <> bool Database::isType<bool>(const std::string &key) const {
auto type = getData(key)->type();
return type == "bool";
* Get a vector *
std::vector<std::string> Database::getVector<std::string>(
const std::string& key, const Units& ) const
std::shared_ptr<const KeyData> ptr = getData( key );
if ( std::dynamic_pointer_cast<const EmptyKeyData>( ptr ) )
template <>
Database::getVector<std::string>(const std::string &key, const Units &) const {
std::shared_ptr<const KeyData> ptr = getData(key);
if (std::dynamic_pointer_cast<const EmptyKeyData>(ptr))
return std::vector<std::string>();
const auto* ptr2 = dynamic_cast<const KeyDataString*>( ptr.get() );
if ( ptr2 == nullptr ) {
ERROR( "Key '" + key + "' is not a string" );
const auto *ptr2 = dynamic_cast<const KeyDataString *>(ptr.get());
if (ptr2 == nullptr) {
ERROR("Key '" + key + "' is not a string");
return ptr2->d_data;
std::vector<bool> Database::getVector<bool>( const std::string& key, const Units& ) const
std::shared_ptr<const KeyData> ptr = getData( key );
if ( std::dynamic_pointer_cast<const EmptyKeyData>( ptr ) )
template <>
std::vector<bool> Database::getVector<bool>(const std::string &key,
const Units &) const {
std::shared_ptr<const KeyData> ptr = getData(key);
if (std::dynamic_pointer_cast<const EmptyKeyData>(ptr))
return std::vector<bool>();
const auto* ptr2 = dynamic_cast<const KeyDataBool*>( ptr.get() );
if ( ptr2 == nullptr ) {
ERROR( "Key '" + key + "' is not a bool" );
const auto *ptr2 = dynamic_cast<const KeyDataBool *>(ptr.get());
if (ptr2 == nullptr) {
ERROR("Key '" + key + "' is not a bool");
return ptr2->d_data;
template<class TYPE>
std::vector<TYPE> Database::getVector( const std::string& key, const Units& unit ) const
std::shared_ptr<const KeyData> ptr = getData( key );
if ( std::dynamic_pointer_cast<const EmptyKeyData>( ptr ) )
template <class TYPE>
std::vector<TYPE> Database::getVector(const std::string &key,
const Units &unit) const {
std::shared_ptr<const KeyData> ptr = getData(key);
if (std::dynamic_pointer_cast<const EmptyKeyData>(ptr))
return std::vector<TYPE>();
std::vector<TYPE> data;
if ( std::dynamic_pointer_cast<const KeyDataDouble>( ptr ) ) {
const auto* ptr2 = dynamic_cast<const KeyDataDouble*>( ptr.get() );
const std::vector<double>& data2 = ptr2->d_data;
double factor = 1;
if ( !unit.isNull() ) {
INSIST( !ptr2->d_unit.isNull(), "Field " + key + " must have units" );
factor = ptr2->d_unit.convert( unit );
INSIST( factor != 0, "Unit conversion failed" );
if (std::dynamic_pointer_cast<const KeyDataDouble>(ptr)) {
const auto *ptr2 = dynamic_cast<const KeyDataDouble *>(ptr.get());
const std::vector<double> &data2 = ptr2->d_data;
double factor = 1;
if (!unit.isNull()) {
INSIST(!ptr2->d_unit.isNull(), "Field " + key + " must have units");
factor = ptr2->d_unit.convert(unit);
INSIST(factor != 0, "Unit conversion failed");
data.resize( data2.size() );
for ( size_t i = 0; i < data2.size(); i++ )
data[i] = static_cast<TYPE>( factor * data2[i] );
} else if ( std::dynamic_pointer_cast<const KeyDataString>( ptr ) ) {
ERROR( "Converting std::string to another type" );
} else if ( std::dynamic_pointer_cast<const KeyDataBool>( ptr ) ) {
ERROR( "Converting std::bool to another type" );
for (size_t i = 0; i < data2.size(); i++)
data[i] = static_cast<TYPE>(factor * data2[i]);
} else if (std::dynamic_pointer_cast<const KeyDataString>(ptr)) {
ERROR("Converting std::string to another type");
} else if (std::dynamic_pointer_cast<const KeyDataBool>(ptr)) {
ERROR("Converting std::bool to another type");
} else {
ERROR( "Unable to convert data format" );
ERROR("Unable to convert data format");
return data;
* Put a vector *
void Database::putVector<std::string>(
const std::string& key, const std::vector<std::string>& data, const Units& )
std::shared_ptr<KeyDataString> ptr( new KeyDataString() );
template <>
void Database::putVector<std::string>(const std::string &key,
const std::vector<std::string> &data,
const Units &) {
std::shared_ptr<KeyDataString> ptr(new KeyDataString());
ptr->d_data = data;
d_data[key] = ptr;
void Database::putVector<bool>(
const std::string& key, const std::vector<bool>& data, const Units& )
std::shared_ptr<KeyDataBool> ptr( new KeyDataBool() );
template <>
void Database::putVector<bool>(const std::string &key,
const std::vector<bool> &data, const Units &) {
std::shared_ptr<KeyDataBool> ptr(new KeyDataBool());
ptr->d_data = data;
d_data[key] = ptr;
template<class TYPE>
void Database::putVector( const std::string& key, const std::vector<TYPE>& data, const Units& unit )
std::shared_ptr<KeyDataDouble> ptr( new KeyDataDouble() );
template <class TYPE>
void Database::putVector(const std::string &key, const std::vector<TYPE> &data,
const Units &unit) {
std::shared_ptr<KeyDataDouble> ptr(new KeyDataDouble());
ptr->d_unit = unit;
ptr->d_data.resize( data.size() );
for ( size_t i = 0; i < data.size(); i++ )
ptr->d_data[i] = static_cast<double>( data[i] );
for (size_t i = 0; i < data.size(); i++)
ptr->d_data[i] = static_cast<double>(data[i]);
d_data[key] = ptr;
* Print the database *
void Database::print( std::ostream& os, const std::string& indent ) const
for ( const auto& it : d_data ) {
void Database::print(std::ostream &os, const std::string &indent) const {
for (const auto &it : d_data) {
os << indent << it.first;
if ( dynamic_cast<const Database*>( it.second.get() ) ) {
const auto* db = dynamic_cast<const Database*>( it.second.get() );
if (dynamic_cast<const Database *>(it.second.get())) {
const auto *db = dynamic_cast<const Database *>(it.second.get());
os << " {\n";
db->print( os, indent + " " );
db->print(os, indent + " ");
os << indent << "}\n";
} else {
os << " = ";
it.second->print( os, "" );
it.second->print(os, "");
std::string Database::print( const std::string& indent ) const
std::string Database::print(const std::string &indent) const {
std::stringstream ss;
print( ss, indent );
print(ss, indent);
return ss.str();
* Read input database file *
Database::Database( const std::string& filename )
Database::Database(const std::string &filename) {
// Read the input file into memory
FILE* fid = fopen( filename.c_str(), "rb" );
if ( fid == nullptr )
ERROR( "Error opening file " + filename );
fseek( fid, 0, SEEK_END );
size_t bytes = ftell( fid );
rewind( fid );
auto* buffer = new char[bytes + 4];
size_t result = fread( buffer, 1, bytes, fid );
fclose( fid );
if ( result != bytes )
ERROR( "Error reading file " + filename );
FILE *fid = fopen(filename.c_str(), "rb");
if (fid == nullptr)
ERROR("Error opening file " + filename);
fseek(fid, 0, SEEK_END);
size_t bytes = ftell(fid);
auto *buffer = new char[bytes + 4];
size_t result = fread(buffer, 1, bytes, fid);
if (result != bytes)
ERROR("Error reading file " + filename);
buffer[bytes + 0] = '\n';
buffer[bytes + 1] = '}';
buffer[bytes + 2] = '\n';
buffer[bytes + 3] = 0;
// Create the database entries
loadDatabase( buffer, *this );
loadDatabase(buffer, *this);
// Free temporary memory
delete[] buffer;
std::shared_ptr<Database> Database::createFromString( const std::string& data )
std::shared_ptr<Database> db( new Database() );
auto* buffer = new char[data.size() + 4];
memcpy( buffer,, data.size() );
std::shared_ptr<Database> Database::createFromString(const std::string &data) {
std::shared_ptr<Database> db(new Database());
auto *buffer = new char[data.size() + 4];
memcpy(buffer,, data.size());
buffer[data.size() + 0] = '\n';
buffer[data.size() + 1] = '}';
buffer[data.size() + 2] = '\n';
buffer[data.size() + 3] = 0;
loadDatabase( buffer, *db );
loadDatabase(buffer, *db);
delete[] buffer;
return db;
@ -335,279 +300,285 @@ enum class token_type {
inline size_t length( token_type type )
inline size_t length(token_type type) {
size_t len = 0;
if ( type == token_type::newline || type == token_type::quote || type == token_type::equal ||
type == token_type::bracket || type == token_type::end_bracket ||
type == token_type::end ) {
if (type == token_type::newline || type == token_type::quote ||
type == token_type::equal || type == token_type::bracket ||
type == token_type::end_bracket || type == token_type::end) {
len = 1;
} else if ( type == token_type::line_comment || type == token_type::block_start ||
type == token_type::block_stop ) {
} else if (type == token_type::line_comment ||
type == token_type::block_start ||
type == token_type::block_stop) {
len = 2;
return len;
inline std::tuple<size_t, token_type> find_next_token( const char* buffer )
inline std::tuple<size_t, token_type> find_next_token(const char *buffer) {
size_t i = 0;
while ( true ) {
if ( buffer[i] == '\n' || buffer[i] == '\r' ) {
return std::pair<size_t, token_type>( i + 1, token_type::newline );
} else if ( buffer[i] == 0 ) {
return std::pair<size_t, token_type>( i + 1, token_type::end );
} else if ( buffer[i] == '"' ) {
return std::pair<size_t, token_type>( i + 1, token_type::quote );
} else if ( buffer[i] == '=' ) {
return std::pair<size_t, token_type>( i + 1, token_type::equal );
} else if ( buffer[i] == '{' ) {
return std::pair<size_t, token_type>( i + 1, token_type::bracket );
} else if ( buffer[i] == '}' ) {
return std::pair<size_t, token_type>( i + 1, token_type::end_bracket );
} else if ( buffer[i] == '/' ) {
if ( buffer[i + 1] == '/' ) {
return std::pair<size_t, token_type>( i + 2, token_type::line_comment );
} else if ( buffer[i + 1] == '*' ) {
return std::pair<size_t, token_type>( i + 2, token_type::block_start );
while (true) {
if (buffer[i] == '\n' || buffer[i] == '\r') {
return std::pair<size_t, token_type>(i + 1, token_type::newline);
} else if (buffer[i] == 0) {
return std::pair<size_t, token_type>(i + 1, token_type::end);
} else if (buffer[i] == '"') {
return std::pair<size_t, token_type>(i + 1, token_type::quote);
} else if (buffer[i] == '=') {
return std::pair<size_t, token_type>(i + 1, token_type::equal);
} else if (buffer[i] == '{') {
return std::pair<size_t, token_type>(i + 1, token_type::bracket);
} else if (buffer[i] == '}') {
return std::pair<size_t, token_type>(i + 1,
} else if (buffer[i] == '/') {
if (buffer[i + 1] == '/') {
return std::pair<size_t, token_type>(i + 2,
} else if (buffer[i + 1] == '*') {
return std::pair<size_t, token_type>(i + 2,
} else if ( buffer[i] == '*' ) {
if ( buffer[i + 1] == '/' )
return std::pair<size_t, token_type>( i + 2, token_type::block_stop );
} else if (buffer[i] == '*') {
if (buffer[i + 1] == '/')
return std::pair<size_t, token_type>(i + 2,
return std::pair<size_t, token_type>( 0, token_type::end );
return std::pair<size_t, token_type>(0, token_type::end);
inline std::string deblank( const std::string& str )
inline std::string deblank(const std::string &str) {
size_t i1 = 0xFFFFFFF, i2 = 0;
for ( size_t i = 0; i < str.size(); i++ ) {
if ( str[i] != ' ' ) {
i1 = std::min( i1, i );
i2 = std::max( i2, i );
for (size_t i = 0; i < str.size(); i++) {
if (str[i] != ' ') {
i1 = std::min(i1, i);
i2 = std::max(i2, i);
return i1 <= i2 ? str.substr( i1, i2 - i1 + 1 ) : std::string();
return i1 <= i2 ? str.substr(i1, i2 - i1 + 1) : std::string();
size_t skip_comment( const char* buffer )
auto tmp = find_next_token( buffer );
const token_type end_comment = ( std::get<1>( tmp ) == token_type::line_comment ) ?
token_type::newline :
size_t skip_comment(const char *buffer) {
auto tmp = find_next_token(buffer);
const token_type end_comment =
(std::get<1>(tmp) == token_type::line_comment) ? token_type::newline
: token_type::block_stop;
size_t pos = 0;
while ( std::get<1>( tmp ) != end_comment ) {
if ( std::get<1>( tmp ) == token_type::end )
ERROR( "Encountered end of file before block comment end" );
pos += std::get<0>( tmp );
tmp = find_next_token( &buffer[pos] );
while (std::get<1>(tmp) != end_comment) {
if (std::get<1>(tmp) == token_type::end)
ERROR("Encountered end of file before block comment end");
pos += std::get<0>(tmp);
tmp = find_next_token(&buffer[pos]);
pos += std::get<0>( tmp );
pos += std::get<0>(tmp);
return pos;
inline std::string lower( const std::string& str )
std::string tmp( str );
std::transform( tmp.begin(), tmp.end(), tmp.begin(), ::tolower );
inline std::string lower(const std::string &str) {
std::string tmp(str);
std::transform(tmp.begin(), tmp.end(), tmp.begin(), ::tolower);
return tmp;
static std::tuple<size_t, std::shared_ptr<KeyData>> read_value(
const char* buffer, const std::string& key )
static std::tuple<size_t, std::shared_ptr<KeyData>>
read_value(const char *buffer, const std::string &key) {
// Get the value as a std::string
size_t pos = 0;
token_type type = token_type::end;
std::tie( pos, type ) = find_next_token( &buffer[pos] );
size_t len = pos - length( type );
while ( type != token_type::newline ) {
if ( type == token_type::quote ) {
size_t i = 0;
std::tie( i, type ) = find_next_token( &buffer[pos] );
size_t pos = 0;
token_type type = token_type::end;
std::tie(pos, type) = find_next_token(&buffer[pos]);
size_t len = pos - length(type);
while (type != token_type::newline) {
if (type == token_type::quote) {
size_t i = 0;
std::tie(i, type) = find_next_token(&buffer[pos]);
pos += i;
while ( type != token_type::quote ) {
ASSERT( type != token_type::end );
std::tie( i, type ) = find_next_token( &buffer[pos] );
while (type != token_type::quote) {
ASSERT(type != token_type::end);
std::tie(i, type) = find_next_token(&buffer[pos]);
pos += i;
} else if ( type == token_type::line_comment || type == token_type::block_start ) {
len = pos - length( type );
pos += skip_comment( &buffer[pos - length( type )] ) - length( type );
} else if (type == token_type::line_comment ||
type == token_type::block_start) {
len = pos - length(type);
pos += skip_comment(&buffer[pos - length(type)]) - length(type);
size_t i = 0;
std::tie( i, type ) = find_next_token( &buffer[pos] );
size_t i = 0;
std::tie(i, type) = find_next_token(&buffer[pos]);
pos += i;
len = pos - length( type );
len = pos - length(type);
const std::string value = deblank( std::string( buffer, len ) );
const std::string value = deblank(std::string(buffer, len));
// Split the value to an array of values
std::vector<std::string> values;
size_t i0 = 0, i = 0, count = 0;
for ( ; i < value.size(); i++ ) {
if ( value[i] == '"' ) {
for (; i < value.size(); i++) {
if (value[i] == '"') {
} else if ( value[i] == ',' && count % 2 == 0 ) {
values.push_back( deblank( value.substr( i0, i - i0 ) ) );
} else if (value[i] == ',' && count % 2 == 0) {
values.push_back(deblank(value.substr(i0, i - i0)));
i0 = i + 1;
values.push_back( deblank( value.substr( i0 ) ) );
// Convert the string value to the database value
std::shared_ptr<KeyData> data;
if ( value.empty() ) {
data.reset( new EmptyKeyData() );
} else if ( value.find( '"' ) != std::string::npos ) {
auto* data2 = new KeyDataString();
data.reset( data2 );
data2->d_data.resize( values.size() );
for ( size_t i = 0; i < values.size(); i++ ) {
ASSERT( values[i].size() >= 2 );
ASSERT( values[i][0] == '"' && values[i][values[i].size() - 1] == '"' );
data2->d_data[i] = values[i].substr( 1, values[i].size() - 2 );
if (value.empty()) {
data.reset(new EmptyKeyData());
} else if (value.find('"') != std::string::npos) {
auto *data2 = new KeyDataString();
for (size_t i = 0; i < values.size(); i++) {
ASSERT(values[i].size() >= 2);
ASSERT(values[i][0] == '"' &&
values[i][values[i].size() - 1] == '"');
data2->d_data[i] = values[i].substr(1, values[i].size() - 2);
} else if ( lower( value ) == "true" || lower( value ) == "false" ) {
auto* data2 = new KeyDataBool();
data.reset( data2 );
data2->d_data.resize( values.size() );
for ( size_t i = 0; i < values.size(); i++ ) {
ASSERT( values[i].size() >= 2 );
if ( lower( values[i] ) != "true" && lower( values[i] ) != "false" )
ERROR( "Error converting " + key + " to logical array" );
data2->d_data[i] = lower( values[i] ) == "true";
} else if (lower(value) == "true" || lower(value) == "false") {
auto *data2 = new KeyDataBool();
for (size_t i = 0; i < values.size(); i++) {
ASSERT(values[i].size() >= 2);
if (lower(values[i]) != "true" && lower(values[i]) != "false")
ERROR("Error converting " + key + " to logical array");
data2->d_data[i] = lower(values[i]) == "true";
} else { // if ( value.find('.')!=std::string::npos || value.find('e')!=std::string::npos ) {
auto* data2 = new KeyDataDouble();
data.reset( data2 );
data2->d_data.resize( values.size(), 0 );
for ( size_t i = 0; i < values.size(); i++ ) {
auto *data2 = new KeyDataDouble();
data2->d_data.resize(values.size(), 0);
for (size_t i = 0; i < values.size(); i++) {
Units unit;
std::tie( data2->d_data[i], unit ) = KeyDataDouble::read( values[i] );
if ( !unit.isNull() )
std::tie(data2->d_data[i], unit) = KeyDataDouble::read(values[i]);
if (!unit.isNull())
data2->d_unit = unit;
//} else {
// ERROR("Unable to determine data type: "+value);
return std::tuple<size_t, std::shared_ptr<KeyData>>( pos, data );
return std::tuple<size_t, std::shared_ptr<KeyData>>(pos, data);
size_t Database::loadDatabase( const char* buffer, Database& db )
size_t Database::loadDatabase(const char *buffer, Database &db) {
size_t pos = 0;
while ( true ) {
while (true) {
size_t i;
token_type type;
std::tie( i, type ) = find_next_token( &buffer[pos] );
const std::string key =
deblank( std::string( &buffer[pos], std::max<int>( i - length( type ), 1 ) - 1 ) );
if ( type == token_type::line_comment || type == token_type::block_start ) {
std::tie(i, type) = find_next_token(&buffer[pos]);
const std::string key = deblank(
std::string(&buffer[pos], std::max<int>(i - length(type), 1) - 1));
if (type == token_type::line_comment ||
type == token_type::block_start) {
// Comment
INSIST( key.empty(), "Key should be empty: " + key );
pos += skip_comment( &buffer[pos] );
} else if ( type == token_type::newline ) {
INSIST( key.empty(), "Key should be empty: " + key );
INSIST(key.empty(), "Key should be empty: " + key);
pos += skip_comment(&buffer[pos]);
} else if (type == token_type::newline) {
INSIST(key.empty(), "Key should be empty: " + key);
pos += i;
} else if ( type == token_type::equal ) {
} else if (type == token_type::equal) {
// Reading key/value pair
ASSERT( !key.empty() );
pos += i;
std::shared_ptr<KeyData> data;
std::tie( i, data ) = read_value( &buffer[pos], key );
ASSERT( data.get() != nullptr );
std::tie(i, data) = read_value(&buffer[pos], key);
ASSERT(data.get() != nullptr);
db.d_data[key] = data;
pos += i;
} else if ( type == token_type::bracket ) {
} else if (type == token_type::bracket) {
// Read database
ASSERT( !key.empty() );
pos += i;
std::shared_ptr<Database> database( new Database() );
pos += loadDatabase( &buffer[pos], *database );
std::shared_ptr<Database> database(new Database());
pos += loadDatabase(&buffer[pos], *database);
db.d_data[key] = database;
} else if ( type == token_type::end_bracket ) {
} else if (type == token_type::end_bracket) {
// Finished with the database
pos += i;
} else {
ERROR( "Error loading data" );
ERROR("Error loading data");
return pos;
* Data type helper functions *
void KeyDataDouble::print( std::ostream& os, const std::string& indent ) const
void KeyDataDouble::print(std::ostream &os, const std::string &indent) const {
os << indent;
for ( size_t i = 0; i < d_data.size(); i++ ) {
if ( i > 0 )
for (size_t i = 0; i < d_data.size(); i++) {
if (i > 0)
os << ", ";
if ( d_data[i] != d_data[i] ) {
if (d_data[i] != d_data[i]) {
os << "nan";
} else if ( d_data[i] == std::numeric_limits<double>::infinity() ) {
} else if (d_data[i] == std::numeric_limits<double>::infinity()) {
os << "inf";
} else if ( d_data[i] == -std::numeric_limits<double>::infinity() ) {
} else if (d_data[i] == -std::numeric_limits<double>::infinity()) {
os << "-inf";
} else {
os << std::setprecision( 12 ) << d_data[i];
os << std::setprecision(12) << d_data[i];
if ( !d_unit.isNull() )
if (!d_unit.isNull())
os << " " << d_unit.str();
os << std::endl;
std::tuple<double, Units> KeyDataDouble::read( const std::string& str )
std::string tmp = deblank( str );
size_t index = tmp.find( " " );
if ( index != std::string::npos ) {
return std::make_tuple(
readValue( tmp.substr( 0, index ) ), Units( tmp.substr( index + 1 ) ) );
std::tuple<double, Units> KeyDataDouble::read(const std::string &str) {
std::string tmp = deblank(str);
size_t index = tmp.find(" ");
if (index != std::string::npos) {
return std::make_tuple(readValue(tmp.substr(0, index)),
Units(tmp.substr(index + 1)));
} else {
return std::make_tuple( readValue( tmp ), Units() );
return std::make_tuple(readValue(tmp), Units());
double KeyDataDouble::readValue( const std::string& str )
const std::string tmp = lower( str );
double data = 0;
if ( tmp == "inf" || tmp == "infinity" ) {
double KeyDataDouble::readValue(const std::string &str) {
const std::string tmp = lower(str);
double data = 0;
if (tmp == "inf" || tmp == "infinity") {
data = std::numeric_limits<double>::infinity();
} else if ( tmp == "-inf" || tmp == "-infinity" ) {
} else if (tmp == "-inf" || tmp == "-infinity") {
data = -std::numeric_limits<double>::infinity();
} else if ( tmp == "nan" ) {
} else if (tmp == "nan") {
data = std::numeric_limits<double>::quiet_NaN();
} else if ( tmp.find( '/' ) != std::string::npos ) {
ERROR( "Error reading value" );
} else if (tmp.find('/') != std::string::npos) {
ERROR("Error reading value");
} else {
char* pos = nullptr;
data = strtod( tmp.c_str(), &pos );
if ( static_cast<size_t>( pos - tmp.c_str() ) == tmp.size() + 1 )
ERROR( "Error reading value" );
char *pos = nullptr;
data = strtod(tmp.c_str(), &pos);
if (static_cast<size_t>(pos - tmp.c_str()) == tmp.size() + 1)
ERROR("Error reading value");
return data;
* Instantiations *
template std::vector<char> Database::getVector<char>( const std::string&, const Units& ) const;
template std::vector<int> Database::getVector<int>( const std::string&, const Units& ) const;
template std::vector<size_t> Database::getVector<size_t>( const std::string&, const Units& ) const;
template std::vector<float> Database::getVector<float>( const std::string&, const Units& ) const;
template std::vector<double> Database::getVector<double>( const std::string&, const Units& ) const;
template void Database::putVector<char>(
const std::string&, const std::vector<char>&, const Units& );
template void Database::putVector<int>( const std::string&, const std::vector<int>&, const Units& );
template void Database::putVector<size_t>(
const std::string&, const std::vector<size_t>&, const Units& );
template void Database::putVector<float>(
const std::string&, const std::vector<float>&, const Units& );
template void Database::putVector<double>(
const std::string&, const std::vector<double>&, const Units& );
template bool Database::isType<int>( const std::string& ) const;
template bool Database::isType<float>( const std::string& ) const;
template bool Database::isType<double>( const std::string& ) const;
template bool Database::isType<std::string>( const std::string& ) const;
template std::vector<char> Database::getVector<char>(const std::string &,
const Units &) const;
template std::vector<int> Database::getVector<int>(const std::string &,
const Units &) const;
template std::vector<size_t> Database::getVector<size_t>(const std::string &,
const Units &) const;
template std::vector<float> Database::getVector<float>(const std::string &,
const Units &) const;
template std::vector<double> Database::getVector<double>(const std::string &,
const Units &) const;
template void Database::putVector<char>(const std::string &,
const std::vector<char> &,
const Units &);
template void Database::putVector<int>(const std::string &,
const std::vector<int> &, const Units &);
template void Database::putVector<size_t>(const std::string &,
const std::vector<size_t> &,
const Units &);
template void Database::putVector<float>(const std::string &,
const std::vector<float> &,
const Units &);
template void Database::putVector<double>(const std::string &,
const std::vector<double> &,
const Units &);
template bool Database::isType<int>(const std::string &) const;
template bool Database::isType<float>(const std::string &) const;
template bool Database::isType<double>(const std::string &) const;
template bool Database::isType<std::string>(const std::string &) const;

View File

@ -26,17 +26,13 @@
#include "common/Units.h"
inline bool exists( const std::string& filename )
std::ifstream domain( filename );
return domain.good();
inline bool exists(const std::string &filename) {
std::ifstream domain(filename);
return domain.good();
//! Base class to hold data of a given type
class KeyData
class KeyData {
//! Empty constructor
KeyData() {}
@ -47,19 +43,18 @@ public:
//! Copy the data
virtual std::shared_ptr<KeyData> clone() const = 0;
//! Print the data to a stream
virtual void print( std::ostream& os, const std::string& indent = "" ) const = 0;
virtual void print(std::ostream &os,
const std::string &indent = "") const = 0;
//! Return the native data type
virtual std::string type() const = 0;
KeyData( const KeyData& ) {}
KeyData& operator=( const KeyData& );
KeyData(const KeyData &) {}
KeyData &operator=(const KeyData &);
//! Class to a database
class Database : public KeyData
class Database : public KeyData {
//! Empty constructor
@ -68,25 +63,25 @@ public:
* Open an database file.
* @param filename Name of input file to open
explicit Database( const std::string& filename );
explicit Database(const std::string &filename);
* Create database from string
* @param data String containing the database data
static std::shared_ptr<Database> createFromString( const std::string& data );
static std::shared_ptr<Database> createFromString(const std::string &data);
//! Copy constructor
Database( const Database& );
Database(const Database &);
//! Assignment operator
Database& operator=( const Database& );
Database &operator=(const Database &);
//! Move constructor
Database( Database&& rhs );
Database(Database &&rhs);
//! Move assignment operator
Database& operator=( Database&& rhs );
Database &operator=(Database &&rhs);
//! Destructor
virtual ~Database();
@ -97,25 +92,21 @@ public:
//! Copy the data
std::shared_ptr<Database> cloneDatabase() const;
* Return true if the specified key exists in the database and false
* otherwise.
* @param[in] key Key name to lookup.
bool keyExists( const std::string& key ) const;
bool keyExists(const std::string &key) const;
* Return all keys in the database.
std::vector<std::string> getAllKeys() const;
//! Return the number of entries in the database
size_t size() const { return d_data.size(); }
* Get the scalar entry from the database with the specified key
* name. If the specified key does not exist in the database or
@ -125,14 +116,14 @@ public:
* @param[in] key Key name in database.
* @param[in] unit Desired units
template<class TYPE>
inline TYPE getScalar( const std::string& key, const Units& unit = Units() ) const;
template <class TYPE>
inline TYPE getScalar(const std::string &key,
const Units &unit = Units()) const;
/// @copydoc Database::getScalar(const std::string&,const Units&) const
template<class TYPE>
inline TYPE getScalar( const std::string& key, const std::string& unit ) const;
template <class TYPE>
inline TYPE getScalar(const std::string &key,
const std::string &unit) const;
* Get the scalar entry from the database with the specified key
@ -143,16 +134,14 @@ public:
* @param[in] value Default value
* @param[in] unit Desired units
template<class TYPE>
inline TYPE getWithDefault(
const std::string& key, const TYPE& value, const Units& unit = Units() ) const;
template <class TYPE>
inline TYPE getWithDefault(const std::string &key, const TYPE &value,
const Units &unit = Units()) const;
/// @copydoc Database::getWithDefault(const std::string&,const TYPE&,const Units&) const
template<class TYPE>
inline TYPE getWithDefault(
const std::string& key, const TYPE& value, const std::string& unit ) const;
template <class TYPE>
inline TYPE getWithDefault(const std::string &key, const TYPE &value,
const std::string &unit) const;
* Put the scalar entry into the database with the specified key name.
@ -160,9 +149,9 @@ public:
* @param value Value to store
* @param unit Desired units
template<class TYPE>
inline void putScalar( const std::string& key, const TYPE& value, const Units& unit = Units() );
template <class TYPE>
inline void putScalar(const std::string &key, const TYPE &value,
const Units &unit = Units());
* Put the scalar entry into the database with the specified key name.
@ -170,9 +159,9 @@ public:
* @param value Value to store
* @param unit Desired units
template<class TYPE>
inline void putScalar( const std::string& key, const TYPE& value, const std::string& unit );
template <class TYPE>
inline void putScalar(const std::string &key, const TYPE &value,
const std::string &unit);
* Get the vector entries from the database with the specified key
@ -183,14 +172,14 @@ public:
* @param key Key name in database.
* @param unit Desired units
template<class TYPE>
std::vector<TYPE> getVector( const std::string& key, const Units& unit = Units() ) const;
template <class TYPE>
std::vector<TYPE> getVector(const std::string &key,
const Units &unit = Units()) const;
/// @copydoc Database::getVector(const std::string&,const Units&) const
template<class TYPE>
inline std::vector<TYPE> getVector( const std::string& key, const std::string& unit ) const;
template <class TYPE>
inline std::vector<TYPE> getVector(const std::string &key,
const std::string &unit) const;
* Put the vector entries into the database with the specified key
@ -202,16 +191,14 @@ public:
* @param data Data to store
* @param unit Desired units
template<class TYPE>
void putVector(
const std::string& key, const std::vector<TYPE>& data, const Units& unit = Units() );
template <class TYPE>
void putVector(const std::string &key, const std::vector<TYPE> &data,
const Units &unit = Units());
/// @copydoc Database::putVector(const std::string&,const std::vector<TYPE>&,const Units&)
template<class TYPE>
inline void putVector(
const std::string& key, const std::vector<TYPE>& data, const std::string& unit );
template <class TYPE>
inline void putVector(const std::string &key, const std::vector<TYPE> &data,
const std::string &unit);
* Get the data for a key in the database. If the specified key
@ -220,7 +207,7 @@ public:
* @param key Key name in database.
std::shared_ptr<KeyData> getData( const std::string& key );
std::shared_ptr<KeyData> getData(const std::string &key);
* Get the data for a key in the database. If the specified key
@ -229,8 +216,7 @@ public:
* @param key Key name in database.
std::shared_ptr<const KeyData> getData( const std::string& key ) const;
std::shared_ptr<const KeyData> getData(const std::string &key) const;
* Put the data for a key in the database.
@ -238,17 +224,13 @@ public:
* @param key Key name in database.
* @param data Data to store
void putData( const std::string& key, std::shared_ptr<KeyData> data );
void putData(const std::string &key, std::shared_ptr<KeyData> data);
// Check if the key is a database object
bool isDatabase( const std::string& key ) const;
bool isDatabase(const std::string &key) const;
// Check if the entry can be stored as the given type
template<class TYPE>
bool isType( const std::string& key ) const;
template <class TYPE> bool isType(const std::string &key) const;
* Get the database for a key in the database. If the specified key
@ -257,7 +239,7 @@ public:
* @param key Key name in database.
std::shared_ptr<Database> getDatabase( const std::string& key );
std::shared_ptr<Database> getDatabase(const std::string &key);
* Get the database for a key in the database. If the specified key
@ -266,8 +248,7 @@ public:
* @param key Key name in database.
std::shared_ptr<const Database> getDatabase( const std::string& key ) const;
std::shared_ptr<const Database> getDatabase(const std::string &key) const;
* Get the database for a key in the database. If the specified key
@ -277,36 +258,32 @@ public:
* @param key Key name in database.
* @param db Database to store
void putDatabase( const std::string& key, std::shared_ptr<Database> db );
void putDatabase(const std::string &key, std::shared_ptr<Database> db);
* Print the data to a stream
* @param os Output stream
* @param indent Indenting to use before each line
virtual void print( std::ostream& os, const std::string& indent = "" ) const override;
virtual void print(std::ostream &os,
const std::string &indent = "") const override;
//! Print the type
virtual std::string type() const override { return "database"; }
* Print the data to a string
* @return Output string
std::string print( const std::string& indent = "" ) const;
std::string print(const std::string &indent = "") const;
std::map<std::string, std::shared_ptr<KeyData>> d_data;
// Function to load a database from a buffer
static size_t loadDatabase( const char* buffer, Database& db );
static size_t loadDatabase(const char *buffer, Database &db);
#include "common/Database.hpp"

View File

@ -38,66 +38,58 @@
#include <tuple>
* Basic classes for primative data types *
class EmptyKeyData : public KeyData
class EmptyKeyData : public KeyData {
EmptyKeyData() {}
virtual ~EmptyKeyData() {}
virtual std::shared_ptr<KeyData> clone() const override
virtual std::shared_ptr<KeyData> clone() const override {
return std::make_shared<EmptyKeyData>();
virtual void print( std::ostream& os, const std::string& = "" ) const override
virtual void print(std::ostream &os,
const std::string & = "") const override {
os << std::endl;
virtual std::string type() const override { return ""; }
class KeyDataDouble : public KeyData
class KeyDataDouble : public KeyData {
KeyDataDouble() {}
explicit KeyDataDouble( const std::vector<double>& data, const Units& unit )
: d_data( data ), d_unit( unit )
explicit KeyDataDouble(const std::vector<double> &data, const Units &unit)
: d_data(data), d_unit(unit) {}
virtual ~KeyDataDouble() {}
virtual std::shared_ptr<KeyData> clone() const override
return std::make_shared<KeyDataDouble>( d_data, d_unit );
virtual std::shared_ptr<KeyData> clone() const override {
return std::make_shared<KeyDataDouble>(d_data, d_unit);
virtual void print( std::ostream& os, const std::string& indent = "" ) const override;
virtual void print(std::ostream &os,
const std::string &indent = "") const override;
virtual std::string type() const override { return "double"; }
static std::tuple<double, Units> read( const std::string& );
static double readValue( const std::string& );
static std::tuple<double, Units> read(const std::string &);
static double readValue(const std::string &);
std::vector<double> d_data;
Units d_unit;
class KeyDataBool : public KeyData
class KeyDataBool : public KeyData {
KeyDataBool() {}
explicit KeyDataBool( const std::vector<bool>& data ) : d_data( data ) {}
explicit KeyDataBool(const std::vector<bool> &data) : d_data(data) {}
virtual ~KeyDataBool() {}
virtual std::shared_ptr<KeyData> clone() const override
return std::make_shared<KeyDataBool>( d_data );
virtual std::shared_ptr<KeyData> clone() const override {
return std::make_shared<KeyDataBool>(d_data);
virtual void print( std::ostream& os, const std::string& indent = "" ) const override
virtual void print(std::ostream &os,
const std::string &indent = "") const override {
os << indent;
for ( size_t i = 0; i < d_data.size(); i++ ) {
if ( i > 0 ) {
for (size_t i = 0; i < d_data.size(); i++) {
if (i > 0) {
os << ", ";
if ( d_data[i] ) {
if (d_data[i]) {
os << "true";
} else {
os << "false";
@ -108,21 +100,20 @@ public:
virtual std::string type() const override { return "bool"; }
std::vector<bool> d_data;
class KeyDataString : public KeyData
class KeyDataString : public KeyData {
KeyDataString() {}
explicit KeyDataString( const std::vector<std::string>& data ) : d_data( data ) {}
explicit KeyDataString(const std::vector<std::string> &data)
: d_data(data) {}
virtual ~KeyDataString() {}
virtual std::shared_ptr<KeyData> clone() const override
return std::make_shared<KeyDataString>( d_data );
virtual std::shared_ptr<KeyData> clone() const override {
return std::make_shared<KeyDataString>(d_data);
virtual void print( std::ostream& os, const std::string& indent = "" ) const override
virtual void print(std::ostream &os,
const std::string &indent = "") const override {
os << indent;
for ( size_t i = 0; i < d_data.size(); i++ ) {
if ( i > 0 ) {
for (size_t i = 0; i < d_data.size(); i++) {
if (i > 0) {
os << ", ";
os << '"' << d_data[i] << '"';
@ -133,73 +124,66 @@ public:
std::vector<std::string> d_data;
* Get a vector *
template<class TYPE>
inline std::vector<TYPE> Database::getVector(
const std::string& key, const std::string& unit ) const
return getVector<TYPE>( key, Units( unit ) );
template <class TYPE>
inline std::vector<TYPE> Database::getVector(const std::string &key,
const std::string &unit) const {
return getVector<TYPE>(key, Units(unit));
template<class TYPE>
inline void Database::putVector(
const std::string& key, const std::vector<TYPE>& data, const std::string& unit )
putVector<TYPE>( key, data, Units( unit ) );
template <class TYPE>
inline void Database::putVector(const std::string &key,
const std::vector<TYPE> &data,
const std::string &unit) {
putVector<TYPE>(key, data, Units(unit));
* Get a scalar *
template<class TYPE>
inline TYPE Database::getScalar( const std::string& key, const Units& unit ) const
const std::vector<TYPE>& data = getVector<TYPE>( key, unit );
if ( data.size() != 1 ) {
template <class TYPE>
inline TYPE Database::getScalar(const std::string &key,
const Units &unit) const {
const std::vector<TYPE> &data = getVector<TYPE>(key, unit);
if (data.size() != 1) {
char msg[1000];
sprintf( msg, "Variable %s is not a scalar", key.c_str() );
ERROR( msg );
sprintf(msg, "Variable %s is not a scalar", key.c_str());
return data[0];
template<class TYPE>
inline TYPE Database::getWithDefault(
const std::string& key, const TYPE& value, const Units& unit ) const
if ( !keyExists( key ) )
template <class TYPE>
inline TYPE Database::getWithDefault(const std::string &key, const TYPE &value,
const Units &unit) const {
if (!keyExists(key))
return value;
return getScalar<TYPE>( key, unit );
return getScalar<TYPE>(key, unit);
template<class TYPE>
inline void Database::putScalar( const std::string& key, const TYPE& data, const Units& unit )
putVector<TYPE>( key, std::vector<TYPE>( 1, data ), unit );
template <class TYPE>
inline void Database::putScalar(const std::string &key, const TYPE &data,
const Units &unit) {
putVector<TYPE>(key, std::vector<TYPE>(1, data), unit);
template<class TYPE>
inline TYPE Database::getScalar( const std::string& key, const std::string& unit ) const
return getScalar<TYPE>( key, Units( unit ) );
template <class TYPE>
inline TYPE Database::getScalar(const std::string &key,
const std::string &unit) const {
return getScalar<TYPE>(key, Units(unit));
template<class TYPE>
inline TYPE Database::getWithDefault(
const std::string& key, const TYPE& value, const std::string& unit ) const
return getWithDefault<TYPE>( key, value, Units( unit ) );
template <class TYPE>
inline TYPE Database::getWithDefault(const std::string &key, const TYPE &value,
const std::string &unit) const {
return getWithDefault<TYPE>(key, value, Units(unit));
template<class TYPE>
inline void Database::putScalar( const std::string& key, const TYPE& data, const std::string& unit )
putScalar<TYPE>( key, data, Units( unit ) );
template <class TYPE>
inline void Database::putScalar(const std::string &key, const TYPE &data,
const std::string &unit) {
putScalar<TYPE>(key, data, Units(unit));
template<class TYPE>
inline void putVector(
const std::string& key, const std::vector<TYPE>& data, const std::string& unit )
putVector<TYPE>( key, data, Units( unit ) );
template <class TYPE>
inline void putVector(const std::string &key, const std::vector<TYPE> &data,
const std::string &unit) {
putVector<TYPE>(key, data, Units(unit));

File diff suppressed because it is too large Load Diff

View File

@ -37,7 +37,6 @@
* \brief Parallel Domain data structures and helper functions
* \class Box
@ -63,20 +62,20 @@ class Patch;
* GPU-based data structures should be constructed separately but may utilize information that the Domain class provides.
class Domain{
class Domain {
* \brief Constructor
* @param db input database
* @param Communicator MPI communicator
Domain( std::shared_ptr<Database> db, const Utilities::MPI& Communicator);
Domain(std::shared_ptr<Database> db, const Utilities::MPI &Communicator);
* \brief Obsolete constructor
Domain( int nx, int ny, int nz, int rnk, int npx, int npy, int npz,
double lx, double ly, double lz, int BC);
Domain(int nx, int ny, int nz, int rnk, int npx, int npy, int npz,
double lx, double ly, double lz, int BC);
* \brief Empty constructor
@ -86,18 +85,18 @@ public:
* \brief Copy constructor
Domain( const Domain& ) = delete;
Domain(const Domain &) = delete;
* \brief Assignment operator
Domain& operator=( const Domain& ) = delete;
Domain &operator=(const Domain &) = delete;
* \brief Destructor
* \brief Get the database
@ -106,37 +105,33 @@ public:
* \brief Get the domain box
inline const Box& getBox() const { return d_box; }
inline const Box &getBox() const { return d_box; }
* \brief Get local patch
inline const Patch& getLocalPatch() const { return *d_localPatch; }
inline const Patch &getLocalPatch() const { return *d_localPatch; }
* \brief Get all patches
inline const std::vector<Patch>& getAllPatch() const { return d_patches; }
inline const std::vector<Patch> &getAllPatch() const { return d_patches; }
* \brief initialize from database
void initialize( std::shared_ptr<Database> db );
void initialize(std::shared_ptr<Database> db);
std::shared_ptr<Database> d_db;
Box d_box;
Patch *d_localPatch;
std::vector<Patch> d_patches;
public: // Public variables (need to create accessors instead)
std::shared_ptr<Database> database;
double Lx,Ly,Lz,Volume,voxel_length;
int Nx,Ny,Nz,N;
double Lx, Ly, Lz, Volume, voxel_length;
int Nx, Ny, Nz, N;
int inlet_layers_x, inlet_layers_y, inlet_layers_z;
int outlet_layers_x, outlet_layers_y, outlet_layers_z;
int inlet_layers_phase; //as usual: 1->n, 2->w
@ -144,7 +139,7 @@ public: // Public variables (need to create accessors instead)
double porosity;
RankInfoStruct rank_info;
Utilities::MPI Comm; // MPI Communicator for this domain
Utilities::MPI Comm; // MPI Communicator for this domain
int BoundaryCondition;
@ -153,7 +148,7 @@ public: // Public variables (need to create accessors instead)
* \brief Compute the porosity based on the current domain id file
inline double Porosity() const { return porosity; }
inline int iproc() const { return rank_info.ix; }
inline int jproc() const { return rank_info.jy; }
@ -186,70 +181,78 @@ public: // Public variables (need to create accessors instead)
// Get the actual D3Q19 communication counts (based on location of solid phase)
// Discrete velocity set symmetry implies the sendcount = recvcount
inline int recvCount( const char* dir ) const { return getRecvList( dir ).size(); }
inline int sendCount( const char* dir ) const { return getSendList( dir ).size(); }
inline const int* recvList( const char* dir ) const { return getRecvList( dir ).data(); }
inline const int* sendList( const char* dir ) const { return getSendList( dir ).data(); }
inline int recvCount(const char *dir) const {
return getRecvList(dir).size();
inline int sendCount(const char *dir) const {
return getSendList(dir).size();
inline const int *recvList(const char *dir) const {
return getRecvList(dir).data();
inline const int *sendList(const char *dir) const {
return getSendList(dir).data();
// Solid indicator function
std::vector<signed char> id;
* \brief Read domain IDs from file
void ReadIDs();
* \brief Compute the porosity
void ComputePorosity();
* \brief Read image and perform domain decomposition
* @param filename - name of file to read IDs
void Decomp( const std::string& filename );
void Decomp(const std::string &filename);
* \brief Perform a halo exchange using MPI
* @param Mesh - array data that holds scalar values
void CommunicateMeshHalo(DoubleArray &Mesh);
* \brief Initialize communication data structures within Domain object.
* This routine needs to be called before the communication functionality will work
void CommInit();
void CommInit();
* \brief Count number of pore nodes (labels > 1)
int PoreCount();
* \brief Read array data from a file and distribute to local arrays for each MPI process
* @param Filename - name of the file to read the data
* @param Datatype - data type to use
* @param UserData - Array to store the values that are read
void ReadFromFile(const std::string& Filename,const std::string& Datatype, double *UserData);
void ReadFromFile(const std::string &Filename, const std::string &Datatype,
double *UserData);
* \brief Aggregate labels from all MPI processes and write to a file
* @param filename - name of the file to write
void AggregateLabels( const std::string& filename );
void AggregateLabels(const std::string &filename);
* \brief Aggregate user provided array from all MPI processes and write to a single file
* @param filename - name of the file to write
* @param UserData - array data to aggregate and write
void AggregateLabels( const std::string& filename, DoubleArray &UserData );
void AggregateLabels(const std::string &filename, DoubleArray &UserData);
* \brief Pack halo data for 8-bit integer
* @param list - list of values in the halo
@ -258,7 +261,7 @@ private:
* @param ID - 8-bit values on mesh [Nx, Ny, Nz]
void PackID(int *list, int count, signed char *sendbuf, signed char *ID);
* \brief Unpack halo data for 8-bit integer
* @param list - list of values in the halo
@ -267,29 +270,32 @@ private:
* @param ID - 8-bit values on mesh [Nx, Ny, Nz]
void UnpackID(int *list, int count, signed char *recvbuf, signed char *ID);
MPI_Request req1[18], req2[18];
std::vector<int> sendList_x, sendList_y, sendList_z, sendList_X, sendList_Y, sendList_Z;
std::vector<int> sendList_xy, sendList_yz, sendList_xz, sendList_Xy, sendList_Yz, sendList_xZ;
std::vector<int> sendList_xY, sendList_yZ, sendList_Xz, sendList_XY, sendList_YZ, sendList_XZ;
std::vector<int> recvList_x, recvList_y, recvList_z, recvList_X, recvList_Y, recvList_Z;
std::vector<int> recvList_xy, recvList_yz, recvList_xz, recvList_Xy, recvList_Yz, recvList_xZ;
std::vector<int> recvList_xY, recvList_yZ, recvList_Xz, recvList_XY, recvList_YZ, recvList_XZ;
const std::vector<int>& getRecvList( const char* dir ) const;
const std::vector<int>& getSendList( const char* dir ) const;
MPI_Request req1[18], req2[18];
std::vector<int> sendList_x, sendList_y, sendList_z, sendList_X, sendList_Y,
std::vector<int> sendList_xy, sendList_yz, sendList_xz, sendList_Xy,
sendList_Yz, sendList_xZ;
std::vector<int> sendList_xY, sendList_yZ, sendList_Xz, sendList_XY,
sendList_YZ, sendList_XZ;
std::vector<int> recvList_x, recvList_y, recvList_z, recvList_X, recvList_Y,
std::vector<int> recvList_xy, recvList_yz, recvList_xz, recvList_Xy,
recvList_Yz, recvList_xZ;
std::vector<int> recvList_xY, recvList_yZ, recvList_Xz, recvList_XY,
recvList_YZ, recvList_XZ;
const std::vector<int> &getRecvList(const char *dir) const;
const std::vector<int> &getSendList(const char *dir) const;
template<class TYPE> class PatchData;
template <class TYPE> class PatchData;
enum class DataLocation { CPU, DEVICE };
* \class Patch
@ -298,44 +304,40 @@ enum class DataLocation { CPU, DEVICE };
class Patch {
//! Empty constructor
Patch() = delete;
//! Copy constructor
Patch( const Patch& ) = delete;
Patch(const Patch &) = delete;
//! Assignment operator
Patch& operator=( const Patch& ) = delete;
Patch &operator=(const Patch &) = delete;
//! Return the box for the patch
inline const Box& getBox() const { return d_box; }
inline const Box &getBox() const { return d_box; }
//! Create patch data
template<class TYPE>
std::shared_ptr<PatchData<TYPE>> createPatchData( DataLocation location ) const;
template <class TYPE>
createPatchData(DataLocation location) const;
Box d_box;
int d_owner;
Domain *d_domain;
// Class to hold data on a patch
template<class TYPE>
class PatchData {
template <class TYPE> class PatchData {
//! Get the raw data pointer
TYPE *data() { return d_data; }
//! Get the raw data pointer
TYPE* data() { return d_data; }
//! Get the raw data pointer
const TYPE* data() const { return d_data; }
const TYPE *data() const { return d_data; }
//! Get the patch
const Patch& getPatch() const { return *d_patch; }
const Patch &getPatch() const { return *d_patch; }
//! Start communication
void beginCommunication();
@ -344,20 +346,20 @@ public:
void endCommunication();
//! Access ghost values
TYPE operator()( int, int, int ) const;
TYPE operator()(int, int, int) const;
//! Copy data from another PatchData
void copy( const PatchData& rhs );
void copy(const PatchData &rhs);
DataLocation d_location;
const Patch *d_patch;
TYPE *d_data;
TYPE *d_gcw;
void WriteCheckpoint(const char *FILENAME, const double *cDen, const double *cfq, size_t Np);
void WriteCheckpoint(const char *FILENAME, const double *cDen,
const double *cfq, size_t Np);
void ReadCheckpoint(char *FILENAME, double *cDen, double *cfq, size_t Np);

View File

@ -1,6 +1,5 @@
#include "FunctionTable.hpp"
* Random number generation *
@ -93,55 +92,36 @@ template<> long double genRand<long double>()
* axpy *
void call_axpy<float>( size_t N, const float alpha, const float *x, float *y )
template <>
void call_axpy<float>(size_t N, const float alpha, const float *x, float *y) {
ERROR("Not finished");
void call_axpy<double>( size_t N, const double alpha, const double *x, double *y )
template <>
void call_axpy<double>(size_t N, const double alpha, const double *x,
double *y) {
ERROR("Not finished");
* Multiply two arrays *
void call_gemv<double>(
size_t M, size_t N, double alpha, double beta, const double *A, const double *x, double *y )
template <>
void call_gemv<double>(size_t M, size_t N, double alpha, double beta,
const double *A, const double *x, double *y) {
ERROR("Not finished");
void call_gemv<float>(
size_t M, size_t N, float alpha, float beta, const float *A, const float *x, float *y )
template <>
void call_gemv<float>(size_t M, size_t N, float alpha, float beta,
const float *A, const float *x, float *y) {
ERROR("Not finished");
void call_gemm<double>( size_t M,
size_t N,
size_t K,
double alpha,
double beta,
const double *A,
const double *B,
double *C )
template <>
void call_gemm<double>(size_t M, size_t N, size_t K, double alpha, double beta,
const double *A, const double *B, double *C) {
ERROR("Not finished");
void call_gemm<float>( size_t M,
size_t N,
size_t K,
float alpha,
float beta,
const float *A,
const float *B,
float *C )
template <>
void call_gemm<float>(size_t M, size_t N, size_t K, float alpha, float beta,
const float *A, const float *B, float *C) {
ERROR("Not finished");

View File

@ -17,28 +17,23 @@
#ifndef included_FunctionTable
#define included_FunctionTable
#include "common/ArraySize.h"
#include <functional>
* Class FunctionTable is a serial function table class that defines
* a series of operations that can be performed on the Array class.
* Users can impliment additional versions of the function table that match
* the interface to change the behavior of the array class.
class FunctionTable final
class FunctionTable final {
* Initialize the array with random values
* @param[in] x The array to operate on
template<class TYPE, class FUN>
static void rand( Array<TYPE, FUN> &x );
template <class TYPE, class FUN> static void rand(Array<TYPE, FUN> &x);
* Perform a reduce operator y = f(x)
@ -49,8 +44,9 @@ public:
* ...)
* @return The reduction
template<class TYPE, class FUN, typename LAMBDA>
static inline TYPE reduce( LAMBDA &op, const Array<TYPE, FUN> &A, const TYPE &initialValue );
template <class TYPE, class FUN, typename LAMBDA>
static inline TYPE reduce(LAMBDA &op, const Array<TYPE, FUN> &A,
const TYPE &initialValue);
* Perform a reduce operator z = f(x,y)
@ -62,11 +58,10 @@ public:
* ...)
* @return The reduction
template<class TYPE, class FUN, typename LAMBDA>
static inline TYPE reduce( LAMBDA &op,
const Array<TYPE, FUN> &A,
const Array<TYPE, FUN> &B,
const TYPE &initialValue );
template <class TYPE, class FUN, typename LAMBDA>
static inline TYPE reduce(LAMBDA &op, const Array<TYPE, FUN> &A,
const Array<TYPE, FUN> &B,
const TYPE &initialValue);
* Perform a element-wise operation y = f(x)
@ -75,8 +70,9 @@ public:
* @param[in,out] x The array to operate on
* @param[out] y The output array
template<class TYPE, class FUN, typename LAMBDA>
static inline void transform( LAMBDA &fun, const Array<TYPE, FUN> &x, Array<TYPE, FUN> &y );
template <class TYPE, class FUN, typename LAMBDA>
static inline void transform(LAMBDA &fun, const Array<TYPE, FUN> &x,
Array<TYPE, FUN> &y);
* Perform a element-wise operation z = f(x,y)
@ -86,11 +82,10 @@ public:
* @param[in] y The second array
* @param[out] z The output array
template<class TYPE, class FUN, typename LAMBDA>
static inline void transform( LAMBDA &fun,
const Array<TYPE, FUN> &x,
const Array<TYPE, FUN> &y,
Array<TYPE, FUN> &z );
template <class TYPE, class FUN, typename LAMBDA>
static inline void transform(LAMBDA &fun, const Array<TYPE, FUN> &x,
const Array<TYPE, FUN> &y,
Array<TYPE, FUN> &z);
* Multiply two arrays
@ -98,9 +93,9 @@ public:
* @param[in] b The second array
* @param[out] c The output array
template<class TYPE, class FUN>
static void
multiply( const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b, Array<TYPE, FUN> &c );
template <class TYPE, class FUN>
static void multiply(const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b,
Array<TYPE, FUN> &c);
* Perform dgemv/dgemm equavalent operation ( C = alpha*A*B + beta*C )
@ -110,12 +105,10 @@ public:
* @param[in] beta The scalar value alpha
* @param[in,out] C The output array C
template<class TYPE, class FUN>
static void gemm( const TYPE alpha,
const Array<TYPE, FUN> &A,
const Array<TYPE, FUN> &B,
const TYPE beta,
Array<TYPE, FUN> &C );
template <class TYPE, class FUN>
static void gemm(const TYPE alpha, const Array<TYPE, FUN> &A,
const Array<TYPE, FUN> &B, const TYPE beta,
Array<TYPE, FUN> &C);
* Perform axpy equavalent operation ( y = alpha*x + y )
@ -123,8 +116,9 @@ public:
* @param[in] x The input array x
* @param[in,out] y The output array y
template<class TYPE, class FUN>
static void axpy( const TYPE alpha, const Array<TYPE, FUN> &x, Array<TYPE, FUN> &y );
template <class TYPE, class FUN>
static void axpy(const TYPE alpha, const Array<TYPE, FUN> &x,
Array<TYPE, FUN> &y);
* Check if two arrays are approximately equal
@ -132,24 +126,15 @@ public:
* @param[in] B The second array
* @param[in] tol The tolerance
template<class TYPE, class FUN>
static bool equals( const Array<TYPE, FUN> &A, const Array<TYPE, FUN> &B, TYPE tol );
template<class TYPE>
static inline void gemmWrapper( char TRANSA,
char TRANSB,
int M,
int N,
int K,
TYPE alpha,
const TYPE *A,
int LDA,
const TYPE *B,
int LDB,
TYPE beta,
int LDC );
template <class TYPE, class FUN>
static bool equals(const Array<TYPE, FUN> &A, const Array<TYPE, FUN> &B,
TYPE tol);
template <class TYPE>
static inline void gemmWrapper(char TRANSA, char TRANSB, int M, int N,
int K, TYPE alpha, const TYPE *A, int LDA,
const TYPE *B, int LDB, TYPE beta, TYPE *C,
int LDC);
/* Specialized Functions */
@ -158,62 +143,66 @@ public:
* @param[in] A The input array
* @param[out] B The output array
template<class TYPE, class FUN, class ALLOC>
static void transformReLU( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B );
template <class TYPE, class FUN, class ALLOC>
static void transformReLU(const Array<TYPE, FUN, ALLOC> &A,
Array<TYPE, FUN, ALLOC> &B);
* Perform a element-wise operation B = |A|
* @param[in] A The array to operate on
* @param[out] B The output array
template<class TYPE, class FUN, class ALLOC>
static void transformAbs( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B );
template <class TYPE, class FUN, class ALLOC>
static void transformAbs(const Array<TYPE, FUN, ALLOC> &A,
Array<TYPE, FUN, ALLOC> &B);
* Perform a element-wise operation B = tanh(A)
* @param[in] A The array to operate on
* @param[out] B The output array
template<class TYPE, class FUN, class ALLOC>
static void transformTanh( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B );
template <class TYPE, class FUN, class ALLOC>
static void transformTanh(const Array<TYPE, FUN, ALLOC> &A,
Array<TYPE, FUN, ALLOC> &B);
* Perform a element-wise operation B = max(-1 , min(1 , A) )
* @param[in] A The array to operate on
* @param[out] B The output array
template<class TYPE, class FUN, class ALLOC>
static void transformHardTanh( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B );
template <class TYPE, class FUN, class ALLOC>
static void transformHardTanh(const Array<TYPE, FUN, ALLOC> &A,
Array<TYPE, FUN, ALLOC> &B);
* Perform a element-wise operation B = 1 / (1 + exp(-A))
* @param[in] A The array to operate on
* @param[out] B The output array
template<class TYPE, class FUN, class ALLOC>
static void transformSigmoid( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B );
template <class TYPE, class FUN, class ALLOC>
static void transformSigmoid(const Array<TYPE, FUN, ALLOC> &A,
Array<TYPE, FUN, ALLOC> &B);
* Perform a element-wise operation B = log(exp(A) + 1)
* @param[in] A The array to operate on
* @param[out] B The output array
template<class TYPE, class FUN, class ALLOC>
static void transformSoftPlus( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B );
template <class TYPE, class FUN, class ALLOC>
static void transformSoftPlus(const Array<TYPE, FUN, ALLOC> &A,
Array<TYPE, FUN, ALLOC> &B);
* Sum the elements of the Array
* @param[in] A The array to sum
template<class TYPE, class FUN, class ALLOC>
static TYPE sum( const Array<TYPE, FUN, ALLOC> &A );
template <class TYPE, class FUN, class ALLOC>
static TYPE sum(const Array<TYPE, FUN, ALLOC> &A);
template<class T>
static inline void rand( size_t N, T *x );
template <class T> static inline void rand(size_t N, T *x);

View File

@ -41,7 +41,6 @@
#include <limits>
//#include <random>
* Random number initialization *
@ -57,287 +56,252 @@ inline void FunctionTable::rand( Array<TYPE, FUN> &x )
* Reduction *
template<class TYPE, class FUN, typename LAMBDA>
inline TYPE FunctionTable::reduce( LAMBDA &op, const Array<TYPE, FUN> &A, const TYPE &initialValue )
if ( A.length() == 0 )
template <class TYPE, class FUN, typename LAMBDA>
inline TYPE FunctionTable::reduce(LAMBDA &op, const Array<TYPE, FUN> &A,
const TYPE &initialValue) {
if (A.length() == 0)
return TYPE();
const TYPE *x =;
TYPE y = initialValue;
for ( size_t i = 0; i < A.length(); i++ )
y = op( x[i], y );
TYPE y = initialValue;
for (size_t i = 0; i < A.length(); i++)
y = op(x[i], y);
return y;
template<class TYPE, class FUN, typename LAMBDA>
inline TYPE FunctionTable::reduce( LAMBDA &op,
const Array<TYPE, FUN> &A,
const Array<TYPE, FUN> &B,
const TYPE &initialValue )
ARRAY_ASSERT( A.length() == B.length() );
if ( A.length() == 0 )
template <class TYPE, class FUN, typename LAMBDA>
inline TYPE FunctionTable::reduce(LAMBDA &op, const Array<TYPE, FUN> &A,
const Array<TYPE, FUN> &B,
const TYPE &initialValue) {
ARRAY_ASSERT(A.length() == B.length());
if (A.length() == 0)
return TYPE();
const TYPE *x =;
const TYPE *y =;
TYPE z = initialValue;
for ( size_t i = 0; i < A.length(); i++ )
z = op( x[i], y[i], z );
TYPE z = initialValue;
for (size_t i = 0; i < A.length(); i++)
z = op(x[i], y[i], z);
return z;
* Unary transformation *
template<class TYPE, class FUN, typename LAMBDA>
inline void FunctionTable::transform( LAMBDA &fun, const Array<TYPE, FUN> &x, Array<TYPE, FUN> &y )
y.resize( x.size() );
template <class TYPE, class FUN, typename LAMBDA>
inline void FunctionTable::transform(LAMBDA &fun, const Array<TYPE, FUN> &x,
Array<TYPE, FUN> &y) {
const size_t N = x.length();
for ( size_t i = 0; i < N; i++ )
y( i ) = fun( x( i ) );
for (size_t i = 0; i < N; i++)
y(i) = fun(x(i));
template<class TYPE, class FUN, typename LAMBDA>
inline void FunctionTable::transform( LAMBDA &fun,
const Array<TYPE, FUN> &x,
const Array<TYPE, FUN> &y,
Array<TYPE, FUN> &z )
if ( x.size() != y.size() )
throw std::logic_error( "Sizes of x and y do not match" );
z.resize( x.size() );
template <class TYPE, class FUN, typename LAMBDA>
inline void FunctionTable::transform(LAMBDA &fun, const Array<TYPE, FUN> &x,
const Array<TYPE, FUN> &y,
Array<TYPE, FUN> &z) {
if (x.size() != y.size())
throw std::logic_error("Sizes of x and y do not match");
const size_t N = x.length();
for ( size_t i = 0; i < N; i++ )
z( i ) = fun( x( i ), y( i ) );
for (size_t i = 0; i < N; i++)
z(i) = fun(x(i), y(i));
* axpy *
template<class TYPE>
void call_axpy( size_t N, const TYPE alpha, const TYPE *x, TYPE *y );
void call_axpy<float>( size_t N, const float alpha, const float *x, float *y );
void call_axpy<double>( size_t N, const double alpha, const double *x, double *y );
template<class TYPE>
void call_axpy( size_t N, const TYPE alpha, const TYPE *x, TYPE *y )
for ( size_t i = 0; i < N; i++ )
template <class TYPE>
void call_axpy(size_t N, const TYPE alpha, const TYPE *x, TYPE *y);
template <>
void call_axpy<float>(size_t N, const float alpha, const float *x, float *y);
template <>
void call_axpy<double>(size_t N, const double alpha, const double *x,
double *y);
template <class TYPE>
void call_axpy(size_t N, const TYPE alpha, const TYPE *x, TYPE *y) {
for (size_t i = 0; i < N; i++)
y[i] += alpha * x[i];
template<class TYPE, class FUN>
void FunctionTable::axpy( const TYPE alpha, const Array<TYPE, FUN> &x, Array<TYPE, FUN> &y )
if ( x.size() != y.size() )
throw std::logic_error( "Array sizes do not match" );
call_axpy( x.length(), alpha,, );
template <class TYPE, class FUN>
void FunctionTable::axpy(const TYPE alpha, const Array<TYPE, FUN> &x,
Array<TYPE, FUN> &y) {
if (x.size() != y.size())
throw std::logic_error("Array sizes do not match");
call_axpy(x.length(), alpha,,;
* Multiply two arrays *
template<class TYPE>
void call_gemv( size_t M, size_t N, TYPE alpha, TYPE beta, const TYPE *A, const TYPE *x, TYPE *y );
void call_gemv<double>(
size_t M, size_t N, double alpha, double beta, const double *A, const double *x, double *y );
void call_gemv<float>(
size_t M, size_t N, float alpha, float beta, const float *A, const float *x, float *y );
template<class TYPE>
void call_gemv( size_t M, size_t N, TYPE alpha, TYPE beta, const TYPE *A, const TYPE *x, TYPE *y )
for ( size_t i = 0; i < M; i++ )
template <class TYPE>
void call_gemv(size_t M, size_t N, TYPE alpha, TYPE beta, const TYPE *A,
const TYPE *x, TYPE *y);
template <>
void call_gemv<double>(size_t M, size_t N, double alpha, double beta,
const double *A, const double *x, double *y);
template <>
void call_gemv<float>(size_t M, size_t N, float alpha, float beta,
const float *A, const float *x, float *y);
template <class TYPE>
void call_gemv(size_t M, size_t N, TYPE alpha, TYPE beta, const TYPE *A,
const TYPE *x, TYPE *y) {
for (size_t i = 0; i < M; i++)
y[i] = beta * y[i];
for ( size_t j = 0; j < N; j++ ) {
for ( size_t i = 0; i < M; i++ )
for (size_t j = 0; j < N; j++) {
for (size_t i = 0; i < M; i++)
y[i] += alpha * A[i + j * M] * x[j];
template<class TYPE>
void call_gemm(
size_t M, size_t N, size_t K, TYPE alpha, TYPE beta, const TYPE *A, const TYPE *B, TYPE *C );
void call_gemm<double>( size_t M,
size_t N,
size_t K,
double alpha,
double beta,
const double *A,
const double *B,
double *C );
void call_gemm<float>( size_t M,
size_t N,
size_t K,
float alpha,
float beta,
const float *A,
const float *B,
float *C );
template<class TYPE>
void call_gemm(
size_t M, size_t N, size_t K, TYPE alpha, TYPE beta, const TYPE *A, const TYPE *B, TYPE *C )
for ( size_t i = 0; i < K * M; i++ )
template <class TYPE>
void call_gemm(size_t M, size_t N, size_t K, TYPE alpha, TYPE beta,
const TYPE *A, const TYPE *B, TYPE *C);
template <>
void call_gemm<double>(size_t M, size_t N, size_t K, double alpha, double beta,
const double *A, const double *B, double *C);
template <>
void call_gemm<float>(size_t M, size_t N, size_t K, float alpha, float beta,
const float *A, const float *B, float *C);
template <class TYPE>
void call_gemm(size_t M, size_t N, size_t K, TYPE alpha, TYPE beta,
const TYPE *A, const TYPE *B, TYPE *C) {
for (size_t i = 0; i < K * M; i++)
C[i] = beta * C[i];
for ( size_t k = 0; k < K; k++ ) {
for ( size_t j = 0; j < N; j++ ) {
for ( size_t i = 0; i < M; i++ )
for (size_t k = 0; k < K; k++) {
for (size_t j = 0; j < N; j++) {
for (size_t i = 0; i < M; i++)
C[i + k * M] += alpha * A[i + j * M] * B[j + k * N];
template<class TYPE, class FUN>
void FunctionTable::gemm( const TYPE alpha,
const Array<TYPE, FUN> &a,
const Array<TYPE, FUN> &b,
const TYPE beta,
Array<TYPE, FUN> &c )
if ( a.size( 1 ) != b.size( 0 ) )
throw std::logic_error( "Inner dimensions must match" );
if ( a.ndim() == 2 && b.ndim() == 1 ) {
call_gemv<TYPE>( a.size( 0 ), a.size( 1 ), alpha, beta,,, );
} else if ( a.ndim() <= 2 && b.ndim() <= 2 ) {
a.size( 0 ), a.size( 1 ), b.size( 1 ), alpha, beta,,, );
template <class TYPE, class FUN>
void FunctionTable::gemm(const TYPE alpha, const Array<TYPE, FUN> &a,
const Array<TYPE, FUN> &b, const TYPE beta,
Array<TYPE, FUN> &c) {
if (a.size(1) != b.size(0))
throw std::logic_error("Inner dimensions must match");
if (a.ndim() == 2 && b.ndim() == 1) {
call_gemv<TYPE>(a.size(0), a.size(1), alpha, beta,,,;
} else if (a.ndim() <= 2 && b.ndim() <= 2) {
call_gemm<TYPE>(a.size(0), a.size(1), b.size(1), alpha, beta,,,;
} else {
throw std::logic_error( "Not finished yet" );
throw std::logic_error("Not finished yet");
template<class TYPE, class FUN>
void FunctionTable::multiply( const Array<TYPE, FUN> &a,
const Array<TYPE, FUN> &b,
Array<TYPE, FUN> &c )
if ( a.size( 1 ) != b.size( 0 ) )
throw std::logic_error( "Inner dimensions must match" );
if ( a.ndim() == 2 && b.ndim() == 1 ) {
c.resize( a.size( 0 ) );
call_gemv<TYPE>( a.size( 0 ), a.size( 1 ), 1, 0,,, );
} else if ( a.ndim() <= 2 && b.ndim() <= 2 ) {
c.resize( a.size( 0 ), b.size( 1 ) );
a.size( 0 ), a.size( 1 ), b.size( 1 ), 1, 0,,, );
template <class TYPE, class FUN>
void FunctionTable::multiply(const Array<TYPE, FUN> &a,
const Array<TYPE, FUN> &b, Array<TYPE, FUN> &c) {
if (a.size(1) != b.size(0))
throw std::logic_error("Inner dimensions must match");
if (a.ndim() == 2 && b.ndim() == 1) {
call_gemv<TYPE>(a.size(0), a.size(1), 1, 0,,,;
} else if (a.ndim() <= 2 && b.ndim() <= 2) {
c.resize(a.size(0), b.size(1));
call_gemm<TYPE>(a.size(0), a.size(1), b.size(1), 1, 0,,,;
} else {
throw std::logic_error( "Not finished yet" );
throw std::logic_error("Not finished yet");
* Check if two arrays are equal *
template<class TYPE, class FUN>
template <class TYPE, class FUN>
inline typename std::enable_if<std::is_integral<TYPE>::value, bool>::type
FunctionTableCompare( const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b, TYPE )
FunctionTableCompare(const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b,
bool pass = true;
if ( a.size() != b.size() )
throw std::logic_error( "Sizes of x and y do not match" );
for ( size_t i = 0; i < a.length(); i++ )
pass = pass && a( i ) == b( i );
if (a.size() != b.size())
throw std::logic_error("Sizes of x and y do not match");
for (size_t i = 0; i < a.length(); i++)
pass = pass && a(i) == b(i);
return pass;
template<class TYPE, class FUN>
template <class TYPE, class FUN>
inline typename std::enable_if<std::is_floating_point<TYPE>::value, bool>::type
FunctionTableCompare( const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b, TYPE tol )
FunctionTableCompare(const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b,
TYPE tol) {
bool pass = true;
if ( a.size() != b.size() )
throw std::logic_error( "Sizes of x and y do not match" );
for ( size_t i = 0; i < a.length(); i++ )
pass = pass && ( std::abs( a( i ) - b( i ) ) < tol );
if (a.size() != b.size())
throw std::logic_error("Sizes of x and y do not match");
for (size_t i = 0; i < a.length(); i++)
pass = pass && (std::abs(a(i) - b(i)) < tol);
return pass;
template<class TYPE, class FUN>
bool FunctionTable::equals( const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b, TYPE tol )
return FunctionTableCompare( a, b, tol );
template <class TYPE, class FUN>
bool FunctionTable::equals(const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b,
TYPE tol) {
return FunctionTableCompare(a, b, tol);
* Specialized Functions *
template<class TYPE, class FUN, class ALLOC>
void FunctionTable::transformReLU( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B )
const auto &fun = []( const TYPE &a ) { return std::max( a, static_cast<TYPE>( 0 ) ); };
transform( fun, A, B );
template<class TYPE, class FUN, class ALLOC>
void FunctionTable::transformAbs( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B )
B.resize( A.size() );
const auto &fun = []( const TYPE &a ) { return std::abs( a ); };
transform( fun, A, B );
template<class TYPE, class FUN, class ALLOC>
void FunctionTable::transformTanh( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B )
B.resize( A.size() );
const auto &fun = []( const TYPE &a ) { return tanh( a ); };
transform( fun, A, B );
template<class TYPE, class FUN, class ALLOC>
void FunctionTable::transformHardTanh( const Array<TYPE, FUN, ALLOC> &A,
Array<TYPE, FUN, ALLOC> &B )
B.resize( A.size() );
const auto &fun = []( const TYPE &a ) {
return std::max( -static_cast<TYPE>( 1.0 ), std::min( static_cast<TYPE>( 1.0 ), a ) );
template <class TYPE, class FUN, class ALLOC>
void FunctionTable::transformReLU(const Array<TYPE, FUN, ALLOC> &A,
Array<TYPE, FUN, ALLOC> &B) {
const auto &fun = [](const TYPE &a) {
return std::max(a, static_cast<TYPE>(0));
transform( fun, A, B );
transform(fun, A, B);
template<class TYPE, class FUN, class ALLOC>
void FunctionTable::transformSigmoid( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B )
B.resize( A.size() );
const auto &fun = []( const TYPE &a ) { return 1.0 / ( 1.0 + exp( -a ) ); };
transform( fun, A, B );
template <class TYPE, class FUN, class ALLOC>
void FunctionTable::transformAbs(const Array<TYPE, FUN, ALLOC> &A,
Array<TYPE, FUN, ALLOC> &B) {
const auto &fun = [](const TYPE &a) { return std::abs(a); };
transform(fun, A, B);
template <class TYPE, class FUN, class ALLOC>
void FunctionTable::transformTanh(const Array<TYPE, FUN, ALLOC> &A,
Array<TYPE, FUN, ALLOC> &B) {
const auto &fun = [](const TYPE &a) { return tanh(a); };
transform(fun, A, B);
template<class TYPE, class FUN, class ALLOC>
void FunctionTable::transformSoftPlus( const Array<TYPE, FUN, ALLOC> &A,
Array<TYPE, FUN, ALLOC> &B )
B.resize( A.size() );
const auto &fun = []( const TYPE &a ) { return log1p( exp( a ) ); };
transform( fun, A, B );
template <class TYPE, class FUN, class ALLOC>
void FunctionTable::transformHardTanh(const Array<TYPE, FUN, ALLOC> &A,
Array<TYPE, FUN, ALLOC> &B) {
const auto &fun = [](const TYPE &a) {
return std::max(-static_cast<TYPE>(1.0),
std::min(static_cast<TYPE>(1.0), a));
transform(fun, A, B);
template<class TYPE, class FUN, class ALLOC>
TYPE FunctionTable::sum( const Array<TYPE, FUN, ALLOC> &A )
const auto &fun = []( const TYPE &a, const TYPE &b ) { return a + b; };
return reduce( fun, A, (TYPE) 0 );
template <class TYPE, class FUN, class ALLOC>
void FunctionTable::transformSigmoid(const Array<TYPE, FUN, ALLOC> &A,
Array<TYPE, FUN, ALLOC> &B) {
const auto &fun = [](const TYPE &a) { return 1.0 / (1.0 + exp(-a)); };
transform(fun, A, B);
template<class TYPE>
inline void FunctionTable::gemmWrapper( char TRANSA,
char TRANSB,
int M,
int N,
int K,
TYPE alpha,
const TYPE *A,
int LDA,
const TYPE *B,
int LDB,
TYPE beta,
int LDC )
template <class TYPE, class FUN, class ALLOC>
void FunctionTable::transformSoftPlus(const Array<TYPE, FUN, ALLOC> &A,
Array<TYPE, FUN, ALLOC> &B) {
const auto &fun = [](const TYPE &a) { return log1p(exp(a)); };
transform(fun, A, B);
template <class TYPE, class FUN, class ALLOC>
TYPE FunctionTable::sum(const Array<TYPE, FUN, ALLOC> &A) {
const auto &fun = [](const TYPE &a, const TYPE &b) { return a + b; };
return reduce(fun, A, (TYPE)0);
template <class TYPE>
inline void FunctionTable::gemmWrapper(char TRANSA, char TRANSB, int M, int N,
int K, TYPE alpha, const TYPE *A,
int LDA, const TYPE *B, int LDB,
TYPE beta, TYPE *C, int LDC) {
ERROR("Not finished");

File diff suppressed because it is too large Load Diff

View File

@ -22,7 +22,6 @@ redistribution is prohibited.
#ifndef included_LBPM_MPI
#define included_LBPM_MPI
#include <array>
#include <atomic>
#include <complex>
@ -31,7 +30,6 @@ redistribution is prohibited.
#include <string>
#include <vector>
// Include mpi.h (or define MPI objects)
// clang-format off
#ifdef USE_MPI
@ -48,10 +46,8 @@ redistribution is prohibited.
// clang-format on
namespace Utilities {
* \class MPI
@ -69,8 +65,7 @@ namespace Utilities {
* succeed provided that the size of the data type object is a fixed size on
* all processors. sizeof(type) must be the same for all elements and processors.
class MPI final
class MPI final {
enum class ThreadSupport : int { SINGLE, FUNNELED, SERIALIZED, MULTIPLE };
@ -87,11 +82,9 @@ public: // Constructors
//! Empty destructor
* \brief Constructor from existing MPI communicator
* \details This constructor creates a new communicator from an existing MPI communicator.
@ -104,8 +97,7 @@ public: // Constructors
* \param manage Do we want to manage the comm (free the MPI_Comm when this object leaves
* scope)
MPI( MPI_Comm comm, bool manage = false );
MPI(MPI_Comm comm, bool manage = false);
* \brief Constructor from existing communicator
@ -113,30 +105,26 @@ public: // Constructors
* This does not create a new internal MPI_Comm, but uses the existing comm.
* \param comm Existing communicator
MPI( const MPI &comm );
MPI(const MPI &comm);
* Move constructor
* @param rhs Communicator to copy
MPI( MPI &&rhs );
MPI(MPI &&rhs);
* \brief Assignment operator
* \details This operator overloads the assignment to correctly copy an communicator
* \param comm Existing MPI object
MPI &operator=( const MPI &comm );
MPI &operator=(const MPI &comm);
* Move assignment operator
* @param rhs Communicator to copy
MPI &operator=( MPI &&rhs );
MPI &operator=(MPI &&rhs);
* \brief Reset the object
@ -144,7 +132,6 @@ public: // Constructors
void reset();
public: // Member functions
* \brief Get the node name
@ -153,18 +140,14 @@ public: // Member functions
static std::string getNodeName();
//! Function to return the number of processors available
static int getNumberOfProcessors();
//! Function to return the affinity of the current process
static std::vector<int> getProcessAffinity();
//! Function to set the affinity of the current process
static void setProcessAffinity( const std::vector<int> &procs );
static void setProcessAffinity(const std::vector<int> &procs);
* \brief Load balance the processes within a node
@ -189,22 +172,21 @@ public: // Member functions
* processors).
static void balanceProcesses( const MPI &comm = MPI( MPI_COMM_WORLD ), const int method = 1,
const std::vector<int> &procs = std::vector<int>(), const int N_min = 1,
const int N_max = -1 );
static void
balanceProcesses(const MPI &comm = MPI(MPI_COMM_WORLD),
const int method = 1,
const std::vector<int> &procs = std::vector<int>(),
const int N_min = 1, const int N_max = -1);
//! Query the level of thread support
static ThreadSupport queryThreadSupport();
* \brief Generate a random number
* \details This generates a random number that is consistent across the comm
size_t rand() const;
* \brief Split an existing communicator
* \details This creates a new communicator by splitting an existing communicator.
@ -222,8 +204,7 @@ public: // Member functions
* have the relative rank order as they did in their parent group. (See
* MPI_Comm_split)
MPI split( int color, int key = -1 ) const;
MPI split(int color, int key = -1) const;
* \brief Split an existing communicator by node
@ -240,8 +221,7 @@ public: // Member functions
* have the relative rank order as they did in their parent group. (See
* MPI_Comm_split)
MPI splitByNode( int key = -1 ) const;
MPI splitByNode(int key = -1) const;
* \brief Duplicate an existing communicator
@ -253,7 +233,6 @@ public: // Member functions
MPI dup() const;
* \brief Create a communicator from the intersection of two communicators
* \details This creates a new communicator by intersecting two existing communicators.
@ -265,15 +244,13 @@ public: // Member functions
* The communicators partially overlap. This will require communication on the first
* communicator.
static MPI intersect( const MPI &comm1, const MPI &comm2 );
static MPI intersect(const MPI &comm1, const MPI &comm2);
* Check if the current communicator is NULL
bool isNull() const { return d_isNull; }
* \brief Return the global ranks for the comm
* \details This returns a vector which contains the global ranks for each
@ -283,7 +260,6 @@ public: // Member functions
std::vector<int> globalRanks() const;
* Get the current MPI communicator.
* Note: The underlying MPI_Comm object may be free'd by the object when it is no
@ -294,15 +270,13 @@ public: // Member functions
const MPI_Comm &getCommunicator() const { return communicator; }
* \brief Overload operator ==
* \details Overload operator comm1 == comm2. Two MPI objects are == if they share the same
* communicator.
* Note: this is a local operation.
bool operator==( const MPI & ) const;
bool operator==(const MPI &) const;
* \brief Overload operator !=
@ -310,8 +284,7 @@ public: // Member functions
* do not share the same communicator.
* Note: this is a local operation.
bool operator!=( const MPI & ) const;
bool operator!=(const MPI &) const;
* \brief Overload operator <
@ -324,8 +297,7 @@ public: // Member functions
* Additionally, all processors on the first object MUST call this routine and will be
* synchronized through this call (there is an internalallReduce).
bool operator<( const MPI & ) const;
bool operator<(const MPI &) const;
* \brief Overload operator <=
@ -337,8 +309,7 @@ public: // Member functions
* call this routine and will be synchronized through this call (there is an internal
* allReduce).
bool operator<=( const MPI & ) const;
bool operator<=(const MPI &) const;
* \brief Overload operator >
@ -351,8 +322,7 @@ public: // Member functions
* Additionally, all processors on the first object MUST call this routine and will be
* synchronized through this call (there is an internal allReduce).
bool operator>( const MPI & ) const;
bool operator>(const MPI &) const;
* \brief Overload operator >=
@ -365,8 +335,7 @@ public: // Member functions
* Additionally, all processors on the first object MUST call this routine and will be
* synchronized through this call (there is an internal allReduce).
bool operator>=( const MPI & ) const;
bool operator>=(const MPI &) const;
* \brief Compare to another communicator
@ -376,8 +345,7 @@ public: // Member functions
* 4 if different contexts but similar groups, and 0 otherwise.
* Note: this is a local operation.
int compare( const MPI & ) const;
int compare(const MPI &) const;
* Return the processor rank (identifier) from 0 through the number of
@ -385,19 +353,16 @@ public: // Member functions
int getRank() const { return comm_rank; }
* Return the number of processors.
int getSize() const { return comm_size; }
* Return the maximum tag
int maxTag() const { return d_maxTag; }
* \brief Return a new tag
* \details This routine will return an unused tag for communication.
@ -406,7 +371,6 @@ public: // Member functions
int newTag();
* Call MPI_Abort or exit depending on whether running with one or more
* processes and value set by function above, if called. The default is
@ -416,15 +380,13 @@ public: // Member functions
void abort() const;
* Set boolean flag indicating whether exit or abort is called when running
* with one processor. Calling this function influences the behavior of
* calls to abort(). By default, the flag is true meaning that
* abort() will be called. Passing false means exit(-1) will be called.
void setCallAbortInSerialInsteadOfExit( bool flag = true );
void setCallAbortInSerialInsteadOfExit(bool flag = true);
* \brief Boolean all reduce
@ -432,8 +394,7 @@ public: // Member functions
* It returns true iff all processor are true;
* \param value The input value for the all reduce
bool allReduce( const bool value ) const;
bool allReduce(const bool value) const;
* \brief Boolean any reduce
@ -441,8 +402,7 @@ public: // Member functions
* It returns true if any processor is true;
* \param value The input value for the all reduce
bool anyReduce( const bool value ) const;
bool anyReduce(const bool value) const;
* \brief Sum Reduce
@ -450,9 +410,7 @@ public: // Member functions
* It returns the sum across all processors;
* \param value The input value for the all reduce
template<class type>
type sumReduce( const type value ) const;
template <class type> type sumReduce(const type value) const;
* \brief Sum Reduce
@ -462,9 +420,7 @@ public: // Member functions
* \param x The input/output array for the reduce
* \param n The number of values in the array (must match on all nodes)
template<class type>
void sumReduce( type *x, const int n = 1 ) const;
template <class type> void sumReduce(type *x, const int n = 1) const;
* \brief Sum Reduce
@ -475,9 +431,8 @@ public: // Member functions
* \param y The output array for the reduce
* \param n The number of values in the array (must match on all nodes)
template<class type>
void sumReduce( const type *x, type *y, const int n = 1 ) const;
template <class type>
void sumReduce(const type *x, type *y, const int n = 1) const;
* \brief Min Reduce
@ -485,9 +440,7 @@ public: // Member functions
* It returns the minimum value across all processors;
* \param value The input value for the all reduce
template<class type>
type minReduce( const type value ) const;
template <class type> type minReduce(const type value) const;
* \brief Sum Reduce
@ -503,9 +456,8 @@ public: // Member functions
* \param rank_of_min Optional array indicating the rank of the processor containing the
* minimum value
template<class type>
void minReduce( type *x, const int n = 1, int *rank_of_min = nullptr ) const;
template <class type>
void minReduce(type *x, const int n = 1, int *rank_of_min = nullptr) const;
* \brief Sum Reduce
@ -522,9 +474,9 @@ public: // Member functions
* \param rank_of_min Optional array indicating the rank of the processor containing the
* minimum value
template<class type>
void minReduce( const type *x, type *y, const int n = 1, int *rank_of_min = nullptr ) const;
template <class type>
void minReduce(const type *x, type *y, const int n = 1,
int *rank_of_min = nullptr) const;
* \brief Max Reduce
@ -532,9 +484,7 @@ public: // Member functions
* It returns the maximum value across all processors;
* \param value The input value for the all reduce
template<class type>
type maxReduce( const type value ) const;
template <class type> type maxReduce(const type value) const;
* \brief Sum Reduce
@ -550,9 +500,8 @@ public: // Member functions
* \param rank_of_max Optional array indicating the rank of the processor containing the
* minimum value
template<class type>
void maxReduce( type *x, const int n = 1, int *rank_of_max = nullptr ) const;
template <class type>
void maxReduce(type *x, const int n = 1, int *rank_of_max = nullptr) const;
* \brief Sum Reduce
@ -569,9 +518,9 @@ public: // Member functions
* \param rank_of_max Optional array indicating the rank of the processor containing the
* minimum value
template<class type>
void maxReduce( const type *x, type *y, const int n = 1, int *rank_of_max = nullptr ) const;
template <class type>
void maxReduce(const type *x, type *y, const int n = 1,
int *rank_of_max = nullptr) const;
* \brief Scan Sum Reduce
@ -581,9 +530,8 @@ public: // Member functions
* \param y The output array for the scan
* \param n The number of values in the array (must match on all nodes)
template<class type>
void sumScan( const type *x, type *y, const int n = 1 ) const;
template <class type>
void sumScan(const type *x, type *y, const int n = 1) const;
* \brief Scan Min Reduce
@ -593,9 +541,8 @@ public: // Member functions
* \param y The output array for the scan
* \param n The number of values in the array (must match on all nodes)
template<class type>
void minScan( const type *x, type *y, const int n = 1 ) const;
template <class type>
void minScan(const type *x, type *y, const int n = 1) const;
* \brief Scan Max Reduce
@ -605,9 +552,8 @@ public: // Member functions
* \param y The output array for the scan
* \param n The number of values in the array (must match on all nodes)
template<class type>
void maxScan( const type *x, type *y, const int n = 1 ) const;
template <class type>
void maxScan(const type *x, type *y, const int n = 1) const;
* \brief Broadcast
@ -615,9 +561,7 @@ public: // Member functions
* \param value The input value for the broadcast.
* \param root The processor performing the broadcast
template<class type>
type bcast( const type &value, const int root ) const;
template <class type> type bcast(const type &value, const int root) const;
* \brief Broadcast
@ -626,16 +570,14 @@ public: // Member functions
* \param n The number of values in the array (must match on all nodes)
* \param root The processor performing the broadcast
template<class type>
void bcast( type *value, const int n, const int root ) const;
template <class type>
void bcast(type *value, const int n, const int root) const;
* Perform a global barrier across all processors.
void barrier() const;
* @brief This function sends an MPI message with an array to another processor.
@ -652,9 +594,9 @@ public: // Member functions
* to be sent with this message. Default tag is 0.
* The matching recv must share this tag.
template<class type>
void send( const type *buf, const int length, const int recv, int tag = 0 ) const;
template <class type>
void send(const type *buf, const int length, const int recv,
int tag = 0) const;
* @brief This function sends an MPI message with an array of bytes
@ -669,8 +611,8 @@ public: // Member functions
* to be sent with this message. Default tag is 0.
* The matching recv must share this tag.
void sendBytes( const void *buf, const int N_bytes, const int recv, int tag = 0 ) const;
void sendBytes(const void *buf, const int N_bytes, const int recv,
int tag = 0) const;
* @brief This function sends an MPI message with an array
@ -684,10 +626,9 @@ public: // Member functions
* @param tag Integer argument specifying an integer tag
* to be sent with this message.
template<class type>
MPI_Request Isend(
const type *buf, const int length, const int recv_proc, const int tag ) const;
template <class type>
MPI_Request Isend(const type *buf, const int length, const int recv_proc,
const int tag) const;
* @brief This function sends an MPI message with an array of bytes
@ -701,9 +642,8 @@ public: // Member functions
* @param tag Integer argument specifying an integer tag
* to be sent with this message.
MPI_Request IsendBytes(
const void *buf, const int N_bytes, const int recv_proc, const int tag ) const;
MPI_Request IsendBytes(const void *buf, const int N_bytes,
const int recv_proc, const int tag) const;
* @brief This function receives an MPI message with a data
@ -721,14 +661,12 @@ public: // Member functions
* @param tag Optional integer argument specifying a tag which must be matched
* by the tag of the incoming message. Default tag is 0.
template<class type>
inline void recv( type *buf, int length, const int send, int tag ) const
template <class type>
inline void recv(type *buf, int length, const int send, int tag) const {
int length2 = length;
recv( buf, length2, send, false, tag );
recv(buf, length2, send, false, tag);
* @brief This function receives an MPI message with a data
* array from another processor.
@ -748,9 +686,9 @@ public: // Member functions
* @param tag Optional integer argument specifying a tag which must be matched
* by the tag of the incoming message. Default tag is 0.
template<class type>
void recv( type *buf, int &length, const int send, const bool get_length, int tag ) const;
template <class type>
void recv(type *buf, int &length, const int send, const bool get_length,
int tag) const;
* @brief This function receives an MPI message with an array of
@ -765,8 +703,7 @@ public: // Member functions
* must be matched by the tag of the incoming message. Default
* tag is 0.
void recvBytes( void *buf, int &N_bytes, const int send, int tag = 0 ) const;
void recvBytes(void *buf, int &N_bytes, const int send, int tag = 0) const;
* @brief This function receives an MPI message with a data
@ -778,9 +715,9 @@ public: // Member functions
* @param tag Optional integer argument specifying a tag which must
* be matched by the tag of the incoming message.
template<class type>
MPI_Request Irecv( type *buf, const int length, const int send_proc, const int tag ) const;
template <class type>
MPI_Request Irecv(type *buf, const int length, const int send_proc,
const int tag) const;
* @brief This function receives an MPI message with an array of
@ -794,35 +731,30 @@ public: // Member functions
* @param tag Integer argument specifying a tag which must
* be matched by the tag of the incoming message.
MPI_Request IrecvBytes(
void *buf, const int N_bytes, const int send_proc, const int tag ) const;
MPI_Request IrecvBytes(void *buf, const int N_bytes, const int send_proc,
const int tag) const;
* @brief This function sends and recieves data using a blocking call
template<class type>
void sendrecv( const type *sendbuf, int sendcount, int dest, int sendtag, type *recvbuf,
int recvcount, int source, int recvtag ) const;
template <class type>
void sendrecv(const type *sendbuf, int sendcount, int dest, int sendtag,
type *recvbuf, int recvcount, int source, int recvtag) const;
* Each processor sends every other processor a single value.
* @param[in] x Input value for allGather
* @return Output array for allGather
template<class type>
std::vector<type> allGather( const type &x ) const;
template <class type> std::vector<type> allGather(const type &x) const;
* Each processor sends every other processor an array
* @param[in] x Input array for allGather
* @return Output array for allGather
template<class type>
std::vector<type> allGather( const std::vector<type> &x ) const;
template <class type>
std::vector<type> allGather(const std::vector<type> &x) const;
* Each processor sends every other processor a single value.
@ -832,9 +764,7 @@ public: // Member functions
* @param x_out Output array for allGather (must be preallocated to the size of the
* communicator)
template<class type>
void allGather( const type &x_in, type *x_out ) const;
template <class type> void allGather(const type &x_in, type *x_out) const;
* Each processor sends an array of data to all other processors.
@ -861,27 +791,24 @@ public: // Member functions
* internally
* and the sizes and displacements will be returned (if desired).
template<class type>
int allGather( const type *send_data, const int send_cnt, type *recv_data,
int *recv_cnt = nullptr, int *recv_disp = nullptr, bool known_recv = false ) const;
template <class type>
int allGather(const type *send_data, const int send_cnt, type *recv_data,
int *recv_cnt = nullptr, int *recv_disp = nullptr,
bool known_recv = false) const;
* This function combines sets from different processors to create a single master set
* @param set Input/Output std::set for the gather.
template<class type>
void setGather( std::set<type> &set ) const;
template <class type> void setGather(std::set<type> &set) const;
* This function combines std::maps from different processors to create a single master std::map
* If two or more ranks share the same key, the lowest rank will be used
* @param map Input/Output std::map for the gather.
template<class KEY, class DATA>
void mapGather( std::map<KEY, DATA> &map ) const;
template <class KEY, class DATA>
void mapGather(std::map<KEY, DATA> &map) const;
* Each processor sends an array of n values to each processor.
@ -894,9 +821,8 @@ public: // Member functions
* @param send_data Input array (nxN)
* @param recv_data Output array of received values (nxN)
template<class type>
void allToAll( const int n, const type *send_data, type *recv_data ) const;
template <class type>
void allToAll(const int n, const type *send_data, type *recv_data) const;
* Each processor sends an array of data to the different processors.
@ -926,11 +852,11 @@ public: // Member functions
* internally
* and the sizes and displacements will be returned (if desired).
template<class type>
int allToAll( const type *send_data, const int send_cnt[], const int send_disp[],
type *recv_data, int *recv_cnt = nullptr, int *recv_disp = nullptr,
bool known_recv = false ) const;
template <class type>
int allToAll(const type *send_data, const int send_cnt[],
const int send_disp[], type *recv_data,
int *recv_cnt = nullptr, int *recv_disp = nullptr,
bool known_recv = false) const;
* \brief Send a list of proccesor ids to communicate
@ -942,8 +868,7 @@ public: // Member functions
* \param ranks List of ranks that the current rank wants to communicate with
* \return List of ranks that want to communicate with the current processor
std::vector<int> commRanks( const std::vector<int> &ranks ) const;
std::vector<int> commRanks(const std::vector<int> &ranks) const;
* \brief Wait for a communication to finish
@ -951,8 +876,7 @@ public: // Member functions
* Note: this does not require a communicator.
* \param request Communication request to wait for (returned for Isend or Irecv)
static void wait( MPI_Request request );
static void wait(MPI_Request request);
* \brief Wait for any communication to finish.
@ -962,8 +886,7 @@ public: // Member functions
* \param count Number of communications to check
* \param request Array of communication requests to wait for (returned for Isend or Irecv)
static int waitAny( int count, MPI_Request *request );
static int waitAny(int count, MPI_Request *request);
* \brief Wait for all communications to finish.
@ -972,8 +895,7 @@ public: // Member functions
* \param count Number of communications to check
* \param request Array of communication requests to wait for (returned for Isend or Irecv)
static void waitAll( int count, MPI_Request *request );
static void waitAll(int count, MPI_Request *request);
* \brief Wait for some communications to finish.
@ -983,8 +905,7 @@ public: // Member functions
* \param count Number of communications to check
* \param request Array of communication requests to wait for (returned for Isend or Irecv)
static std::vector<int> waitSome( int count, MPI_Request *request );
static std::vector<int> waitSome(int count, MPI_Request *request);
* \brief Nonblocking test for a message
@ -995,8 +916,7 @@ public: // Member functions
* \param source source rank (-1: any source)
* \param tag tag (-1: any tag)
int Iprobe( int source = -1, int tag = -1 ) const;
int Iprobe(int source = -1, int tag = -1) const;
* \brief Blocking test for a message
@ -1006,8 +926,7 @@ public: // Member functions
* \param source source rank (-1: any source)
* \param tag tag (-1: any tag)
int probe( int source = -1, int tag = -1 ) const;
int probe(int source = -1, int tag = -1) const;
* \brief Start a serial region
@ -1018,14 +937,12 @@ public: // Member functions
void serializeStart();
* \brief Stop a serial region
* \details Stop a serial region. See serializeStart for more information.
void serializeStop();
* \brief Elapsed time
* \details This function returns the elapsed time on the calling processor
@ -1036,21 +953,18 @@ public: // Member functions
static double time();
* \brief Timer resolution
* \details This function returns the timer resolution used by "time"
static double tick();
* \brief Change the level of the internal timers
* \details This function changes the level of the timers used to profile MPI
* \param level New level of the timers
static void changeProfileLevel( int level ) { profile_level = level; }
static void changeProfileLevel(int level) { profile_level = level; }
//! Return the total number of MPI_Comm objects that have been created
static size_t MPI_Comm_created() { return N_MPI_Comm_created; }
@ -1068,51 +982,51 @@ public: // Member functions
static bool MPI_Active();
//! Start MPI
static void start_MPI( int argc_in, char *argv_in[], int profile_level = 0 );
static void start_MPI(int argc_in, char *argv_in[], int profile_level = 0);
//! Stop MPI
static void stop_MPI();
* \brief Load balance
* \details This function will return a new communicator in which the ranks match
* the performance and the work load.
MPI loadBalance( double localPerformance, std::vector<double> work );
MPI loadBalance(double localPerformance, std::vector<double> work);
private: // Private helper functions for templated MPI operations;
template<class type>
void call_sumReduce( type *x, const int n = 1 ) const;
template<class type>
void call_sumReduce( const type *x, type *y, const int n = 1 ) const;
template<class type>
void call_minReduce( type *x, const int n = 1, int *rank_of_min = nullptr ) const;
template<class type>
void call_minReduce(
const type *x, type *y, const int n = 1, int *rank_of_min = nullptr ) const;
template<class type>
void call_maxReduce( type *x, const int n = 1, int *rank_of_max = nullptr ) const;
template<class type>
void call_maxReduce(
const type *x, type *y, const int n = 1, int *rank_of_max = nullptr ) const;
template<class type>
void call_bcast( type *x, const int n, const int root ) const;
template<class type>
void call_allGather( const type &x_in, type *x_out ) const;
template<class type>
void call_allGather(
const type *x_in, int size_in, type *x_out, int *size_out, int *disp_out ) const;
template<class type>
void call_sumScan( const type *x, type *y, int n = 1 ) const;
template<class type>
void call_minScan( const type *x, type *y, int n = 1 ) const;
template<class type>
void call_maxScan( const type *x, type *y, int n = 1 ) const;
template<class type>
void call_allToAll( const type *send_data, const int send_cnt[], const int send_disp[],
type *recv_data, const int *recv_cnt, const int *recv_disp ) const;
template <class type> void call_sumReduce(type *x, const int n = 1) const;
template <class type>
void call_sumReduce(const type *x, type *y, const int n = 1) const;
template <class type>
void call_minReduce(type *x, const int n = 1,
int *rank_of_min = nullptr) const;
template <class type>
void call_minReduce(const type *x, type *y, const int n = 1,
int *rank_of_min = nullptr) const;
template <class type>
void call_maxReduce(type *x, const int n = 1,
int *rank_of_max = nullptr) const;
template <class type>
void call_maxReduce(const type *x, type *y, const int n = 1,
int *rank_of_max = nullptr) const;
template <class type>
void call_bcast(type *x, const int n, const int root) const;
template <class type>
void call_allGather(const type &x_in, type *x_out) const;
template <class type>
void call_allGather(const type *x_in, int size_in, type *x_out,
int *size_out, int *disp_out) const;
template <class type>
void call_sumScan(const type *x, type *y, int n = 1) const;
template <class type>
void call_minScan(const type *x, type *y, int n = 1) const;
template <class type>
void call_maxScan(const type *x, type *y, int n = 1) const;
template <class type>
void call_allToAll(const type *send_data, const int send_cnt[],
const int send_disp[], type *recv_data,
const int *recv_cnt, const int *recv_disp) const;
private: // data members
// The internal MPI communicator
@ -1157,14 +1071,11 @@ private: // data members
static volatile unsigned int N_MPI_Comm_destroyed;
} // namespace Utilities
// Include the default instantiations
#include "common/MPI.I"
// \endcond

View File

@ -5,112 +5,108 @@
#include <cstring>
// Read a file into memory
std::vector<char> readFile( const std::string& filename )
auto fid = fopen( filename.c_str(), "rb" );
INSIST( fid, "File does not exist: " + filename );
fseek( fid, 0, SEEK_END );
std::vector<char> readFile(const std::string &filename) {
auto fid = fopen(filename.c_str(), "rb");
INSIST(fid, "File does not exist: " + filename);
fseek(fid, 0, SEEK_END);
size_t bytes = ftell(fid);
fseek( fid, 0, SEEK_SET );
std::vector<char> data( bytes );
size_t bytes2 = fread(, 1, bytes, fid );
ASSERT( bytes == bytes2 );
fclose( fid );
fseek(fid, 0, SEEK_SET);
std::vector<char> data(bytes);
size_t bytes2 = fread(, 1, bytes, fid);
ASSERT(bytes == bytes2);
return data;
// Decompress a gzip buffer
std::vector<char> gunzip( const std::vector<char>& in )
std::vector<char> gunzip(const std::vector<char> &in) {
z_stream stream;
std::vector<char> out( 1000000 );
stream.next_in = (Bytef*);
std::vector<char> out(1000000);
stream.next_in = (Bytef *);
stream.avail_in = in.size();
stream.total_in = 0;
stream.zalloc = Z_NULL;
stream.zfree = Z_NULL;
stream.opaque = Z_NULL;
stream.next_out = (Bytef*);
stream.next_out = (Bytef *);
stream.avail_out = out.size();
stream.total_out = 0;
ASSERT( inflateInit2(&stream, 16+MAX_WBITS) == Z_OK );
ASSERT(inflateInit2(&stream, 16 + MAX_WBITS) == Z_OK);
bool finished = inflate(&stream, Z_SYNC_FLUSH) == Z_STREAM_END;
while ( !finished && stream.msg == Z_NULL ) {
out.resize( 2 * out.size() );
stream.next_out = (Bytef*) &out[stream.total_out];
while (!finished && stream.msg == Z_NULL) {
out.resize(2 * out.size());
stream.next_out = (Bytef *)&out[stream.total_out];
stream.avail_out = out.size() - stream.total_out;
finished = inflate(&stream, Z_SYNC_FLUSH) == Z_STREAM_END;
ASSERT( stream.msg == Z_NULL );
out.resize( stream.total_out );
ASSERT(stream.msg == Z_NULL);
return out;
// Read the compressed micro CT data
Array<uint8_t> readMicroCT( const std::string& filename )
auto in = readFile( filename );
auto out = gunzip( in );
ASSERT( out.size() == 1024*1024*1024 );
Array<uint8_t> data( 1024, 1024, 1024 );
memcpy(,, data.length() );
Array<uint8_t> readMicroCT(const std::string &filename) {
auto in = readFile(filename);
auto out = gunzip(in);
ASSERT(out.size() == 1024 * 1024 * 1024);
Array<uint8_t> data(1024, 1024, 1024);
memcpy(,, data.length());
return data;
// Read the compressed micro CT data and distribute
Array<uint8_t> readMicroCT( const Database& domain, const Utilities::MPI& comm )
Array<uint8_t> readMicroCT(const Database &domain, const Utilities::MPI &comm) {
// Get the local problem info
auto n = domain.getVector<int>( "n" );
auto n = domain.getVector<int>("n");
int rank = comm.getRank();
auto nproc = domain.getVector<int>( "nproc" );
RankInfoStruct rankInfo( rank, nproc[0], nproc[1], nproc[2] );
auto nproc = domain.getVector<int>("nproc");
RankInfoStruct rankInfo(rank, nproc[0], nproc[1], nproc[2]);
// Determine the largest file number to get
int Nfx = ( n[0] * rankInfo.nx + 1023 ) / 1024;
int Nfy = ( n[1] * rankInfo.ny + 1023 ) / 1024;
int Nfz = ( n[2] * + 1023 ) / 1024;
int Nfx = (n[0] * rankInfo.nx + 1023) / 1024;
int Nfy = (n[1] * rankInfo.ny + 1023) / 1024;
int Nfz = (n[2] * + 1023) / 1024;
// Load one of the files if rank < largest file
Array<uint8_t> data;
RankInfoStruct srcRankInfo( rank, Nfx, Nfy, Nfz );
if ( srcRankInfo.ix >= 0 ) {
auto filename = domain.getScalar<std::string>( "Filename" );
char tmp[100];
if ( filename.find( "0x_0y_0z.gbd.gz" ) != std::string::npos ) {
sprintf( tmp, "%ix_%iy_%iz.gbd.gz", srcRankInfo.ix, srcRankInfo.jy, );
filename = filename.replace( filename.find( "0x_0y_0z.gbd.gz" ), 15, std::string( tmp ) );
} else if ( filename.find( "x0_y0_z0.gbd.gz" ) != std::string::npos ) {
sprintf( tmp, "x%i_y%i_z%i.gbd.gz", srcRankInfo.ix, srcRankInfo.jy, );
filename = filename.replace( filename.find( "x0_y0_z0.gbd.gz" ), 15, std::string( tmp ) );
} else {
ERROR( "Invalid name for first file" );
data = readMicroCT( filename );
RankInfoStruct srcRankInfo(rank, Nfx, Nfy, Nfz);
if (srcRankInfo.ix >= 0) {
auto filename = domain.getScalar<std::string>("Filename");
char tmp[100];
if (filename.find("0x_0y_0z.gbd.gz") != std::string::npos) {
sprintf(tmp, "%ix_%iy_%iz.gbd.gz", srcRankInfo.ix, srcRankInfo.jy,;
filename = filename.replace(filename.find("0x_0y_0z.gbd.gz"), 15,
} else if (filename.find("x0_y0_z0.gbd.gz") != std::string::npos) {
sprintf(tmp, "x%i_y%i_z%i.gbd.gz", srcRankInfo.ix, srcRankInfo.jy,;
filename = filename.replace(filename.find("x0_y0_z0.gbd.gz"), 15,
} else {
ERROR("Invalid name for first file");
data = readMicroCT(filename);
// Redistribute the data
data = redistribute( srcRankInfo, data, rankInfo, { n[0], n[1], n[2] }, comm );
data = redistribute(srcRankInfo, data, rankInfo, {n[0], n[1], n[2]}, comm);
// Relabel the data
auto ReadValues = domain.getVector<int>( "ReadValues" );
auto WriteValues = domain.getVector<int>( "WriteValues" );
ASSERT( ReadValues.size() == WriteValues.size() );
// Relabel the data
auto ReadValues = domain.getVector<int>("ReadValues");
auto WriteValues = domain.getVector<int>("WriteValues");
ASSERT(ReadValues.size() == WriteValues.size());
int readMaxValue = 0;
for ( auto v : ReadValues )
readMaxValue = std::max( data.max()+1, v );
std::vector<int> map( readMaxValue + 1, -1 );
for ( size_t i=0; i<ReadValues.size(); i++ )
for (auto v : ReadValues)
readMaxValue = std::max(data.max() + 1, v);
std::vector<int> map(readMaxValue + 1, -1);
for (size_t i = 0; i < ReadValues.size(); i++)
map[ReadValues[i]] = WriteValues[i];
for ( size_t i=0; i<data.length(); i++ ) {
for (size_t i = 0; i < data.length(); i++) {
int t = data(i);
ASSERT( t >= 0 && t <= readMaxValue );
ASSERT(t >= 0 && t <= readMaxValue);
data(i) = map[t];

View File

@ -1,16 +1,13 @@
#include "common/Array.h"
#include "common/Communication.h"
#include "common/Database.h"
#include "common/MPI.h"
Array<uint8_t> readMicroCT(const std::string &filename);
Array<uint8_t> readMicroCT( const std::string& filename );
Array<uint8_t> readMicroCT( const Database& domain, const Utilities::MPI& comm );
Array<uint8_t> readMicroCT(const Database &domain, const Utilities::MPI &comm);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -47,245 +47,277 @@
#include "common/SpherePack.h"
// Inline function to read line without a return argument
static inline void fgetl( char * str, int num, FILE * stream )
char* ptr = fgets( str, num, stream );
if ( 0 ) {char *temp = (char *)&ptr; temp++;}
static inline void fgetl(char *str, int num, FILE *stream) {
char *ptr = fgets(str, num, stream);
if (0) {
char *temp = (char *)&ptr;
void WriteLocalSolidID(char *FILENAME, char *ID, int N)
char value;
ofstream File(FILENAME,ios::binary);
for (int n=0; n<N; n++){
value = ID[n];
File.write((char*) &value, sizeof(value));
void WriteLocalSolidID(char *FILENAME, char *ID, int N) {
char value;
ofstream File(FILENAME, ios::binary);
for (int n = 0; n < N; n++) {
value = ID[n];
File.write((char *)&value, sizeof(value));
void WriteLocalSolidDistance(char *FILENAME, double *Distance, int N)
double value;
ofstream File(FILENAME,ios::binary);
for (int n=0; n<N; n++){
value = Distance[n];
File.write((char*) &value, sizeof(value));
void WriteLocalSolidDistance(char *FILENAME, double *Distance, int N) {
double value;
ofstream File(FILENAME, ios::binary);
for (int n = 0; n < N; n++) {
value = Distance[n];
File.write((char *)&value, sizeof(value));
void ReadSpherePacking(int nspheres, double *List_cx, double *List_cy, double *List_cz, double *List_rad)
// Read in the full sphere pack
//...... READ IN THE SPHERES...................................
cout << "Reading the packing file..." << endl;
FILE *fid = fopen("pack.out","rb");
INSIST(fid!=NULL,"Error opening pack.out");
//.........Trash the header lines..........
char line[100];
fgetl(line, 100, fid);
fgetl(line, 100, fid);
fgetl(line, 100, fid);
fgetl(line, 100, fid);
fgetl(line, 100, fid);
// the spheres..................
// We will read until a blank like or end-of-file is reached
int count = 0;
while ( !feof(fid) && fgets(line,100,fid)!=NULL ) {
char* line2 = line;
List_cx[count] = strtod(line2,&line2);
List_cy[count] = strtod(line2,&line2);
List_cz[count] = strtod(line2,&line2);
List_rad[count] = strtod(line2,&line2);
cout << "Number of spheres extracted is: " << count << endl;
INSIST( count==nspheres, "Specified number of spheres is probably incorrect!" );
// .............................................................
void ReadSpherePacking(int nspheres, double *List_cx, double *List_cy,
double *List_cz, double *List_rad) {
// Read in the full sphere pack
//...... READ IN THE SPHERES...................................
cout << "Reading the packing file..." << endl;
FILE *fid = fopen("pack.out", "rb");
INSIST(fid != NULL, "Error opening pack.out");
//.........Trash the header lines..........
char line[100];
fgetl(line, 100, fid);
fgetl(line, 100, fid);
fgetl(line, 100, fid);
fgetl(line, 100, fid);
fgetl(line, 100, fid);
// the spheres..................
// We will read until a blank like or end-of-file is reached
int count = 0;
while (!feof(fid) && fgets(line, 100, fid) != NULL) {
char *line2 = line;
List_cx[count] = strtod(line2, &line2);
List_cy[count] = strtod(line2, &line2);
List_cz[count] = strtod(line2, &line2);
List_rad[count] = strtod(line2, &line2);
cout << "Number of spheres extracted is: " << count << endl;
INSIST(count == nspheres,
"Specified number of spheres is probably incorrect!");
// .............................................................
void AssignLocalSolidID(char *ID, int nspheres, double *List_cx, double *List_cy, double *List_cz, double *List_rad,
double Lx, double Ly, double Lz, int Nx, int Ny, int Nz,
int iproc, int jproc, int kproc, int nprocx, int nprocy, int nprocz)
// Use sphere lists to determine which nodes are in porespace
// Write out binary file for nodes
char value;
int N = Nx*Ny*Nz; // Domain size, including the halo
double hx,hy,hz;
double x,y,z;
double cx,cy,cz,r;
int imin,imax,jmin,jmax,kmin,kmax;
int p,i,j,k,n;
double min_x,min_y,min_z;
// double max_x,max_y,max_z;
// Lattice spacing for the entire domain
// It should generally be true that hx=hy=hz
// Otherwise, you will end up with ellipsoids
hx = Lx/(Nx*nprocx-1);
hy = Ly/(Ny*nprocy-1);
hz = Lz/(Nz*nprocz-1);
// Get maximum and minimum for this domain
// Halo is included !
min_x = double(iproc*Nx-1)*hx;
min_y = double(jproc*Ny-1)*hy;
min_z = double(kproc*Nz-1)*hz;
// max_x = ((iproc+1)*Nx+1)*hx;
// max_y = ((jproc+1)*Ny+1)*hy;
// max_z = ((kproc+1)*Nz+1)*hz;
void AssignLocalSolidID(char *ID, int nspheres, double *List_cx,
double *List_cy, double *List_cz, double *List_rad,
double Lx, double Ly, double Lz, int Nx, int Ny, int Nz,
int iproc, int jproc, int kproc, int nprocx, int nprocy,
int nprocz) {
// Use sphere lists to determine which nodes are in porespace
// Write out binary file for nodes
char value;
int N = Nx * Ny * Nz; // Domain size, including the halo
double hx, hy, hz;
double x, y, z;
double cx, cy, cz, r;
int imin, imax, jmin, jmax, kmin, kmax;
int p, i, j, k, n;
double min_x, min_y, min_z;
// double max_x,max_y,max_z;
// Lattice spacing for the entire domain
// It should generally be true that hx=hy=hz
// Otherwise, you will end up with ellipsoids
hx = Lx / (Nx * nprocx - 1);
hy = Ly / (Ny * nprocy - 1);
hz = Lz / (Nz * nprocz - 1);
// Get maximum and minimum for this domain
// Halo is included !
min_x = double(iproc * Nx - 1) * hx;
min_y = double(jproc * Ny - 1) * hy;
min_z = double(kproc * Nz - 1) * hz;
// max_x = ((iproc+1)*Nx+1)*hx;
// max_y = ((jproc+1)*Ny+1)*hy;
// max_z = ((kproc+1)*Nz+1)*hz;
// Pre-initialize local ID
for (n=0;n<N;n++){
// Pre-initialize local ID
for (n = 0; n < N; n++) {
ID[n] = 1;
// .........Loop over the spheres.............
for (p=0;p<nspheres;p++){
// Get the sphere from the list, map to local min
cx = List_cx[p] - min_x;
cy = List_cy[p] - min_y;
cz = List_cz[p] - min_z;
r = List_rad[p];
// Check if
// Range for this sphere in global indexing
imin = int ((cx-r)/hx)-1;
imax = int ((cx+r)/hx)+1;
jmin = int ((cy-r)/hy)-1;
jmax = int ((cy+r)/hy)+1;
kmin = int ((cz-r)/hz)-1;
kmax = int ((cz+r)/hz)+1;
// Obviously we have to do something at the edges
if (imin<0) imin = 0;
if (imin>Nx) imin = Nx;
if (imax<0) imax = 0;
if (imax>Nx) imax = Nx;
if (jmin<0) jmin = 0;
if (jmin>Ny) jmin = Ny;
if (jmax<0) jmax = 0;
if (jmax>Ny) jmax = Ny;
if (kmin<0) kmin = 0;
if (kmin>Nz) kmin = Nz;
if (kmax<0) kmax = 0;
if (kmax>Nz) kmax = Nz;
// Loop over the domain for this sphere (may be null)
for (i=imin;i<imax;i++){
for (j=jmin;j<jmax;j++){
for (k=kmin;k<kmax;k++){
// Initialize ID value to 'fluid (=1)'
x = i*hx;
y = j*hy;
z = k*hz;
value = 1;
// if inside sphere, set to zero
if ( (cx-x)*(cx-x)+(cy-y)*(cy-y)+(cz-z)*(cz-z) < r*r){
// get the position in the list
n = k*Nx*Ny+j*Nx+i;
if ( ID[n] != 0 ){
ID[n] = value;
// .........Loop over the spheres.............
for (p = 0; p < nspheres; p++) {
// Get the sphere from the list, map to local min
cx = List_cx[p] - min_x;
cy = List_cy[p] - min_y;
cz = List_cz[p] - min_z;
r = List_rad[p];
// Check if
// Range for this sphere in global indexing
imin = int((cx - r) / hx) - 1;
imax = int((cx + r) / hx) + 1;
jmin = int((cy - r) / hy) - 1;
jmax = int((cy + r) / hy) + 1;
kmin = int((cz - r) / hz) - 1;
kmax = int((cz + r) / hz) + 1;
// Obviously we have to do something at the edges
if (imin < 0)
imin = 0;
if (imin > Nx)
imin = Nx;
if (imax < 0)
imax = 0;
if (imax > Nx)
imax = Nx;
if (jmin < 0)
jmin = 0;
if (jmin > Ny)
jmin = Ny;
if (jmax < 0)
jmax = 0;
if (jmax > Ny)
jmax = Ny;
if (kmin < 0)
kmin = 0;
if (kmin > Nz)
kmin = Nz;
if (kmax < 0)
kmax = 0;
if (kmax > Nz)
kmax = Nz;
// Loop over the domain for this sphere (may be null)
for (i = imin; i < imax; i++) {
for (j = jmin; j < jmax; j++) {
for (k = kmin; k < kmax; k++) {
// Initialize ID value to 'fluid (=1)'
x = i * hx;
y = j * hy;
z = k * hz;
value = 1;
// if inside sphere, set to zero
if ((cx - x) * (cx - x) + (cy - y) * (cy - y) +
(cz - z) * (cz - z) <
r * r) {
value = 0;
// get the position in the list
n = k * Nx * Ny + j * Nx + i;
if (ID[n] != 0) {
ID[n] = value;
void SignedDistance(double *Distance, int nspheres, double *List_cx, double *List_cy, double *List_cz, double *List_rad,
double Lx, double Ly, double Lz, int Nx, int Ny, int Nz,
int iproc, int jproc, int kproc, int nprocx, int nprocy, int nprocz)
// Use sphere lists to determine which nodes are in porespace
// Write out binary file for nodes
int N = Nx*Ny*Nz; // Domain size, including the halo
double hx,hy,hz;
double x,y,z;
double cx,cy,cz,r;
int imin,imax,jmin,jmax,kmin,kmax;
int p,i,j,k,n;
double min_x,min_y,min_z;
double distance;
// Lattice spacing for the entire domain
// It should generally be true that hx=hy=hz
// Otherwise, you will end up with ellipsoids
hx = Lx/((Nx-2)*nprocx-1);
hy = Ly/((Ny-2)*nprocy-1);
hz = Lz/((Nz-2)*nprocz-1);
// Get maximum and minimum for this domain
// Halo is included !
min_x = double(iproc*(Nx-2)-1)*hx;
min_y = double(jproc*(Ny-2)-1)*hy;
min_z = double(kproc*(Nz-2)-1)*hz;
void SignedDistance(double *Distance, int nspheres, double *List_cx,
double *List_cy, double *List_cz, double *List_rad,
double Lx, double Ly, double Lz, int Nx, int Ny, int Nz,
int iproc, int jproc, int kproc, int nprocx, int nprocy,
int nprocz) {
// Use sphere lists to determine which nodes are in porespace
// Write out binary file for nodes
int N = Nx * Ny * Nz; // Domain size, including the halo
double hx, hy, hz;
double x, y, z;
double cx, cy, cz, r;
int imin, imax, jmin, jmax, kmin, kmax;
int p, i, j, k, n;
double min_x, min_y, min_z;
double distance;
// Lattice spacing for the entire domain
// It should generally be true that hx=hy=hz
// Otherwise, you will end up with ellipsoids
hx = Lx / ((Nx - 2) * nprocx - 1);
hy = Ly / ((Ny - 2) * nprocy - 1);
hz = Lz / ((Nz - 2) * nprocz - 1);
// Get maximum and minimum for this domain
// Halo is included !
min_x = double(iproc * (Nx - 2) - 1) * hx;
min_y = double(jproc * (Ny - 2) - 1) * hy;
min_z = double(kproc * (Nz - 2) - 1) * hz;
// Pre-initialize Distance
for (n=0;n<N;n++){
// Pre-initialize Distance
for (n = 0; n < N; n++) {
Distance[n] = 100.0;
// .........Loop over the spheres.............
for (p=0;p<nspheres;p++){
// Get the sphere from the list, map to local min
cx = List_cx[p] - min_x;
cy = List_cy[p] - min_y;
cz = List_cz[p] - min_z;
r = List_rad[p];
// Check if
// Range for this sphere in global indexing
imin = int ((cx-2*r)/hx);
imax = int ((cx+2*r)/hx)+2;
jmin = int ((cy-2*r)/hy);
jmax = int ((cy+2*r)/hy)+2;
kmin = int ((cz-2*r)/hz);
kmax = int ((cz+2*r)/hz)+2;
// Obviously we have to do something at the edges
if (imin<0) imin = 0;
if (imin>Nx) imin = Nx;
if (imax<0) imax = 0;
if (imax>Nx) imax = Nx;
if (jmin<0) jmin = 0;
if (jmin>Ny) jmin = Ny;
if (jmax<0) jmax = 0;
if (jmax>Ny) jmax = Ny;
if (kmin<0) kmin = 0;
if (kmin>Nz) kmin = Nz;
if (kmax<0) kmax = 0;
if (kmax>Nz) kmax = Nz;
// Loop over the domain for this sphere (may be null)
for (i=imin;i<imax;i++){
for (j=jmin;j<jmax;j++){
for (k=kmin;k<kmax;k++){
// x,y,z is distance in physical units
x = i*hx;
y = j*hy;
z = k*hz;
// if inside sphere, set to zero
// get the position in the list
n = k*Nx*Ny+j*Nx+i;
// Compute the distance
distance = sqrt((cx-x)*(cx-x)+(cy-y)*(cy-y)+(cz-z)*(cz-z)) - r;
// Assign the minimum distance
if (distance < Distance[n]) Distance[n] = distance;
// .........Loop over the spheres.............
for (p = 0; p < nspheres; p++) {
// Get the sphere from the list, map to local min
cx = List_cx[p] - min_x;
cy = List_cy[p] - min_y;
cz = List_cz[p] - min_z;
r = List_rad[p];
// Check if
// Range for this sphere in global indexing
imin = int((cx - 2 * r) / hx);
imax = int((cx + 2 * r) / hx) + 2;
jmin = int((cy - 2 * r) / hy);
jmax = int((cy + 2 * r) / hy) + 2;
kmin = int((cz - 2 * r) / hz);
kmax = int((cz + 2 * r) / hz) + 2;
// Obviously we have to do something at the edges
if (imin < 0)
imin = 0;
if (imin > Nx)
imin = Nx;
if (imax < 0)
imax = 0;
if (imax > Nx)
imax = Nx;
if (jmin < 0)
jmin = 0;
if (jmin > Ny)
jmin = Ny;
if (jmax < 0)
jmax = 0;
if (jmax > Ny)
jmax = Ny;
if (kmin < 0)
kmin = 0;
if (kmin > Nz)
kmin = Nz;
if (kmax < 0)
kmax = 0;
if (kmax > Nz)
kmax = Nz;
// Loop over the domain for this sphere (may be null)
for (i = imin; i < imax; i++) {
for (j = jmin; j < jmax; j++) {
for (k = kmin; k < kmax; k++) {
// x,y,z is distance in physical units
x = i * hx;
y = j * hy;
z = k * hz;
// if inside sphere, set to zero
// get the position in the list
n = k * Nx * Ny + j * Nx + i;
// Compute the distance
distance = sqrt((cx - x) * (cx - x) + (cy - y) * (cy - y) +
(cz - z) * (cz - z)) -
// Assign the minimum distance
if (distance < Distance[n])
Distance[n] = distance;
// Map the distance to lattice units
for (n=0; n<N; n++) Distance[n] = Distance[n]/hx;
// Map the distance to lattice units
for (n = 0; n < N; n++)
Distance[n] = Distance[n] / hx;

View File

@ -40,14 +40,19 @@ void WriteLocalSolidID(char *FILENAME, char *ID, int N);
void WriteLocalSolidDistance(char *FILENAME, double *Distance, int N);
void ReadSpherePacking(int nspheres, double *List_cx, double *List_cy, double *List_cz, double *List_rad);
void ReadSpherePacking(int nspheres, double *List_cx, double *List_cy,
double *List_cz, double *List_rad);
void AssignLocalSolidID(char *ID, int nspheres, double *List_cx, double *List_cy, double *List_cz, double *List_rad,
double Lx, double Ly, double Lz, int Nx, int Ny, int Nz,
int iproc, int jproc, int kproc, int nprocx, int nprocy, int nprocz);
void AssignLocalSolidID(char *ID, int nspheres, double *List_cx,
double *List_cy, double *List_cz, double *List_rad,
double Lx, double Ly, double Lz, int Nx, int Ny, int Nz,
int iproc, int jproc, int kproc, int nprocx, int nprocy,
int nprocz);
void SignedDistance(double *Distance, int nspheres, double *List_cx, double *List_cy, double *List_cz, double *List_rad,
double Lx, double Ly, double Lz, int Nx, int Ny, int Nz,
int iproc, int jproc, int kproc, int nprocx, int nprocy, int nprocz);
void SignedDistance(double *Distance, int nspheres, double *List_cx,
double *List_cy, double *List_cz, double *List_rad,
double Lx, double Ly, double Lz, int Nx, int Ny, int Nz,
int iproc, int jproc, int kproc, int nprocx, int nprocy,
int nprocz);

View File

@ -22,373 +22,361 @@
#include <string>
#include <vector>
#define pout std::cout
#define printp printf
* Constructor/Destructor *
UnitTest::UnitTest() {
#ifdef USE_MPI
UnitTest::~UnitTest() { reset(); }
void UnitTest::reset()
void UnitTest::reset() {
// Clear the data forcing a reallocation
std::vector<std::string>().swap( pass_messages );
std::vector<std::string>().swap( fail_messages );
std::vector<std::string>().swap( expected_fail_messages );
* Add a pass, fail, expected failure message in a thread-safe way *
void UnitTest::passes( const std::string &in )
void UnitTest::passes(const std::string &in) {
pass_messages.push_back( in );
void UnitTest::failure( const std::string &in )
void UnitTest::failure(const std::string &in) {
fail_messages.push_back( in );
void UnitTest::expected_failure( const std::string &in )
void UnitTest::expected_failure(const std::string &in) {
expected_fail_messages.push_back( in );
* Print a global report *
* Note: only rank 0 will print, all messages will be aggregated *
inline std::vector<int> UnitTest::allGather( int value ) const
inline std::vector<int> UnitTest::allGather(int value) const {
int size = getSize();
std::vector<int> data( size, value );
std::vector<int> data(size, value);
#ifdef USE_MPI
if ( size > 1 )
MPI_Allgather( &value, 1, MPI_INT,, 1, MPI_INT, comm );
if (size > 1)
MPI_Allgather(&value, 1, MPI_INT,, 1, MPI_INT, comm);
return data;
inline void UnitTest::barrier() const
inline void UnitTest::barrier() const {
#ifdef USE_MPI
if ( getSize() > 1 )
MPI_Barrier( comm );
if (getSize() > 1)
static inline void print_messages( const std::vector<std::vector<std::string>> &messages )
if ( messages.size() > 1 ) {
for ( size_t i = 0; i < messages.size(); i++ ) {
if ( !messages[i].empty() ) {
printp( " Proccessor %i:\n", static_cast<int>( i ) );
for ( const auto &j : messages[i] )
static inline void
print_messages(const std::vector<std::vector<std::string>> &messages) {
if (messages.size() > 1) {
for (size_t i = 0; i < messages.size(); i++) {
if (!messages[i].empty()) {
printp(" Proccessor %i:\n", static_cast<int>(i));
for (const auto &j : messages[i])
pout << " " << j << std::endl;
} else {
for ( const auto &j : messages[0] )
for (const auto &j : messages[0])
pout << " " << j << std::endl;
void UnitTest::report( const int level0 ) const
void UnitTest::report(const int level0) const {
int size = getSize();
int rank = getRank();
// Broadcast the print level from rank 0
int level = level0;
#ifdef USE_MPI
if ( getSize() > 1 )
MPI_Bcast( &level, 1, MPI_INT, 0, comm );
if (getSize() > 1)
MPI_Bcast(&level, 1, MPI_INT, 0, comm);
if ( level < 0 || level > 2 )
ERROR( "Invalid print level" );
if (level < 0 || level > 2)
ERROR("Invalid print level");
// Perform a global all gather to get the number of failures per processor
auto N_pass = allGather( pass_messages.size() );
auto N_fail = allGather( fail_messages.size() );
auto N_expected_fail = allGather( expected_fail_messages.size() );
int N_pass_tot = 0;
int N_fail_tot = 0;
auto N_pass = allGather(pass_messages.size());
auto N_fail = allGather(fail_messages.size());
auto N_expected_fail = allGather(expected_fail_messages.size());
int N_pass_tot = 0;
int N_fail_tot = 0;
int N_expected_fail_tot = 0;
for ( int i = 0; i < size; i++ ) {
for (int i = 0; i < size; i++) {
N_pass_tot += N_pass[i];
N_fail_tot += N_fail[i];
N_expected_fail_tot += N_expected_fail[i];
// Send all messages to rank 0 (if needed)
std::vector<std::vector<std::string>> pass_messages_rank( size );
std::vector<std::vector<std::string>> fail_messages_rank( size );
std::vector<std::vector<std::string>> expected_fail_rank( size );
std::vector<std::vector<std::string>> pass_messages_rank(size);
std::vector<std::vector<std::string>> fail_messages_rank(size);
std::vector<std::vector<std::string>> expected_fail_rank(size);
// Get the pass messages
if ( ( level == 1 && N_pass_tot <= 20 ) || level == 2 )
pass_messages_rank = UnitTest::gatherMessages( pass_messages, 1 );
if ((level == 1 && N_pass_tot <= 20) || level == 2)
pass_messages_rank = UnitTest::gatherMessages(pass_messages, 1);
// Get the fail messages
if ( level == 1 || level == 2 )
fail_messages_rank = UnitTest::gatherMessages( fail_messages, 2 );
if (level == 1 || level == 2)
fail_messages_rank = UnitTest::gatherMessages(fail_messages, 2);
// Get the expected_fail messages
if ( ( level == 1 && N_expected_fail_tot <= 50 ) || level == 2 )
expected_fail_rank = UnitTest::gatherMessages( expected_fail_messages, 2 );
if ((level == 1 && N_expected_fail_tot <= 50) || level == 2)
expected_fail_rank =
UnitTest::gatherMessages(expected_fail_messages, 2);
// Print the results of all messages (only rank 0 will print)
if ( rank == 0 ) {
if (rank == 0) {
pout << std::endl;
// Print the passed tests
pout << "Tests passed" << std::endl;
if ( level == 0 || ( level == 1 && N_pass_tot > 20 ) ) {
if (level == 0 || (level == 1 && N_pass_tot > 20)) {
// We want to print a summary
if ( size > 8 ) {
if (size > 8) {
// Print 1 summary for all processors
printp( " %i tests passed (use report level 2 for more detail)\n", N_pass_tot );
printp(" %i tests passed (use report level 2 for more "
} else {
// Print a summary for each processor
for ( int i = 0; i < size; i++ )
printp( " %i tests passed (proc %i) (use report level 2 for more detail)\n",
N_pass[i], i );
for (int i = 0; i < size; i++)
printp(" %i tests passed (proc %i) (use report level 2 "
"for more detail)\n",
N_pass[i], i);
} else {
// We want to print all messages
for ( int i = 0; i < size; i++ )
ASSERT( (int) pass_messages_rank[i].size() == N_pass[i] );
print_messages( pass_messages_rank );
for (int i = 0; i < size; i++)
ASSERT((int)pass_messages_rank[i].size() == N_pass[i]);
pout << std::endl;
// Print the tests that failed
pout << "Tests failed" << std::endl;
if ( level == 0 ) {
if (level == 0) {
// We want to print a summary
if ( size > 8 ) {
if (size > 8) {
// Print 1 summary for all processors
printp( " %i tests failed (use report level 2 for more detail)\n", N_fail_tot );
printp(" %i tests failed (use report level 2 for more "
} else {
// Print a summary for each processor
for ( int i = 0; i < size; i++ )
printp( " %i tests failed (proc %i) (use report level 2 for more detail)\n",
N_fail[i], i );
for (int i = 0; i < size; i++)
printp(" %i tests failed (proc %i) (use report level 2 "
"for more detail)\n",
N_fail[i], i);
} else {
// We want to print all messages
for ( int i = 0; i < size; i++ )
ASSERT( (int) fail_messages_rank[i].size() == N_fail[i] );
print_messages( fail_messages_rank );
for (int i = 0; i < size; i++)
ASSERT((int)fail_messages_rank[i].size() == N_fail[i]);
pout << std::endl;
// Print the tests that expected failed
pout << "Tests expected failed" << std::endl;
if ( level == 0 || ( level == 1 && N_expected_fail_tot > 50 ) ) {
if (level == 0 || (level == 1 && N_expected_fail_tot > 50)) {
// We want to print a summary
if ( size > 8 ) {
if (size > 8) {
// Print 1 summary for all processors
printp( " %i tests expected failed (use report level 2 for more detail)\n",
N_expected_fail_tot );
printp(" %i tests expected failed (use report level 2 for "
"more detail)\n",
} else {
// Print a summary for each processor
for ( int i = 0; i < size; i++ )
printp( " %i tests expected failed (proc %i) (use report level 2 for more "
N_expected_fail[i], i );
for (int i = 0; i < size; i++)
printp(" %i tests expected failed (proc %i) (use "
"report level 2 for more "
N_expected_fail[i], i);
} else {
// We want to print all messages
for ( int i = 0; i < size; i++ )
ASSERT( (int) expected_fail_rank[i].size() == N_expected_fail[i] );
print_messages( expected_fail_rank );
for (int i = 0; i < size; i++)
ASSERT((int)expected_fail_rank[i].size() == N_expected_fail[i]);
pout << std::endl;
// Add a barrier to synchronize all processors (rank 0 is much slower)
Utilities::sleep_ms( 10 ); // Need a brief pause to allow any printing to finish
10); // Need a brief pause to allow any printing to finish
* Gather the messages to rank 0 *
std::vector<std::vector<std::string>> UnitTest::gatherMessages(
const std::vector<std::string> &local_messages, int tag ) const
UnitTest::gatherMessages(const std::vector<std::string> &local_messages,
int tag) const {
const int rank = getRank();
const int size = getSize();
std::vector<std::vector<std::string>> messages( size );
if ( rank == 0 ) {
std::vector<std::vector<std::string>> messages(size);
if (rank == 0) {
// Rank 0 should receive all messages
for ( int i = 0; i < size; i++ ) {
if ( i == 0 )
for (int i = 0; i < size; i++) {
if (i == 0)
messages[i] = local_messages;
messages[i] = unpack_message_stream( i, tag );
messages[i] = unpack_message_stream(i, tag);
} else {
// All other ranks send their message (use non-blocking communication)
pack_message_stream( local_messages, 0, tag );
pack_message_stream(local_messages, 0, tag);
return messages;
* Pack and send the given messages *
void UnitTest::pack_message_stream(
const std::vector<std::string> &messages, const int rank, const int tag ) const
void UnitTest::pack_message_stream(const std::vector<std::string> &messages,
const int rank, const int tag) const {
#ifdef USE_MPI
// Get the size of the messages
auto N_messages = (int) messages.size();
auto *msg_size = new int[N_messages];
auto N_messages = (int)messages.size();
auto *msg_size = new int[N_messages];
int msg_size_tot = 0;
for ( int i = 0; i < N_messages; i++ ) {
msg_size[i] = (int) messages[i].size();
for (int i = 0; i < N_messages; i++) {
msg_size[i] = (int)messages[i].size();
msg_size_tot += msg_size[i];
// Allocate space for the message stream
size_t size_data = ( N_messages + 1 ) * sizeof( int ) + msg_size_tot;
auto *data = new char[size_data];
size_t size_data = (N_messages + 1) * sizeof(int) + msg_size_tot;
auto *data = new char[size_data];
// Pack the message stream
memcpy( data, &N_messages, sizeof( int ) );
memcpy( &data[sizeof( int )], msg_size, N_messages * sizeof( int ) );
size_t k = ( N_messages + 1 ) * sizeof( int );
for ( int i = 0; i < N_messages; i++ ) {
messages[i].copy( &data[k], msg_size[i] );
memcpy(data, &N_messages, sizeof(int));
memcpy(&data[sizeof(int)], msg_size, N_messages * sizeof(int));
size_t k = (N_messages + 1) * sizeof(int);
for (int i = 0; i < N_messages; i++) {
messages[i].copy(&data[k], msg_size[i]);
k += msg_size[i];
// Send the message stream (using a non-blocking send)
MPI_Request request;
MPI_Isend( data, size_data, MPI_CHAR, rank, tag, comm, &request );
MPI_Isend(data, size_data, MPI_CHAR, rank, tag, comm, &request);
// Wait for the communication to send and free the temporary memory
MPI_Status status;
MPI_Wait( &request, &status );
MPI_Wait(&request, &status);
delete[] data;
delete[] msg_size;
NULL_USE( messages );
NULL_USE( rank );
NULL_USE( tag );
* Receive and unpack a message stream *
std::vector<std::string> UnitTest::unpack_message_stream( const int rank, const int tag ) const
std::vector<std::string> UnitTest::unpack_message_stream(const int rank,
const int tag) const {
#ifdef USE_MPI
// Probe the message to get the message size
MPI_Status status;
MPI_Probe( rank, tag, comm, &status );
MPI_Probe(rank, tag, comm, &status);
int size_data = -1;
MPI_Get_count( &status, MPI_BYTE, &size_data );
ASSERT( size_data >= 0 );
MPI_Get_count(&status, MPI_BYTE, &size_data);
ASSERT(size_data >= 0);
// Allocate memory to receive the data
auto *data = new char[size_data];
// receive the data (using a non-blocking receive)
MPI_Request request;
MPI_Irecv( data, size_data, MPI_CHAR, rank, tag, comm, &request );
MPI_Irecv(data, size_data, MPI_CHAR, rank, tag, comm, &request);
// Wait for the communication to be received
MPI_Wait( &request, &status );
MPI_Wait(&request, &status);
// Unpack the message stream
int N_messages = 0;
memcpy( &N_messages, data, sizeof( int ) );
if ( N_messages == 0 ) {
memcpy(&N_messages, data, sizeof(int));
if (N_messages == 0) {
delete[] data;
return std::vector<std::string>();
std::vector<int> msg_size( N_messages );
std::vector<std::string> messages( N_messages );
memcpy(, &data[sizeof( int )], N_messages * sizeof( int ) );
int k = ( N_messages + 1 ) * sizeof( int );
for ( int i = 0; i < N_messages; i++ ) {
messages[i] = std::string( &data[k], msg_size[i] );
std::vector<int> msg_size(N_messages);
std::vector<std::string> messages(N_messages);
memcpy(, &data[sizeof(int)], N_messages * sizeof(int));
int k = (N_messages + 1) * sizeof(int);
for (int i = 0; i < N_messages; i++) {
messages[i] = std::string(&data[k], msg_size[i]);
k += msg_size[i];
delete[] data;
return messages;
NULL_USE( rank );
NULL_USE( tag );
return std::vector<std::string>();
* Other functions *
int UnitTest::getRank() const
int UnitTest::getRank() const {
int rank = 0;
#ifdef USE_MPI
int flag = 0;
MPI_Initialized( &flag );
if ( flag )
MPI_Comm_rank( comm, &rank );
if (flag)
MPI_Comm_rank(comm, &rank);
return rank;
int UnitTest::getSize() const
int UnitTest::getSize() const {
int size = 1;
#ifdef USE_MPI
int flag = 0;
MPI_Initialized( &flag );
if ( flag )
MPI_Comm_size( comm, &size );
if (flag)
MPI_Comm_size(comm, &size);
return size;
size_t UnitTest::NumPassGlobal() const
size_t UnitTest::NumPassGlobal() const {
size_t num = pass_messages.size();
#ifdef USE_MPI
if ( getSize() > 1 ) {
auto send = static_cast<int>( num );
int sum = 0;
MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm );
num = static_cast<size_t>( sum );
if (getSize() > 1) {
auto send = static_cast<int>(num);
int sum = 0;
MPI_Allreduce(&send, &sum, 1, MPI_INT, MPI_SUM, comm);
num = static_cast<size_t>(sum);
return num;
size_t UnitTest::NumFailGlobal() const
size_t UnitTest::NumFailGlobal() const {
size_t num = fail_messages.size();
#ifdef USE_MPI
if ( getSize() > 1 ) {
auto send = static_cast<int>( num );
int sum = 0;
MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm );
num = static_cast<size_t>( sum );
if (getSize() > 1) {
auto send = static_cast<int>(num);
int sum = 0;
MPI_Allreduce(&send, &sum, 1, MPI_INT, MPI_SUM, comm);
num = static_cast<size_t>(sum);
return num;
size_t UnitTest::NumExpectedFailGlobal() const
size_t UnitTest::NumExpectedFailGlobal() const {
size_t num = expected_fail_messages.size();
#ifdef USE_MPI
if ( getSize() > 1 ) {
auto send = static_cast<int>( num );
int sum = 0;
MPI_Allreduce( &send, &sum, 1, MPI_INT, MPI_SUM, comm );
num = static_cast<size_t>( sum );
if (getSize() > 1) {
auto send = static_cast<int>(num);
int sum = 0;
MPI_Allreduce(&send, &sum, 1, MPI_INT, MPI_SUM, comm);
num = static_cast<size_t>(sum);
return num;

View File

@ -25,7 +25,6 @@
#include "mpi.h"
* @brief Class UnitTest is simple utility for running unit tests.
* It provides basic routines for tracing success or failure of tests,
@ -44,8 +43,7 @@
* \endcode
class UnitTest
class UnitTest {
//! Constructor
@ -54,13 +52,13 @@ public:
virtual ~UnitTest();
//! Indicate a passed test (thread-safe)
virtual void passes( const std::string &in );
virtual void passes(const std::string &in);
//! Indicate a failed test (thread-safe)
virtual void failure( const std::string &in );
virtual void failure(const std::string &in);
//! Indicate an expected failed test (thread-safe)
virtual void expected_failure( const std::string &in );
virtual void expected_failure(const std::string &in);
//! Return the number of passed tests locally
virtual size_t NumPassLocal() const { return pass_messages.size(); }
@ -69,7 +67,9 @@ public:
virtual size_t NumFailLocal() const { return fail_messages.size(); }
//! Return the number of expected failed tests locally
virtual size_t NumExpectedFailLocal() const { return expected_fail_messages.size(); }
virtual size_t NumExpectedFailLocal() const {
return expected_fail_messages.size();
//! Return the number of passed tests locally
virtual size_t NumPassGlobal() const;
@ -98,7 +98,7 @@ public:
* failed tests (if <=50) or the number passed otherwise.
* 2: Report all passed, failed, and expected failed tests.
virtual void report( const int level = 1 ) const;
virtual void report(const int level = 1) const;
//! Clear the messages
void reset();
@ -114,23 +114,24 @@ protected:
// Make the copy constructor private
UnitTest( const UnitTest & ) {}
UnitTest(const UnitTest &) {}
// Function to pack the messages into a single data stream and send to the given processor
// Note: This function does not return until the message stream has been sent
void pack_message_stream(
const std::vector<std::string> &messages, const int rank, const int tag ) const;
void pack_message_stream(const std::vector<std::string> &messages,
const int rank, const int tag) const;
// Function to unpack the messages from a single data stream
// Note: This function does not return until the message stream has been received
std::vector<std::string> unpack_message_stream( const int rank, const int tag ) const;
std::vector<std::string> unpack_message_stream(const int rank,
const int tag) const;
// Helper functions
inline void barrier() const;
inline std::vector<int> allGather( int value ) const;
inline std::vector<std::vector<std::string>> gatherMessages(
const std::vector<std::string> &local_messages, int tag ) const;
inline std::vector<int> allGather(int value) const;
inline std::vector<std::vector<std::string>>
gatherMessages(const std::vector<std::string> &local_messages,
int tag) const;

View File

@ -21,134 +21,126 @@
#include <cmath>
#include <string>
constexpr double Units::d_pow10[22];
constexpr char Units::d_prefixSymbol[];
* Constructors *
Units::Units() : d_prefix( UnitPrefix::unknown ), d_unit( UnitValue::unknown ) {}
Units::Units( UnitPrefix p, UnitValue u ) : d_prefix( p ), d_unit( u ) {}
Units::Units( const std::string& unit )
: d_prefix( UnitPrefix::unknown ), d_unit( UnitValue::unknown )
Units::Units() : d_prefix(UnitPrefix::unknown), d_unit(UnitValue::unknown) {}
Units::Units(UnitPrefix p, UnitValue u) : d_prefix(p), d_unit(u) {}
Units::Units(const std::string &unit)
: d_prefix(UnitPrefix::unknown), d_unit(UnitValue::unknown) {
// Parse the string to get it into a more friendly format
auto tmp = unit;
tmp.erase( std::remove( tmp.begin(), tmp.end(), ' ' ), tmp.end() );
tmp.erase(std::remove(tmp.begin(), tmp.end(), ' '), tmp.end());
// Check if the character '-' is present indicating a seperation between the prefix and unit
size_t index = tmp.find( '-' );
if ( index != std::string::npos ) {
d_prefix = getUnitPrefix( tmp.substr( 0, index ) );
d_unit = getUnitValue( tmp.substr( index + 1 ) );
size_t index = tmp.find('-');
if (index != std::string::npos) {
d_prefix = getUnitPrefix(tmp.substr(0, index));
d_unit = getUnitValue(tmp.substr(index + 1));
} else {
if ( tmp.size() <= 1 ) {
if (tmp.size() <= 1) {
d_prefix = UnitPrefix::none;
d_unit = getUnitValue( tmp );
} else if ( tmp.substr( 0, 2 ) == "da" ) {
d_unit = getUnitValue(tmp);
} else if (tmp.substr(0, 2) == "da") {
d_prefix = UnitPrefix::deca;
d_unit = getUnitValue( tmp.substr( 2 ) );
d_unit = getUnitValue(tmp.substr(2));
} else {
d_prefix = getUnitPrefix( tmp.substr( 0, 1 ) );
d_unit = getUnitValue( tmp.substr( 1 ) );
if ( d_prefix == UnitPrefix::unknown || d_unit == UnitValue::unknown ) {
d_prefix = getUnitPrefix(tmp.substr(0, 1));
d_unit = getUnitValue(tmp.substr(1));
if (d_prefix == UnitPrefix::unknown ||
d_unit == UnitValue::unknown) {
d_prefix = UnitPrefix::none;
d_unit = getUnitValue( tmp );
d_unit = getUnitValue(tmp);
* Get prefix *
Units::UnitPrefix Units::getUnitPrefix( const std::string& str ) noexcept
Units::UnitPrefix Units::getUnitPrefix(const std::string &str) noexcept {
Units::UnitPrefix value = UnitPrefix::unknown;
if ( str.empty() ) {
if (str.empty()) {
value = UnitPrefix::none;
} else if ( str == "yotta" || str == "Y" ) {
} else if (str == "yotta" || str == "Y") {
value = UnitPrefix::yotta;
} else if ( str == "zetta" || str == "Z" ) {
} else if (str == "zetta" || str == "Z") {
value = UnitPrefix::zetta;
} else if ( str == "exa" || str == "E" ) {
} else if (str == "exa" || str == "E") {
value = UnitPrefix::exa;
} else if ( str == "peta" || str == "P" ) {
} else if (str == "peta" || str == "P") {
value = UnitPrefix::peta;
} else if ( str == "tera" || str == "T" ) {
} else if (str == "tera" || str == "T") {
value = UnitPrefix::tera;
} else if ( str == "giga" || str == "G" ) {
} else if (str == "giga" || str == "G") {
value = UnitPrefix::giga;
} else if ( str == "mega" || str == "M" ) {
} else if (str == "mega" || str == "M") {
value = UnitPrefix::mega;
} else if ( str == "kilo" || str == "k" ) {
} else if (str == "kilo" || str == "k") {
value = UnitPrefix::kilo;
} else if ( str == "hecto" || str == "h" ) {
} else if (str == "hecto" || str == "h") {
value = UnitPrefix::hecto;
} else if ( str == "deca" || str == "da" ) {
} else if (str == "deca" || str == "da") {
value = UnitPrefix::deca;
} else if ( str == "deci" || str == "d" ) {
} else if (str == "deci" || str == "d") {
value = UnitPrefix::deci;
} else if ( str == "centi" || str == "c" ) {
} else if (str == "centi" || str == "c") {
value = UnitPrefix::centi;
} else if ( str == "milli" || str == "m" ) {
} else if (str == "milli" || str == "m") {
value = UnitPrefix::milli;
} else if ( str == "micro" || str == "u" ) {
} else if (str == "micro" || str == "u") {
value = UnitPrefix::micro;
} else if ( str == "nano" || str == "n" ) {
} else if (str == "nano" || str == "n") {
value = UnitPrefix::nano;
} else if ( str == "pico" || str == "p" ) {
} else if (str == "pico" || str == "p") {
value = UnitPrefix::pico;
} else if ( str == "femto" || str == "f" ) {
} else if (str == "femto" || str == "f") {
value = UnitPrefix::femto;
} else if ( str == "atto" || str == "a" ) {
} else if (str == "atto" || str == "a") {
value = UnitPrefix::atto;
} else if ( str == "zepto" || str == "z" ) {
} else if (str == "zepto" || str == "z") {
value = UnitPrefix::zepto;
} else if ( str == "yocto" || str == "y" ) {
} else if (str == "yocto" || str == "y") {
value = UnitPrefix::yocto;
return value;
* Get unit value *
Units::UnitValue Units::getUnitValue( const std::string& str ) noexcept
Units::UnitValue Units::getUnitValue(const std::string &str) noexcept {
Units::UnitValue value = UnitValue::unknown;
if ( str == "meter" || str == "m" ) {
if (str == "meter" || str == "m") {
value = UnitValue::meter;
} else if ( str == "gram" || str == "g" ) {
} else if (str == "gram" || str == "g") {
value = UnitValue::gram;
} else if ( str == "second" || str == "s" ) {
} else if (str == "second" || str == "s") {
value = UnitValue::second;
} else if ( str == "ampere" || str == "A" ) {
} else if (str == "ampere" || str == "A") {
value = UnitValue::ampere;
} else if ( str == "kelvin" || str == "K" ) {
} else if (str == "kelvin" || str == "K") {
value = UnitValue::kelvin;
} else if ( str == "joule" || str == "J" ) {
} else if (str == "joule" || str == "J") {
value = UnitValue::joule;
} else if ( str == "ergs" || str == "erg" ) {
} else if (str == "ergs" || str == "erg") {
value = UnitValue::erg;
} else if ( str == "degree" || str == "degrees" ) {
} else if (str == "degree" || str == "degrees") {
value = UnitValue::degree;
} else if ( str == "radian" || str == "radians" ) {
} else if (str == "radian" || str == "radians") {
value = UnitValue::radian;
return value;
* Get unit type *
Units::UnitType Units::getUnitType( UnitValue u ) noexcept
switch ( u ) {
Units::UnitType Units::getUnitType(UnitValue u) noexcept {
switch (u) {
case UnitValue::meter:
return UnitType::length;
case UnitValue::gram:
@ -170,72 +162,66 @@ Units::UnitType Units::getUnitType( UnitValue u ) noexcept
* Convert to another unit system *
double Units::convert( const Units& rhs ) const noexcept
if ( this->operator==( rhs ) )
double Units::convert(const Units &rhs) const noexcept {
if (this->operator==(rhs))
return 1;
// Convert the prefix
double cp = convert( d_prefix ) / convert( rhs.d_prefix );
if ( d_unit == rhs.d_unit )
double cp = convert(d_prefix) / convert(rhs.d_prefix);
if (d_unit == rhs.d_unit)
return cp; // Only need to convert prefix
// Convert the unit
if ( getUnitType( d_unit ) != getUnitType( rhs.d_unit ) )
if (getUnitType(d_unit) != getUnitType(rhs.d_unit))
return 0; // Invalid conversion
double cu = 0;
if ( d_unit == UnitValue::joule && rhs.d_unit == UnitValue::erg )
if (d_unit == UnitValue::joule && rhs.d_unit == UnitValue::erg)
cu = 1e7;
else if ( d_unit == UnitValue::erg && rhs.d_unit == UnitValue::joule )
else if (d_unit == UnitValue::erg && rhs.d_unit == UnitValue::joule)
cu = 1e-7;
else if ( d_unit == UnitValue::degree && rhs.d_unit == UnitValue::radian )
else if (d_unit == UnitValue::degree && rhs.d_unit == UnitValue::radian)
cu = 0.017453292519943;
else if ( d_unit == UnitValue::radian && rhs.d_unit == UnitValue::degree )
else if (d_unit == UnitValue::radian && rhs.d_unit == UnitValue::degree)
cu = 57.295779513082323;
// Return the total conversion
return cp * cu;
* Write a string for the units *
std::string Units::str() const
ASSERT( !isNull() );
return std::string( str( d_prefix ).data() ) + str( d_unit );
std::string Units::str() const {
return std::string(str(d_prefix).data()) + str(d_unit);
std::array<char, 3> Units::str( UnitPrefix p ) noexcept
std::array<char, 3> Units::str(UnitPrefix p) noexcept {
std::array<char, 3> str;
str[0] = d_prefixSymbol[static_cast<int8_t>( p )];
str[0] = d_prefixSymbol[static_cast<int8_t>(p)];
str[1] = 0;
str[2] = 0;
if ( p == UnitPrefix::deca )
if (p == UnitPrefix::deca)
str[1] = 'a';
return str;
std::string Units::str( UnitValue u )
if ( u == UnitValue::meter ) {
std::string Units::str(UnitValue u) {
if (u == UnitValue::meter) {
return "m";
} else if ( u == UnitValue::gram ) {
} else if (u == UnitValue::gram) {
return "g";
} else if ( u == UnitValue::second ) {
} else if (u == UnitValue::second) {
return "s";
} else if ( u == UnitValue::ampere ) {
} else if (u == UnitValue::ampere) {
return "A";
} else if ( u == UnitValue::kelvin ) {
} else if (u == UnitValue::kelvin) {
return "K";
} else if ( u == UnitValue::joule ) {
} else if (u == UnitValue::joule) {
return "J";
} else if ( u == UnitValue::erg ) {
} else if (u == UnitValue::erg) {
return "erg";
} else if ( u == UnitValue::degree ) {
} else if (u == UnitValue::degree) {
return "degree";
} else if ( u == UnitValue::radian ) {
} else if (u == UnitValue::radian) {
return "radian";
return "unknown";

View File

@ -24,34 +24,32 @@
#include <string>
#include <vector>
//! Unit system class
class Units final
class Units final {
//! Enum to hold prefix
enum class UnitPrefix : int8_t {
yocto = 0,
zepto = 1,
atto = 2,
femto = 3,
pico = 4,
nano = 5,
micro = 6,
milli = 7,
centi = 8,
deci = 9,
none = 10,
deca = 11,
hecto = 12,
kilo = 13,
mega = 14,
giga = 15,
tera = 16,
peta = 17,
exa = 18,
zetta = 19,
yotta = 20,
yocto = 0,
zepto = 1,
atto = 2,
femto = 3,
pico = 4,
nano = 5,
micro = 6,
milli = 7,
centi = 8,
deci = 9,
none = 10,
deca = 11,
hecto = 12,
kilo = 13,
mega = 14,
giga = 15,
tera = 16,
peta = 17,
exa = 18,
zetta = 19,
yotta = 20,
unknown = 21
@ -81,16 +79,15 @@ public:
//! Constructor
//! Constructor
explicit Units( const std::string& unit );
explicit Units(const std::string &unit);
//! Constructor
explicit Units( UnitPrefix, UnitValue );
explicit Units(UnitPrefix, UnitValue);
//! Get the prefix
inline UnitPrefix getPrefix() const noexcept { return d_prefix; }
@ -99,57 +96,57 @@ public:
inline UnitValue getUnit() const noexcept { return d_unit; }
//! Get the unit
inline UnitType getUnitType() const noexcept { return getUnitType( d_unit ); }
inline UnitType getUnitType() const noexcept { return getUnitType(d_unit); }
//! Get the unit
static UnitType getUnitType( UnitValue ) noexcept;
static UnitType getUnitType(UnitValue) noexcept;
//! Get the prefix from a string
static UnitPrefix getUnitPrefix( const std::string& ) noexcept;
static UnitPrefix getUnitPrefix(const std::string &) noexcept;
//! Get the unit value from a string
static UnitValue getUnitValue( const std::string& ) noexcept;
static UnitValue getUnitValue(const std::string &) noexcept;
//! Convert to the given unit system
double convert( const Units& ) const noexcept;
double convert(const Units &) const noexcept;
//! Convert a prefix to a scalar
static inline double convert( UnitPrefix x ) noexcept
return d_pow10[static_cast<int8_t>( x )];
static inline double convert(UnitPrefix x) noexcept {
return d_pow10[static_cast<int8_t>(x)];
//! Get a string representation of the units
std::string str() const;
//! Get a string representation for the prefix
static std::array<char, 3> str( UnitPrefix ) noexcept;
static std::array<char, 3> str(UnitPrefix) noexcept;
//! Get a string representation for the unit value
static std::string str( UnitValue );
static std::string str(UnitValue);
//! Operator ==
inline bool operator==( const Units& rhs ) const noexcept
inline bool operator==(const Units &rhs) const noexcept {
return d_prefix == rhs.d_prefix && d_unit == rhs.d_unit;
//! Operator !=
inline bool operator!=( const Units& rhs ) const noexcept
inline bool operator!=(const Units &rhs) const noexcept {
return d_prefix != rhs.d_prefix || d_unit != rhs.d_unit;
//! Check if unit is null
bool isNull() const { return d_prefix == UnitPrefix::unknown || d_unit == UnitValue::unknown; }
bool isNull() const {
return d_prefix == UnitPrefix::unknown || d_unit == UnitValue::unknown;
UnitPrefix d_prefix;
UnitValue d_unit;
constexpr static double d_pow10[22] = { 1e-24, 1e-21, 1e-18, 1e-15, 1e-12, 1e-9, 1e-6, 1e-3,
1e-2, 0.1, 1, 10, 100, 1000, 1e6, 1e9, 1e12, 1e15, 1e18, 1e21, 1e24, 0 };
constexpr static double d_pow10[22] = {
1e-24, 1e-21, 1e-18, 1e-15, 1e-12, 1e-9, 1e-6, 1e-3, 1e-2, 0.1, 1,
10, 100, 1000, 1e6, 1e9, 1e12, 1e15, 1e18, 1e21, 1e24, 0};
constexpr static char d_prefixSymbol[] = "yzafpnumcd\0dhkMGTPEZYu";

View File

@ -31,7 +31,6 @@
#include <math.h>
#include <mutex>
// OS specific includes / definitions
// clang-format off
#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 )
@ -45,162 +44,150 @@
// clang-format on
// Mutex for Utility functions
static std::mutex Utilities_mutex;
* Function to perform the default startup/shutdown sequences *
void Utilities::startup( int argc, char **argv, bool multiple )
NULL_USE( argc );
NULL_USE( argv );
void Utilities::startup(int argc, char **argv, bool multiple) {
// Disable OpenMP
Utilities::setenv( "OMP_NUM_THREADS", "1" );
Utilities::setenv( "MKL_NUM_THREADS", "1" );
Utilities::setenv("OMP_NUM_THREADS", "1");
Utilities::setenv("MKL_NUM_THREADS", "1");
// Start MPI
#ifdef USE_MPI
if ( multiple ) {
if (multiple) {
int provided;
MPI_Init_thread( &argc, &argv, MPI_THREAD_MULTIPLE, &provided );
if ( provided < MPI_THREAD_MULTIPLE ) {
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
if (provided < MPI_THREAD_MULTIPLE) {
int rank;
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
if ( rank == 0 )
std::cerr << "Warning: Failed to start MPI with necessary thread support, thread support will be disabled" << std::endl;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (rank == 0)
std::cerr << "Warning: Failed to start MPI with necessary "
"thread support, thread support will be disabled"
<< std::endl;
StackTrace::globalCallStackInitialize( MPI_COMM_WORLD );
} else {
MPI_Init( &argc, &argv );
MPI_Init(&argc, &argv);
// Set the error handlers
Utilities::setAbortBehavior( true, 3 );
Utilities::setAbortBehavior(true, 3);
void Utilities::shutdown()
void Utilities::shutdown() {
// Clear the error handlers
int rank = 0;
#ifdef USE_MPI
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
#ifdef USE_TIMER
auto memory = MemoryApp::getMemoryStats();
if ( rank == 0 && memory.N_new > memory.N_delete )
MemoryApp::print( std::cout );
if (rank == 0 && memory.N_new > memory.N_delete)
* Function to set an environemental variable *
void Utilities::setenv( const std::string &name, const std::string &value )
void Utilities::setenv(const std::string &name, const std::string &value) {
#if defined( USE_LINUX ) || defined( USE_MAC )
#if defined(USE_LINUX) || defined(USE_MAC)
bool pass = false;
if ( !value.empty() )
pass = ::setenv(,, 1 ) == 0;
if (!value.empty())
pass = ::setenv(,, 1) == 0;
pass = ::unsetenv( ) == 0;
#elif defined( USE_WINDOWS )
bool pass = SetEnvironmentVariable(, ) != 0;
pass = ::unsetenv( == 0;
#elif defined(USE_WINDOWS)
bool pass = SetEnvironmentVariable(, != 0;
#error Unknown OS
if ( !pass ) {
if (!pass) {
char msg[1024];
if ( !value.empty() )
msg, "Error setting enviornmental variable: %s=%s\n",, );
if (!value.empty())
sprintf(msg, "Error setting enviornmental variable: %s=%s\n",,;
sprintf( msg, "Error clearing enviornmental variable: %s\n", );
ERROR( msg );
sprintf(msg, "Error clearing enviornmental variable: %s\n",;
std::string Utilities::getenv( const std::string &name )
std::string Utilities::getenv(const std::string &name) {
std::string var;
auto tmp = std::getenv( );
if ( tmp )
var = std::string( tmp );
auto tmp = std::getenv(;
if (tmp)
var = std::string(tmp);
return var;
* Factor a number into it's prime factors *
std::vector<int> Utilities::factor(size_t number)
if ( number<=3 )
return std::vector<int>(1,(int)number);
std::vector<int> Utilities::factor(size_t number) {
if (number <= 3)
return std::vector<int>(1, (int)number);
size_t i, n, n_max;
bool factor_found;
// Compute the maximum number of factors
int N_primes_max = 1;
n = number;
while (n >>= 1) ++N_primes_max;
// Initialize n, factors
while (n >>= 1)
// Initialize n, factors
n = number;
std::vector<int> factors;
while ( 1 ) {
while (1) {
// Check if n is a trivial prime number
if ( n==2 || n==3 || n==5 ) {
factors.push_back( (int) n );
if (n == 2 || n == 3 || n == 5) {
// Check if n is divisible by 2
if ( n%2 == 0 ) {
factors.push_back( 2 );
if (n % 2 == 0) {
n /= 2;
// Check each odd number until a factor is reached
n_max = (size_t) floor(sqrt((double) n));
n_max = (size_t)floor(sqrt((double)n));
factor_found = false;
for (i=3; i<=n_max; i+=2) {
if ( n%i == 0 ) {
factors.push_back( i );
for (i = 3; i <= n_max; i += 2) {
if (n % i == 0) {
n /= i;
factor_found = true;
if ( factor_found )
if (factor_found)
// No factors were found, the number must be prime
factors.push_back( (int) n );
// Sort the factors
std::sort( factors.begin(), factors.end() );
std::sort(factors.begin(), factors.end());
return factors;
* Dummy function to prevent compiler from optimizing away variable *
void Utilities::nullUse( void* data )
void Utilities::nullUse(void *data) { NULL_USE(data); }

View File

@ -22,10 +22,8 @@
#include "StackTrace/Utilities.h"
namespace Utilities {
// Functions inherited from StackTrace::Utilities
using StackTrace::Utilities::abort;
using StackTrace::Utilities::cause_segfault;
@ -35,11 +33,10 @@ using StackTrace::Utilities::getMemoryUsage;
using StackTrace::Utilities::getSystemMemory;
using StackTrace::Utilities::setAbortBehavior;
using StackTrace::Utilities::setErrorHandlers;
using StackTrace::Utilities::tick;
using StackTrace::Utilities::time;
using StackTrace::Utilities::sleep_ms;
using StackTrace::Utilities::sleep_s;
using StackTrace::Utilities::tick;
using StackTrace::Utilities::time;
* \brief Start MPI, error handlers
@ -48,7 +45,7 @@ using StackTrace::Utilities::sleep_s;
* \param argv argv from main
* \param multiple Intialize mpi with MPI_THREAD_MULTIPLE support?
void startup( int argc, char **argv, bool multiple=true );
void startup(int argc, char **argv, bool multiple = true);
* \brief Stop MPI, error handlers
@ -56,51 +53,41 @@ void startup( int argc, char **argv, bool multiple=true );
void shutdown();
* Get an environmental variable
* @param name The name of the environmental variable
* @return The value of the enviornmental variable
std::string getenv( const std::string &name );
std::string getenv(const std::string &name);
* Set an environmental variable
* @param name The name of the environmental variable
* @param value The value to set
void setenv( const std::string &name, const std::string &value );
void setenv(const std::string &name, const std::string &value);
//! std::string version of sprintf
inline std::string stringf( const char *format, ... );
inline std::string stringf(const char *format, ...);
//! Factor a number into it's prime factors
std::vector<int> factor(size_t number);
//! Null use function
void nullUse( void* );
void nullUse(void *);
} // namespace Utilities
#include "common/UtilityMacros.h"
// stringf
inline std::string Utilities::stringf( const char *format, ... )
inline std::string Utilities::stringf(const char *format, ...) {
va_list ap;
va_start( ap, format );
va_start(ap, format);
char tmp[4096];
vsprintf( tmp, format, ap );
va_end( ap );
return std::string( tmp );
vsprintf(tmp, format, ap);
return std::string(tmp);

View File

@ -1,108 +1,104 @@
#ifndef included_Utilities_hpp
#define included_Utilities_hpp
#include "Utilities.h"
#include <vector>
namespace Utilities {
* templated quicksort routines *
template<class T>
void quicksort( std::vector<T> &x )
if ( x.size() <= 1u )
template <class T> void quicksort(std::vector<T> &x) {
if (x.size() <= 1u)
T *arr = &x[0];
bool test;
long int i, ir, j, jstack, k, l, istack[100];
T a, tmp_a;
jstack = 0;
l = 0;
ir = x.size() - 1;
while ( 1 ) {
if ( ir - l < 7 ) { // Insertion sort when subarray small enough.
for ( j = l + 1; j <= ir; j++ ) {
a = arr[j];
l = 0;
ir = x.size() - 1;
while (1) {
if (ir - l < 7) { // Insertion sort when subarray small enough.
for (j = l + 1; j <= ir; j++) {
a = arr[j];
test = true;
for ( i = j - 1; i >= 0; i-- ) {
if ( arr[i] < a ) {
for (i = j - 1; i >= 0; i--) {
if (arr[i] < a) {
arr[i + 1] = a;
test = false;
test = false;
arr[i + 1] = arr[i];
if ( test ) {
i = l - 1;
if (test) {
i = l - 1;
arr[i + 1] = a;
if ( jstack == 0 )
if (jstack == 0)
ir = istack[jstack]; // Pop stack and begin a new round of partitioning.
l = istack[jstack - 1];
ir = istack
[jstack]; // Pop stack and begin a new round of partitioning.
l = istack[jstack - 1];
jstack -= 2;
} else {
k = ( l + ir ) / 2; // Choose median of left, center and right elements as partitioning
// element a. Also rearrange so that a(l) < a(l+1) < a(ir).
tmp_a = arr[k];
arr[k] = arr[l + 1];
k = (l + ir) /
2; // Choose median of left, center and right elements as partitioning
// element a. Also rearrange so that a(l) < a(l+1) < a(ir).
tmp_a = arr[k];
arr[k] = arr[l + 1];
arr[l + 1] = tmp_a;
if ( arr[l] > arr[ir] ) {
tmp_a = arr[l];
arr[l] = arr[ir];
if (arr[l] > arr[ir]) {
tmp_a = arr[l];
arr[l] = arr[ir];
arr[ir] = tmp_a;
if ( arr[l + 1] > arr[ir] ) {
tmp_a = arr[l + 1];
if (arr[l + 1] > arr[ir]) {
tmp_a = arr[l + 1];
arr[l + 1] = arr[ir];
arr[ir] = tmp_a;
arr[ir] = tmp_a;
if ( arr[l] > arr[l + 1] ) {
tmp_a = arr[l];
arr[l] = arr[l + 1];
if (arr[l] > arr[l + 1]) {
tmp_a = arr[l];
arr[l] = arr[l + 1];
arr[l + 1] = tmp_a;
// Scan up to find element > a
j = ir;
a = arr[l + 1]; // Partitioning element.
for ( i = l + 2; i <= ir; i++ ) {
if ( arr[i] < a )
for (i = l + 2; i <= ir; i++) {
if (arr[i] < a)
while ( arr[j] > a ) // Scan down to find element < a.
while (arr[j] > a) // Scan down to find element < a.
if ( j < i )
break; // Pointers crossed. Exit with partitioning complete.
tmp_a = arr[i]; // Exchange elements of both arrays.
if (j < i)
break; // Pointers crossed. Exit with partitioning complete.
tmp_a = arr[i]; // Exchange elements of both arrays.
arr[i] = arr[j];
arr[j] = tmp_a;
arr[l + 1] = arr[j]; // Insert partitioning element in both arrays.
arr[j] = a;
arr[j] = a;
jstack += 2;
// Push pointers to larger subarray on stack, process smaller subarray immediately.
if ( ir - i + 1 >= j - l ) {
istack[jstack] = ir;
if (ir - i + 1 >= j - l) {
istack[jstack] = ir;
istack[jstack - 1] = i;
ir = j - 1;
ir = j - 1;
} else {
istack[jstack] = j - 1;
istack[jstack] = j - 1;
istack[jstack - 1] = l;
l = i;
l = i;
template<class T1, class T2>
void quicksort( std::vector<T1> &x, std::vector<T2> &y )
if ( x.size() <= 1u )
template <class T1, class T2>
void quicksort(std::vector<T1> &x, std::vector<T2> &y) {
if (x.size() <= 1u)
T1 *arr = &x[0];
T2 *brr = &y[0];
@ -111,124 +107,123 @@ void quicksort( std::vector<T1> &x, std::vector<T2> &y )
T1 a, tmp_a;
T2 b, tmp_b;
jstack = 0;
l = 0;
ir = x.size() - 1;
while ( 1 ) {
if ( ir - l < 7 ) { // Insertion sort when subarray small enough.
for ( j = l + 1; j <= ir; j++ ) {
a = arr[j];
b = brr[j];
l = 0;
ir = x.size() - 1;
while (1) {
if (ir - l < 7) { // Insertion sort when subarray small enough.
for (j = l + 1; j <= ir; j++) {
a = arr[j];
b = brr[j];
test = true;
for ( i = j - 1; i >= 0; i-- ) {
if ( arr[i] < a ) {
for (i = j - 1; i >= 0; i--) {
if (arr[i] < a) {
arr[i + 1] = a;
brr[i + 1] = b;
test = false;
test = false;
arr[i + 1] = arr[i];
brr[i + 1] = brr[i];
if ( test ) {
i = l - 1;
if (test) {
i = l - 1;
arr[i + 1] = a;
brr[i + 1] = b;
if ( jstack == 0 )
if (jstack == 0)
ir = istack[jstack]; // Pop stack and begin a new round of partitioning.
l = istack[jstack - 1];
ir = istack
[jstack]; // Pop stack and begin a new round of partitioning.
l = istack[jstack - 1];
jstack -= 2;
} else {
k = ( l + ir ) / 2; // Choose median of left, center and right elements as partitioning
// element a. Also rearrange so that a(l) ? a(l+1) ? a(ir).
tmp_a = arr[k];
arr[k] = arr[l + 1];
k = (l + ir) /
2; // Choose median of left, center and right elements as partitioning
// element a. Also rearrange so that a(l) ? a(l+1) ? a(ir).
tmp_a = arr[k];
arr[k] = arr[l + 1];
arr[l + 1] = tmp_a;
tmp_b = brr[k];
brr[k] = brr[l + 1];
tmp_b = brr[k];
brr[k] = brr[l + 1];
brr[l + 1] = tmp_b;
if ( arr[l] > arr[ir] ) {
tmp_a = arr[l];
arr[l] = arr[ir];
if (arr[l] > arr[ir]) {
tmp_a = arr[l];
arr[l] = arr[ir];
arr[ir] = tmp_a;
tmp_b = brr[l];
brr[l] = brr[ir];
tmp_b = brr[l];
brr[l] = brr[ir];
brr[ir] = tmp_b;
if ( arr[l + 1] > arr[ir] ) {
tmp_a = arr[l + 1];
if (arr[l + 1] > arr[ir]) {
tmp_a = arr[l + 1];
arr[l + 1] = arr[ir];
arr[ir] = tmp_a;
tmp_b = brr[l + 1];
arr[ir] = tmp_a;
tmp_b = brr[l + 1];
brr[l + 1] = brr[ir];
brr[ir] = tmp_b;
brr[ir] = tmp_b;
if ( arr[l] > arr[l + 1] ) {
tmp_a = arr[l];
arr[l] = arr[l + 1];
if (arr[l] > arr[l + 1]) {
tmp_a = arr[l];
arr[l] = arr[l + 1];
arr[l + 1] = tmp_a;
tmp_b = brr[l];
brr[l] = brr[l + 1];
tmp_b = brr[l];
brr[l] = brr[l + 1];
brr[l + 1] = tmp_b;
// Scan up to find element > a
j = ir;
a = arr[l + 1]; // Partitioning element.
b = brr[l + 1];
for ( i = l + 2; i <= ir; i++ ) {
if ( arr[i] < a )
for (i = l + 2; i <= ir; i++) {
if (arr[i] < a)
while ( arr[j] > a ) // Scan down to find element < a.
while (arr[j] > a) // Scan down to find element < a.
if ( j < i )
break; // Pointers crossed. Exit with partitioning complete.
tmp_a = arr[i]; // Exchange elements of both arrays.
if (j < i)
break; // Pointers crossed. Exit with partitioning complete.
tmp_a = arr[i]; // Exchange elements of both arrays.
arr[i] = arr[j];
arr[j] = tmp_a;
tmp_b = brr[i];
tmp_b = brr[i];
brr[i] = brr[j];
brr[j] = tmp_b;
arr[l + 1] = arr[j]; // Insert partitioning element in both arrays.
arr[j] = a;
arr[j] = a;
brr[l + 1] = brr[j];
brr[j] = b;
brr[j] = b;
jstack += 2;
// Push pointers to larger subarray on stack, process smaller subarray immediately.
if ( ir - i + 1 >= j - l ) {
istack[jstack] = ir;
if (ir - i + 1 >= j - l) {
istack[jstack] = ir;
istack[jstack - 1] = i;
ir = j - 1;
ir = j - 1;
} else {
istack[jstack] = j - 1;
istack[jstack] = j - 1;
istack[jstack - 1] = l;
l = i;
l = i;
template<class T>
void unique( std::vector<T> &x )
if ( x.size() <= 1 )
template <class T> void unique(std::vector<T> &x) {
if (x.size() <= 1)
// First perform a quicksort
quicksort( x );
// Next remove duplicate entries
size_t pos = 1;
for ( size_t i = 1; i < x.size(); i++ ) {
if ( x[i] != x[pos - 1] ) {
for (size_t i = 1; i < x.size(); i++) {
if (x[i] != x[pos - 1]) {
x[pos] = x[i];
if ( pos < x.size() )
x.resize( pos );
if (pos < x.size())
} // namespace Utilities

View File

@ -24,7 +24,6 @@
#include <sstream>
#include <stdexcept>
/*! \defgroup Macros Set of utility macro functions
* \details These functions are a list of C++ macros that are used
* for common operations, including checking for errors.
@ -32,7 +31,6 @@
* @{
* \brief A null statement
* \details A statement that does nothing, for insure++ make it something
@ -40,33 +38,31 @@
#ifdef __INSURE__
do { \
if ( 0 ) \
int nullstatement = 0 \
} while ( 0 )
do { \
if (0) \
int nullstatement = 0 \
} while (0)
/*! \def NULL_USE(variable)
* \brief A null use of a variable
* \details A null use of a variable, use to avoid GNU compiler warnings about unused variables.
* \param variable Variable to pretend to use
#ifndef NULL_USE
#define NULL_USE( variable ) \
do { \
if ( 0 ) { \
auto temp = (char *) &variable; \
temp++; \
} \
} while ( 0 )
#define NULL_USE(variable) \
do { \
if (0) { \
auto temp = (char *)&variable; \
temp++; \
} \
} while (0)
/*! \def ERROR(MSG)
* \brief Throw error
* \details Throw an error exception from within any C++ source code. The
@ -74,25 +70,23 @@
* line number of the abort are also printed.
* \param MSG Error message to print
#define ERROR(MSG) \
do { \
::Utilities::abort( MSG, __FILE__, __LINE__ ); \
} while ( 0 )
#define ERROR(MSG) \
do { \
::Utilities::abort(MSG, __FILE__, __LINE__); \
} while (0)
/*! \def WARNING(MSG)
* \brief Print a warning
* \details Print a warning without exit. Print file and line number of the warning.
* \param MSG Warning message to print
#define WARNING(MSG) \
do { \
std::stringstream tboxos; \
tboxos << MSG << std::ends; \
printf("WARNING: %s\n Warning called in %s on line %i\n", \
tboxos.str().c_str(),__FILE__,__LINE__); \
#define WARNING(MSG) \
do { \
std::stringstream tboxos; \
tboxos << MSG << std::ends; \
printf("WARNING: %s\n Warning called in %s on line %i\n", \
tboxos.str().c_str(), __FILE__, __LINE__); \
} while (0)
/*! \def ASSERT(EXP)
* \brief Assert error
@ -102,15 +96,14 @@
* The file and line number of the abort are printed along with the stack trace (if availible).
* \param EXP Expression to evaluate
#define ASSERT(EXP) \
do { \
if ( !(EXP) ) { \
std::stringstream tboxos; \
tboxos << "Failed assertion: " << #EXP << std::ends; \
::Utilities::abort(tboxos.str(), __FILE__, __LINE__); \
} \
#define ASSERT(EXP) \
do { \
if (!(EXP)) { \
std::stringstream tboxos; \
tboxos << "Failed assertion: " << #EXP << std::ends; \
::Utilities::abort(tboxos.str(), __FILE__, __LINE__); \
} \
} while (0)
/*! \def INSIST(EXP,MSG)
* \brief Insist error
@ -121,15 +114,15 @@
* \param EXP Expression to evaluate
* \param MSG Debug message to print
#define INSIST(EXP,MSG) do { \
if ( !(EXP) ) { \
std::stringstream tboxos; \
tboxos << "Failed insist: " << #EXP << std::endl; \
tboxos << "Message: " << MSG << std::ends; \
::Utilities::abort(tboxos.str(), __FILE__, __LINE__); \
} \
#define INSIST(EXP, MSG) \
do { \
if (!(EXP)) { \
std::stringstream tboxos; \
tboxos << "Failed insist: " << #EXP << std::endl; \
tboxos << "Message: " << MSG << std::ends; \
::Utilities::abort(tboxos.str(), __FILE__, __LINE__); \
} \
} while (0)
* Macro for use when assertions are to be included
@ -143,12 +136,11 @@
* \param EXP Expression to evaluate
* \brief Reenable warnings
* \details This will re-enable warnings after a call to DIASABLE_WARNINGS
@ -190,9 +182,6 @@
// clang-format on
/*! @} */

View File

@ -3,338 +3,543 @@ This class implements support for halo widths larger than 1
#include "common/WideHalo.h"
ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr <Domain> Dm, int width)
Lock=false; // unlock the communicator
// Create a separate copy of the communicator for the device
std::shared_ptr<Domain> Dm, int width) {
Lock = false; // unlock the communicator
// Create a separate copy of the communicator for the device
MPI_COMM_SCALBL = Dm->Comm.dup();
// Copy the domain size and communication information directly from Dm
Nx = Dm->Nx;
Ny = Dm->Ny;
Nz = Dm->Nz;
N = Nx*Ny*Nz;
Nxh = Nx + 2*(width - 1);
Nyh = Ny + 2*(width - 1);
Nzh = Nz + 2*(width - 1);
Nh = Nxh*Nyh*Nzh;
iproc = Dm->iproc();
jproc = Dm->jproc();
kproc = Dm->kproc();
nprocx = Dm->nprocx();
nprocy = Dm->nprocy();
nprocz = Dm->nprocz();
rank_info = RankInfoStruct(rank,nprocx,nprocy,nprocz);
rank = rank_info.rank[1][1][1];
rank_X = rank_info.rank[2][1][1];
rank_x = rank_info.rank[0][1][1];
rank_Y = rank_info.rank[1][2][1];
rank_y = rank_info.rank[1][0][1];
rank_Z = rank_info.rank[1][1][2];
rank_z = rank_info.rank[1][1][0];
rank_XY = rank_info.rank[2][2][1];
rank_xy = rank_info.rank[0][0][1];
rank_Xy = rank_info.rank[2][0][1];
rank_xY = rank_info.rank[0][2][1];
rank_XZ = rank_info.rank[2][1][2];
rank_xz = rank_info.rank[0][1][0];
rank_Xz = rank_info.rank[2][1][0];
rank_xZ = rank_info.rank[0][1][2];
rank_YZ = rank_info.rank[1][2][2];
rank_yz = rank_info.rank[1][0][0];
rank_Yz = rank_info.rank[1][2][0];
rank_yZ = rank_info.rank[1][0][2];
rank_XYz = rank_info.rank[2][2][0];
rank_xyz = rank_info.rank[0][0][0];
rank_Xyz = rank_info.rank[2][0][0];
rank_xYz = rank_info.rank[0][2][0];
rank_XYZ = rank_info.rank[2][2][2];
rank_xyZ = rank_info.rank[0][0][2];
rank_XyZ = rank_info.rank[2][0][2];
rank_xYZ = rank_info.rank[0][2][2];
/* Fill in communications patterns for the lists */
/* Send lists */
sendCount_x =getHaloBlock(width,2*width,width,Nyh-width,width,Nzh-width,dvcSendList_x);
sendCount_X =getHaloBlock(Nxh-2*width,Nxh-width,width,Nyh-width,width,Nzh-width,dvcSendList_X);
sendCount_y =getHaloBlock(width,Nxh-width,width,2*width,width,Nzh-width,dvcSendList_y);
sendCount_Y =getHaloBlock(width,Nxh-width,Nyh-2*width,Nyh-width,width,Nzh-width,dvcSendList_Y);
sendCount_z =getHaloBlock(width,Nxh-width,width,Nyh-width,width,2*width,dvcSendList_z);
sendCount_Z =getHaloBlock(width,Nxh-width,width,Nyh-width,Nzh-2*width,Nzh-width,dvcSendList_Z);
// xy
sendCount_xy =getHaloBlock(width,2*width,width,2*width,width,Nzh-width,dvcSendList_xy);
sendCount_xY =getHaloBlock(width,2*width,Nyh-2*width,Nyh-width,width,Nzh-width,dvcSendList_xY);
sendCount_Xy =getHaloBlock(Nxh-2*width,Nxh-width,width,2*width,width,Nzh-width,dvcSendList_Xy);
sendCount_XY =getHaloBlock(Nxh-2*width,Nxh-width,Nyh-2*width,Nyh-width,width,Nzh-width,dvcSendList_XY);
// xz
sendCount_xz =getHaloBlock(width,2*width,width,Nyh-width,width,2*width,dvcSendList_xz);
sendCount_xZ =getHaloBlock(width,2*width,width,Nyh-width,Nzh-2*width,Nzh-width,dvcSendList_xZ);
sendCount_Xz =getHaloBlock(Nxh-2*width,Nxh-width,width,Nyh-width,width,2*width,dvcSendList_Xz);
sendCount_XZ =getHaloBlock(Nxh-2*width,Nxh-width,width,Nyh-width,Nzh-2*width,Nzh-width,dvcSendList_XZ);
// yz
sendCount_yz =getHaloBlock(width,Nxh-width,width,2*width,width,2*width,dvcSendList_yz);
sendCount_yZ =getHaloBlock(width,Nxh-width,width,2*width,Nzh-2*width,Nzh-width,dvcSendList_yZ);
sendCount_Yz =getHaloBlock(width,Nxh-width,Nyh-2*width,Nyh-width,width,2*width,dvcSendList_Yz);
sendCount_YZ =getHaloBlock(width,Nxh-width,Nyh-2*width,Nyh-width,Nzh-2*width,Nzh-width,dvcSendList_YZ);
// xyz
sendCount_xyz =getHaloBlock(width,2*width,width,2*width,width,2*width,dvcSendList_xyz);
sendCount_xyZ =getHaloBlock(width,2*width,width,2*width,Nzh-2*width,Nzh-width,dvcSendList_xyZ);
sendCount_xYz =getHaloBlock(width,2*width,Nyh-2*width,Nyh-width,width,2*width,dvcSendList_xYz);
sendCount_xYZ =getHaloBlock(width,2*width,Nyh-2*width,Nyh-width,Nzh-2*width,Nzh-width,dvcSendList_xYZ);
sendCount_Xyz =getHaloBlock(Nxh-2*width,Nxh-width,width,2*width,width,2*width,dvcSendList_Xyz);
sendCount_XyZ =getHaloBlock(Nxh-2*width,Nxh-width,width,2*width,Nzh-2*width,Nzh-width,dvcSendList_XyZ);
sendCount_XYz =getHaloBlock(Nxh-2*width,Nxh-width,Nyh-2*width,Nyh-width,width,2*width,dvcSendList_XYz);
sendCount_XYZ =getHaloBlock(Nxh-2*width,Nxh-width,Nyh-2*width,Nyh-width,Nzh-2*width,Nzh-width,dvcSendList_XYZ);
/* Recv lists */
recvCount_x =getHaloBlock(0,width,width,Nyh-width,width,Nzh-width,dvcRecvList_x);
recvCount_X =getHaloBlock(Nxh-width,Nxh,width,Nyh-width,width,Nzh-width,dvcRecvList_X);
recvCount_y =getHaloBlock(width,Nxh-width,0,width,width,Nzh-width,dvcRecvList_y);
recvCount_Y =getHaloBlock(width,Nxh-width,Nyh-width,Nyh,width,Nzh-width,dvcRecvList_Y);
recvCount_z =getHaloBlock(width,Nxh-width,width,Nyh-width,0,width,dvcRecvList_z);
recvCount_Z =getHaloBlock(width,Nxh-width,width,Nyh-width,Nzh-width,Nzh,dvcRecvList_Z);
recvCount_xy =getHaloBlock(0,width,0,width,width,Nzh-width,dvcRecvList_xy);
recvCount_xY =getHaloBlock(0,width,Nyh-width,Nyh,width,Nzh-width,dvcRecvList_xY);
recvCount_Xy =getHaloBlock(Nxh-width,Nxh,0,width,width,Nzh-width,dvcRecvList_Xy);
recvCount_XY =getHaloBlock(Nxh-width,Nxh,Nyh-width,Nyh,width,Nzh-width,dvcRecvList_XY);
recvCount_xz =getHaloBlock(0,width,width,Nyh-width,0,width,dvcRecvList_xz);
recvCount_xZ =getHaloBlock(0,width,width,Nyh-width,Nzh-width,Nzh,dvcRecvList_xZ);
recvCount_Xz =getHaloBlock(Nxh-width,Nxh,width,Nyh-width,0,width,dvcRecvList_Xz);
recvCount_XZ =getHaloBlock(Nxh-width,Nxh,width,Nyh-width,Nzh-width,Nzh,dvcRecvList_XZ);
recvCount_yz =getHaloBlock(width,Nxh-width,0,width,0,width,dvcRecvList_yz);
recvCount_yZ =getHaloBlock(width,Nxh-width,0,width,Nzh-width,Nzh,dvcRecvList_yZ);
recvCount_Yz =getHaloBlock(width,Nxh-width,Nyh-width,Nyh,0,width,dvcRecvList_Yz);
recvCount_YZ =getHaloBlock(width,Nxh-width,Nyh-width,Nyh,Nzh-width,Nzh,dvcRecvList_YZ);
recvCount_xyz =getHaloBlock(0,width,0,width,0,width,dvcRecvList_xyz);
recvCount_xyZ =getHaloBlock(0,width,0,width,Nzh-width,Nzh,dvcRecvList_xyZ);
recvCount_xYz =getHaloBlock(0,width,Nyh-width,Nyh,0,width,dvcRecvList_xYz);
recvCount_xYZ =getHaloBlock(0,width,Nyh-width,Nyh,Nzh-width,Nzh,dvcRecvList_xYZ);
recvCount_Xyz =getHaloBlock(Nxh-width,Nxh,0,width,0,width,dvcRecvList_Xyz);
recvCount_XyZ =getHaloBlock(Nxh-width,Nxh,0,width,Nzh-width,Nzh,dvcRecvList_XyZ);
recvCount_XYz =getHaloBlock(Nxh-width,Nxh,Nyh-width,Nyh,0,width,dvcRecvList_XYz);
recvCount_XYZ =getHaloBlock(Nxh-width,Nxh,Nyh-width,Nyh,Nzh-width,Nzh,dvcRecvList_XYZ);
// Copy the domain size and communication information directly from Dm
Nx = Dm->Nx;
Ny = Dm->Ny;
Nz = Dm->Nz;
N = Nx * Ny * Nz;
Nxh = Nx + 2 * (width - 1);
Nyh = Ny + 2 * (width - 1);
Nzh = Nz + 2 * (width - 1);
Nh = Nxh * Nyh * Nzh;
ScaLBL_AllocateZeroCopy((void **) &sendbuf_x, sendCount_x*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_X, sendCount_X*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_y, sendCount_y*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_Y, sendCount_Y*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_z, sendCount_z*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_Z, sendCount_Z*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_xy, sendCount_xy*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_xY, sendCount_xY*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_Xy, sendCount_Xy*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_XY, sendCount_XY*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_xz, sendCount_xz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_xZ, sendCount_xZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_Xz, sendCount_Xz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_XZ, sendCount_XZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_yz, sendCount_yz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_yZ, sendCount_yZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_Yz, sendCount_Yz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_YZ, sendCount_YZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_xyz, sendCount_xyz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_xYz, sendCount_xYz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_Xyz, sendCount_Xyz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_XYz, sendCount_XYz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_xyZ, sendCount_xyZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_xYZ, sendCount_xYZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_XyZ, sendCount_XyZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_XYZ, sendCount_XYZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_x, recvCount_x*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_X, recvCount_X*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_y, recvCount_y*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_Y, recvCount_Y*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_z, recvCount_z*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_Z, recvCount_Z*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_xy, recvCount_xy*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_xY, recvCount_xY*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_Xy, recvCount_Xy*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_XY, recvCount_XY*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_xz, recvCount_xz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_xZ, recvCount_xZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_Xz, recvCount_Xz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_XZ, recvCount_XZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_yz, recvCount_yz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_yZ, recvCount_yZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_Yz, recvCount_Yz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_YZ, recvCount_YZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_xyz, recvCount_xyz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_xYz, recvCount_xYz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_Xyz, recvCount_Xyz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_XYz, recvCount_XYz*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_xyZ, recvCount_xyZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_xYZ, recvCount_xYZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_XyZ, recvCount_XyZ*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf_XYZ, recvCount_XYZ*sizeof(double)); // Allocate device memory
/* Set up a map to the halo width=1 data structure */
for (k=width; k<Nzh-width; k++){
for (j=width; j<Nyh-width; j++){
for (i=width; i<Nxh-width; i++){
int idx = k*Nxh*Nyh + j*Nxh + i;
Map(i-width+1,j-width+1,k-width+1) = idx;
Map.resize(Nx, Ny, Nz);
rank = Dm->rank();
iproc = Dm->iproc();
jproc = Dm->jproc();
kproc = Dm->kproc();
nprocx = Dm->nprocx();
nprocy = Dm->nprocy();
nprocz = Dm->nprocz();
rank_info = RankInfoStruct(rank, nprocx, nprocy, nprocz);
rank = rank_info.rank[1][1][1];
rank_X = rank_info.rank[2][1][1];
rank_x = rank_info.rank[0][1][1];
rank_Y = rank_info.rank[1][2][1];
rank_y = rank_info.rank[1][0][1];
rank_Z = rank_info.rank[1][1][2];
rank_z = rank_info.rank[1][1][0];
rank_XY = rank_info.rank[2][2][1];
rank_xy = rank_info.rank[0][0][1];
rank_Xy = rank_info.rank[2][0][1];
rank_xY = rank_info.rank[0][2][1];
rank_XZ = rank_info.rank[2][1][2];
rank_xz = rank_info.rank[0][1][0];
rank_Xz = rank_info.rank[2][1][0];
rank_xZ = rank_info.rank[0][1][2];
rank_YZ = rank_info.rank[1][2][2];
rank_yz = rank_info.rank[1][0][0];
rank_Yz = rank_info.rank[1][2][0];
rank_yZ = rank_info.rank[1][0][2];
rank_XYz = rank_info.rank[2][2][0];
rank_xyz = rank_info.rank[0][0][0];
rank_Xyz = rank_info.rank[2][0][0];
rank_xYz = rank_info.rank[0][2][0];
rank_XYZ = rank_info.rank[2][2][2];
rank_xyZ = rank_info.rank[0][0][2];
rank_XyZ = rank_info.rank[2][0][2];
rank_xYZ = rank_info.rank[0][2][2];
/* Fill in communications patterns for the lists */
/* Send lists */
sendCount_x = getHaloBlock(width, 2 * width, width, Nyh - width, width,
Nzh - width, dvcSendList_x);
sendCount_X = getHaloBlock(Nxh - 2 * width, Nxh - width, width, Nyh - width,
width, Nzh - width, dvcSendList_X);
sendCount_y = getHaloBlock(width, Nxh - width, width, 2 * width, width,
Nzh - width, dvcSendList_y);
sendCount_Y = getHaloBlock(width, Nxh - width, Nyh - 2 * width, Nyh - width,
width, Nzh - width, dvcSendList_Y);
sendCount_z = getHaloBlock(width, Nxh - width, width, Nyh - width, width,
2 * width, dvcSendList_z);
sendCount_Z = getHaloBlock(width, Nxh - width, width, Nyh - width,
Nzh - 2 * width, Nzh - width, dvcSendList_Z);
// xy
sendCount_xy = getHaloBlock(width, 2 * width, width, 2 * width, width,
Nzh - width, dvcSendList_xy);
sendCount_xY = getHaloBlock(width, 2 * width, Nyh - 2 * width, Nyh - width,
width, Nzh - width, dvcSendList_xY);
sendCount_Xy = getHaloBlock(Nxh - 2 * width, Nxh - width, width, 2 * width,
width, Nzh - width, dvcSendList_Xy);
sendCount_XY =
getHaloBlock(Nxh - 2 * width, Nxh - width, Nyh - 2 * width, Nyh - width,
width, Nzh - width, dvcSendList_XY);
// xz
sendCount_xz = getHaloBlock(width, 2 * width, width, Nyh - width, width,
2 * width, dvcSendList_xz);
sendCount_xZ = getHaloBlock(width, 2 * width, width, Nyh - width,
Nzh - 2 * width, Nzh - width, dvcSendList_xZ);
sendCount_Xz = getHaloBlock(Nxh - 2 * width, Nxh - width, width,
Nyh - width, width, 2 * width, dvcSendList_Xz);
sendCount_XZ =
getHaloBlock(Nxh - 2 * width, Nxh - width, width, Nyh - width,
Nzh - 2 * width, Nzh - width, dvcSendList_XZ);
// yz
sendCount_yz = getHaloBlock(width, Nxh - width, width, 2 * width, width,
2 * width, dvcSendList_yz);
sendCount_yZ = getHaloBlock(width, Nxh - width, width, 2 * width,
Nzh - 2 * width, Nzh - width, dvcSendList_yZ);
sendCount_Yz = getHaloBlock(width, Nxh - width, Nyh - 2 * width,
Nyh - width, width, 2 * width, dvcSendList_Yz);
sendCount_YZ =
getHaloBlock(width, Nxh - width, Nyh - 2 * width, Nyh - width,
Nzh - 2 * width, Nzh - width, dvcSendList_YZ);
// xyz
sendCount_xyz = getHaloBlock(width, 2 * width, width, 2 * width, width,
2 * width, dvcSendList_xyz);
sendCount_xyZ = getHaloBlock(width, 2 * width, width, 2 * width,
Nzh - 2 * width, Nzh - width, dvcSendList_xyZ);
sendCount_xYz = getHaloBlock(width, 2 * width, Nyh - 2 * width, Nyh - width,
width, 2 * width, dvcSendList_xYz);
sendCount_xYZ = getHaloBlock(width, 2 * width, Nyh - 2 * width, Nyh - width,
Nzh - 2 * width, Nzh - width, dvcSendList_xYZ);
sendCount_Xyz = getHaloBlock(Nxh - 2 * width, Nxh - width, width, 2 * width,
width, 2 * width, dvcSendList_Xyz);
sendCount_XyZ = getHaloBlock(Nxh - 2 * width, Nxh - width, width, 2 * width,
Nzh - 2 * width, Nzh - width, dvcSendList_XyZ);
sendCount_XYz =
getHaloBlock(Nxh - 2 * width, Nxh - width, Nyh - 2 * width, Nyh - width,
width, 2 * width, dvcSendList_XYz);
sendCount_XYZ =
getHaloBlock(Nxh - 2 * width, Nxh - width, Nyh - 2 * width, Nyh - width,
Nzh - 2 * width, Nzh - width, dvcSendList_XYZ);
/* Recv lists */
recvCount_x = getHaloBlock(0, width, width, Nyh - width, width, Nzh - width,
recvCount_X = getHaloBlock(Nxh - width, Nxh, width, Nyh - width, width,
Nzh - width, dvcRecvList_X);
recvCount_y = getHaloBlock(width, Nxh - width, 0, width, width, Nzh - width,
recvCount_Y = getHaloBlock(width, Nxh - width, Nyh - width, Nyh, width,
Nzh - width, dvcRecvList_Y);
recvCount_z = getHaloBlock(width, Nxh - width, width, Nyh - width, 0, width,
recvCount_Z = getHaloBlock(width, Nxh - width, width, Nyh - width,
Nzh - width, Nzh, dvcRecvList_Z);
recvCount_xy =
getHaloBlock(0, width, 0, width, width, Nzh - width, dvcRecvList_xy);
recvCount_xY = getHaloBlock(0, width, Nyh - width, Nyh, width, Nzh - width,
recvCount_Xy = getHaloBlock(Nxh - width, Nxh, 0, width, width, Nzh - width,
recvCount_XY = getHaloBlock(Nxh - width, Nxh, Nyh - width, Nyh, width,
Nzh - width, dvcRecvList_XY);
recvCount_xz =
getHaloBlock(0, width, width, Nyh - width, 0, width, dvcRecvList_xz);
recvCount_xZ = getHaloBlock(0, width, width, Nyh - width, Nzh - width, Nzh,
recvCount_Xz = getHaloBlock(Nxh - width, Nxh, width, Nyh - width, 0, width,
recvCount_XZ = getHaloBlock(Nxh - width, Nxh, width, Nyh - width,
Nzh - width, Nzh, dvcRecvList_XZ);
recvCount_yz =
getHaloBlock(width, Nxh - width, 0, width, 0, width, dvcRecvList_yz);
recvCount_yZ = getHaloBlock(width, Nxh - width, 0, width, Nzh - width, Nzh,
recvCount_Yz = getHaloBlock(width, Nxh - width, Nyh - width, Nyh, 0, width,
recvCount_YZ = getHaloBlock(width, Nxh - width, Nyh - width, Nyh,
Nzh - width, Nzh, dvcRecvList_YZ);
recvCount_xyz = getHaloBlock(0, width, 0, width, 0, width, dvcRecvList_xyz);
recvCount_xyZ =
getHaloBlock(0, width, 0, width, Nzh - width, Nzh, dvcRecvList_xyZ);
recvCount_xYz =
getHaloBlock(0, width, Nyh - width, Nyh, 0, width, dvcRecvList_xYz);
recvCount_xYZ = getHaloBlock(0, width, Nyh - width, Nyh, Nzh - width, Nzh,
recvCount_Xyz =
getHaloBlock(Nxh - width, Nxh, 0, width, 0, width, dvcRecvList_Xyz);
recvCount_XyZ = getHaloBlock(Nxh - width, Nxh, 0, width, Nzh - width, Nzh,
recvCount_XYz = getHaloBlock(Nxh - width, Nxh, Nyh - width, Nyh, 0, width,
recvCount_XYZ = getHaloBlock(Nxh - width, Nxh, Nyh - width, Nyh,
Nzh - width, Nzh, dvcRecvList_XYZ);
ScaLBL_AllocateZeroCopy((void **)&sendbuf_x,
sendCount_x *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_X,
sendCount_X *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_y,
sendCount_y *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_Y,
sendCount_Y *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_z,
sendCount_z *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_Z,
sendCount_Z *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_xy,
sendCount_xy *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_xY,
sendCount_xY *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_Xy,
sendCount_Xy *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_XY,
sendCount_XY *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_xz,
sendCount_xz *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_xZ,
sendCount_xZ *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_Xz,
sendCount_Xz *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_XZ,
sendCount_XZ *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_yz,
sendCount_yz *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_yZ,
sendCount_yZ *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_Yz,
sendCount_Yz *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_YZ,
sendCount_YZ *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_xyz,
sendCount_xyz *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_xYz,
sendCount_xYz *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_Xyz,
sendCount_Xyz *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_XYz,
sendCount_XYz *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_xyZ,
sendCount_xyZ *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_xYZ,
sendCount_xYZ *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_XyZ,
sendCount_XyZ *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&sendbuf_XYZ,
sendCount_XYZ *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_x,
recvCount_x *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_X,
recvCount_X *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_y,
recvCount_y *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_Y,
recvCount_Y *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_z,
recvCount_z *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_Z,
recvCount_Z *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_xy,
recvCount_xy *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_xY,
recvCount_xY *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_Xy,
recvCount_Xy *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_XY,
recvCount_XY *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_xz,
recvCount_xz *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_xZ,
recvCount_xZ *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_Xz,
recvCount_Xz *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_XZ,
recvCount_XZ *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_yz,
recvCount_yz *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_yZ,
recvCount_yZ *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_Yz,
recvCount_Yz *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_YZ,
recvCount_YZ *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_xyz,
recvCount_xyz *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_xYz,
recvCount_xYz *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_Xyz,
recvCount_Xyz *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_XYz,
recvCount_XYz *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_xyZ,
recvCount_xyZ *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_xYZ,
recvCount_xYZ *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_XyZ,
recvCount_XyZ *
sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **)&recvbuf_XYZ,
recvCount_XYZ *
sizeof(double)); // Allocate device memory
/* Set up a map to the halo width=1 data structure */
for (k = width; k < Nzh - width; k++) {
for (j = width; j < Nyh - width; j++) {
for (i = width; i < Nxh - width; i++) {
int idx = k * Nxh * Nyh + j * Nxh + i;
Map(i - width + 1, j - width + 1, k - width + 1) = idx;
void ScaLBLWideHalo_Communicator::Send(double *data){
if (Lock==true){
ERROR("ScaLBL Error (SendHalo): ScaLBLWideHalo_Communicator is locked -- did you forget to match Send/Recv calls?");
sendtag = recvtag = 1;
ScaLBL_Scalar_Pack(dvcSendList_x, sendCount_x,sendbuf_x, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_y, sendCount_y,sendbuf_y, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_z, sendCount_z,sendbuf_z, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_X, sendCount_X,sendbuf_X, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_Y, sendCount_Y,sendbuf_Y, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_Z, sendCount_Z,sendbuf_Z, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_xy, sendCount_xy,sendbuf_xy, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_xY, sendCount_xY,sendbuf_xY, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_Xy, sendCount_Xy,sendbuf_Xy, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_XY, sendCount_XY,sendbuf_XY, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_xz, sendCount_xz,sendbuf_xz, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_xZ, sendCount_xZ,sendbuf_xZ, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_Xz, sendCount_Xz,sendbuf_Xz, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_XZ, sendCount_XZ,sendbuf_XZ, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_yz, sendCount_yz,sendbuf_yz, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_yZ, sendCount_yZ,sendbuf_yZ, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_Yz, sendCount_Yz,sendbuf_Yz, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_YZ, sendCount_YZ,sendbuf_YZ, data, Nh);
/* corners */
ScaLBL_Scalar_Pack(dvcSendList_xyz, sendCount_xyz,sendbuf_xyz, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_xyZ, sendCount_xyZ,sendbuf_xyZ, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_xYz, sendCount_xYz,sendbuf_xYz, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_xYZ, sendCount_xYZ,sendbuf_xYZ, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_Xyz, sendCount_Xyz,sendbuf_Xyz, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_XyZ, sendCount_XyZ,sendbuf_XyZ, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_XYz, sendCount_XYz,sendbuf_XYz, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_XYZ, sendCount_XYZ,sendbuf_XYZ, data, Nh);
// Send / Recv all the phase indcator field values
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x,sendCount_x,rank_x,sendtag+0);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X,recvCount_X,rank_X,recvtag+0);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X,sendCount_X,rank_X,sendtag+1);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x,recvCount_x,rank_x,recvtag+1);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y,sendCount_y,rank_y,sendtag+2);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y,recvCount_Y,rank_Y,recvtag+2);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y,sendCount_Y,rank_Y,sendtag+3);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y,recvCount_y,rank_y,recvtag+3);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z,sendCount_z,rank_z,sendtag+4);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z,recvCount_Z,rank_Z,recvtag+4);
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z,sendCount_Z,rank_Z,sendtag+5);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z,recvCount_z,rank_z,recvtag+5);
req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy,sendCount_xy,rank_xy,sendtag+6);
req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY,recvCount_XY,rank_XY,recvtag+6);
req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY,sendCount_XY,rank_XY,sendtag+7);
req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy,recvCount_xy,rank_xy,recvtag+7);
req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy,sendCount_Xy,rank_Xy,sendtag+8);
req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY,recvCount_xY,rank_xY,recvtag+8);
req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY,sendCount_xY,rank_xY,sendtag+9);
req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy,recvCount_Xy,rank_Xy,recvtag+9);
req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz,sendCount_xz,rank_xz,sendtag+10);
req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ,recvCount_XZ,rank_XZ,recvtag+10);
req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ,sendCount_XZ,rank_XZ,sendtag+11);
req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz,recvCount_xz,rank_xz,recvtag+11);
req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz,sendCount_Xz,rank_Xz,sendtag+12);
req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ,recvCount_xZ,rank_xZ,recvtag+12);
req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ,sendCount_xZ,rank_xZ,sendtag+13);
req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz,recvCount_Xz,rank_Xz,recvtag+13);
req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz,sendCount_yz,rank_yz,sendtag+14);
req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ,recvCount_YZ,rank_YZ,recvtag+14);
req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ,sendCount_YZ,rank_YZ,sendtag+15);
req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz,recvCount_yz,rank_yz,recvtag+15);
req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz,sendCount_Yz,rank_Yz,sendtag+16);
req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ,recvCount_yZ,rank_yZ,recvtag+16);
req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ,sendCount_yZ,rank_yZ,sendtag+17);
req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz,recvCount_Yz,rank_Yz,recvtag+17);
/* Corners */
req1[18] = MPI_COMM_SCALBL.Isend(sendbuf_xyz,sendCount_xyz,rank_xyz,sendtag+18);
req2[18] = MPI_COMM_SCALBL.Irecv(recvbuf_XYZ,recvCount_XYZ,rank_XYZ,recvtag+18);
req1[19] = MPI_COMM_SCALBL.Isend(sendbuf_XYz,sendCount_XYz,rank_XYz,sendtag+19);
req2[19] = MPI_COMM_SCALBL.Irecv(recvbuf_xyZ,recvCount_xyZ,rank_xyZ,recvtag+19);
req1[20] = MPI_COMM_SCALBL.Isend(sendbuf_Xyz,sendCount_Xyz,rank_Xyz,sendtag+20);
req2[20] = MPI_COMM_SCALBL.Irecv(recvbuf_xYZ,recvCount_xYZ,rank_xYZ,recvtag+20);
req1[21] = MPI_COMM_SCALBL.Isend(sendbuf_xYz,sendCount_xYz,rank_xYz,sendtag+21);
req2[21] = MPI_COMM_SCALBL.Irecv(recvbuf_XyZ,recvCount_XyZ,rank_XyZ,recvtag+21);
req1[22] = MPI_COMM_SCALBL.Isend(sendbuf_xyZ,sendCount_xyZ,rank_xyZ,sendtag+22);
req2[22] = MPI_COMM_SCALBL.Irecv(recvbuf_XYz,recvCount_XYz,rank_XYz,recvtag+22);
req1[23] = MPI_COMM_SCALBL.Isend(sendbuf_XYZ,sendCount_XYZ,rank_XYZ,sendtag+23);
req2[23] = MPI_COMM_SCALBL.Irecv(recvbuf_xyz,recvCount_xyz,rank_xyz,recvtag+23);
req1[24] = MPI_COMM_SCALBL.Isend(sendbuf_XyZ,sendCount_XyZ,rank_XyZ,sendtag+24);
req2[24] = MPI_COMM_SCALBL.Irecv(recvbuf_xYz,recvCount_xYz,rank_xYz,recvtag+24);
req1[25] = MPI_COMM_SCALBL.Isend(sendbuf_xYZ,sendCount_xYZ,rank_xYZ,sendtag+25);
req2[25] = MPI_COMM_SCALBL.Irecv(recvbuf_Xyz,recvCount_Xyz,rank_Xyz,recvtag+25);
void ScaLBLWideHalo_Communicator::Send(double *data) {
if (Lock == true) {
ERROR("ScaLBL Error (SendHalo): ScaLBLWideHalo_Communicator is locked "
"-- did you forget to match Send/Recv calls?");
} else {
Lock = true;
sendtag = recvtag = 1;
ScaLBL_Scalar_Pack(dvcSendList_x, sendCount_x, sendbuf_x, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_y, sendCount_y, sendbuf_y, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_z, sendCount_z, sendbuf_z, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_X, sendCount_X, sendbuf_X, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_Y, sendCount_Y, sendbuf_Y, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_Z, sendCount_Z, sendbuf_Z, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_xy, sendCount_xy, sendbuf_xy, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_xY, sendCount_xY, sendbuf_xY, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_Xy, sendCount_Xy, sendbuf_Xy, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_XY, sendCount_XY, sendbuf_XY, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_xz, sendCount_xz, sendbuf_xz, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_xZ, sendCount_xZ, sendbuf_xZ, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_Xz, sendCount_Xz, sendbuf_Xz, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_XZ, sendCount_XZ, sendbuf_XZ, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_yz, sendCount_yz, sendbuf_yz, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_yZ, sendCount_yZ, sendbuf_yZ, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_Yz, sendCount_Yz, sendbuf_Yz, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_YZ, sendCount_YZ, sendbuf_YZ, data, Nh);
/* corners */
ScaLBL_Scalar_Pack(dvcSendList_xyz, sendCount_xyz, sendbuf_xyz, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_xyZ, sendCount_xyZ, sendbuf_xyZ, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_xYz, sendCount_xYz, sendbuf_xYz, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_xYZ, sendCount_xYZ, sendbuf_xYZ, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_Xyz, sendCount_Xyz, sendbuf_Xyz, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_XyZ, sendCount_XyZ, sendbuf_XyZ, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_XYz, sendCount_XYz, sendbuf_XYz, data, Nh);
ScaLBL_Scalar_Pack(dvcSendList_XYZ, sendCount_XYZ, sendbuf_XYZ, data, Nh);
// Send / Recv all the phase indcator field values
req1[0] =
MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x, rank_x, sendtag + 0);
req2[0] =
MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X, rank_X, recvtag + 0);
req1[1] =
MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X, rank_X, sendtag + 1);
req2[1] =
MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x, rank_x, recvtag + 1);
req1[2] =
MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y, rank_y, sendtag + 2);
req2[2] =
MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y, rank_Y, recvtag + 2);
req1[3] =
MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y, rank_Y, sendtag + 3);
req2[3] =
MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y, rank_y, recvtag + 3);
req1[4] =
MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z, rank_z, sendtag + 4);
req2[4] =
MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z, rank_Z, recvtag + 4);
req1[5] =
MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z, rank_Z, sendtag + 5);
req2[5] =
MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z, rank_z, recvtag + 5);
req1[6] =
MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy, rank_xy, sendtag + 6);
req2[6] =
MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY, rank_XY, recvtag + 6);
req1[7] =
MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY, rank_XY, sendtag + 7);
req2[7] =
MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy, rank_xy, recvtag + 7);
req1[8] =
MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy, rank_Xy, sendtag + 8);
req2[8] =
MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY, rank_xY, recvtag + 8);
req1[9] =
MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY, rank_xY, sendtag + 9);
req2[9] =
MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy, rank_Xy, recvtag + 9);
req1[10] =
MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz, rank_xz, sendtag + 10);
req2[10] =
MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ, rank_XZ, recvtag + 10);
req1[11] =
MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ, rank_XZ, sendtag + 11);
req2[11] =
MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz, rank_xz, recvtag + 11);
req1[12] =
MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz, rank_Xz, sendtag + 12);
req2[12] =
MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ, rank_xZ, recvtag + 12);
req1[13] =
MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ, rank_xZ, sendtag + 13);
req2[13] =
MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz, rank_Xz, recvtag + 13);
req1[14] =
MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz, rank_yz, sendtag + 14);
req2[14] =
MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ, rank_YZ, recvtag + 14);
req1[15] =
MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ, rank_YZ, sendtag + 15);
req2[15] =
MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz, rank_yz, recvtag + 15);
req1[16] =
MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz, rank_Yz, sendtag + 16);
req2[16] =
MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ, rank_yZ, recvtag + 16);
req1[17] =
MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ, rank_yZ, sendtag + 17);
req2[17] =
MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz, rank_Yz, recvtag + 17);
/* Corners */
req1[18] = MPI_COMM_SCALBL.Isend(sendbuf_xyz, sendCount_xyz, rank_xyz,
sendtag + 18);
req2[18] = MPI_COMM_SCALBL.Irecv(recvbuf_XYZ, recvCount_XYZ, rank_XYZ,
recvtag + 18);
req1[19] = MPI_COMM_SCALBL.Isend(sendbuf_XYz, sendCount_XYz, rank_XYz,
sendtag + 19);
req2[19] = MPI_COMM_SCALBL.Irecv(recvbuf_xyZ, recvCount_xyZ, rank_xyZ,
recvtag + 19);
req1[20] = MPI_COMM_SCALBL.Isend(sendbuf_Xyz, sendCount_Xyz, rank_Xyz,
sendtag + 20);
req2[20] = MPI_COMM_SCALBL.Irecv(recvbuf_xYZ, recvCount_xYZ, rank_xYZ,
recvtag + 20);
req1[21] = MPI_COMM_SCALBL.Isend(sendbuf_xYz, sendCount_xYz, rank_xYz,
sendtag + 21);
req2[21] = MPI_COMM_SCALBL.Irecv(recvbuf_XyZ, recvCount_XyZ, rank_XyZ,
recvtag + 21);
req1[22] = MPI_COMM_SCALBL.Isend(sendbuf_xyZ, sendCount_xyZ, rank_xyZ,
sendtag + 22);
req2[22] = MPI_COMM_SCALBL.Irecv(recvbuf_XYz, recvCount_XYz, rank_XYz,
recvtag + 22);
req1[23] = MPI_COMM_SCALBL.Isend(sendbuf_XYZ, sendCount_XYZ, rank_XYZ,
sendtag + 23);
req2[23] = MPI_COMM_SCALBL.Irecv(recvbuf_xyz, recvCount_xyz, rank_xyz,
recvtag + 23);
req1[24] = MPI_COMM_SCALBL.Isend(sendbuf_XyZ, sendCount_XyZ, rank_XyZ,
sendtag + 24);
req2[24] = MPI_COMM_SCALBL.Irecv(recvbuf_xYz, recvCount_xYz, rank_xYz,
recvtag + 24);
req1[25] = MPI_COMM_SCALBL.Isend(sendbuf_xYZ, sendCount_xYZ, rank_xYZ,
sendtag + 25);
req2[25] = MPI_COMM_SCALBL.Irecv(recvbuf_Xyz, recvCount_Xyz, rank_Xyz,
recvtag + 25);
ScaLBLWideHalo_Communicator::~ScaLBLWideHalo_Communicator() {}
void ScaLBLWideHalo_Communicator::Recv(double *data) {
Utilities::MPI::waitAll(26, req1);
Utilities::MPI::waitAll(26, req2);
//printf("Ready to unpack %i to x\n",recvCount_x);
//printf(" print first 10 values...\n");
//for (int idx=0; idx<10; idx++) printf(" recvBuf[%i]=%f \n",idx,recvbuf_x[idx]);
ScaLBL_Scalar_Unpack(dvcRecvList_x, recvCount_x, recvbuf_x, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_y, recvCount_y, recvbuf_y, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_X, recvCount_X, recvbuf_X, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_Y, recvCount_Y, recvbuf_Y, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_xy, recvCount_xy, recvbuf_xy, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_xY, recvCount_xY, recvbuf_xY, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_Xy, recvCount_Xy, recvbuf_Xy, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_XY, recvCount_XY, recvbuf_XY, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_z, recvCount_z, recvbuf_z, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_xz, recvCount_xz, recvbuf_xz, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_Xz, recvCount_Xz, recvbuf_Xz, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_yz, recvCount_yz, recvbuf_yz, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_Yz, recvCount_Yz, recvbuf_Yz, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_Z, recvCount_Z, recvbuf_Z, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_xZ, recvCount_xZ, recvbuf_xZ, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_XZ, recvCount_XZ, recvbuf_XZ, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_yZ, recvCount_yZ, recvbuf_yZ, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_YZ, recvCount_YZ, recvbuf_YZ, data, Nh);
/* corners */
ScaLBL_Scalar_Unpack(dvcRecvList_xyz, recvCount_xyz, recvbuf_xyz, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_xYz, recvCount_xYz, recvbuf_xYz, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_xyZ, recvCount_xyZ, recvbuf_xyZ, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_xYZ, recvCount_xYZ, recvbuf_xYZ, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_Xyz, recvCount_Xyz, recvbuf_Xyz, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_XYz, recvCount_XYz, recvbuf_XYz, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_XyZ, recvCount_XyZ, recvbuf_XyZ, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_XYZ, recvCount_XYZ, recvbuf_XYZ, data, Nh);
Lock = false; // unlock the communicator after communications complete
void ScaLBLWideHalo_Communicator::Recv(double *data){
//printf("Ready to unpack %i to x\n",recvCount_x);
//printf(" print first 10 values...\n");
//for (int idx=0; idx<10; idx++) printf(" recvBuf[%i]=%f \n",idx,recvbuf_x[idx]);
ScaLBL_Scalar_Unpack(dvcRecvList_x, recvCount_x,recvbuf_x, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_y, recvCount_y,recvbuf_y, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_X, recvCount_X,recvbuf_X, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_Y, recvCount_Y,recvbuf_Y, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_xy, recvCount_xy,recvbuf_xy, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_xY, recvCount_xY,recvbuf_xY, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_Xy, recvCount_Xy,recvbuf_Xy, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_XY, recvCount_XY,recvbuf_XY, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_z, recvCount_z,recvbuf_z, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_xz, recvCount_xz,recvbuf_xz, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_Xz, recvCount_Xz,recvbuf_Xz, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_yz, recvCount_yz,recvbuf_yz, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_Yz, recvCount_Yz,recvbuf_Yz, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_Z, recvCount_Z,recvbuf_Z, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_xZ, recvCount_xZ,recvbuf_xZ, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_XZ, recvCount_XZ,recvbuf_XZ, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_yZ, recvCount_yZ,recvbuf_yZ, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_YZ, recvCount_YZ,recvbuf_YZ, data, Nh);
/* corners */
ScaLBL_Scalar_Unpack(dvcRecvList_xyz, recvCount_xyz,recvbuf_xyz, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_xYz, recvCount_xYz,recvbuf_xYz, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_xyZ, recvCount_xyZ,recvbuf_xyZ, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_xYZ, recvCount_xYZ,recvbuf_xYZ, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_Xyz, recvCount_Xyz,recvbuf_Xyz, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_XYz, recvCount_XYz,recvbuf_XYz, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_XyZ, recvCount_XyZ,recvbuf_XyZ, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_XYZ, recvCount_XYZ,recvbuf_XYZ, data, Nh);
Lock=false; // unlock the communicator after communications complete

View File

@ -6,110 +6,130 @@ This class implements support for halo widths larger than 1
#include "common/ScaLBL.h"
#include "common/MPI.h"
class ScaLBLWideHalo_Communicator{
class ScaLBLWideHalo_Communicator {
ScaLBLWideHalo_Communicator(std::shared_ptr <Domain> Dm, int width);
//MPI_Comm MPI_COMM_SCALBL; // MPI Communicator
unsigned long int CommunicationCount,SendCount,RecvCount;
int Nx,Ny,Nz,N; // original domain structure
int Nxh,Nyh,Nzh,Nh; // with wide halo
DoubleArray Map; // map to regular halo
int first_interior,last_interior;
// Set up for D3Q19 distributions -- all 27 neighbors are needed
// Buffers to store data sent and recieved by this MPI process
double *sendbuf_x, *sendbuf_y, *sendbuf_z, *sendbuf_X, *sendbuf_Y, *sendbuf_Z;
double *sendbuf_xy, *sendbuf_yz, *sendbuf_xz, *sendbuf_Xy, *sendbuf_Yz, *sendbuf_xZ;
double *sendbuf_xY, *sendbuf_yZ, *sendbuf_Xz, *sendbuf_XY, *sendbuf_YZ, *sendbuf_XZ;
double *sendbuf_xyz, *sendbuf_Xyz, *sendbuf_xYz, *sendbuf_XYz;
double *sendbuf_xyZ, *sendbuf_XyZ, *sendbuf_xYZ, *sendbuf_XYZ;
double *recvbuf_x, *recvbuf_y, *recvbuf_z, *recvbuf_X, *recvbuf_Y, *recvbuf_Z;
double *recvbuf_xy, *recvbuf_yz, *recvbuf_xz, *recvbuf_Xy, *recvbuf_Yz, *recvbuf_xZ;
double *recvbuf_xY, *recvbuf_yZ, *recvbuf_Xz, *recvbuf_XY, *recvbuf_YZ, *recvbuf_XZ;
double *recvbuf_xyz, *recvbuf_Xyz, *recvbuf_xYz, *recvbuf_XYz;
double *recvbuf_xyZ, *recvbuf_XyZ, *recvbuf_xYZ, *recvbuf_XYZ;
int LastExterior();
int FirstInterior();
int LastInterior();
void Send(double *data);
void Recv(double *data);
ScaLBLWideHalo_Communicator(std::shared_ptr<Domain> Dm, int width);
//MPI_Comm MPI_COMM_SCALBL; // MPI Communicator
unsigned long int CommunicationCount, SendCount, RecvCount;
int Nx, Ny, Nz, N; // original domain structure
int Nxh, Nyh, Nzh, Nh; // with wide halo
DoubleArray Map; // map to regular halo
int first_interior, last_interior;
// Set up for D3Q19 distributions -- all 27 neighbors are needed
// Buffers to store data sent and recieved by this MPI process
double *sendbuf_x, *sendbuf_y, *sendbuf_z, *sendbuf_X, *sendbuf_Y,
double *sendbuf_xy, *sendbuf_yz, *sendbuf_xz, *sendbuf_Xy, *sendbuf_Yz,
double *sendbuf_xY, *sendbuf_yZ, *sendbuf_Xz, *sendbuf_XY, *sendbuf_YZ,
double *sendbuf_xyz, *sendbuf_Xyz, *sendbuf_xYz, *sendbuf_XYz;
double *sendbuf_xyZ, *sendbuf_XyZ, *sendbuf_xYZ, *sendbuf_XYZ;
double *recvbuf_x, *recvbuf_y, *recvbuf_z, *recvbuf_X, *recvbuf_Y,
double *recvbuf_xy, *recvbuf_yz, *recvbuf_xz, *recvbuf_Xy, *recvbuf_Yz,
double *recvbuf_xY, *recvbuf_yZ, *recvbuf_Xz, *recvbuf_XY, *recvbuf_YZ,
double *recvbuf_xyz, *recvbuf_Xyz, *recvbuf_xYz, *recvbuf_XYz;
double *recvbuf_xyZ, *recvbuf_XyZ, *recvbuf_xYZ, *recvbuf_XYZ;
int LastExterior();
int FirstInterior();
int LastInterior();
// Debugging and unit testing functions
void PrintDebug();
void Send(double *data);
void Recv(double *data);
// Debugging and unit testing functions
void PrintDebug();
bool Lock; // use Lock to make sure only one call at a time to protect data in transit
// only one set of Send requests can be active at any time (per instance)
int i,j,k,n;
int iproc,jproc,kproc;
int nprocx,nprocy,nprocz;
int sendtag,recvtag;
// Give the object it's own MPI communicator
RankInfoStruct rank_info;
MPI_Request req1[26],req2[26];
// MPI ranks for all 18 neighbors
// These variables are all private to prevent external things from modifying them!!
int rank;
int rank_x,rank_y,rank_z,rank_X,rank_Y,rank_Z;
int rank_xy,rank_XY,rank_xY,rank_Xy;
int rank_xz,rank_XZ,rank_xZ,rank_Xz;
int rank_yz,rank_YZ,rank_yZ,rank_Yz;
int rank_xyz,rank_Xyz,rank_xYz,rank_XYz;
int rank_xyZ,rank_XyZ,rank_xYZ,rank_XYZ;
int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z;
int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ;
int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ;
int sendCount_xyz,sendCount_Xyz,sendCount_xYz,sendCount_XYz;
int sendCount_xyZ,sendCount_XyZ,sendCount_xYZ,sendCount_XYZ;
int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z;
int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, recvCount_xZ;
int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, recvCount_XZ;
int recvCount_xyz,recvCount_Xyz,recvCount_xYz,recvCount_XYz;
int recvCount_xyZ,recvCount_XyZ,recvCount_xYZ,recvCount_XYZ;
// Send buffers that reside on the compute device
int *dvcSendList_x, *dvcSendList_y, *dvcSendList_z, *dvcSendList_X, *dvcSendList_Y, *dvcSendList_Z;
int *dvcSendList_xy, *dvcSendList_yz, *dvcSendList_xz, *dvcSendList_Xy, *dvcSendList_Yz, *dvcSendList_xZ;
int *dvcSendList_xY, *dvcSendList_yZ, *dvcSendList_Xz, *dvcSendList_XY, *dvcSendList_YZ, *dvcSendList_XZ;
int *dvcSendList_xyz,*dvcSendList_Xyz,*dvcSendList_xYz,*dvcSendList_XYz;
int *dvcSendList_xyZ,*dvcSendList_XyZ,*dvcSendList_xYZ,*dvcSendList_XYZ;
// Recieve buffers that reside on the compute device
int *dvcRecvList_x, *dvcRecvList_y, *dvcRecvList_z, *dvcRecvList_X, *dvcRecvList_Y, *dvcRecvList_Z;
int *dvcRecvList_xy, *dvcRecvList_yz, *dvcRecvList_xz, *dvcRecvList_Xy, *dvcRecvList_Yz, *dvcRecvList_xZ;
int *dvcRecvList_xY, *dvcRecvList_yZ, *dvcRecvList_Xz, *dvcRecvList_XY, *dvcRecvList_YZ, *dvcRecvList_XZ;
int *dvcRecvList_xyz,*dvcRecvList_Xyz,*dvcRecvList_xYz,*dvcRecvList_XYz;
int *dvcRecvList_xyZ,*dvcRecvList_XyZ,*dvcRecvList_xYZ,*dvcRecvList_XYZ;
inline int getHaloBlock(int imin, int imax, int jmin, int jmax, int kmin, int kmax, int *& dvcList){
int count = 0;
int *List;
List = new int [(imax-imin)*(jmax-jmin)*(kmax-kmin)];
for (k=kmin; k<kmax; k++){
for (j=jmin; j<jmax; j++){
for (i=imin; i<imax; i++){
List[count++] = k*Nxh*Nyh + j*Nxh + i;
size_t numbytes=count*sizeof(int);
ScaLBL_AllocateZeroCopy((void **) &dvcList, numbytes); // Allocate device memory
return count;
Lock; // use Lock to make sure only one call at a time to protect data in transit
// only one set of Send requests can be active at any time (per instance)
int i, j, k, n;
int iproc, jproc, kproc;
int nprocx, nprocy, nprocz;
int sendtag, recvtag;
// Give the object it's own MPI communicator
RankInfoStruct rank_info;
MPI_Request req1[26], req2[26];
// MPI ranks for all 18 neighbors
// These variables are all private to prevent external things from modifying them!!
int rank;
int rank_x, rank_y, rank_z, rank_X, rank_Y, rank_Z;
int rank_xy, rank_XY, rank_xY, rank_Xy;
int rank_xz, rank_XZ, rank_xZ, rank_Xz;
int rank_yz, rank_YZ, rank_yZ, rank_Yz;
int rank_xyz, rank_Xyz, rank_xYz, rank_XYz;
int rank_xyZ, rank_XyZ, rank_xYZ, rank_XYZ;
int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y,
int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz,
int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ,
int sendCount_xyz, sendCount_Xyz, sendCount_xYz, sendCount_XYz;
int sendCount_xyZ, sendCount_XyZ, sendCount_xYZ, sendCount_XYZ;
int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y,
int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz,
int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ,
int recvCount_xyz, recvCount_Xyz, recvCount_xYz, recvCount_XYz;
int recvCount_xyZ, recvCount_XyZ, recvCount_xYZ, recvCount_XYZ;
// Send buffers that reside on the compute device
int *dvcSendList_x, *dvcSendList_y, *dvcSendList_z, *dvcSendList_X,
*dvcSendList_Y, *dvcSendList_Z;
int *dvcSendList_xy, *dvcSendList_yz, *dvcSendList_xz, *dvcSendList_Xy,
*dvcSendList_Yz, *dvcSendList_xZ;
int *dvcSendList_xY, *dvcSendList_yZ, *dvcSendList_Xz, *dvcSendList_XY,
*dvcSendList_YZ, *dvcSendList_XZ;
int *dvcSendList_xyz, *dvcSendList_Xyz, *dvcSendList_xYz, *dvcSendList_XYz;
int *dvcSendList_xyZ, *dvcSendList_XyZ, *dvcSendList_xYZ, *dvcSendList_XYZ;
// Recieve buffers that reside on the compute device
int *dvcRecvList_x, *dvcRecvList_y, *dvcRecvList_z, *dvcRecvList_X,
*dvcRecvList_Y, *dvcRecvList_Z;
int *dvcRecvList_xy, *dvcRecvList_yz, *dvcRecvList_xz, *dvcRecvList_Xy,
*dvcRecvList_Yz, *dvcRecvList_xZ;
int *dvcRecvList_xY, *dvcRecvList_yZ, *dvcRecvList_Xz, *dvcRecvList_XY,
*dvcRecvList_YZ, *dvcRecvList_XZ;
int *dvcRecvList_xyz, *dvcRecvList_Xyz, *dvcRecvList_xYz, *dvcRecvList_XYz;
int *dvcRecvList_xyZ, *dvcRecvList_XyZ, *dvcRecvList_xYZ, *dvcRecvList_XYZ;
inline int getHaloBlock(int imin, int imax, int jmin, int jmax, int kmin,
int kmax, int *&dvcList) {
int count = 0;
int *List;
List = new int[(imax - imin) * (jmax - jmin) * (kmax - kmin)];
for (k = kmin; k < kmax; k++) {
for (j = jmin; j < jmax; j++) {
for (i = imin; i < imax; i++) {
List[count++] = k * Nxh * Nyh + j * Nxh + i;
size_t numbytes = count * sizeof(int);
ScaLBL_AllocateZeroCopy((void **)&dvcList,
numbytes); // Allocate device memory
ScaLBL_CopyToZeroCopy(dvcList, List, numbytes);
return count;

View File

@ -14,278 +14,387 @@
You should have received a copy of the GNU General Public License
along with OPM. If not, see <>.
extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
// conserved momemnts
double rho,ux,uy,uz,uu;
// non-conserved moments
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18;
extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish,
int Np, double rlx, double Fx,
double Fy, double Fz) {
// conserved momemnts
double rho, ux, uy, uz, uu;
// non-conserved moments
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
for (int n=start; n<finish; n++){
// q=0
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
f6 = dist[5*Np+n];
f7 = dist[8*Np+n];
f8 = dist[7*Np+n];
f9 = dist[10*Np+n];
f10 = dist[9*Np+n];
f11 = dist[12*Np+n];
f12 = dist[11*Np+n];
f13 = dist[14*Np+n];
f14 = dist[13*Np+n];
f15 = dist[16*Np+n];
f16 = dist[15*Np+n];
f17 = dist[18*Np+n];
f18 = dist[17*Np+n];
for (int n = start; n < finish; n++) {
// q=0
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
f7 = dist[8 * Np + n];
f8 = dist[7 * Np + n];
f9 = dist[10 * Np + n];
f10 = dist[9 * Np + n];
f11 = dist[12 * Np + n];
f12 = dist[11 * Np + n];
f13 = dist[14 * Np + n];
f14 = dist[13 * Np + n];
f15 = dist[16 * Np + n];
f16 = dist[15 * Np + n];
f17 = dist[18 * Np + n];
f18 = dist[17 * Np + n];
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
uu = 1.5*(ux*ux+uy*uy+uz*uz);
rho = f0 + f2 + f1 + f4 + f3 + f6 + f5 + f8 + f7 + f10 + f9 + f12 +
f11 + f14 + f13 + f16 + f15 + f18 + f17;
ux = f1 - f2 + f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14;
uy = f3 - f4 + f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18;
uz = f5 - f6 + f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18;
uu = 1.5 * (ux * ux + uy * uy + uz * uz);
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu);
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.3333333333333333 * (1.0 - uu);
// q = 1
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx;
// q = 1
dist[1 * Np + n] =
f1 * (1.0 - rlx) +
rlx * 0.05555555555555555 * (rho + 3.0 * ux + 4.5 * ux * ux - uu) +
0.16666666 * Fx;
// q=2
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx;
// q=2
dist[2 * Np + n] =
f2 * (1.0 - rlx) +
rlx * 0.05555555555555555 * (rho - 3.0 * ux + 4.5 * ux * ux - uu) -
0.16666666 * Fx;
// q = 3
dist[3*Np+n] = f3*(1.0-rlx) +
rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy;
// q = 3
dist[3 * Np + n] =
f3 * (1.0 - rlx) +
rlx * 0.05555555555555555 * (rho + 3.0 * uy + 4.5 * uy * uy - uu) +
0.16666666 * Fy;
// q = 4
dist[4*Np+n] = f4*(1.0-rlx) +
rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy;
// q = 4
dist[4 * Np + n] =
f4 * (1.0 - rlx) +
rlx * 0.05555555555555555 * (rho - 3.0 * uy + 4.5 * uy * uy - uu) -
0.16666666 * Fy;
// q = 5
dist[5*Np+n] = f5*(1.0-rlx) +
rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz;
// q = 5
dist[5 * Np + n] =
f5 * (1.0 - rlx) +
rlx * 0.05555555555555555 * (rho + 3.0 * uz + 4.5 * uz * uz - uu) +
0.16666666 * Fz;
// q = 6
dist[6*Np+n] = f6*(1.0-rlx) +
rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz;
// q = 6
dist[6 * Np + n] =
f6 * (1.0 - rlx) +
rlx * 0.05555555555555555 * (rho - 3.0 * uz + 4.5 * uz * uz - uu) -
0.16666666 * Fz;
// q = 7
dist[7*Np+n] = f7*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy);
// q = 7
dist[7 * Np + n] =
f7 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho + 3.0 * (ux + uy) + 4.5 * (ux + uy) * (ux + uy) - uu) +
0.08333333333 * (Fx + Fy);
// q = 8
dist[8*Np+n] = f8*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy);
// q = 8
dist[8 * Np + n] =
f8 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho - 3.0 * (ux + uy) + 4.5 * (ux + uy) * (ux + uy) - uu) -
0.08333333333 * (Fx + Fy);
// q = 9
dist[9*Np+n] = f9*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy);
// q = 9
dist[9 * Np + n] =
f9 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho + 3.0 * (ux - uy) + 4.5 * (ux - uy) * (ux - uy) - uu) +
0.08333333333 * (Fx - Fy);
// q = 10
dist[10*Np+n] = f10*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy);
// q = 10
dist[10 * Np + n] =
f10 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho - 3.0 * (ux - uy) + 4.5 * (ux - uy) * (ux - uy) - uu) -
0.08333333333 * (Fx - Fy);
// q = 11
dist[11*Np+n] = f11*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz);
// q = 11
dist[11 * Np + n] =
f11 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho + 3.0 * (ux + uz) + 4.5 * (ux + uz) * (ux + uz) - uu) +
0.08333333333 * (Fx + Fz);
// q = 12
dist[12*Np+n] = f12*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz);
// q = 12
dist[12 * Np + n] =
f12 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho - 3.0 * (ux + uz) + 4.5 * (ux + uz) * (ux + uz) - uu) -
0.08333333333 * (Fx + Fz);
// q = 13
dist[13*Np+n] = f13*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz);
// q = 13
dist[13 * Np + n] =
f13 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho + 3.0 * (ux - uz) + 4.5 * (ux - uz) * (ux - uz) - uu) +
0.08333333333 * (Fx - Fz);
// q= 14
dist[14*Np+n] = f14*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz);
// q= 14
dist[14 * Np + n] =
f14 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho - 3.0 * (ux - uz) + 4.5 * (ux - uz) * (ux - uz) - uu) -
0.08333333333 * (Fx - Fz);
// q = 15
dist[15*Np+n] = f15*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz);
// q = 15
dist[15 * Np + n] =
f15 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho + 3.0 * (uy + uz) + 4.5 * (uy + uz) * (uy + uz) - uu) +
0.08333333333 * (Fy + Fz);
// q = 16
dist[16*Np+n] = f16*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz);
// q = 16
dist[16 * Np + n] =
f16 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho - 3.0 * (uy + uz) + 4.5 * (uy + uz) * (uy + uz) - uu) -
0.08333333333 * (Fy + Fz);
// q = 17
dist[17*Np+n] = f17*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz);
// q = 17
dist[17 * Np + n] =
f17 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho + 3.0 * (uy - uz) + 4.5 * (uy - uz) * (uy - uz) - uu) +
0.08333333333 * (Fy - Fz);
// q = 18
dist[18*Np+n] = f18*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz);
// q = 18
dist[18 * Np + n] =
f18 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho - 3.0 * (uy - uz) + 4.5 * (uy - uz) * (uy - uz) - uu) -
0.08333333333 * (Fy - Fz);
extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
// conserved momemnts
double rho,ux,uy,uz,uu;
// non-conserved moments
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18;
int nr1,nr2,nr3,nr4,nr5,nr6,nr7,nr8,nr9,nr10,nr11,nr12,nr13,nr14,nr15,nr16,nr17,nr18;
extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist,
int start, int finish, int Np,
double rlx, double Fx, double Fy,
double Fz) {
// conserved momemnts
double rho, ux, uy, uz, uu;
// non-conserved moments
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
int nr1, nr2, nr3, nr4, nr5, nr6, nr7, nr8, nr9, nr10, nr11, nr12, nr13,
nr14, nr15, nr16, nr17, nr18;
for (int n=start; n<finish; n++){
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
for (int n = start; n < finish; n++) {
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
// q=3
nr3 = neighborList[n+2*Np]; // neighbor 4
f3 = dist[nr3];
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q = 4
nr4 = neighborList[n+3*Np]; // neighbor 3
f4 = dist[nr4];
// q=3
nr3 = neighborList[n + 2 * Np]; // neighbor 4
f3 = dist[nr3];
// q=5
nr5 = neighborList[n+4*Np];
f5 = dist[nr5];
// q = 4
nr4 = neighborList[n + 3 * Np]; // neighbor 3
f4 = dist[nr4];
// q = 6
nr6 = neighborList[n+5*Np];
f6 = dist[nr6];
// q=7
nr7 = neighborList[n+6*Np];
f7 = dist[nr7];
// q=5
nr5 = neighborList[n + 4 * Np];
f5 = dist[nr5];
// q = 8
nr8 = neighborList[n+7*Np];
f8 = dist[nr8];
// q = 6
nr6 = neighborList[n + 5 * Np];
f6 = dist[nr6];
// q=9
nr9 = neighborList[n+8*Np];
f9 = dist[nr9];
// q=7
nr7 = neighborList[n + 6 * Np];
f7 = dist[nr7];
// q = 10
nr10 = neighborList[n+9*Np];
f10 = dist[nr10];
// q = 8
nr8 = neighborList[n + 7 * Np];
f8 = dist[nr8];
// q=11
nr11 = neighborList[n+10*Np];
f11 = dist[nr11];
// q=9
nr9 = neighborList[n + 8 * Np];
f9 = dist[nr9];
// q=12
nr12 = neighborList[n+11*Np];
f12 = dist[nr12];
// q = 10
nr10 = neighborList[n + 9 * Np];
f10 = dist[nr10];
// q=13
nr13 = neighborList[n+12*Np];
f13 = dist[nr13];
// q=11
nr11 = neighborList[n + 10 * Np];
f11 = dist[nr11];
// q=14
nr14 = neighborList[n+13*Np];
f14 = dist[nr14];
// q=12
nr12 = neighborList[n + 11 * Np];
f12 = dist[nr12];
// q=15
nr15 = neighborList[n+14*Np];
f15 = dist[nr15];
// q=13
nr13 = neighborList[n + 12 * Np];
f13 = dist[nr13];
// q=16
nr16 = neighborList[n+15*Np];
f16 = dist[nr16];
// q=14
nr14 = neighborList[n + 13 * Np];
f14 = dist[nr14];
// q=17
//fq = dist[18*Np+n];
nr17 = neighborList[n+16*Np];
f17 = dist[nr17];
// q=15
nr15 = neighborList[n + 14 * Np];
f15 = dist[nr15];
// q=18
nr18 = neighborList[n+17*Np];
f18 = dist[nr18];
// q=16
nr16 = neighborList[n + 15 * Np];
f16 = dist[nr16];
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
uu = 1.5*(ux*ux+uy*uy+uz*uz);
// q=17
//fq = dist[18*Np+n];
nr17 = neighborList[n + 16 * Np];
f17 = dist[nr17];
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu);
// q=18
nr18 = neighborList[n + 17 * Np];
f18 = dist[nr18];
// q = 1
dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx;
rho = f0 + f2 + f1 + f4 + f3 + f6 + f5 + f8 + f7 + f10 + f9 + f12 +
f11 + f14 + f13 + f16 + f15 + f18 + f17;
ux = f1 - f2 + f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14;
uy = f3 - f4 + f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18;
uz = f5 - f6 + f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18;
uu = 1.5 * (ux * ux + uy * uy + uz * uz);
// q=2
dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx;
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.3333333333333333 * (1.0 - uu);
// q = 3
dist[nr4] = f3*(1.0-rlx) +
rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy;
// q = 1
dist[nr2] =
f1 * (1.0 - rlx) +
rlx * 0.05555555555555555 * (rho + 3.0 * ux + 4.5 * ux * ux - uu) +
0.16666666 * Fx;
// q = 4
dist[nr3] = f4*(1.0-rlx) +
rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy;
// q=2
dist[nr1] =
f2 * (1.0 - rlx) +
rlx * 0.05555555555555555 * (rho - 3.0 * ux + 4.5 * ux * ux - uu) -
0.16666666 * Fx;
// q = 5
dist[nr6] = f5*(1.0-rlx) +
rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz;
// q = 3
dist[nr4] =
f3 * (1.0 - rlx) +
rlx * 0.05555555555555555 * (rho + 3.0 * uy + 4.5 * uy * uy - uu) +
0.16666666 * Fy;
// q = 6
dist[nr5] = f6*(1.0-rlx) +
rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz;
// q = 4
dist[nr3] =
f4 * (1.0 - rlx) +
rlx * 0.05555555555555555 * (rho - 3.0 * uy + 4.5 * uy * uy - uu) -
0.16666666 * Fy;
// q = 7
dist[nr8] = f7*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy);
// q = 5
dist[nr6] =
f5 * (1.0 - rlx) +
rlx * 0.05555555555555555 * (rho + 3.0 * uz + 4.5 * uz * uz - uu) +
0.16666666 * Fz;
// q = 8
dist[nr7] = f8*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy);
// q = 6
dist[nr5] =
f6 * (1.0 - rlx) +
rlx * 0.05555555555555555 * (rho - 3.0 * uz + 4.5 * uz * uz - uu) -
0.16666666 * Fz;
// q = 9
dist[nr10] = f9*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy);
// q = 7
dist[nr8] =
f7 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho + 3.0 * (ux + uy) + 4.5 * (ux + uy) * (ux + uy) - uu) +
0.08333333333 * (Fx + Fy);
// q = 10
dist[nr9] = f10*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy);
// q = 8
dist[nr7] =
f8 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho - 3.0 * (ux + uy) + 4.5 * (ux + uy) * (ux + uy) - uu) -
0.08333333333 * (Fx + Fy);
// q = 11
dist[nr12] = f11*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz);
// q = 9
dist[nr10] =
f9 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho + 3.0 * (ux - uy) + 4.5 * (ux - uy) * (ux - uy) - uu) +
0.08333333333 * (Fx - Fy);
// q = 12
dist[nr11] = f12*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz);
// q = 10
dist[nr9] =
f10 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho - 3.0 * (ux - uy) + 4.5 * (ux - uy) * (ux - uy) - uu) -
0.08333333333 * (Fx - Fy);
// q = 13
dist[nr14] = f13*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz);
// q = 11
dist[nr12] =
f11 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho + 3.0 * (ux + uz) + 4.5 * (ux + uz) * (ux + uz) - uu) +
0.08333333333 * (Fx + Fz);
// q= 14
dist[nr13] = f14*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz);
// q = 12
dist[nr11] =
f12 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho - 3.0 * (ux + uz) + 4.5 * (ux + uz) * (ux + uz) - uu) -
0.08333333333 * (Fx + Fz);
// q = 15
dist[nr16] = f15*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz);
// q = 13
dist[nr14] =
f13 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho + 3.0 * (ux - uz) + 4.5 * (ux - uz) * (ux - uz) - uu) +
0.08333333333 * (Fx - Fz);
// q = 16
dist[nr15] = f16*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz);
// q= 14
dist[nr13] =
f14 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho - 3.0 * (ux - uz) + 4.5 * (ux - uz) * (ux - uz) - uu) -
0.08333333333 * (Fx - Fz);
// q = 17
dist[nr18] = f17*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz);
// q = 15
dist[nr16] =
f15 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho + 3.0 * (uy + uz) + 4.5 * (uy + uz) * (uy + uz) - uu) +
0.08333333333 * (Fy + Fz);
// q = 18
dist[nr17] = f18*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz);
// q = 16
dist[nr15] =
f16 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho - 3.0 * (uy + uz) + 4.5 * (uy + uz) * (uy + uz) - uu) -
0.08333333333 * (Fy + Fz);
// q = 17
dist[nr18] =
f17 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho + 3.0 * (uy - uz) + 4.5 * (uy - uz) * (uy - uz) - uu) +
0.08333333333 * (Fy - Fz);
// q = 18
dist[nr17] =
f18 * (1.0 - rlx) +
rlx * 0.02777777777777778 *
(rho - 3.0 * (uy - uz) + 4.5 * (uy - uz) * (uy - uz) - uu) -
0.08333333333 * (Fy - Fz);

// CPU Functions for D3Q7 Lattice Boltzmann Methods
extern "C" void ScaLBL_Scalar_Pack(int *list, int count, double *sendbuf, double *Data, int N){
// Pack distribution q into the send buffer for the listed lattice sites
// dist may be even or odd distributions stored by stream layout
int idx,n;
for (idx=0; idx<count; idx++){
n = list[idx];
sendbuf[idx] = Data[n];
extern "C" void ScaLBL_Scalar_Pack(int *list, int count, double *sendbuf,
double *Data, int N) {
// Pack distribution q into the send buffer for the listed lattice sites
// dist may be even or odd distributions stored by stream layout
int idx, n;
for (idx = 0; idx < count; idx++) {
n = list[idx];
sendbuf[idx] = Data[n];
extern "C" void ScaLBL_Scalar_Unpack(int *list, int count, double *recvbuf, double *Data, int N){
// Pack distribution q into the send buffer for the listed lattice sites
// dist may be even or odd distributions stored by stream layout
int idx,n;
for (idx=0; idx<count; idx++){
n = list[idx];
Data[n] = recvbuf[idx];
extern "C" void ScaLBL_Scalar_Unpack(int *list, int count, double *recvbuf,
double *Data, int N) {
// Pack distribution q into the send buffer for the listed lattice sites
// dist may be even or odd distributions stored by stream layout
int idx, n;
for (idx = 0; idx < count; idx++) {
n = list[idx];
Data[n] = recvbuf[idx];
extern "C" void ScaLBL_D3Q7_Unpack(int q, int *list, int start, int count,
double *recvbuf, double *dist, int N){
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
int n,idx;
for (idx=0; idx<count; idx++){
// Get the value from the list -- note that n is the index is from the send (non-local) process
n = list[idx];
// unpack the distribution to the proper location
if (!(n<0)) dist[q*N+n] = recvbuf[start+idx];
//dist[q*N+n] = recvbuf[start+idx];
extern "C" void ScaLBL_D3Q7_Unpack(int q, int *list, int start, int count,
double *recvbuf, double *dist, int N) {
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
int n, idx;
for (idx = 0; idx < count; idx++) {
// Get the value from the list -- note that n is the index is from the send (non-local) process
n = list[idx];
// unpack the distribution to the proper location
if (!(n < 0))
dist[q * N + n] = recvbuf[start + idx];
//dist[q*N+n] = recvbuf[start+idx];
extern "C" void ScaLBL_PackDenD3Q7(int *list, int count, double *sendbuf, int number, double *Data, int N){
// Pack distribution into the send buffer for the listed lattice sites
int idx,n,component;
for (idx=0; idx<count; idx++){
for (component=0; component<number; component++){
n = list[idx];
sendbuf[idx*number+component] = Data[number*n+component];
Data[number*n+component] = 0.0; // Set the data value to zero once it's in the buffer!
extern "C" void ScaLBL_PackDenD3Q7(int *list, int count, double *sendbuf,
int number, double *Data, int N) {
// Pack distribution into the send buffer for the listed lattice sites
int idx, n, component;
for (idx = 0; idx < count; idx++) {
for (component = 0; component < number; component++) {
n = list[idx];
sendbuf[idx * number + component] = Data[number * n + component];
Data[number * n + component] =
0.0; // Set the data value to zero once it's in the buffer!
extern "C" void ScaLBL_UnpackDenD3Q7(int *list, int count, double *recvbuf, int number, double *Data, int N){
// Unack distribution from the recv buffer
// Sum to the existing density value
int idx,n,component;
for (idx=0; idx<count; idx++){
for (component=0; component<number; component++){
n = list[idx];
Data[number*n+component] += recvbuf[idx*number+component];
extern "C" void ScaLBL_UnpackDenD3Q7(int *list, int count, double *recvbuf,
int number, double *Data, int N) {
// Unack distribution from the recv buffer
// Sum to the existing density value
int idx, n, component;
for (idx = 0; idx < count; idx++) {
for (component = 0; component < number; component++) {
n = list[idx];
Data[number * n + component] += recvbuf[idx * number + component];
extern "C" void ScaLBL_D3Q7_Reflection_BC_z(int *list, double *dist, int count, int Np){
int n;
for (int idx=0; idx<count; idx++){
n = list[idx];
double f5 = 0.222222222222222222222222 - dist[6*Np+n];
dist[6*Np+n] = f5;
extern "C" void ScaLBL_D3Q7_Reflection_BC_z(int *list, double *dist, int count,
int Np) {
int n;
for (int idx = 0; idx < count; idx++) {
n = list[idx];
double f5 = 0.222222222222222222222222 - dist[6 * Np + n];
dist[6 * Np + n] = f5;
extern "C" void ScaLBL_D3Q7_Reflection_BC_Z(int *list, double *dist, int count, int Np){
int n;
for (int idx=0; idx<count; idx++){
n = list[idx];
double f6 = 0.222222222222222222222222 - dist[5*Np+n];
dist[5*Np+n] = f6;
extern "C" void ScaLBL_D3Q7_Reflection_BC_Z(int *list, double *dist, int count,
int Np) {
int n;
for (int idx = 0; idx < count; idx++) {
n = list[idx];
double f6 = 0.222222222222222222222222 - dist[5 * Np + n];
dist[5 * Np + n] = f6;
extern "C" void ScaLBL_D3Q7_Init(char *ID, double *f_even, double *f_odd, double *Den, int Nx, int Ny, int Nz)
int n,N;
N = Nx*Ny*Nz;
double value;
extern "C" void ScaLBL_D3Q7_Init(char *ID, double *f_even, double *f_odd,
double *Den, int Nx, int Ny, int Nz) {
int n, N;
N = Nx * Ny * Nz;
double value;
for (n=0; n<N; n++){
for (n = 0; n < N; n++) {
if (ID[n] > 0){
value = Den[n];
f_even[n] = 0.3333333333333333*value;
f_odd[n] = 0.1111111111111111*value; //double(100*n)+1.f;
f_even[N+n] = 0.1111111111111111*value; //double(100*n)+2.f;
f_odd[N+n] = 0.1111111111111111*value; //double(100*n)+3.f;
f_even[2*N+n] = 0.1111111111111111*value; //double(100*n)+4.f;
f_odd[2*N+n] = 0.1111111111111111*value; //double(100*n)+5.f;
f_even[3*N+n] = 0.1111111111111111*value; //double(100*n)+6.f;
for(int q=0; q<3; q++){
f_even[q*N+n] = -1.0;
f_odd[q*N+n] = -1.0;
f_even[3*N+n] = -1.0;
if (ID[n] > 0) {
value = Den[n];
f_even[n] = 0.3333333333333333 * value;
f_odd[n] = 0.1111111111111111 * value; //double(100*n)+1.f;
f_even[N + n] = 0.1111111111111111 * value; //double(100*n)+2.f;
f_odd[N + n] = 0.1111111111111111 * value; //double(100*n)+3.f;
f_even[2 * N + n] = 0.1111111111111111 * value; //double(100*n)+4.f;
f_odd[2 * N + n] = 0.1111111111111111 * value; //double(100*n)+5.f;
f_even[3 * N + n] = 0.1111111111111111 * value; //double(100*n)+6.f;
} else {
for (int q = 0; q < 3; q++) {
f_even[q * N + n] = -1.0;
f_odd[q * N + n] = -1.0;
f_even[3 * N + n] = -1.0;
extern "C" void ScaLBL_D3Q7_Swap(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz)
int i,j,k,n,nn,N;
// distributions
double f1,f2,f3,f4,f5,f6;
N = Nx*Ny*Nz;
for (n=0; n<N; n++){
//.......Back out the 3-D indices for node n..............
k = n/(Nx*Ny);
j = (n-Nx*Ny*k)/Nx;
i = n-Nx*Ny*k-Nz*j;
if (ID[n] > 0){
// Retrieve even distributions from the local node (swap convention)
// f0 = disteven[n]; // Does not particupate in streaming
f1 = distodd[n];
f3 = distodd[N+n];
f5 = distodd[2*N+n];
// Retrieve odd distributions from neighboring nodes (swap convention)
nn = n+1; // neighbor index (pull convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
//if (i+1<Nx){
f2 = disteven[N+nn]; // pull neighbor for distribution 2
if (!(f2 < 0.0)){
distodd[n] = f2;
disteven[N+nn] = f1;
nn = n+Nx; // neighbor index (pull convention)
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
//if (j+1<Ny){
f4 = disteven[2*N+nn]; // pull neighbor for distribution 4
if (!(f4 < 0.0)){
distodd[N+n] = f4;
disteven[2*N+nn] = f3;
// }
nn = n+Nx*Ny; // neighbor index (pull convention)
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
//if (k+1<Nz){
f6 = disteven[3*N+nn]; // pull neighbor for distribution 6
if (!(f6 < 0.0)){
distodd[2*N+n] = f6;
disteven[3*N+nn] = f5;
// }
extern "C" void ScaLBL_D3Q7_Swap(char *ID, double *disteven, double *distodd,
int Nx, int Ny, int Nz) {
int i, j, k, n, nn, N;
// distributions
double f1, f2, f3, f4, f5, f6;
N = Nx * Ny * Nz;
for (n = 0; n < N; n++) {
//.......Back out the 3-D indices for node n..............
k = n / (Nx * Ny);
j = (n - Nx * Ny * k) / Nx;
i = n - Nx * Ny * k - Nz * j;
if (ID[n] > 0) {
// Retrieve even distributions from the local node (swap convention)
// f0 = disteven[n]; // Does not particupate in streaming
f1 = distodd[n];
f3 = distodd[N + n];
f5 = distodd[2 * N + n];
// Retrieve odd distributions from neighboring nodes (swap convention)
nn = n + 1; // neighbor index (pull convention)
if (!(i + 1 < Nx))
nn -= Nx; // periodic BC along the x-boundary
//if (i+1<Nx){
f2 = disteven[N + nn]; // pull neighbor for distribution 2
if (!(f2 < 0.0)) {
distodd[n] = f2;
disteven[N + nn] = f1;
nn = n + Nx; // neighbor index (pull convention)
if (!(j + 1 < Ny))
nn -= Nx * Ny; // Perioidic BC along the y-boundary
//if (j+1<Ny){
f4 = disteven[2 * N + nn]; // pull neighbor for distribution 4
if (!(f4 < 0.0)) {
distodd[N + n] = f4;
disteven[2 * N + nn] = f3;
// }
nn = n + Nx * Ny; // neighbor index (pull convention)
if (!(k + 1 < Nz))
nn -= Nx * Ny * Nz; // Perioidic BC along the z-boundary
//if (k+1<Nz){
f6 = disteven[3 * N + nn]; // pull neighbor for distribution 6
if (!(f6 < 0.0)) {
distodd[2 * N + n] = f6;
disteven[3 * N + nn] = f5;
// }
extern "C" void ScaLBL_D3Q7_Density(char *ID, double *disteven, double *distodd, double *Den,
int Nx, int Ny, int Nz)
char id;
int n;
double f0,f1,f2,f3,f4,f5,f6;
int N = Nx*Ny*Nz;
for (n=0; n<N; n++){
id = ID[n];
if (id > 0 ){
// Read the distributions
f0 = disteven[n];
f2 = disteven[N+n];
f4 = disteven[2*N+n];
f6 = disteven[3*N+n];
f1 = distodd[n];
f3 = distodd[N+n];
f5 = distodd[2*N+n];
// Compute the density
Den[n] = f0+f1+f2+f3+f4+f5+f6;
extern "C" void ScaLBL_D3Q7_Density(char *ID, double *disteven, double *distodd,
double *Den, int Nx, int Ny, int Nz) {
char id;
int n;
double f0, f1, f2, f3, f4, f5, f6;
int N = Nx * Ny * Nz;
for (n = 0; n < N; n++) {
id = ID[n];
if (id > 0) {
// Read the distributions
f0 = disteven[n];
f2 = disteven[N + n];
f4 = disteven[2 * N + n];
f6 = disteven[3 * N + n];
f1 = distodd[n];
f3 = distodd[N + n];
f5 = distodd[2 * N + n];
// Compute the density
Den[n] = f0 + f1 + f2 + f3 + f4 + f5 + f6;

#include <string.h>
#include <mm_malloc.h>
extern "C" int ScaLBL_SetDevice(int rank){
return 0;
extern "C" int ScaLBL_SetDevice(int rank) { return 0; }
extern "C" void ScaLBL_AllocateZeroCopy(void **address, size_t size) {
(*address) = _mm_malloc(size, 64);
memset(*address, 0, size);
if (*address == NULL) {
printf("Memory allocation failed! \n");
extern "C" void ScaLBL_AllocateZeroCopy(void** address, size_t size){
(*address) = _mm_malloc(size,64);
if (*address==NULL){
printf("Memory allocation failed! \n");
extern "C" void ScaLBL_AllocateDeviceMemory(void **address, size_t size) {
(*address) = _mm_malloc(size, 64);
memset(*address, 0, size);
if (*address == NULL) {
printf("Memory allocation failed! \n");
extern "C" void ScaLBL_AllocateDeviceMemory(void** address, size_t size){
(*address) = _mm_malloc(size,64);
if (*address==NULL){
printf("Memory allocation failed! \n");
extern "C" void ScaLBL_FreeDeviceMemory(void *pointer) { _mm_free(pointer); }
extern "C" void ScaLBL_CopyToDevice(void *dest, const void *source,
size_t size) {
// cudaMemcpy(dest,source,size,cudaMemcpyHostToDevice);
memcpy(dest, source, size);
extern "C" void ScaLBL_FreeDeviceMemory(void* pointer){
extern "C" void ScaLBL_CopyToHost(void *dest, const void *source, size_t size) {
// cudaMemcpy(dest,source,size,cudaMemcpyDeviceToHost);
memcpy(dest, source, size);
extern "C" void ScaLBL_CopyToDevice(void* dest, const void* source, size_t size){
// cudaMemcpy(dest,source,size,cudaMemcpyHostToDevice);
memcpy(dest, source, size);
extern "C" void ScaLBL_CopyToZeroCopy(void *dest, const void *source,
size_t size) {
// cudaMemcpy(dest,source,size,cudaMemcpyDeviceToHost);
memcpy(dest, source, size);
extern "C" void ScaLBL_CopyToHost(void* dest, const void* source, size_t size){
// cudaMemcpy(dest,source,size,cudaMemcpyDeviceToHost);
memcpy(dest, source, size);
extern "C" void ScaLBL_CopyToZeroCopy(void* dest, const void* source, size_t size){
// cudaMemcpy(dest,source,size,cudaMemcpyDeviceToHost);
memcpy(dest, source, size);
extern "C" void ScaLBL_DeviceBarrier(){
// cudaDeviceSynchronize();
extern "C" void ScaLBL_DeviceBarrier() {
// cudaDeviceSynchronize();

#include <stdio.h>
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
int n,nread;
double fq,Ci;
for (n=start; n<finish; n++){
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList,
double *dist, double *Den,
int start, int finish,
int Np) {
int n, nread;
double fq, Ci;
for (n = start; n < finish; n++) {
// q=0
fq = dist[n];
// q=0
fq = dist[n];
Ci = fq;
// q=1
nread = neighborList[n];
fq = dist[nread];
Ci += fq;
// q=1
nread = neighborList[n];
fq = dist[nread];
Ci += fq;
// q=2
nread = neighborList[n+Np];
fq = dist[nread];
Ci += fq;
nread = neighborList[n + Np];
fq = dist[nread];
Ci += fq;
// q=3
nread = neighborList[n+2*Np];
fq = dist[nread];
Ci += fq;
nread = neighborList[n + 2 * Np];
fq = dist[nread];
Ci += fq;
// q=4
nread = neighborList[n+3*Np];
fq = dist[nread];
Ci += fq;
nread = neighborList[n + 3 * Np];
fq = dist[nread];
Ci += fq;
// q=5
nread = neighborList[n+4*Np];
fq = dist[nread];
Ci += fq;
nread = neighborList[n + 4 * Np];
fq = dist[nread];
Ci += fq;
// q=6
nread = neighborList[n+5*Np];
fq = dist[nread];
Ci += fq;
nread = neighborList[n + 5 * Np];
fq = dist[nread];
Ci += fq;
Den[n] = Ci;
extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den,
int start, int finish,
int Np) {
int n;
double fq,Ci;
for (n=start; n<finish; n++){
double fq, Ci;
for (n = start; n < finish; n++) {
// q=0
fq = dist[n];
Ci = fq;
// q=1
fq = dist[2*Np+n];
Ci += fq;
// q=0
fq = dist[n];
Ci = fq;
// q=2
fq = dist[1*Np+n];
Ci += fq;
// q=1
fq = dist[2 * Np + n];
Ci += fq;
// q=3
fq = dist[4*Np+n];
Ci += fq;
// q=2
fq = dist[1 * Np + n];
Ci += fq;
// q=4
fq = dist[3*Np+n];
Ci += fq;
// q=3
fq = dist[4 * Np + n];
Ci += fq;
// q=5
fq = dist[6*Np+n];
Ci += fq;
// q=4
fq = dist[3 * Np + n];
Ci += fq;
// q=6
fq = dist[5*Np+n];
Ci += fq;
// q=5
fq = dist[6 * Np + n];
Ci += fq;
// q=6
fq = dist[5 * Np + n];
Ci += fq;
Den[n] = Ci;
extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
int n;
double Ci;
double ux,uy,uz;
double uEPx,uEPy,uEPz;//electrochemical induced velocity
double Ex,Ey,Ez;//electrical field
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
double f0,f1,f2,f3,f4,f5,f6;
int nr1,nr2,nr3,nr4,nr5,nr6;
extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist,
double *Den, double *FluxDiffusive,
double *FluxAdvective,
double *FluxElectrical, double *Velocity,
double *ElectricField, double Di, int zi,
double rlx, double Vt, int start,
int finish, int Np) {
int n;
double Ci;
double ux, uy, uz;
double uEPx, uEPy, uEPz; //electrochemical induced velocity
double Ex, Ey, Ez; //electrical field
double flux_diffusive_x, flux_diffusive_y, flux_diffusive_z;
double f0, f1, f2, f3, f4, f5, f6;
int nr1, nr2, nr3, nr4, nr5, nr6;
for (n = start; n < finish; n++) {
for (n=start; n<finish; n++){
//Load data
Ci = Den[n];
Ex = ElectricField[n + 0 * Np];
Ey = ElectricField[n + 1 * Np];
Ez = ElectricField[n + 2 * Np];
ux = Velocity[n + 0 * Np];
uy = Velocity[n + 1 * Np];
uz = Velocity[n + 2 * Np];
uEPx = zi * Di / Vt * Ex;
uEPy = zi * Di / Vt * Ey;
uEPz = zi * Di / Vt * Ez;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
// q=2
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n+2*Np]; // neighbor 4
f3 = dist[nr3];
// q=4
nr4 = neighborList[n+3*Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n+4*Np];
f5 = dist[nr5];
// q=6
nr6 = neighborList[n+5*Np];
f6 = dist[nr6];
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
// q=2
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n + 2 * Np]; // neighbor 4
f3 = dist[nr3];
// q=4
nr4 = neighborList[n + 3 * Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n + 4 * Np];
f5 = dist[nr5];
// q=6
nr6 = neighborList[n + 5 * Np];
f6 = dist[nr6];
// compute diffusive flux
flux_diffusive_x = (1.0-0.5*rlx)*((f1-f2)-ux*Ci);
flux_diffusive_y = (1.0-0.5*rlx)*((f3-f4)-uy*Ci);
flux_diffusive_z = (1.0-0.5*rlx)*((f5-f6)-uz*Ci);
FluxDiffusive[n+0*Np] = flux_diffusive_x;
FluxDiffusive[n+1*Np] = flux_diffusive_y;
FluxDiffusive[n+2*Np] = flux_diffusive_z;
FluxAdvective[n+0*Np] = ux*Ci;
FluxAdvective[n+1*Np] = uy*Ci;
FluxAdvective[n+2*Np] = uz*Ci;
FluxElectrical[n+0*Np] = uEPx*Ci;
FluxElectrical[n+1*Np] = uEPy*Ci;
FluxElectrical[n+2*Np] = uEPz*Ci;
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
FluxAdvective[n + 0 * Np] = ux * Ci;
FluxAdvective[n + 1 * Np] = uy * Ci;
FluxAdvective[n + 2 * Np] = uz * Ci;
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
// q = 1
dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q=2
dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
// q = 1
dist[nr2] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// q = 3
dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
// q=2
dist[nr1] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// q = 4
dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
// q = 3
dist[nr4] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// q = 5
dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
// q = 4
dist[nr3] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// q = 6
dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
// q = 5
dist[nr6] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// q = 6
dist[nr5] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
int n;
double Ci;
double ux,uy,uz;
double uEPx,uEPy,uEPz;//electrochemical induced velocity
double Ex,Ey,Ez;//electrical field
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
double f0,f1,f2,f3,f4,f5,f6;
extern "C" void ScaLBL_D3Q7_AAeven_Ion(
double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective,
double *FluxElectrical, double *Velocity, double *ElectricField, double Di,
int zi, double rlx, double Vt, int start, int finish, int Np) {
int n;
double Ci;
double ux, uy, uz;
double uEPx, uEPy, uEPz; //electrochemical induced velocity
double Ex, Ey, Ez; //electrical field
double flux_diffusive_x, flux_diffusive_y, flux_diffusive_z;
double f0, f1, f2, f3, f4, f5, f6;
for (n = start; n < finish; n++) {
for (n=start; n<finish; n++){
//Load data
Ci = Den[n];
Ex = ElectricField[n + 0 * Np];
Ey = ElectricField[n + 1 * Np];
Ez = ElectricField[n + 2 * Np];
ux = Velocity[n + 0 * Np];
uy = Velocity[n + 1 * Np];
uz = Velocity[n + 2 * Np];
uEPx = zi * Di / Vt * Ex;
uEPy = zi * Di / Vt * Ey;
uEPz = zi * Di / Vt * Ez;
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
f6 = dist[5*Np+n];
// compute diffusive flux
flux_diffusive_x = (1.0-0.5*rlx)*((f1-f2)-ux*Ci);
flux_diffusive_y = (1.0-0.5*rlx)*((f3-f4)-uy*Ci);
flux_diffusive_z = (1.0-0.5*rlx)*((f5-f6)-uz*Ci);
FluxDiffusive[n+0*Np] = flux_diffusive_x;
FluxDiffusive[n+1*Np] = flux_diffusive_y;
FluxDiffusive[n+2*Np] = flux_diffusive_z;
FluxAdvective[n+0*Np] = ux*Ci;
FluxAdvective[n+1*Np] = uy*Ci;
FluxAdvective[n+2*Np] = uz*Ci;
FluxElectrical[n+0*Np] = uEPx*Ci;
FluxElectrical[n+1*Np] = uEPy*Ci;
FluxElectrical[n+2*Np] = uEPz*Ci;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
FluxAdvective[n + 0 * Np] = ux * Ci;
FluxAdvective[n + 1 * Np] = uy * Ci;
FluxAdvective[n + 2 * Np] = uz * Ci;
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q = 1
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
// q = 1
dist[1 * Np + n] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// q=2
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
// q=2
dist[2 * Np + n] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// q = 3
dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
// q = 3
dist[3 * Np + n] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// q = 4
dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
// q = 4
dist[4 * Np + n] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// q = 5
dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
// q = 5
dist[5 * Np + n] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// q = 6
dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
// q = 6
dist[6 * Np + n] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np)
int n;
for (n=0; n<Np; n++){
dist[0*Np+n] = 0.25*DenInit;
dist[1*Np+n] = 0.125*DenInit;
dist[2*Np+n] = 0.125*DenInit;
dist[3*Np+n] = 0.125*DenInit;
dist[4*Np+n] = 0.125*DenInit;
dist[5*Np+n] = 0.125*DenInit;
dist[6*Np+n] = 0.125*DenInit;
extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit,
int Np) {
int n;
for (n = 0; n < Np; n++) {
dist[0 * Np + n] = 0.25 * DenInit;
dist[1 * Np + n] = 0.125 * DenInit;
dist[2 * Np + n] = 0.125 * DenInit;
dist[3 * Np + n] = 0.125 * DenInit;
dist[4 * Np + n] = 0.125 * DenInit;
dist[5 * Np + n] = 0.125 * DenInit;
dist[6 * Np + n] = 0.125 * DenInit;
Den[n] = DenInit;
extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np)
int n;
double DenInit;
for (n=0; n<Np; n++){
DenInit = Den[n];
dist[0*Np+n] = 0.25*DenInit;
dist[1*Np+n] = 0.125*DenInit;
dist[2*Np+n] = 0.125*DenInit;
dist[3*Np+n] = 0.125*DenInit;
dist[4*Np+n] = 0.125*DenInit;
dist[5*Np+n] = 0.125*DenInit;
dist[6*Np+n] = 0.125*DenInit;
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
int n;
double Ci;//ion concentration of species i
double CD;//charge density
double CD_tmp;
double F = 96485.0;//Faraday's constant; unit[C/mol]; F=e*Na, where Na is the Avogadro constant
for (n=start; n<finish; n++){
Ci = Den[n+ion_component*Np];
CD = ChargeDensity[n];
CD_tmp = F*IonValence*Ci;
ChargeDensity[n] = CD*(ion_component>0) + CD_tmp;
extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den,
int Np) {
int n;
double DenInit;
for (n = 0; n < Np; n++) {
DenInit = Den[n];
dist[0 * Np + n] = 0.25 * DenInit;
dist[1 * Np + n] = 0.125 * DenInit;
dist[2 * Np + n] = 0.125 * DenInit;
dist[3 * Np + n] = 0.125 * DenInit;
dist[4 * Np + n] = 0.125 * DenInit;
dist[5 * Np + n] = 0.125 * DenInit;
dist[6 * Np + n] = 0.125 * DenInit;
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den,
double *ChargeDensity,
int IonValence, int ion_component,
int start, int finish, int Np) {
int n;
double Ci; //ion concentration of species i
double CD; //charge density
double CD_tmp;
double F =
96485.0; //Faraday's constant; unit[C/mol]; F=e*Na, where Na is the Avogadro constant
for (n = start; n < finish; n++) {
Ci = Den[n + ion_component * Np];
CD = ChargeDensity[n];
CD_tmp = F * IonValence * Ci;
ChargeDensity[n] = CD * (ion_component > 0) + CD_tmp;

@ -14,279 +14,316 @@
along with OPM. If not, see <>.
extern "C" void INITIALIZE(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
int n,N;
N = Nx*Ny*Nz;
extern "C" void INITIALIZE(char *ID, double *f_even, double *f_odd, int Nx,
int Ny, int Nz) {
int n, N;
N = Nx * Ny * Nz;
for (n=0; n<N; n++){
for (n = 0; n < N; n++) {
if (ID[n] > 0){
f_even[n] = 0.3333333333333333;
f_odd[n] = 0.055555555555555555; //double(100*n)+1.f;
f_even[N+n] = 0.055555555555555555; //double(100*n)+2.f;
f_odd[N+n] = 0.055555555555555555; //double(100*n)+3.f;
f_even[2*N+n] = 0.055555555555555555; //double(100*n)+4.f;
f_odd[2*N+n] = 0.055555555555555555; //double(100*n)+5.f;
f_even[3*N+n] = 0.055555555555555555; //double(100*n)+6.f;
f_odd[3*N+n] = 0.0277777777777778; //double(100*n)+7.f;
f_even[4*N+n] = 0.0277777777777778; //double(100*n)+8.f;
f_odd[4*N+n] = 0.0277777777777778; //double(100*n)+9.f;
f_even[5*N+n] = 0.0277777777777778; //double(100*n)+10.f;
f_odd[5*N+n] = 0.0277777777777778; //double(100*n)+11.f;
f_even[6*N+n] = 0.0277777777777778; //double(100*n)+12.f;
f_odd[6*N+n] = 0.0277777777777778; //double(100*n)+13.f;
f_even[7*N+n] = 0.0277777777777778; //double(100*n)+14.f;
f_even[8*N+n] = 0.0277777777777778; //double(100*n)+16.f;
f_odd[8*N+n] = 0.0277777777777778; //double(100*n)+17.f;
f_even[9*N+n] = 0.0277777777777778; //double(100*n)+18.f;
for(int q=0; q<9; q++){
f_even[q*N+n] = -1.0;
f_odd[q*N+n] = -1.0;
f_even[9*N+n] = -1.0;
if (ID[n] > 0) {
f_even[n] = 0.3333333333333333;
f_odd[n] = 0.055555555555555555; //double(100*n)+1.f;
f_even[N + n] = 0.055555555555555555; //double(100*n)+2.f;
f_odd[N + n] = 0.055555555555555555; //double(100*n)+3.f;
f_even[2 * N + n] = 0.055555555555555555; //double(100*n)+4.f;
f_odd[2 * N + n] = 0.055555555555555555; //double(100*n)+5.f;
f_even[3 * N + n] = 0.055555555555555555; //double(100*n)+6.f;
f_odd[3 * N + n] = 0.0277777777777778; //double(100*n)+7.f;
f_even[4 * N + n] = 0.0277777777777778; //double(100*n)+8.f;
f_odd[4 * N + n] = 0.0277777777777778; //double(100*n)+9.f;
f_even[5 * N + n] = 0.0277777777777778; //double(100*n)+10.f;
f_odd[5 * N + n] = 0.0277777777777778; //double(100*n)+11.f;
f_even[6 * N + n] = 0.0277777777777778; //double(100*n)+12.f;
f_odd[6 * N + n] = 0.0277777777777778; //double(100*n)+13.f;
f_even[7 * N + n] = 0.0277777777777778; //double(100*n)+14.f;
f_odd[7 * N + n] = 0.0277777777777778; //double(100*n)+15.f;
f_even[8 * N + n] = 0.0277777777777778; //double(100*n)+16.f;
f_odd[8 * N + n] = 0.0277777777777778; //double(100*n)+17.f;
f_even[9 * N + n] = 0.0277777777777778; //double(100*n)+18.f;
} else {
for (int q = 0; q < 9; q++) {
f_even[q * N + n] = -1.0;
f_odd[q * N + n] = -1.0;
f_even[9 * N + n] = -1.0;
extern "C" void Compute_VELOCITY(char *ID, double *disteven, double *distodd, double *vel, int Nx, int Ny, int Nz)
int n,N;
// distributions
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
double vx,vy,vz;
extern "C" void Compute_VELOCITY(char *ID, double *disteven, double *distodd,
double *vel, int Nx, int Ny, int Nz) {
int n, N;
// distributions
double f1, f2, f3, f4, f5, f6, f7, f8, f9;
double f10, f11, f12, f13, f14, f15, f16, f17, f18;
double vx, vy, vz;
N = Nx*Ny*Nz;
for (n=0; n<N; n++){
if (ID[n] > 0){
// Registers to store the distributions
f2 = disteven[N+n];
f4 = disteven[2*N+n];
f6 = disteven[3*N+n];
f8 = disteven[4*N+n];
f10 = disteven[5*N+n];
f12 = disteven[6*N+n];
f14 = disteven[7*N+n];
f16 = disteven[8*N+n];
f18 = disteven[9*N+n];
f1 = distodd[n];
f3 = distodd[1*N+n];
f5 = distodd[2*N+n];
f7 = distodd[3*N+n];
f9 = distodd[4*N+n];
f11 = distodd[5*N+n];
f13 = distodd[6*N+n];
f15 = distodd[7*N+n];
f17 = distodd[8*N+n];
//.................Compute the velocity...................................
vx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
vy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
vz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
//..................Write the velocity.....................................
vel[n] = vx;
vel[N+n] = vy;
vel[2*N+n] = vz;
N = Nx * Ny * Nz;
for (n = 0; n < N; n++) {
if (ID[n] > 0) {
// Registers to store the distributions
f2 = disteven[N + n];
f4 = disteven[2 * N + n];
f6 = disteven[3 * N + n];
f8 = disteven[4 * N + n];
f10 = disteven[5 * N + n];
f12 = disteven[6 * N + n];
f14 = disteven[7 * N + n];
f16 = disteven[8 * N + n];
f18 = disteven[9 * N + n];
f1 = distodd[n];
f3 = distodd[1 * N + n];
f5 = distodd[2 * N + n];
f7 = distodd[3 * N + n];
f9 = distodd[4 * N + n];
f11 = distodd[5 * N + n];
f13 = distodd[6 * N + n];
f15 = distodd[7 * N + n];
f17 = distodd[8 * N + n];
//.................Compute the velocity...................................
vx = f1 - f2 + f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14;
vy = f3 - f4 + f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18;
vz = f5 - f6 + f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18;
//..................Write the velocity.....................................
vel[n] = vx;
vel[N + n] = vy;
vel[2 * N + n] = vz;
extern "C" void ScaLBL_D3Q19_MRT(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz,
double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz)
extern "C" void ScaLBL_D3Q19_MRT(char *ID, double *disteven, double *distodd,
int Nx, int Ny, int Nz, double rlx_setA,
double rlx_setB, double Fx, double Fy,
double Fz) {
int n,N;
// distributions
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
int n, N;
// distributions
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9;
double f10, f11, f12, f13, f14, f15, f16, f17, f18;
// conserved momemnts
double rho,jx,jy,jz;
// non-conserved moments
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
// conserved momemnts
double rho, jx, jy, jz;
// non-conserved moments
double m1, m2, m4, m6, m8, m9, m10, m11, m12, m13, m14, m15, m16, m17, m18;
N = Nx*Ny*Nz;
N = Nx * Ny * Nz;
char id;
for (n=0; n<N; n++){
id = ID[n];
if (id > 0){
// Registers to store the distributions - read based on swap convention
f2 = distodd[n];
f4 = distodd[N+n];
f6 = distodd[2*N+n];
f8 = distodd[3*N+n];
f10 = distodd[4*N+n];
f12 = distodd[5*N+n];
f14 = distodd[6*N+n];
f16 = distodd[7*N+n];
f18 = distodd[8*N+n];
f0 = disteven[n];
f1 = disteven[N+n];
f3 = disteven[2*N+n];
f5 = disteven[3*N+n];
f7 = disteven[4*N+n];
f9 = disteven[5*N+n];
f11 = disteven[6*N+n];
f13 = disteven[7*N+n];
f15 = disteven[8*N+n];
f17 = disteven[9*N+n];
//....................compute the moments...............................................
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
m1 = -30*f0-11*(f2+f1+f4+f3+f6+f5)+8*(f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18 +f17);
m2 = 12*f0-4*(f2+f1 +f4+f3+f6 +f5)+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
jx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
m4 = 4*(-f1+f2)+f7-f8+f9-f10+f11-f12+f13-f14;
jy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
m6 = -4*(f3-f4)+f7-f8-f9+f10+f15-f16+f17-f18;
jz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
m8 = -4*(f5-f6)+f11-f12-f13+f14+f15-f16-f17+f18;
m9 = 2*(f1+f2)-f3-f4-f5-f6+f7+f8+f9+f10+f11+f12+f13+f14-2*(f15+f16+f17+f18);
m10 = -4*(f1+f2)+2*(f4+f3+f6+f5)+f8+f7+f10+f9+f12+f11+f14+f13-2*(f16+f15+f18+f17);
m11 = f4+f3-f6-f5+f8+f7+f10+f9-f12-f11-f14-f13;
m12 = -2*(f4+f3-f6-f5)+f8+f7+f10+f9-f12-f11-f14-f13;
m13 = f8+f7-f10-f9;
m14 = f16+f15-f18-f17;
m15 = f12+f11-f14-f13;
m16 = f7-f8+f9-f10-f11+f12-f13+f14;
m17 = -f7+f8+f9-f10+f15-f16+f17-f18;
m18 = f11-f12-f13+f14-f15+f16+f17-f18;
//..............incorporate external force................................................
//jx += 0.5*Fx;
//jy += 0.5*Fy;
//jz += 0.5*Fz;
//..............carry out relaxation process...............................................
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho) - m2);
m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4);
m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6);
m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) - m9);
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) - m11);
m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho) - m12);
m13 = m13 + rlx_setA*((jx*jy/rho) - m13);
m14 = m14 + rlx_setA*((jy*jz/rho) - m14);
m15 = m15 + rlx_setA*((jx*jz/rho) - m15);
m16 = m16 + rlx_setB*( - m16);
m17 = m17 + rlx_setB*( - m17);
m18 = m18 + rlx_setB*( - m18);
//.................inverse transformation......................................................
f0 = 0.05263157894736842*rho-0.012531328320802*m1+0.04761904761904762*m2;
f1 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
f2 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
f3 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
f4 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
f5 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
f6 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
f7 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx+jy)+0.025*(m4+m6)
f8 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2-0.1*(jx+jy)-0.025*(m4+m6)
f9 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx-jy)+0.025*(m4-m6)
f10 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jy-jx)+0.025*(m6-m4)
f11 = 0.05263157894736842*rho+0.003341687552213868*m1
f12 = 0.05263157894736842*rho+0.003341687552213868*m1
f13 = 0.05263157894736842*rho+0.003341687552213868*m1
f14 = 0.05263157894736842*rho+0.003341687552213868*m1
f15 = 0.05263157894736842*rho+0.003341687552213868*m1
f16 = 0.05263157894736842*rho+0.003341687552213868*m1
f17 = 0.05263157894736842*rho+0.003341687552213868*m1
f18 = 0.05263157894736842*rho+0.003341687552213868*m1
// incorporate external force
f1 += 0.16666666*Fx;
f2 -= 0.16666666*Fx;
f3 += 0.16666666*Fy;
f4 -= 0.16666666*Fy;
f5 += 0.16666666*Fz;
f6 -= 0.16666666*Fz;
f7 += 0.08333333333*(Fx+Fy);
f8 -= 0.08333333333*(Fx+Fy);
f9 += 0.08333333333*(Fx-Fy);
f10 -= 0.08333333333*(Fx-Fy);
f11 += 0.08333333333*(Fx+Fz);
f12 -= 0.08333333333*(Fx+Fz);
f13 += 0.08333333333*(Fx-Fz);
f14 -= 0.08333333333*(Fx-Fz);
f15 += 0.08333333333*(Fy+Fz);
f16 -= 0.08333333333*(Fy+Fz);
f17 += 0.08333333333*(Fy-Fz);
f18 -= 0.08333333333*(Fy-Fz);
// Write data based on un-swapped convention
disteven[n] = f0;
disteven[N+n] = f2;
disteven[2*N+n] = f4;
disteven[3*N+n] = f6;
disteven[4*N+n] = f8;
disteven[5*N+n] = f10;
disteven[6*N+n] = f12;
disteven[7*N+n] = f14;
disteven[8*N+n] = f16;
disteven[9*N+n] = f18;
distodd[n] = f1;
distodd[N+n] = f3;
distodd[2*N+n] = f5;
distodd[3*N+n] = f7;
distodd[4*N+n] = f9;
distodd[5*N+n] = f11;
distodd[6*N+n] = f13;
distodd[7*N+n] = f15;
distodd[8*N+n] = f17;
char id;
for (n = 0; n < N; n++) {
id = ID[n];
if (id > 0) {
// Registers to store the distributions - read based on swap convention
f2 = distodd[n];
f4 = distodd[N + n];
f6 = distodd[2 * N + n];
f8 = distodd[3 * N + n];
f10 = distodd[4 * N + n];
f12 = distodd[5 * N + n];
f14 = distodd[6 * N + n];
f16 = distodd[7 * N + n];
f18 = distodd[8 * N + n];
f0 = disteven[n];
f1 = disteven[N + n];
f3 = disteven[2 * N + n];
f5 = disteven[3 * N + n];
f7 = disteven[4 * N + n];
f9 = disteven[5 * N + n];
f11 = disteven[6 * N + n];
f13 = disteven[7 * N + n];
f15 = disteven[8 * N + n];
f17 = disteven[9 * N + n];
//....................compute the moments...............................................
rho = f0 + f2 + f1 + f4 + f3 + f6 + f5 + f8 + f7 + f10 + f9 + f12 +
f11 + f14 + f13 + f16 + f15 + f18 + f17;
m1 = -30 * f0 - 11 * (f2 + f1 + f4 + f3 + f6 + f5) +
8 * (f8 + f7 + f10 + f9 + f12 + f11 + f14 + f13 + f16 + f15 +
f18 + f17);
m2 = 12 * f0 - 4 * (f2 + f1 + f4 + f3 + f6 + f5) + f8 + f7 + f10 +
f9 + f12 + f11 + f14 + f13 + f16 + f15 + f18 + f17;
jx = f1 - f2 + f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14;
m4 = 4 * (-f1 + f2) + f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14;
jy = f3 - f4 + f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18;
m6 = -4 * (f3 - f4) + f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18;
jz = f5 - f6 + f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18;
m8 = -4 * (f5 - f6) + f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18;
m9 = 2 * (f1 + f2) - f3 - f4 - f5 - f6 + f7 + f8 + f9 + f10 + f11 +
f12 + f13 + f14 - 2 * (f15 + f16 + f17 + f18);
m10 = -4 * (f1 + f2) + 2 * (f4 + f3 + f6 + f5) + f8 + f7 + f10 +
f9 + f12 + f11 + f14 + f13 - 2 * (f16 + f15 + f18 + f17);
m11 =
f4 + f3 - f6 - f5 + f8 + f7 + f10 + f9 - f12 - f11 - f14 - f13;
m12 = -2 * (f4 + f3 - f6 - f5) + f8 + f7 + f10 + f9 - f12 - f11 -
f14 - f13;
m13 = f8 + f7 - f10 - f9;
m14 = f16 + f15 - f18 - f17;
m15 = f12 + f11 - f14 - f13;
m16 = f7 - f8 + f9 - f10 - f11 + f12 - f13 + f14;
m17 = -f7 + f8 + f9 - f10 + f15 - f16 + f17 - f18;
m18 = f11 - f12 - f13 + f14 - f15 + f16 + f17 - f18;
//..............incorporate external force................................................
//jx += 0.5*Fx;
//jy += 0.5*Fy;
//jz += 0.5*Fz;
//..............carry out relaxation process...............................................
m1 = m1 + rlx_setA * ((19 * (jx * jx + jy * jy + jz * jz) / rho -
11 * rho) -
m2 = m2 + rlx_setA * ((3 * rho -
5.5 * (jx * jx + jy * jy + jz * jz) / rho) -
m4 = m4 + rlx_setB * ((-0.6666666666666666 * jx) - m4);
m6 = m6 + rlx_setB * ((-0.6666666666666666 * jy) - m6);
m8 = m8 + rlx_setB * ((-0.6666666666666666 * jz) - m8);
m9 = m9 +
rlx_setA * (((2 * jx * jx - jy * jy - jz * jz) / rho) - m9);
m10 = m10 +
rlx_setA *
(-0.5 * ((2 * jx * jx - jy * jy - jz * jz) / rho) - m10);
m11 = m11 + rlx_setA * (((jy * jy - jz * jz) / rho) - m11);
m12 = m12 + rlx_setA * (-0.5 * ((jy * jy - jz * jz) / rho) - m12);
m13 = m13 + rlx_setA * ((jx * jy / rho) - m13);
m14 = m14 + rlx_setA * ((jy * jz / rho) - m14);
m15 = m15 + rlx_setA * ((jx * jz / rho) - m15);
m16 = m16 + rlx_setB * (-m16);
m17 = m17 + rlx_setB * (-m17);
m18 = m18 + rlx_setB * (-m18);
//.................inverse transformation......................................................
f0 = 0.05263157894736842 * rho - 0.012531328320802 * m1 +
0.04761904761904762 * m2;
f1 = 0.05263157894736842 * rho - 0.004594820384294068 * m1 -
0.01587301587301587 * m2 + 0.1 * (jx - m4) +
0.05555555555555555 * (m9 - m10);
f2 = 0.05263157894736842 * rho - 0.004594820384294068 * m1 -
0.01587301587301587 * m2 + 0.1 * (m4 - jx) +
0.05555555555555555 * (m9 - m10);
f3 = 0.05263157894736842 * rho - 0.004594820384294068 * m1 -
0.01587301587301587 * m2 + 0.1 * (jy - m6) +
0.02777777777777778 * (m10 - m9) +
0.08333333333333333 * (m11 - m12);
f4 = 0.05263157894736842 * rho - 0.004594820384294068 * m1 -
0.01587301587301587 * m2 + 0.1 * (m6 - jy) +
0.02777777777777778 * (m10 - m9) +
0.08333333333333333 * (m11 - m12);
f5 = 0.05263157894736842 * rho - 0.004594820384294068 * m1 -
0.01587301587301587 * m2 + 0.1 * (jz - m8) +
0.02777777777777778 * (m10 - m9) +
0.08333333333333333 * (m12 - m11);
f6 = 0.05263157894736842 * rho - 0.004594820384294068 * m1 -
0.01587301587301587 * m2 + 0.1 * (m8 - jz) +
0.02777777777777778 * (m10 - m9) +
0.08333333333333333 * (m12 - m11);
f7 = 0.05263157894736842 * rho + 0.003341687552213868 * m1 +
0.003968253968253968 * m2 + 0.1 * (jx + jy) +
0.025 * (m4 + m6) + 0.02777777777777778 * m9 +
0.01388888888888889 * m10 + 0.08333333333333333 * m11 +
0.04166666666666666 * m12 + 0.25 * m13 + 0.125 * (m16 - m17);
f8 = 0.05263157894736842 * rho + 0.003341687552213868 * m1 +
0.003968253968253968 * m2 - 0.1 * (jx + jy) -
0.025 * (m4 + m6) + 0.02777777777777778 * m9 +
0.01388888888888889 * m10 + 0.08333333333333333 * m11 +
0.04166666666666666 * m12 + 0.25 * m13 + 0.125 * (m17 - m16);
f9 = 0.05263157894736842 * rho + 0.003341687552213868 * m1 +
0.003968253968253968 * m2 + 0.1 * (jx - jy) +
0.025 * (m4 - m6) + 0.02777777777777778 * m9 +
0.01388888888888889 * m10 + 0.08333333333333333 * m11 +
0.04166666666666666 * m12 - 0.25 * m13 + 0.125 * (m16 + m17);
f10 = 0.05263157894736842 * rho + 0.003341687552213868 * m1 +
0.003968253968253968 * m2 + 0.1 * (jy - jx) +
0.025 * (m6 - m4) + 0.02777777777777778 * m9 +
0.01388888888888889 * m10 + 0.08333333333333333 * m11 +
0.04166666666666666 * m12 - 0.25 * m13 - 0.125 * (m16 + m17);
f11 = 0.05263157894736842 * rho + 0.003341687552213868 * m1 +
0.003968253968253968 * m2 + 0.1 * (jx + jz) +
0.025 * (m4 + m8) + 0.02777777777777778 * m9 +
0.01388888888888889 * m10 - 0.08333333333333333 * m11 -
0.04166666666666666 * m12 + 0.25 * m15 + 0.125 * (m18 - m16);
f12 = 0.05263157894736842 * rho + 0.003341687552213868 * m1 +
0.003968253968253968 * m2 - 0.1 * (jx + jz) -
0.025 * (m4 + m8) + 0.02777777777777778 * m9 +
0.01388888888888889 * m10 - 0.08333333333333333 * m11 -
0.04166666666666666 * m12 + 0.25 * m15 + 0.125 * (m16 - m18);
f13 = 0.05263157894736842 * rho + 0.003341687552213868 * m1 +
0.003968253968253968 * m2 + 0.1 * (jx - jz) +
0.025 * (m4 - m8) + 0.02777777777777778 * m9 +
0.01388888888888889 * m10 - 0.08333333333333333 * m11 -
0.04166666666666666 * m12 - 0.25 * m15 - 0.125 * (m16 + m18);
f14 = 0.05263157894736842 * rho + 0.003341687552213868 * m1 +
0.003968253968253968 * m2 + 0.1 * (jz - jx) +
0.025 * (m8 - m4) + 0.02777777777777778 * m9 +
0.01388888888888889 * m10 - 0.08333333333333333 * m11 -
0.04166666666666666 * m12 - 0.25 * m15 + 0.125 * (m16 + m18);
f15 = 0.05263157894736842 * rho + 0.003341687552213868 * m1 +
0.003968253968253968 * m2 + 0.1 * (jy + jz) +
0.025 * (m6 + m8) - 0.05555555555555555 * m9 -
0.02777777777777778 * m10 + 0.25 * m14 + 0.125 * (m17 - m18);
f16 = 0.05263157894736842 * rho + 0.003341687552213868 * m1 +
0.003968253968253968 * m2 - 0.1 * (jy + jz) -
0.025 * (m6 + m8) - 0.05555555555555555 * m9 -
0.02777777777777778 * m10 + 0.25 * m14 + 0.125 * (m18 - m17);
f17 = 0.05263157894736842 * rho + 0.003341687552213868 * m1 +
0.003968253968253968 * m2 + 0.1 * (jy - jz) +
0.025 * (m6 - m8) - 0.05555555555555555 * m9 -
0.02777777777777778 * m10 - 0.25 * m14 + 0.125 * (m17 + m18);
f18 = 0.05263157894736842 * rho + 0.003341687552213868 * m1 +
0.003968253968253968 * m2 + 0.1 * (jz - jy) +
0.025 * (m8 - m6) - 0.05555555555555555 * m9 -
0.02777777777777778 * m10 - 0.25 * m14 - 0.125 * (m17 + m18);
// incorporate external force
f1 += 0.16666666 * Fx;
f2 -= 0.16666666 * Fx;
f3 += 0.16666666 * Fy;
f4 -= 0.16666666 * Fy;
f5 += 0.16666666 * Fz;
f6 -= 0.16666666 * Fz;
f7 += 0.08333333333 * (Fx + Fy);
f8 -= 0.08333333333 * (Fx + Fy);
f9 += 0.08333333333 * (Fx - Fy);
f10 -= 0.08333333333 * (Fx - Fy);
f11 += 0.08333333333 * (Fx + Fz);
f12 -= 0.08333333333 * (Fx + Fz);
f13 += 0.08333333333 * (Fx - Fz);
f14 -= 0.08333333333 * (Fx - Fz);
f15 += 0.08333333333 * (Fy + Fz);
f16 -= 0.08333333333 * (Fy + Fz);
f17 += 0.08333333333 * (Fy - Fz);
f18 -= 0.08333333333 * (Fy - Fz);
// Write data based on un-swapped convention
disteven[n] = f0;
disteven[N + n] = f2;
disteven[2 * N + n] = f4;
disteven[3 * N + n] = f6;
disteven[4 * N + n] = f8;
disteven[5 * N + n] = f10;
disteven[6 * N + n] = f12;
disteven[7 * N + n] = f14;
disteven[8 * N + n] = f16;
disteven[9 * N + n] = f18;
distodd[n] = f1;
distodd[N + n] = f3;
distodd[2 * N + n] = f5;
distodd[3 * N + n] = f7;
distodd[4 * N + n] = f9;
distodd[5 * N + n] = f11;
distodd[6 * N + n] = f13;
distodd[7 * N + n] = f15;
distodd[8 * N + n] = f17;

@ -1,48 +1,51 @@
/* Implement Mixed Gradient (Lee et al. JCP 2016)*/
extern "C" void ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz)
static int D3Q19[18][3]={{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1},
int i,j,k,n;
int np,np2,nm; // neighbors
double v,vp,vp2,vm; // values at neighbors
double grad;
for (int idx=start; idx<finish; idx++){
n = Map[idx]; // layout in regular array
//.......Back out the 3-D indices for node n..............
k = n/(Nx*Ny);
j = (n-Nx*Ny*k)/Nx;
i = n-Nx*Ny*k-Nx*j;
v = Phi[n];
grad = 0.0;
for (int q=0; q<6; q++){
int iqx = D3Q19[q][0];
int iqy = D3Q19[q][1];
int iqz = D3Q19[q][2];
np = (k+iqz)*Nx*Ny + (j+iqy)*Nx + i + iqx;
np2 = (k+2*iqz)*Nx*Ny + (j+2*iqy)*Nx + i + 2*iqx;
nm = (k-iqz)*Nx*Ny + (j-iqy)*Nx + i - iqx;
vp = Phi[np];
vp2 = Phi[np2];
vm = Phi[nm];
grad += 0.25*(5.0*vp-vp2-3.0*v-vm);
for (int q=6; q<18; q++){
int iqx = D3Q19[q][0];
int iqy = D3Q19[q][1];
int iqz = D3Q19[q][2];
np = (k+iqz)*Nx*Ny + (j+iqy)*Nx + i + iqx;
np2 = (k+2*iqz)*Nx*Ny + (j+2*iqy)*Nx + i + 2*iqx;
nm = (k-iqz)*Nx*Ny + (j-iqy)*Nx + i - iqx;
vp = Phi[np];
vp2 = Phi[np2];
vm = Phi[nm];
grad += 0.125*(5.0*vp-vp2-3.0*v-vm);
Gradient[n] = grad;
extern "C" void ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi,
double *Gradient, int start,
int finish, int Np, int Nx, int Ny,
int Nz) {
static int D3Q19[18][3] = {{1, 0, 0}, {-1, 0, 0}, {0, 1, 0}, {0, -1, 0},
{0, 0, 1}, {0, 0, -1}, {1, 1, 0}, {-1, -1, 0},
{1, -1, 0}, {-1, 1, 0}, {1, 0, 1}, {-1, 0, -1},
{1, 0, -1}, {-1, 0, 1}, {0, 1, 1}, {0, -1, -1},
{0, 1, -1}, {0, -1, 1}};
int i, j, k, n;
int np, np2, nm; // neighbors
double v, vp, vp2, vm; // values at neighbors
double grad;
for (int idx = start; idx < finish; idx++) {
n = Map[idx]; // layout in regular array
//.......Back out the 3-D indices for node n..............
k = n / (Nx * Ny);
j = (n - Nx * Ny * k) / Nx;
i = n - Nx * Ny * k - Nx * j;
v = Phi[n];
grad = 0.0;
for (int q = 0; q < 6; q++) {
int iqx = D3Q19[q][0];
int iqy = D3Q19[q][1];
int iqz = D3Q19[q][2];
np = (k + iqz) * Nx * Ny + (j + iqy) * Nx + i + iqx;
np2 = (k + 2 * iqz) * Nx * Ny + (j + 2 * iqy) * Nx + i + 2 * iqx;
nm = (k - iqz) * Nx * Ny + (j - iqy) * Nx + i - iqx;
vp = Phi[np];
vp2 = Phi[np2];
vm = Phi[nm];
grad += 0.25 * (5.0 * vp - vp2 - 3.0 * v - vm);
for (int q = 6; q < 18; q++) {
int iqx = D3Q19[q][0];
int iqy = D3Q19[q][1];
int iqz = D3Q19[q][2];
np = (k + iqz) * Nx * Ny + (j + iqy) * Nx + i + iqx;
np2 = (k + 2 * iqz) * Nx * Ny + (j + 2 * iqy) * Nx + i + 2 * iqx;
nm = (k - iqz) * Nx * Ny + (j - iqy) * Nx + i - iqx;
vp = Phi[np];
vp2 = Phi[np2];
vm = Phi[nm];
grad += 0.125 * (5.0 * vp - vp2 - 3.0 * v - vm);
Gradient[n] = grad;

@ -1,320 +1,342 @@
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
double psi;//electric potential
double fq;
int nread;
extern "C" void
ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList, int *Map,
double *dist, double *Psi,
int start, int finish, int Np) {
int n;
double psi; //electric potential
double fq;
int nread;
int idx;
for (n=start; n<finish; n++){
for (n = start; n < finish; n++) {
// q=0
fq = dist[n];
// q=0
fq = dist[n];
psi = fq;
// q=1
nread = neighborList[n];
fq = dist[nread];
psi += fq;
// q=2
nread = neighborList[n+Np];
fq = dist[nread];
psi += fq;
// q=1
nread = neighborList[n];
fq = dist[nread];
psi += fq;
// q=3
nread = neighborList[n+2*Np];
fq = dist[nread];
psi += fq;
// q=2
nread = neighborList[n + Np];
fq = dist[nread];
psi += fq;
// q = 4
nread = neighborList[n+3*Np];
fq = dist[nread];
psi += fq;
// q=3
nread = neighborList[n + 2 * Np];
fq = dist[nread];
psi += fq;
// q=5
nread = neighborList[n+4*Np];
fq = dist[nread];
psi += fq;
// q = 4
nread = neighborList[n + 3 * Np];
fq = dist[nread];
psi += fq;
// q = 6
nread = neighborList[n+5*Np];
fq = dist[nread];
psi += fq;
// q=5
nread = neighborList[n + 4 * Np];
fq = dist[nread];
psi += fq;
// q = 6
nread = neighborList[n + 5 * Np];
fq = dist[nread];
psi += fq;
idx = Map[n];
Psi[idx] = psi;
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
double psi;//electric potential
double fq;
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(
int *Map, double *dist, double *Psi, int start, int finish, int Np) {
int n;
double psi; //electric potential
double fq;
int idx;
for (n=start; n<finish; n++){
for (n = start; n < finish; n++) {
// q=0
fq = dist[n];
psi = fq;
// q=1
fq = dist[2*Np+n];
psi += fq;
// q=0
fq = dist[n];
psi = fq;
// q=2
fq = dist[1*Np+n];
psi += fq;
// q=1
fq = dist[2 * Np + n];
psi += fq;
// q=3
fq = dist[4*Np+n];
psi += fq;
// q=2
fq = dist[1 * Np + n];
psi += fq;
// q=4
fq = dist[3*Np+n];
psi += fq;
// q=3
fq = dist[4 * Np + n];
psi += fq;
// q=5
fq = dist[6*Np+n];
psi += fq;
// q=4
fq = dist[3 * Np + n];
psi += fq;
// q=6
fq = dist[5*Np+n];
psi += fq;
// q=5
fq = dist[6 * Np + n];
psi += fq;
// q=6
fq = dist[5 * Np + n];
psi += fq;
idx = Map[n];
Psi[idx] = psi;
extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
int n;
double psi;//electric potential
double Ex,Ey,Ez;//electric field
double rho_e;//local charge density
double f0,f1,f2,f3,f4,f5,f6;
int nr1,nr2,nr3,nr4,nr5,nr6;
double rlx=1.0/tau;
extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map,
double *dist, double *Den_charge,
double *Psi, double *ElectricField,
double tau, double epsilon_LB,
int start, int finish, int Np) {
int n;
double psi; //electric potential
double Ex, Ey, Ez; //electric field
double rho_e; //local charge density
double f0, f1, f2, f3, f4, f5, f6;
int nr1, nr2, nr3, nr4, nr5, nr6;
double rlx = 1.0 / tau;
int idx;
for (n=start; n<finish; n++){
for (n = start; n < finish; n++) {
//Load data
rho_e = Den_charge[n];
rho_e = rho_e/epsilon_LB;
rho_e = rho_e / epsilon_LB;
idx = Map[n];
psi = Psi[idx];
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
// q=3
nr3 = neighborList[n+2*Np]; // neighbor 4
f3 = dist[nr3];
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q = 4
nr4 = neighborList[n+3*Np]; // neighbor 3
f4 = dist[nr4];
// q=3
nr3 = neighborList[n + 2 * Np]; // neighbor 4
f3 = dist[nr3];
// q=5
nr5 = neighborList[n+4*Np];
f5 = dist[nr5];
// q = 4
nr4 = neighborList[n + 3 * Np]; // neighbor 3
f4 = dist[nr4];
// q = 6
nr6 = neighborList[n+5*Np];
f6 = dist[nr6];
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice squared speed of sound
Ez = (f5-f6)*rlx*4.0;
ElectricField[n+0*Np] = Ex;
ElectricField[n+1*Np] = Ey;
ElectricField[n+2*Np] = Ez;
// q=5
nr5 = neighborList[n + 4 * Np];
f5 = dist[nr5];
// q = 0
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
// q = 6
nr6 = neighborList[n + 5 * Np];
f6 = dist[nr6];
// q = 1
dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
Ex = (f1 - f2) * rlx *
4.0; //NOTE the unit of electric field here is V/lu
Ey = (f3 - f4) * rlx *
4.0; //factor 4.0 is D3Q7 lattice squared speed of sound
Ez = (f5 - f6) * rlx * 4.0;
ElectricField[n + 0 * Np] = Ex;
ElectricField[n + 1 * Np] = Ey;
ElectricField[n + 2 * Np] = Ez;
// q = 2
dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 0
dist[n] = f0 * (1.0 - rlx) + 0.25 * (rlx * psi + rho_e);
// q = 3
dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 1
dist[nr2] = f1 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
// q = 4
dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 2
dist[nr1] = f2 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
// q = 5
dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 3
dist[nr4] = f3 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
// q = 6
dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 4
dist[nr3] = f4 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
// q = 5
dist[nr6] = f5 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
// q = 6
dist[nr5] = f6 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
int n;
double psi;//electric potential
double Ex,Ey,Ez;//electric field
double rho_e;//local charge density
double f0,f1,f2,f3,f4,f5,f6;
double rlx=1.0/tau;
extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist,
double *Den_charge, double *Psi,
double *ElectricField, double tau,
double epsilon_LB, int start,
int finish, int Np) {
int n;
double psi; //electric potential
double Ex, Ey, Ez; //electric field
double rho_e; //local charge density
double f0, f1, f2, f3, f4, f5, f6;
double rlx = 1.0 / tau;
int idx;
for (n=start; n<finish; n++){
for (n = start; n < finish; n++) {
//Load data
rho_e = Den_charge[n];
rho_e = rho_e/epsilon_LB;
rho_e = rho_e / epsilon_LB;
idx = Map[n];
psi = Psi[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
f6 = dist[5*Np+n];
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
Ex = (f1 - f2) * rlx *
4.0; //NOTE the unit of electric field here is V/lu
Ey = (f3 - f4) * rlx *
4.0; //factor 4.0 is D3Q7 lattice squared speed of sound
Ez = (f5 - f6) * rlx * 4.0;
ElectricField[n + 0 * Np] = Ex;
ElectricField[n + 1 * Np] = Ey;
ElectricField[n + 2 * Np] = Ez;
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice squared speed of sound
Ez = (f5-f6)*rlx*4.0;
ElectricField[n+0*Np] = Ex;
ElectricField[n+1*Np] = Ey;
ElectricField[n+2*Np] = Ez;
// q = 0
dist[n] = f0 * (1.0 - rlx) + 0.25 * (rlx * psi + rho_e);
// q = 0
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
// q = 1
dist[1 * Np + n] = f1 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
// q = 1
dist[1*Np+n] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 2
dist[2 * Np + n] = f2 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
// q = 2
dist[2*Np+n] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 3
dist[3 * Np + n] = f3 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
// q = 3
dist[3*Np+n] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 4
dist[4 * Np + n] = f4 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
// q = 4
dist[4*Np+n] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 5
dist[5 * Np + n] = f5 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
// q = 5
dist[5*Np+n] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 6
dist[6*Np+n] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 6
dist[6 * Np + n] = f6 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np)
int n;
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi,
int start, int finish, int Np) {
int n;
int ijk;
for (n=start; n<finish; n++){
for (n = start; n < finish; n++) {
ijk = Map[n];
dist[0*Np+n] = 0.25*Psi[ijk];
dist[1*Np+n] = 0.125*Psi[ijk];
dist[2*Np+n] = 0.125*Psi[ijk];
dist[3*Np+n] = 0.125*Psi[ijk];
dist[4*Np+n] = 0.125*Psi[ijk];
dist[5*Np+n] = 0.125*Psi[ijk];
dist[6*Np+n] = 0.125*Psi[ijk];
dist[0 * Np + n] = 0.25 * Psi[ijk];
dist[1 * Np + n] = 0.125 * Psi[ijk];
dist[2 * Np + n] = 0.125 * Psi[ijk];
dist[3 * Np + n] = 0.125 * Psi[ijk];
dist[4 * Np + n] = 0.125 * Psi[ijk];
dist[5 * Np + n] = 0.125 * Psi[ijk];
dist[6 * Np + n] = 0.125 * Psi[ijk];
extern "C" void ScaLBL_D3Q7_PoissonResidualError(int *neighborList, int *Map, double *ResidualError, double *Psi, double *Den_charge, double epsilon_LB,int strideY, int strideZ,int start, int finish){
extern "C" void ScaLBL_D3Q7_PoissonResidualError(
int *neighborList, int *Map, double *ResidualError, double *Psi,
double *Den_charge, double epsilon_LB, int strideY, int strideZ, int start,
int finish) {
int n,nn,ijk;
double psi;//electric potential
double rho_e;//local charge density
// neighbors of electric potential psi
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
double m3,m5,m7;
int n, nn, ijk;
double psi; //electric potential
double rho_e; //local charge density
// neighbors of electric potential psi
double m1, m2, m4, m6, m8, m9, m10, m11, m12, m13, m14, m15, m16, m17, m18;
double m3, m5, m7;
double psi_Laplacian;
double residual_error;
for (n=start; n<finish; n++){
for (n = start; n < finish; n++) {
//Load data
rho_e = Den_charge[n];
ijk = Map[n];
psi = Psi[ijk];
//.................Read Phase Indicator Values............................
nn = ijk-1; // neighbor index (get convention)
m1 = Psi[nn]; // get neighbor for phi - 1
nn = ijk+1; // neighbor index (get convention)
m2 = Psi[nn]; // get neighbor for phi - 2
nn = ijk-strideY; // neighbor index (get convention)
m3 = Psi[nn]; // get neighbor for phi - 3
nn = ijk+strideY; // neighbor index (get convention)
m4 = Psi[nn]; // get neighbor for phi - 4
nn = ijk-strideZ; // neighbor index (get convention)
m5 = Psi[nn]; // get neighbor for phi - 5
nn = ijk+strideZ; // neighbor index (get convention)
m6 = Psi[nn]; // get neighbor for phi - 6
nn = ijk-strideY-1; // neighbor index (get convention)
m7 = Psi[nn]; // get neighbor for phi - 7
nn = ijk+strideY+1; // neighbor index (get convention)
m8 = Psi[nn]; // get neighbor for phi - 8
nn = ijk+strideY-1; // neighbor index (get convention)
m9 = Psi[nn]; // get neighbor for phi - 9
nn = ijk-strideY+1; // neighbor index (get convention)
m10 = Psi[nn]; // get neighbor for phi - 10
nn = ijk-strideZ-1; // neighbor index (get convention)
m11 = Psi[nn]; // get neighbor for phi - 11
nn = ijk+strideZ+1; // neighbor index (get convention)
m12 = Psi[nn]; // get neighbor for phi - 12
nn = ijk+strideZ-1; // neighbor index (get convention)
m13 = Psi[nn]; // get neighbor for phi - 13
nn = ijk-strideZ+1; // neighbor index (get convention)
m14 = Psi[nn]; // get neighbor for phi - 14
nn = ijk-strideZ-strideY; // neighbor index (get convention)
m15 = Psi[nn]; // get neighbor for phi - 15
nn = ijk+strideZ+strideY; // neighbor index (get convention)
m16 = Psi[nn]; // get neighbor for phi - 16
nn = ijk+strideZ-strideY; // neighbor index (get convention)
m17 = Psi[nn]; // get neighbor for phi - 17
nn = ijk-strideZ+strideY; // neighbor index (get convention)
m18 = Psi[nn]; // get neighbor for phi - 18
//.................Read Phase Indicator Values............................
nn = ijk - 1; // neighbor index (get convention)
m1 = Psi[nn]; // get neighbor for phi - 1
nn = ijk + 1; // neighbor index (get convention)
m2 = Psi[nn]; // get neighbor for phi - 2
nn = ijk - strideY; // neighbor index (get convention)
m3 = Psi[nn]; // get neighbor for phi - 3
nn = ijk + strideY; // neighbor index (get convention)
m4 = Psi[nn]; // get neighbor for phi - 4
nn = ijk - strideZ; // neighbor index (get convention)
m5 = Psi[nn]; // get neighbor for phi - 5
nn = ijk + strideZ; // neighbor index (get convention)
m6 = Psi[nn]; // get neighbor for phi - 6
nn = ijk - strideY - 1; // neighbor index (get convention)
m7 = Psi[nn]; // get neighbor for phi - 7
nn = ijk + strideY + 1; // neighbor index (get convention)
m8 = Psi[nn]; // get neighbor for phi - 8
nn = ijk + strideY - 1; // neighbor index (get convention)
m9 = Psi[nn]; // get neighbor for phi - 9
nn = ijk - strideY + 1; // neighbor index (get convention)
m10 = Psi[nn]; // get neighbor for phi - 10
nn = ijk - strideZ - 1; // neighbor index (get convention)
m11 = Psi[nn]; // get neighbor for phi - 11
nn = ijk + strideZ + 1; // neighbor index (get convention)
m12 = Psi[nn]; // get neighbor for phi - 12
nn = ijk + strideZ - 1; // neighbor index (get convention)
m13 = Psi[nn]; // get neighbor for phi - 13
nn = ijk - strideZ + 1; // neighbor index (get convention)
m14 = Psi[nn]; // get neighbor for phi - 14
nn = ijk - strideZ - strideY; // neighbor index (get convention)
m15 = Psi[nn]; // get neighbor for phi - 15
nn = ijk + strideZ + strideY; // neighbor index (get convention)
m16 = Psi[nn]; // get neighbor for phi - 16
nn = ijk + strideZ - strideY; // neighbor index (get convention)
m17 = Psi[nn]; // get neighbor for phi - 17
nn = ijk - strideZ + strideY; // neighbor index (get convention)
m18 = Psi[nn]; // get neighbor for phi - 18
psi_Laplacian = 2.0*3.0/18.0*(m1+m2+m3+m4+m5+m6-6*psi+0.5*(m7+m8+m9+m10+m11+m12+m13+m14+m15+m16+m17+m18-12*psi));//Laplacian of electric potential
residual_error = psi_Laplacian+rho_e/epsilon_LB;
ResidualError[n] = residual_error;
psi_Laplacian =
2.0 * 3.0 / 18.0 *
(m1 + m2 + m3 + m4 + m5 + m6 - 6 * psi +
0.5 * (m7 + m8 + m9 + m10 + m11 + m12 + m13 + m14 + m15 + m16 +
m17 + m18 - 12 * psi)); //Laplacian of electric potential
residual_error = psi_Laplacian + rho_e / epsilon_LB;
ResidualError[n] = residual_error;
//extern "C" void ScaLBL_D3Q7_Poisson_ElectricField(int *neighborList, int *Map, signed char *ID, double *Psi, double *ElectricField, int SolidBC,
// int strideY, int strideZ,int start, int finish, int Np){
@ -413,7 +435,7 @@ extern "C" void ScaLBL_D3Q7_PoissonResidualError(int *neighborList, int *Map, do
// nx = 1.f/6.f*(m1-m2);//but looks like it needs to multiply another factor of 3
// ny = 1.f/6.f*(m3-m4);
// nz = 1.f/6.f*(m5-m6);
// ElectricField[n] = nx;
// ElectricField[Np+n] = ny;
// ElectricField[2*Np+n] = nz;

@ -30,6 +30,3 @@
// cpu implementation for thermal lattice boltzmann methods
// copyright James McClure, 2014

@ -33,7 +33,6 @@ Implementation of color lattice boltzmann model
#include "ProfilerApp.h"
#include "threadpool/thread_pool.h"
#ifndef ScaLBL_ColorModel_INC
#define ScaLBL_ColorModel_INC
@ -54,81 +53,81 @@ public:
* @param NP number of processors
* @param COMM MPI communicator
* Mass transport equations are described by D3Q7 scheme
class ScaLBL_ColorModel{
class ScaLBL_ColorModel {
* \brief Constructor
@ -54,81 +53,81 @@ public:
* @param NP number of processors
* @param COMM MPI communicator
ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM);
ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI &COMM);
* \brief Read simulation parameters
* @param filename input database file that includes "Color" section
void ReadParams(string filename);
void ReadParams(string filename);
* \brief Read simulation parameters
* @param db0 input database that includes "Color" section
void ReadParams(std::shared_ptr<Database> db0);
void ReadParams(std::shared_ptr<Database> db0);
* \brief Create domain data structures
void SetDomain();
void SetDomain();
* \brief Read image data
void ReadInput();
void ReadInput();
* \brief Create color model data structures
void Create();
void Create();
* \brief Initialize the simulation
void Initialize();
void Initialize();
* \brief Run the simulation
void Run();
void Run();
* \brief Run the simulation
* @param returntime - timestep at which the routine will return
double Run(int returntime);
double Run(int returntime);
* \brief Debugging function to dump simulation state to disk
void WriteDebug();
void WriteDebug();
* \brief Copy the phase field for use by external methods
* @param f - DoubleArray to hold the phase field
void getPhaseField(DoubleArray &f);
bool Restart,pBC;
int timestep,timestepMax;
int BoundaryCondition;
double tauA,tauB,rhoA,rhoB,alpha,beta;
double Fx,Fy,Fz,flux;
double din,dout,inletA,inletB,outletA,outletB;
int Nx,Ny,Nz,N,Np;
int rank,nprocx,nprocy,nprocz,nprocs;
double Lx,Ly,Lz;
void getPhaseField(DoubleArray &f);
std::shared_ptr<Domain> Dm; // this domain is for analysis
@ -137,33 +136,32 @@ public:
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm_Regular;
bool Restart, pBC;
int timestep, timestepMax;
int BoundaryCondition;
double tauA, tauB, rhoA, rhoB, alpha, beta;
double Fx, Fy, Fz, flux;
double din, dout, inletA, inletB, outletA, outletB;
int Nx, Ny, Nz, N, Np;
int rank, nprocx, nprocy, nprocz, nprocs;
double Lx, Ly, Lz;
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm_Regular;
std::shared_ptr<SubPhase> Averages;
// input database
std::shared_ptr<Database> db;
std::shared_ptr<Database> domain_db;
@ -137,33 +136,32 @@ public:
std::shared_ptr<Database> vis_db;
IntArray Map;
signed char *id;
int *NeighborList;
int *dvcMap;
double *fq, *Aq, *Bq;
double *Den, *Phi;
double *ColorGrad;
double *Velocity;
double *Pressure;
signed char *id;
int *NeighborList;
int *dvcMap;
double *fq, *Aq, *Bq;
double *Den, *Phi;
double *ColorGrad;
double *Velocity;
double *Pressure;
* \brief Assign wetting affinity values
void AssignComponentLabels(double *phase);
Utilities::MPI comm;
void AssignComponentLabels(double *phase);
int dist_mem_size;
int neighborSize;
// filenames
Utilities::MPI comm;
int dist_mem_size;
int neighborSize;
// filenames
char LocalRankString[8];
char LocalRankFilename[40];
char LocalRestartFile[40];
//int rank,nprocs;
void LoadParams(std::shared_ptr<Database> db0);

View File

@ -16,38 +16,38 @@ Implementation of color lattice boltzmann model
#include "ProfilerApp.h"
#include "threadpool/thread_pool.h"
class ScaLBL_DFHModel{
class ScaLBL_DFHModel {
ScaLBL_DFHModel(int RANK, int NP, const Utilities::MPI& COMM);
// functions in they should be run
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void ReadInput();
void Create();
void Initialize();
void AssignSolidPotential();
void Run();
void WriteDebug();
bool Restart,pBC;
int timestep,timestepMax;
int BoundaryCondition;
double tauA,tauB,rhoA,rhoB,alpha,beta;
double Fx,Fy,Fz,flux;
double din,dout,inletA,inletB,outletA,outletB;
int Nx,Ny,Nz,N,Np;
int rank,nprocx,nprocy,nprocz,nprocs;
double Lx,Ly,Lz;
ScaLBL_DFHModel(int RANK, int NP, const Utilities::MPI &COMM);
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
// functions in they should be run
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void ReadInput();
void Create();
void Initialize();
void AssignSolidPotential();
void Run();
void WriteDebug();
bool Restart, pBC;
int timestep, timestepMax;
int BoundaryCondition;
double tauA, tauB, rhoA, rhoB, alpha, beta;
double Fx, Fy, Fz, flux;
double din, dout, inletA, inletB, outletA, outletB;
int Nx, Ny, Nz, N, Np;
int rank, nprocx, nprocy, nprocz, nprocs;
double Lx, Ly, Lz;
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
std::shared_ptr<TwoPhase> Averages;
// input database
std::shared_ptr<Database> db;
std::shared_ptr<Database> domain_db;
@ -64,20 +64,18 @@ public:
double *Velocity;
double *Gradient;
double *Pressure;
Utilities::MPI comm;
int dist_mem_size;
int neighborSize;
// filenames
Utilities::MPI comm;
int dist_mem_size;
int neighborSize;
// filenames
char LocalRankString[8];
char LocalRankFilename[40];
char LocalRestartFile[40];
//int rank,nprocs;
void LoadParams(std::shared_ptr<Database> db0);
void AssignComponentLabels(double *phase);

View File

@ -19,50 +19,50 @@ Implementation of Lee et al JCP 2016 lattice boltzmann model
#ifndef ScaLBL_FreeLeeModel_INC
#define ScaLBL_FreeLeeModel_INC
class ScaLBL_FreeLeeModel{
class ScaLBL_FreeLeeModel {
ScaLBL_FreeLeeModel(int RANK, int NP, const Utilities::MPI& COMM);
// functions in they should be run
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void ReadInput();
void Create_TwoFluid();
void Initialize_TwoFluid();
double Run_TwoFluid(int returntime);
ScaLBL_FreeLeeModel(int RANK, int NP, const Utilities::MPI &COMM);
void WriteDebug_TwoFluid();
void Create_SingleFluid();
void Initialize_SingleFluid();
void Run_SingleFluid();
void WriteDebug_SingleFluid();
// functions in they should be run
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void ReadInput();
void Create_TwoFluid();
void Initialize_TwoFluid();
double Run_TwoFluid(int returntime);
void WriteDebug_TwoFluid();
void Create_SingleFluid();
void Initialize_SingleFluid();
void Run_SingleFluid();
void WriteDebug_SingleFluid();
// test utilities
void Create_DummyPhase_MGTest();
void MGTest();
bool Restart,pBC;
int timestep,timestepMax;
int BoundaryCondition;
double tauA,tauB,rhoA,rhoB;
double tau, rho0;//only for single-fluid Lee model
double tauM;//relaxation time for phase field (or mass)
double W,gamma,kappa,beta;
double Fx,Fy,Fz,flux;
double din,dout,inletA,inletB,outletA,outletB;
int Nx,Ny,Nz,N,Np;
int Nxh,Nyh,Nzh,Nh; // extra halo width
int rank,nprocx,nprocy,nprocz,nprocs;
double Lx,Ly,Lz;
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm_Regular;
std::shared_ptr<ScaLBLWideHalo_Communicator> ScaLBL_Comm_WideHalo;
bool Restart, pBC;
int timestep, timestepMax;
int BoundaryCondition;
double tauA, tauB, rhoA, rhoB;
double tau, rho0; //only for single-fluid Lee model
double tauM; //relaxation time for phase field (or mass)
double W, gamma, kappa, beta;
double Fx, Fy, Fz, flux;
double din, dout, inletA, inletB, outletA, outletB;
int Nx, Ny, Nz, N, Np;
int Nxh, Nyh, Nzh, Nh; // extra halo width
int rank, nprocx, nprocy, nprocz, nprocs;
double Lx, Ly, Lz;
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm_Regular;
std::shared_ptr<ScaLBLWideHalo_Communicator> ScaLBL_Comm_WideHalo;
// input database
@ -72,35 +72,34 @@ public:
@ -72,35 +72,34 @@ public:
std::shared_ptr<Database> vis_db;
IntArray Map;
signed char *id;
int *NeighborList;
int *dvcMap;
double *gqbar, *hq;
double *mu_phi, *Den, *Phi;
double *ColorGrad;
double *Velocity;
double *Pressure;
void getPhase(DoubleArray &PhaseValues);
void getPotential(DoubleArray &PressureValues, DoubleArray &MuValues);
void getVelocity(DoubleArray &Vx, DoubleArray &Vy, DoubleArray &Vz);
signed char *id;
int *NeighborList;
int *dvcMap;
double *gqbar, *hq;
double *mu_phi, *Den, *Phi;
double *ColorGrad;
double *Velocity;
double *Pressure;
void getPhase(DoubleArray &PhaseValues);
void getPotential(DoubleArray &PressureValues, DoubleArray &MuValues);
void getVelocity(DoubleArray &Vx, DoubleArray &Vy, DoubleArray &Vz);
void getData_RegularLayout(const double *data, DoubleArray &regdata);
DoubleArray SignDist;
DoubleArray SignDist;
Utilities::MPI comm;
int dist_mem_size;
int neighborSize;
// filenames
Utilities::MPI comm;
int dist_mem_size;
int neighborSize;
// filenames
char LocalRankString[8];
char LocalRankFilename[40];
char LocalRestartFile[40];
//int rank,nprocs;
void LoadParams(std::shared_ptr<Database> db0);
void AssignComponentLabels_ChemPotential_ColorGrad();
void AssignComponentLabels_ChemPotential_ColorGrad();

View File

@ -25,8 +25,7 @@ Implementation of two-fluid greyscale color lattice boltzmann model
* Mass transport equations are described by D3Q7 scheme
class ScaLBL_GreyscaleColorModel{
class ScaLBL_GreyscaleColorModel {
* \brief Constructor
@ -34,75 +33,76 @@ public:
* @param NP number of processors
* @param COMM MPI communicator
ScaLBL_GreyscaleColorModel(int RANK, int NP, const Utilities::MPI& COMM);
// functions in they should be run
ScaLBL_GreyscaleColorModel(int RANK, int NP, const Utilities::MPI &COMM);
// functions in they should be run
* \brief Read simulation parameters
* @param filename input database file that includes "Color" section
void ReadParams(string filename);
void ReadParams(string filename);
* \brief Read simulation parameters
* @param db0 input database that includes "Color" section
void ReadParams(std::shared_ptr<Database> db0);
void ReadParams(std::shared_ptr<Database> db0);
* \brief Create domain data structures
void SetDomain();
void SetDomain();
* \brief Read image data
void ReadInput();
void ReadInput();
* \brief Create color model data structures
void Create();
void Create();
* \brief Initialize the simulation
void Initialize();
void Initialize();
* \brief Run the simulation
void Run();
void Run();
* \brief Debugging function to dump simulation state to disk
void WriteDebug();
void WriteDebug();
void WriteVisFiles();
bool Restart,pBC;
int timestep,timestepMax;
int BoundaryCondition;
double tauA,tauB,rhoA,rhoB,alpha,beta;
double tauA_eff,tauB_eff;
double Fx,Fy,Fz,flux;
double din,dout,inletA,inletB,outletA,outletB;
double GreyPorosity;
bool RecoloringOff;//recoloring can be turn off for grey nodes if this is true
//double W;//wetting strength paramter for capillary pressure penalty for grey nodes
int Nx,Ny,Nz,N,Np;
int rank,nprocx,nprocy,nprocz,nprocs;
double Lx,Ly,Lz;
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm_Regular;
bool Restart, pBC;
int timestep, timestepMax;
int BoundaryCondition;
double tauA, tauB, rhoA, rhoB, alpha, beta;
double tauA_eff, tauB_eff;
double Fx, Fy, Fz, flux;
double din, dout, inletA, inletB, outletA, outletB;
double GreyPorosity;
RecoloringOff; //recoloring can be turn off for grey nodes if this is true
//double W;//wetting strength paramter for capillary pressure penalty for grey nodes
int Nx, Ny, Nz, N, Np;
int rank, nprocx, nprocy, nprocz, nprocs;
double Lx, Ly, Lz;
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm_Regular;
std::shared_ptr<GreyPhaseAnalysis> Averages;
// input database
std::shared_ptr<Database> db;
std::shared_ptr<Database> domain_db;
@ -111,11 +111,11 @@ public:
std::shared_ptr<Database> vis_db;
IntArray Map;
signed char *id;
int *NeighborList;
int *dvcMap;
double *fq, *Aq, *Bq;
double *Den, *Phi;
signed char *id;
int *NeighborList;
int *dvcMap;
double *fq, *Aq, *Bq;
double *Den, *Phi;
//double *GreySolidPhi; //Model 2 & 3
//double *GreySolidGrad;//Model 1 & 4
double *GreySolidW;
@ -123,31 +123,31 @@ public:
double *GreySw;
double *GreyKn;
double *GreyKw;
double *MobilityRatio;
double *Velocity;
double *Pressure;
double *MobilityRatio;
double *Velocity;
double *Pressure;
double *Porosity_dvc;
double *Permeability_dvc;
//double *Psi;
Utilities::MPI comm;
int dist_mem_size;
int neighborSize;
// filenames
Utilities::MPI comm;
int dist_mem_size;
int neighborSize;
// filenames
char LocalRankString[8];
char LocalRankFilename[40];
char LocalRestartFile[40];
//int rank,nprocs;
void LoadParams(std::shared_ptr<Database> db0);
@ -161,4 +161,3 @@ private:
void AssignComponentLabels();
* \brief Assign wetting affinity values in greyscale regions
@ -161,4 +161,3 @@ private:
double SeedPhaseField(const double seed_water_in_oil);

View File

@ -31,43 +31,43 @@
#include "ProfilerApp.h"
#include "threadpool/thread_pool.h"
class ScaLBL_GreyscaleModel{
class ScaLBL_GreyscaleModel {
ScaLBL_GreyscaleModel(int RANK, int NP, const Utilities::MPI& COMM);
// functions in they should be run
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void ReadInput();
void Create();
void Initialize();
void Run();
void WriteDebug();
void VelocityField();
bool Restart,pBC;
int timestep,timestepMax;
int BoundaryCondition;
int CollisionType;
double tau;
double tau_eff;
double Den;//constant density
double tolerance;
double Fx,Fy,Fz,flux;
double din,dout;
double dp;//solid particle diameter, unit in voxel
double GreyPorosity;
int Nx,Ny,Nz,N,Np;
int rank,nprocx,nprocy,nprocz,nprocs;
double Lx,Ly,Lz;
ScaLBL_GreyscaleModel(int RANK, int NP, const Utilities::MPI &COMM);
// functions in they should be run
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void ReadInput();
void Create();
void Initialize();
void Run();
void WriteDebug();
void VelocityField();
bool Restart, pBC;
int timestep, timestepMax;
int BoundaryCondition;
int CollisionType;
double tau;
double tau_eff;
double Den; //constant density
double tolerance;
double Fx, Fy, Fz, flux;
double din, dout;
double dp; //solid particle diameter, unit in voxel
double GreyPorosity;
int Nx, Ny, Nz, N, Np;
int rank, nprocx, nprocy, nprocz, nprocs;
double Lx, Ly, Lz;
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
// input database
std::shared_ptr<Database> db;
std::shared_ptr<Database> domain_db;
@ -75,13 +75,13 @@ public:
std::shared_ptr<Database> analysis_db;
std::shared_ptr<Database> vis_db;
signed char *id;
int *NeighborList;
double *fq;
double *Permeability;//grey voxel permeability
double *Porosity;
double *Velocity;
double *Pressure_dvc;
signed char *id;
int *NeighborList;
double *fq;
double *Permeability; //grey voxel permeability
double *Porosity;
double *Velocity;
double *Pressure_dvc;
IntArray Map;
DoubleArray SignDist;
DoubleArray Velocity_x;
@ -89,18 +89,19 @@ public:
DoubleArray Velocity_z;
DoubleArray PorosityMap;
DoubleArray Pressure;
Utilities::MPI comm;
int dist_mem_size;
int neighborSize;
// filenames
Utilities::MPI comm;
int dist_mem_size;
int neighborSize;
// filenames
char LocalRankString[8];
char LocalRankFilename[40];
char LocalRestartFile[40];
void AssignComponentLabels(double *Porosity, double *Permeablity);
void AssignComponentLabels(double *Porosity,double *Permeability,const vector<std::string> &File_poro,const vector<std::string> &File_perm);
void AssignComponentLabels(double *Porosity, double *Permeablity);
void AssignComponentLabels(double *Porosity, double *Permeability,
const vector<std::string> &File_poro,
const vector<std::string> &File_perm);

View File

@ -20,25 +20,28 @@
#include "analysis/Minkowski.h"
#include "ProfilerApp.h"
class ScaLBL_IonModel{
class ScaLBL_IonModel {
ScaLBL_IonModel(int RANK, int NP, const Utilities::MPI& COMM);
// functions in they should be run
void ReadParams(string filename,vector<int> &num_iter);
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void ReadInput();
void Create();
void Initialize();
void Run(double *Velocity, double *ElectricField);
ScaLBL_IonModel(int RANK, int NP, const Utilities::MPI &COMM);
// functions in they should be run
void ReadParams(string filename, vector<int> &num_iter);
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void ReadInput();
void Create();
void Initialize();
void Run(double *Velocity, double *ElectricField);
void getIonConcentration(DoubleArray &IonConcentration, const size_t ic);
void getIonConcentration_debug(int timestep);
void getIonFluxDiffusive(DoubleArray &IonFlux_x,DoubleArray &IonFlux_y,DoubleArray &IonFlux_z,const size_t ic);
void getIonFluxAdvective(DoubleArray &IonFlux_x,DoubleArray &IonFlux_y,DoubleArray &IonFlux_z,const size_t ic);
void getIonFluxElectrical(DoubleArray &IonFlux_x,DoubleArray &IonFlux_y,DoubleArray &IonFlux_z,const size_t ic);
void getIonFluxDiffusive(DoubleArray &IonFlux_x, DoubleArray &IonFlux_y,
DoubleArray &IonFlux_z, const size_t ic);
void getIonFluxAdvective(DoubleArray &IonFlux_x, DoubleArray &IonFlux_y,
DoubleArray &IonFlux_z, const size_t ic);
void getIonFluxElectrical(DoubleArray &IonFlux_x, DoubleArray &IonFlux_y,
DoubleArray &IonFlux_z, const size_t ic);
void getIonFluxDiffusive_debug(int timestep);
void getIonFluxAdvective_debug(int timestep);
void getIonFluxElectrical_debug(int timestep);
@ -46,35 +49,37 @@ public:
void DummyElectricField();
double CalIonDenConvergence(vector<double> &ci_avg_previous);
//bool Restart,pBC;
int timestep;
//bool Restart,pBC;
int timestep;
vector<int> timestepMax;
int BoundaryConditionSolid;
double h;//domain resolution, unit [um/lu]
double kb,electron_charge,T,Vt;
int BoundaryConditionSolid;
double h; //domain resolution, unit [um/lu]
double kb, electron_charge, T, Vt;
double k2_inv;
double tolerance;
double fluidVelx_dummy,fluidVely_dummy,fluidVelz_dummy;
double Ex_dummy,Ey_dummy,Ez_dummy;
size_t number_ion_species;
vector<int> BoundaryConditionInlet;
vector<int> BoundaryConditionOutlet;
vector<double> IonDiffusivity;//User input unit [m^2/sec]
vector<int> IonValence;
vector<double> IonConcentration;//unit [mol/m^3]
vector<double> Cin;//inlet boundary value, can be either concentration [mol/m^3] or flux [mol/m^2/sec]
vector<double> Cout;//outlet boundary value, can be either concentration [mol/m^3] or flux [mol/m^2/sec]
vector<double> tau;
vector<double> time_conv;
int Nx,Ny,Nz,N,Np;
int rank,nprocx,nprocy,nprocz,nprocs;
double Lx,Ly,Lz;
double fluidVelx_dummy, fluidVely_dummy, fluidVelz_dummy;
double Ex_dummy, Ey_dummy, Ez_dummy;
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
size_t number_ion_species;
vector<int> BoundaryConditionInlet;
vector<int> BoundaryConditionOutlet;
vector<double> IonDiffusivity; //User input unit [m^2/sec]
vector<int> IonValence;
vector<double> IonConcentration; //unit [mol/m^3]
Cin; //inlet boundary value, can be either concentration [mol/m^3] or flux [mol/m^2/sec]
Cout; //outlet boundary value, can be either concentration [mol/m^3] or flux [mol/m^2/sec]
vector<double> tau;
vector<double> time_conv;
int Nx, Ny, Nz, N, Np;
int rank, nprocx, nprocy, nprocz, nprocs;
double Lx, Ly, Lz;
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
// input database
std::shared_ptr<Database> db;
std::shared_ptr<Database> domain_db;
@ -84,8 +89,8 @@ public:
DoubleArray Distance;
int *NeighborList;
double *fq;
double *Ci;
double *ChargeDensity;
double *Ci;
double *ChargeDensity;
double *IonSolid;
double *FluidVelocityDummy;
double *ElectricFieldDummy;
@ -94,18 +99,20 @@ public:
double *FluxElectrical;
Utilities::MPI comm;
// filenames
Utilities::MPI comm;
// filenames
char LocalRankString[8];
char LocalRankFilename[40];
char LocalRestartFile[40];
char OutputFilename[200];
//int rank,nprocs;
void LoadParams(std::shared_ptr<Database> db0);
void LoadParams(std::shared_ptr<Database> db0);
void AssignSolidBoundary(double *ion_solid);
void AssignIonConcentration_FromFile(double *Ci,const vector<std::string> &File_ion,int ic);
void AssignIonConcentration_FromFile(double *Ci,
const vector<std::string> &File_ion,
int ic);
void IonConcentration_LB_to_Phys(DoubleArray &Den_reg);
void IonFlux_LB_to_Phys(DoubleArray &Den_reg, const size_t ic);

View File

@ -20,375 +20,408 @@
#include "models/MRTModel.h"
#include "analysis/distance.h"
#include "common/ReadMicroCT.h"
ScaLBL_MRTModel::ScaLBL_MRTModel(int RANK, int NP, const Utilities::MPI& COMM):
rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0),
ScaLBL_MRTModel::ScaLBL_MRTModel(int RANK, int NP, const Utilities::MPI &COMM)
: rank(RANK), nprocs(NP), Restart(0), timestep(0), timestepMax(0), tau(0),
Fx(0), Fy(0), Fz(0), flux(0), din(0), dout(0), mu(0), Nx(0), Ny(0), Nz(0),
N(0), Np(0), nprocx(0), nprocy(0), nprocz(0), BoundaryCondition(0), Lx(0),
Ly(0), Lz(0), comm(COMM) {}
ScaLBL_MRTModel::~ScaLBL_MRTModel() {}
void ScaLBL_MRTModel::ReadParams(string filename) {
// read the input database
db = std::make_shared<Database>(filename);
domain_db = db->getDatabase("Domain");
mrt_db = db->getDatabase("MRT");
vis_db = db->getDatabase("Visualization");
tau = 1.0;
timestepMax = 100000;
tolerance = 1.0e-8;
Fx = Fy = 0.0;
Fz = 1.0e-5;
dout = 1.0;
din = 1.0;
void ScaLBL_MRTModel::ReadParams(string filename){
// read the input database
db = std::make_shared<Database>( filename );
domain_db = db->getDatabase( "Domain" );
mrt_db = db->getDatabase( "MRT" );
vis_db = db->getDatabase( "Visualization" );
tau = 1.0;
timestepMax = 100000;
tolerance = 1.0e-8;
Fx = Fy = 0.0;
Fz = 1.0e-5;
dout = 1.0;
din = 1.0;
// Color Model parameters
if (mrt_db->keyExists( "timestepMax" )){
timestepMax = mrt_db->getScalar<int>( "timestepMax" );
if (mrt_db->keyExists( "tolerance" )){
tolerance = mrt_db->getScalar<double>( "tolerance" );
if (mrt_db->keyExists( "tau" )){
tau = mrt_db->getScalar<double>( "tau" );
if (mrt_db->keyExists( "F" )){
Fx = mrt_db->getVector<double>( "F" )[0];
Fy = mrt_db->getVector<double>( "F" )[1];
Fz = mrt_db->getVector<double>( "F" )[2];
if (mrt_db->keyExists( "Restart" )){
Restart = mrt_db->getScalar<bool>( "Restart" );
if (mrt_db->keyExists( "din" )){
din = mrt_db->getScalar<double>( "din" );
if (mrt_db->keyExists( "dout" )){
dout = mrt_db->getScalar<double>( "dout" );
if (mrt_db->keyExists( "flux" )){
flux = mrt_db->getScalar<double>( "flux" );
// Read domain parameters
if (mrt_db->keyExists( "BoundaryCondition" )){
BoundaryCondition = mrt_db->getScalar<int>( "BC" );
else if (domain_db->keyExists( "BC" )){
BoundaryCondition = domain_db->getScalar<int>( "BC" );
void ScaLBL_MRTModel::SetDomain(){
Dm = std::shared_ptr<Domain>(new Domain(domain_db,comm)); // full domain for analysis
Mask = std::shared_ptr<Domain>(new Domain(domain_db,comm)); // mask domain removes immobile phases
// domain parameters
Nx = Dm->Nx;
Ny = Dm->Ny;
Nz = Dm->Nz;
Lx = Dm->Lx;
Ly = Dm->Ly;
Lz = Dm->Lz;
N = Nx*Ny*Nz;
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = 1; // initialize this way
//Averages = std::shared_ptr<TwoPhase> ( new TwoPhase(Dm) ); // TwoPhase analysis object
rank = Dm->rank();
nprocx = Dm->nprocx();
nprocy = Dm->nprocy();
nprocz = Dm->nprocz();
void ScaLBL_MRTModel::ReadInput(){
if (domain_db->keyExists( "Filename" )){
auto Filename = domain_db->getScalar<std::string>( "Filename" );
// Color Model parameters
if (mrt_db->keyExists("timestepMax")) {
timestepMax = mrt_db->getScalar<int>("timestepMax");
else if (domain_db->keyExists( "GridFile" )){
// Read the local domain data
auto input_id = readMicroCT( *domain_db, comm );
// Fill the halo (assuming GCW of 1)
array<int,3> size0 = { (int) input_id.size(0), (int) input_id.size(1), (int) input_id.size(2) };
ArraySize size1 = { (size_t) Mask->Nx, (size_t) Mask->Ny, (size_t) Mask->Nz };
ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 );
fillHalo<signed char> fill( comm, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 );
Array<signed char> id_view;
id_view.viewRaw( size1, Mask-> );
fill.copy( input_id, id_view );
fill.fill( id_view );
if (mrt_db->keyExists("tolerance")) {
tolerance = mrt_db->getScalar<double>("tolerance");
if (mrt_db->keyExists("tau")) {
tau = mrt_db->getScalar<double>("tau");
if (mrt_db->keyExists("F")) {
Fx = mrt_db->getVector<double>("F")[0];
Fy = mrt_db->getVector<double>("F")[1];
Fz = mrt_db->getVector<double>("F")[2];
if (mrt_db->keyExists("Restart")) {
Restart = mrt_db->getScalar<bool>("Restart");
if (mrt_db->keyExists("din")) {
din = mrt_db->getScalar<double>("din");
if (mrt_db->keyExists("dout")) {
dout = mrt_db->getScalar<double>("dout");
if (mrt_db->keyExists("flux")) {
flux = mrt_db->getScalar<double>("flux");
// Read domain parameters
if (mrt_db->keyExists("BoundaryCondition")) {
BoundaryCondition = mrt_db->getScalar<int>("BC");
} else if (domain_db->keyExists("BC")) {
BoundaryCondition = domain_db->getScalar<int>("BC");
mu = (tau - 0.5) / 3.0;
void ScaLBL_MRTModel::SetDomain() {
Dm = std::shared_ptr<Domain>(
new Domain(domain_db, comm)); // full domain for analysis
Mask = std::shared_ptr<Domain>(
new Domain(domain_db, comm)); // mask domain removes immobile phases
// domain parameters
Nx = Dm->Nx;
Ny = Dm->Ny;
Nz = Dm->Nz;
Lx = Dm->Lx;
Ly = Dm->Ly;
Lz = Dm->Lz;
N = Nx * Ny * Nz;
Distance.resize(Nx, Ny, Nz);
Velocity_x.resize(Nx, Ny, Nz);
Velocity_y.resize(Nx, Ny, Nz);
Velocity_z.resize(Nx, Ny, Nz);
for (int i = 0; i < Nx * Ny * Nz; i++)
Dm->id[i] = 1; // initialize this way
//Averages = std::shared_ptr<TwoPhase> ( new TwoPhase(Dm) ); // TwoPhase analysis object
rank = Dm->rank();
nprocx = Dm->nprocx();
nprocy = Dm->nprocy();
nprocz = Dm->nprocz();
void ScaLBL_MRTModel::ReadInput() {
sprintf(LocalRankString, "%05d", Dm->rank());
sprintf(LocalRankFilename, "%s%s", "ID.", LocalRankString);
sprintf(LocalRestartFile, "%s%s", "Restart.", LocalRankString);
if (domain_db->keyExists("Filename")) {
auto Filename = domain_db->getScalar<std::string>("Filename");
} else if (domain_db->keyExists("GridFile")) {
// Read the local domain data
auto input_id = readMicroCT(*domain_db, comm);
// Fill the halo (assuming GCW of 1)
array<int, 3> size0 = {(int)input_id.size(0), (int)input_id.size(1),
ArraySize size1 = {(size_t)Mask->Nx, (size_t)Mask->Ny,
ASSERT((int)size1[0] == size0[0] + 2 && (int)size1[1] == size0[1] + 2 &&
(int)size1[2] == size0[2] + 2);
fillHalo<signed char> fill(comm, Mask->rank_info, size0, {1, 1, 1}, 0,
Array<signed char> id_view;
id_view.viewRaw(size1, Mask->;
fill.copy(input_id, id_view);
} else {
// Generate the signed distance map
// Initialize the domain and communication
Array<char> id_solid(Nx,Ny,Nz);
// Solve for the position of the solid phase
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
int n = k*Nx*Ny+j*Nx+i;
// Initialize the solid phase
if (Mask->id[n] > 0) id_solid(i,j,k) = 1;
else id_solid(i,j,k) = 0;
// Initialize the signed distance function
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
// Initialize distance to +/- 1
Distance(i,j,k) = 2.0*double(id_solid(i,j,k))-1.0;
// MeanFilter(Averages->SDs);
if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n");
if (rank == 0) cout << "Domain set." << endl;
// Initialize the domain and communication
Array<char> id_solid(Nx, Ny, Nz);
// Solve for the position of the solid phase
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
int n = k * Nx * Ny + j * Nx + i;
// Initialize the solid phase
if (Mask->id[n] > 0)
id_solid(i, j, k) = 1;
id_solid(i, j, k) = 0;
// Initialize the signed distance function
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
// Initialize distance to +/- 1
Distance(i, j, k) = 2.0 * double(id_solid(i, j, k)) - 1.0;
// MeanFilter(Averages->SDs);
if (rank == 0)
printf("Initialized solid phase -- Converting to Signed Distance "
"function \n");
CalcDist(Distance, id_solid, *Dm);
if (rank == 0)
cout << "Domain set." << endl;
void ScaLBL_MRTModel::Create(){
void ScaLBL_MRTModel::Create() {
* This function creates the variables needed to run a LBM
int rank=Mask->rank();
// Initialize communication structures in averaging domain
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = Mask->id[i];
if (rank==0) printf ("Create ScaLBL_Communicator \n");
// Create a communicator for the device (will use optimized layout)
// ScaLBL_Communicator ScaLBL_Comm(Mask); // original
ScaLBL_Comm = std::shared_ptr<ScaLBL_Communicator>(new ScaLBL_Communicator(Mask));
int rank = Mask->rank();
// Initialize communication structures in averaging domain
for (int i = 0; i < Nx * Ny * Nz; i++)
Dm->id[i] = Mask->id[i];
Np = Mask->PoreCount();
if (rank == 0)
printf("Create ScaLBL_Communicator \n");
// Create a communicator for the device (will use optimized layout)
// ScaLBL_Communicator ScaLBL_Comm(Mask); // original
ScaLBL_Comm =
std::shared_ptr<ScaLBL_Communicator>(new ScaLBL_Communicator(Mask));
int Npad=(Np/16 + 2)*16;
if (rank==0) printf ("Set up memory efficient layout \n");
Map.resize(Nx,Ny,Nz); Map.fill(-2);
auto neighborList= new int[18*Npad];
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->,Np,1);
int Npad = (Np / 16 + 2) * 16;
if (rank == 0)
printf("Set up memory efficient layout \n");
Map.resize(Nx, Ny, Nz);
auto neighborList = new int[18 * Npad];
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map, neighborList,
Mask->, Np, 1);
// LBM variables
if (rank==0) printf ("Allocating distributions \n");
//......................device distributions.................................
int dist_mem_size = Np*sizeof(double);
int neighborSize=18*(Np*sizeof(int));
ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize);
ScaLBL_AllocateDeviceMemory((void **) &fq, 19*dist_mem_size);
ScaLBL_AllocateDeviceMemory((void **) &Pressure, sizeof(double)*Np);
ScaLBL_AllocateDeviceMemory((void **) &Velocity, 3*sizeof(double)*Np);
// Update GPU data structures
if (rank==0) printf ("Setting up device map and neighbor list \n");
// copy the neighbor list
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
double MLUPS = ScaLBL_Comm->GetPerformance(NeighborList,fq,Np);
printf(" MLPUS=%f from rank %i\n",MLUPS,rank);
// LBM variables
if (rank == 0)
printf("Allocating distributions \n");
//......................device distributions.................................
int dist_mem_size = Np * sizeof(double);
int neighborSize = 18 * (Np * sizeof(int));
ScaLBL_AllocateDeviceMemory((void **)&NeighborList, neighborSize);
ScaLBL_AllocateDeviceMemory((void **)&fq, 19 * dist_mem_size);
ScaLBL_AllocateDeviceMemory((void **)&Pressure, sizeof(double) * Np);
ScaLBL_AllocateDeviceMemory((void **)&Velocity, 3 * sizeof(double) * Np);
// Update GPU data structures
if (rank == 0)
printf("Setting up device map and neighbor list \n");
// copy the neighbor list
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
double MLUPS = ScaLBL_Comm->GetPerformance(NeighborList, fq, Np);
printf(" MLPUS=%f from rank %i\n", MLUPS, rank);
void ScaLBL_MRTModel::Initialize(){
void ScaLBL_MRTModel::Initialize() {
* This function initializes model
if (rank==0) printf ("Initializing distributions \n");
if (rank == 0)
printf("Initializing distributions \n");
ScaLBL_D3Q19_Init(fq, Np);
void ScaLBL_MRTModel::Run(){
double rlx_setA=1.0/tau;
double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA);
Minkowski Morphology(Mask);
void ScaLBL_MRTModel::Run() {
double rlx_setA = 1.0 / tau;
double rlx_setB = 8.f * (2.f - rlx_setA) / (8.f - rlx_setA);
if (rank==0){
bool WriteHeader=false;
FILE *log_file = fopen("Permeability.csv","r");
if (log_file != NULL)
Minkowski Morphology(Mask);
if (WriteHeader){
log_file = fopen("Permeability.csv","a+");
fprintf(log_file,"time Fx Fy Fz mu Vs As Js Xs vx vy vz k\n");
if (rank == 0) {
bool WriteHeader = false;
FILE *log_file = fopen("Permeability.csv", "r");
if (log_file != NULL)
WriteHeader = true;
//.......create and start timer............
ScaLBL_DeviceBarrier(); comm.barrier();
if (rank==0) printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax);
if (rank==0) printf("********************************************************\n");
double error = 1.0;
double flow_rate_previous = 0.0;
if (WriteHeader) {
log_file = fopen("Permeability.csv", "a+");
fprintf(log_file, "time Fx Fy Fz mu Vs As Js Xs vx vy vz k\n");
//.......create and start timer............
if (rank == 0)
printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax);
if (rank == 0)
timestep = 0;
double error = 1.0;
double flow_rate_previous = 0.0;
auto t1 = std::chrono::system_clock::now();
while (timestep < timestepMax && error > tolerance) {
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
// Set boundary conditions
if (BoundaryCondition == 3){
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
else if (BoundaryCondition == 4){
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
else if (BoundaryCondition == 5){
ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
ScaLBL_DeviceBarrier(); comm.barrier();
ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL
ScaLBL_D3Q19_AAeven_MRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
// Set boundary conditions
if (BoundaryCondition == 3){
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
else if (BoundaryCondition == 4){
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
else if (BoundaryCondition == 5){
ScaLBL_D3Q19_AAeven_MRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
ScaLBL_DeviceBarrier(); comm.barrier();
if (timestep%1000==0){
ScaLBL_D3Q19_Momentum(fq,Velocity, Np);
ScaLBL_DeviceBarrier(); comm.barrier();
double count_loc=0;
double count;
double vax,vay,vaz;
double vax_loc,vay_loc,vaz_loc;
vax_loc = vay_loc = vaz_loc = 0.f;
for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
if (Distance(i,j,k) > 0){
vax_loc += Velocity_x(i,j,k);
vay_loc += Velocity_y(i,j,k);
vaz_loc += Velocity_z(i,j,k);
vax=Dm->Comm.sumReduce( vax_loc);
vay=Dm->Comm.sumReduce( vay_loc);
vaz=Dm->Comm.sumReduce( vaz_loc);
count=Dm->Comm.sumReduce( count_loc);
vax /= count;
vay /= count;
vaz /= count;
double force_mag = sqrt(Fx*Fx+Fy*Fy+Fz*Fz);
double dir_x = Fx/force_mag;
double dir_y = Fy/force_mag;
double dir_z = Fz/force_mag;
if (force_mag == 0.0){
// default to z direction
dir_x = 0.0;
dir_y = 0.0;
dir_z = 1.0;
force_mag = 1.0;
double flow_rate = (vax*dir_x + vay*dir_y + vaz*dir_z);
error = fabs(flow_rate - flow_rate_previous) / fabs(flow_rate);
flow_rate_previous = flow_rate;
//if (rank==0) printf("Computing Minkowski functionals \n");
double mu = (tau-0.5)/3.f;
double Vs = Morphology.V();
double As = Morphology.A();
double Hs = Morphology.H();
double Xs = Morphology.X();
Vs=Dm->Comm.sumReduce( Vs);
As=Dm->Comm.sumReduce( As);
Hs=Dm->Comm.sumReduce( Hs);
Xs=Dm->Comm.sumReduce( Xs);
while (timestep < timestepMax && error > tolerance) {
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(),
ScaLBL_Comm->LastInterior(), Np, rlx_setA,
rlx_setB, Fx, Fy, Fz);
// Set boundary conditions
if (BoundaryCondition == 3) {
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
} else if (BoundaryCondition == 4) {
din =
ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
} else if (BoundaryCondition == 5) {
ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(),
Np, rlx_setA, rlx_setB, Fx, Fy, Fz);
ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL
ScaLBL_D3Q19_AAeven_MRT(fq, ScaLBL_Comm->FirstInterior(),
ScaLBL_Comm->LastInterior(), Np, rlx_setA,
rlx_setB, Fx, Fy, Fz);
// Set boundary conditions
if (BoundaryCondition == 3) {
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
} else if (BoundaryCondition == 4) {
din =
ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
} else if (BoundaryCondition == 5) {
ScaLBL_D3Q19_AAeven_MRT(fq, 0, ScaLBL_Comm->LastExterior(), Np,
rlx_setA, rlx_setB, Fx, Fy, Fz);
double h = Dm->voxel_length;
double absperm = h*h*mu*Mask->Porosity()*flow_rate / force_mag;
if (rank==0) {
printf(" %f\n",absperm);
FILE * log_file = fopen("Permeability.csv","a");
fprintf(log_file,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",timestep, Fx, Fy, Fz, mu,
h*h*h*Vs,h*h*As,h*Hs,Xs,vax,vay,vaz, absperm);
if (rank==0) printf("-------------------------------------------------------------------\n");
// Compute the walltime per timestep
if (timestep % 1000 == 0) {
ScaLBL_D3Q19_Momentum(fq, Velocity, Np);
ScaLBL_Comm->RegularLayout(Map, &Velocity[0], Velocity_x);
ScaLBL_Comm->RegularLayout(Map, &Velocity[Np], Velocity_y);
ScaLBL_Comm->RegularLayout(Map, &Velocity[2 * Np], Velocity_z);
double count_loc = 0;
double count;
double vax, vay, vaz;
double vax_loc, vay_loc, vaz_loc;
vax_loc = vay_loc = vaz_loc = 0.f;
for (int k = 1; k < Nz - 1; k++) {
for (int j = 1; j < Ny - 1; j++) {
for (int i = 1; i < Nx - 1; i++) {
if (Distance(i, j, k) > 0) {
vax_loc += Velocity_x(i, j, k);
vay_loc += Velocity_y(i, j, k);
vaz_loc += Velocity_z(i, j, k);
count_loc += 1.0;
vax = Dm->Comm.sumReduce(vax_loc);
vay = Dm->Comm.sumReduce(vay_loc);
vaz = Dm->Comm.sumReduce(vaz_loc);
count = Dm->Comm.sumReduce(count_loc);
vax /= count;
vay /= count;
vaz /= count;
double force_mag = sqrt(Fx * Fx + Fy * Fy + Fz * Fz);
double dir_x = Fx / force_mag;
double dir_y = Fy / force_mag;
double dir_z = Fz / force_mag;
if (force_mag == 0.0) {
// default to z direction
dir_x = 0.0;
dir_y = 0.0;
dir_z = 1.0;
force_mag = 1.0;
double flow_rate = (vax * dir_x + vay * dir_y + vaz * dir_z);
error = fabs(flow_rate - flow_rate_previous) / fabs(flow_rate);
flow_rate_previous = flow_rate;
//if (rank==0) printf("Computing Minkowski functionals \n");
Morphology.ComputeScalar(Distance, 0.f);
double mu = (tau - 0.5) / 3.f;
double Vs = Morphology.V();
double As = Morphology.A();
double Hs = Morphology.H();
double Xs = Morphology.X();
Vs = Dm->Comm.sumReduce(Vs);
As = Dm->Comm.sumReduce(As);
Hs = Dm->Comm.sumReduce(Hs);
Xs = Dm->Comm.sumReduce(Xs);
double h = Dm->voxel_length;
double absperm =
h * h * mu * Mask->Porosity() * flow_rate / force_mag;
if (rank == 0) {
printf(" %f\n", absperm);
FILE *log_file = fopen("Permeability.csv", "a");
"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g "
"%.8g %.8g\n",
timestep, Fx, Fy, Fz, mu, h * h * h * Vs, h * h * As,
h * Hs, Xs, vax, vay, vaz, absperm);
if (rank == 0)
// Compute the walltime per timestep
auto t2 = std::chrono::system_clock::now();
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
// Performance obtained from each node
double MLUPS = double(Np)/cputime/1000000;
if (rank==0) printf("********************************************************\n");
if (rank==0) printf("CPU time = %f \n", cputime);
if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS);
MLUPS *= nprocs;
if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS);
if (rank==0) printf("********************************************************\n");
double cputime = std::chrono::duration<double>(t2 - t1).count() / timestep;
// Performance obtained from each node
double MLUPS = double(Np) / cputime / 1000000;
if (rank == 0)
if (rank == 0)
printf("CPU time = %f \n", cputime);
if (rank == 0)
printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS);
MLUPS *= nprocs;
if (rank == 0)
printf("Lattice update rate (total)= %f MLUPS \n", MLUPS);
if (rank == 0)
void ScaLBL_MRTModel::VelocityField(){
void ScaLBL_MRTModel::VelocityField() {
auto format = vis_db->getWithDefault<string>( "format", "silo" );
auto format = vis_db->getWithDefault<string>("format", "silo");
/* memcpy(,, Nx*Ny*Nz*sizeof(double));
@ -428,59 +461,63 @@ void ScaLBL_MRTModel::VelocityField(){
if (rank==0) printf("%.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",Fx, Fy, Fz, mu,
vis_db = db->getDatabase( "Visualization" );
if (vis_db->getWithDefault<bool>( "write_silo", false )){
std::vector<IO::MeshDataStruct> visData;
fillHalo<double> fillData(Dm->Comm,Dm->rank_info,{Dm->Nx-2,Dm->Ny-2,Dm->Nz-2},{1,1,1},0,1);
vis_db = db->getDatabase("Visualization");
if (vis_db->getWithDefault<bool>("write_silo", false)) {
auto VxVar = std::make_shared<IO::Variable>();
auto VyVar = std::make_shared<IO::Variable>();
auto VzVar = std::make_shared<IO::Variable>();
auto SignDistVar = std::make_shared<IO::Variable>();
std::vector<IO::MeshDataStruct> visData;
fillHalo<double> fillData(Dm->Comm, Dm->rank_info,
{Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2},
{1, 1, 1}, 0, 1);
// Create the MeshDataStruct
visData[0].meshName = "domain";
visData[0].mesh = std::make_shared<IO::DomainMesh>( Dm->rank_info,Dm->Nx-2,Dm->Ny-2,Dm->Nz-2,Dm->Lx,Dm->Ly,Dm->Lz );
SignDistVar->name = "SignDist";
SignDistVar->type = IO::VariableType::VolumeVariable;
SignDistVar->dim = 1;
VxVar->name = "Velocity_x";
VxVar->type = IO::VariableType::VolumeVariable;
VxVar->dim = 1;
VyVar->name = "Velocity_y";
VyVar->type = IO::VariableType::VolumeVariable;
VyVar->dim = 1;
VzVar->name = "Velocity_z";
VzVar->type = IO::VariableType::VolumeVariable;
VzVar->dim = 1;
Array<double>& SignData = visData[0].vars[0]->data;
Array<double>& VelxData = visData[0].vars[1]->data;
Array<double>& VelyData = visData[0].vars[2]->data;
Array<double>& VelzData = visData[0].vars[3]->data;
IO::writeData( timestep, visData, Dm->Comm );
auto VxVar = std::make_shared<IO::Variable>();
auto VyVar = std::make_shared<IO::Variable>();
auto VzVar = std::make_shared<IO::Variable>();
auto SignDistVar = std::make_shared<IO::Variable>();
IO::initialize("", format, "false");
// Create the MeshDataStruct
visData[0].meshName = "domain";
visData[0].mesh = std::make_shared<IO::DomainMesh>(
Dm->rank_info, Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2, Dm->Lx, Dm->Ly,
SignDistVar->name = "SignDist";
SignDistVar->type = IO::VariableType::VolumeVariable;
SignDistVar->dim = 1;
SignDistVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
VxVar->name = "Velocity_x";
VxVar->type = IO::VariableType::VolumeVariable;
VxVar->dim = 1;
VxVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
VyVar->name = "Velocity_y";
VyVar->type = IO::VariableType::VolumeVariable;
VyVar->dim = 1;
VyVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
VzVar->name = "Velocity_z";
VzVar->type = IO::VariableType::VolumeVariable;
VzVar->dim = 1;
VzVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
Array<double> &SignData = visData[0].vars[0]->data;
Array<double> &VelxData = visData[0].vars[1]->data;
Array<double> &VelyData = visData[0].vars[2]->data;
Array<double> &VelzData = visData[0].vars[3]->data;
ASSERT(visData[0].vars[0]->name == "SignDist");
ASSERT(visData[0].vars[1]->name == "Velocity_x");
ASSERT(visData[0].vars[2]->name == "Velocity_y");
ASSERT(visData[0].vars[3]->name == "Velocity_z");
fillData.copy(Distance, SignData);
fillData.copy(Velocity_x, VelxData);
fillData.copy(Velocity_y, VelyData);
fillData.copy(Velocity_z, VelzData);
IO::writeData(timestep, visData, Dm->Comm);

@ -31,36 +31,36 @@
#include "analysis/Minkowski.h"
#include "ProfilerApp.h"
class ScaLBL_MRTModel{
class ScaLBL_MRTModel {
ScaLBL_MRTModel(int RANK, int NP, const Utilities::MPI& COMM);
// functions in they should be run
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void ReadInput();
void Create();
void Initialize();
void Run();
void VelocityField();
bool Restart,pBC;
int timestep,timestepMax;
int BoundaryCondition;
double tau,mu;
double Fx,Fy,Fz,flux;
double din,dout;
double tolerance;
int Nx,Ny,Nz,N,Np;
int rank,nprocx,nprocy,nprocz,nprocs;
double Lx,Ly,Lz;
ScaLBL_MRTModel(int RANK, int NP, const Utilities::MPI &COMM);
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
// functions in they should be run
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void ReadInput();
void Create();
void Initialize();
void Run();
void VelocityField();
bool Restart, pBC;
int timestep, timestepMax;
int BoundaryCondition;
double tau, mu;
double Fx, Fy, Fz, flux;
double din, dout;
double tolerance;
int Nx, Ny, Nz, N, Np;
int rank, nprocx, nprocy, nprocz, nprocs;
double Lx, Ly, Lz;
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
// input database
std::shared_ptr<Database> db;
std::shared_ptr<Database> domain_db;
@ -73,20 +73,21 @@ public:
double *fq;
double *Velocity;
double *Pressure;
//Minkowski Morphology;
DoubleArray Velocity_x;
DoubleArray Velocity_y;
DoubleArray Velocity_z;
Utilities::MPI comm;
// filenames
// filenames
char LocalRankString[8];
char LocalRankFilename[40];
char LocalRestartFile[40];
//int rank,nprocs;
void LoadParams(std::shared_ptr<Database> db0);
void LoadParams(std::shared_ptr<Database> db0);

@ -1,51 +1,48 @@
#include "models/MultiPhysController.h"
ScaLBL_Multiphys_Controller::ScaLBL_Multiphys_Controller(int RANK, int NP, const Utilities::MPI& COMM):
int RANK, int NP, const Utilities::MPI &COMM)
: rank(RANK), nprocs(NP), Restart(0), timestepMax(0), num_iter_Stokes(0),
num_iter_Ion(0), analysis_interval(0), visualization_interval(0),
tolerance(0), time_conv_max(0), comm(COMM) {}
ScaLBL_Multiphys_Controller::~ScaLBL_Multiphys_Controller() {}
void ScaLBL_Multiphys_Controller::ReadParams(string filename) {
void ScaLBL_Multiphys_Controller::ReadParams(string filename){
// read the input database
db = std::make_shared<Database>( filename );
study_db = db->getDatabase( "MultiphysController" );
// read the input database
db = std::make_shared<Database>(filename);
study_db = db->getDatabase("MultiphysController");
// Default parameters
timestepMax = 10000;
Restart = false;
num_iter_Stokes = 1;
analysis_interval = 500;
visualization_interval = 10000;
tolerance = 1.0e-6;
time_conv_max = 0.0;
// load input parameters
if (study_db->keyExists( "timestepMax" )){
timestepMax = study_db->getScalar<int>( "timestepMax" );
if (study_db->keyExists( "analysis_interval" )){
analysis_interval = study_db->getScalar<int>( "analysis_interval" );
if (study_db->keyExists( "visualization_interval" )){
visualization_interval = study_db->getScalar<int>( "visualization_interval" );
if (study_db->keyExists( "tolerance" )){
tolerance = study_db->getScalar<double>( "tolerance" );
//if (study_db->keyExists( "time_conv" )){
// time_conv = study_db->getScalar<double>( "time_conv" );
//if (study_db->keyExists( "Schmidt_Number" )){
// SchmidtNum = study_db->getScalar<double>( "Schmidt_Number" );
if (study_db->keyExists("timestepMax")) {
timestepMax = study_db->getScalar<int>("timestepMax");
if (study_db->keyExists("analysis_interval")) {
analysis_interval = study_db->getScalar<int>("analysis_interval");
if (study_db->keyExists("visualization_interval")) {
visualization_interval =
if (study_db->keyExists("tolerance")) {
tolerance = study_db->getScalar<double>("tolerance");
//if (study_db->keyExists( "time_conv" )){
// time_conv = study_db->getScalar<double>( "time_conv" );
//if (study_db->keyExists( "Schmidt_Number" )){
// SchmidtNum = study_db->getScalar<double>( "Schmidt_Number" );
// recalculate relevant parameters
//if (SchmidtNum>1){
@ -61,87 +58,104 @@ void ScaLBL_Multiphys_Controller::ReadParams(string filename){
// num_iter_Ion = 1;
// ERROR("Error: SchmidtNum (Schmidt number) must be a positive number! \n");
// ERROR("Error: SchmidtNum (Schmidt number) must be a positive number! \n");
// load input parameters
// in case user wants to have an absolute control over the iternal iteration
if (study_db->keyExists( "num_iter_Ion_List" )){
if (study_db->keyExists("num_iter_Ion_List")) {
num_iter_Ion = study_db->getVector<int>( "num_iter_Ion_List" );
num_iter_Ion = study_db->getVector<int>("num_iter_Ion_List");
if (study_db->keyExists( "num_iter_Stokes" )){
num_iter_Stokes = study_db->getScalar<int>( "num_iter_Stokes" );
if (study_db->keyExists("num_iter_Stokes")) {
num_iter_Stokes = study_db->getScalar<int>("num_iter_Stokes");
int ScaLBL_Multiphys_Controller::getStokesNumIter_PNP_coupling(double StokesTimeConv,const vector<double> &IonTimeConv){
int ScaLBL_Multiphys_Controller::getStokesNumIter_PNP_coupling(
double StokesTimeConv, const vector<double> &IonTimeConv) {
//Return number of internal iterations for the Stokes solver
int num_iter_stokes;
vector<double> TimeConv;
vector<double>::iterator it_max = max_element(TimeConv.begin(),TimeConv.end());
int idx_max = distance(TimeConv.begin(),it_max);
if (idx_max==0){
TimeConv.assign(IonTimeConv.begin(), IonTimeConv.end());
TimeConv.insert(TimeConv.begin(), StokesTimeConv);
vector<double>::iterator it_max =
max_element(TimeConv.begin(), TimeConv.end());
int idx_max = distance(TimeConv.begin(), it_max);
if (idx_max == 0) {
num_iter_stokes = 2;
double temp = 2*TimeConv[idx_max]/StokesTimeConv;//the factor 2 is the number of iterations for the element has max time_conv
num_iter_stokes = int(round(temp/2)*2);
} else {
double temp =
2 * TimeConv[idx_max] /
StokesTimeConv; //the factor 2 is the number of iterations for the element has max time_conv
num_iter_stokes = int(round(temp / 2) * 2);
return num_iter_stokes;
vector<int> ScaLBL_Multiphys_Controller::getIonNumIter_PNP_coupling(double StokesTimeConv,const vector<double> &IonTimeConv){
vector<int> ScaLBL_Multiphys_Controller::getIonNumIter_PNP_coupling(
double StokesTimeConv, const vector<double> &IonTimeConv) {
//Return number of internal iterations for the Ion transport solver
vector<int> num_iter_ion;
vector<double> TimeConv;
vector<double>::iterator it_max = max_element(TimeConv.begin(),TimeConv.end());
unsigned int idx_max = distance(TimeConv.begin(),it_max);
if (idx_max==0){
for (unsigned int idx=1;idx<TimeConv.size();idx++){
double temp = 2*StokesTimeConv/TimeConv[idx];//the factor 2 is the number of iterations for the element has max time_conv
TimeConv.assign(IonTimeConv.begin(), IonTimeConv.end());
TimeConv.insert(TimeConv.begin(), StokesTimeConv);
vector<double>::iterator it_max =
max_element(TimeConv.begin(), TimeConv.end());
unsigned int idx_max = distance(TimeConv.begin(), it_max);
if (idx_max == 0) {
for (unsigned int idx = 1; idx < TimeConv.size(); idx++) {
double temp =
2 * StokesTimeConv /
[idx]; //the factor 2 is the number of iterations for the element has max time_conv
num_iter_ion.push_back(int(round(temp / 2) * 2));
else if (idx_max==1){
} else if (idx_max == 1) {
for (unsigned int idx=2;idx<TimeConv.size();idx++){
double temp = 2*TimeConv[idx_max]/TimeConv[idx];//the factor 2 is the number of iterations for the element has max time_conv
for (unsigned int idx = 2; idx < TimeConv.size(); idx++) {
double temp =
2 * TimeConv[idx_max] /
[idx]; //the factor 2 is the number of iterations for the element has max time_conv
num_iter_ion.push_back(int(round(temp / 2) * 2));
else if (idx_max==TimeConv.size()-1){
for (unsigned int idx=1;idx<TimeConv.size()-1;idx++){
double temp = 2*TimeConv[idx_max]/TimeConv[idx];//the factor 2 is the number of iterations for the element has max time_conv
} else if (idx_max == TimeConv.size() - 1) {
for (unsigned int idx = 1; idx < TimeConv.size() - 1; idx++) {
double temp =
2 * TimeConv[idx_max] /
[idx]; //the factor 2 is the number of iterations for the element has max time_conv
num_iter_ion.push_back(int(round(temp / 2) * 2));
else {
for (unsigned int idx=1;idx<idx_max;idx++){
double temp = 2*TimeConv[idx_max]/TimeConv[idx];//the factor 2 is the number of iterations for the element has max time_conv
} else {
for (unsigned int idx = 1; idx < idx_max; idx++) {
double temp =
2 * TimeConv[idx_max] /
[idx]; //the factor 2 is the number of iterations for the element has max time_conv
num_iter_ion.push_back(int(round(temp / 2) * 2));
for (unsigned int idx=idx_max+1;idx<TimeConv.size();idx++){
double temp = 2*TimeConv[idx_max]/TimeConv[idx];//the factor 2 is the number of iterations for the element has max time_conv
for (unsigned int idx = idx_max + 1; idx < TimeConv.size(); idx++) {
double temp =
2 * TimeConv[idx_max] /
[idx]; //the factor 2 is the number of iterations for the element has max time_conv
num_iter_ion.push_back(int(round(temp / 2) * 2));
return num_iter_ion;
void ScaLBL_Multiphys_Controller::getTimeConvMax_PNP_coupling(double StokesTimeConv,const vector<double> &IonTimeConv){
void ScaLBL_Multiphys_Controller::getTimeConvMax_PNP_coupling(
double StokesTimeConv, const vector<double> &IonTimeConv) {
//Return maximum of the time converting factor from Stokes and ion solvers
vector<double> TimeConv;
time_conv_max = *max_element(TimeConv.begin(),TimeConv.end());
TimeConv.assign(IonTimeConv.begin(), IonTimeConv.end());
TimeConv.insert(TimeConv.begin(), StokesTimeConv);
time_conv_max = *max_element(TimeConv.begin(), TimeConv.end());

View File

@ -17,19 +17,22 @@
#include "analysis/Minkowski.h"
#include "ProfilerApp.h"
class ScaLBL_Multiphys_Controller{
class ScaLBL_Multiphys_Controller {
ScaLBL_Multiphys_Controller(int RANK, int NP, const Utilities::MPI& COMM);
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
int getStokesNumIter_PNP_coupling(double StokesTimeConv,const vector<double> &IonTimeConv);
vector<int> getIonNumIter_PNP_coupling(double StokesTimeConv,const vector<double> &IonTimeConv);
ScaLBL_Multiphys_Controller(int RANK, int NP, const Utilities::MPI &COMM);
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
int getStokesNumIter_PNP_coupling(double StokesTimeConv,
const vector<double> &IonTimeConv);
vector<int> getIonNumIter_PNP_coupling(double StokesTimeConv,
const vector<double> &IonTimeConv);
//void getIonNumIter_PNP_coupling(double StokesTimeConv,vector<double> &IonTimeConv,vector<int> &IonTimeMax);
void getTimeConvMax_PNP_coupling(double StokesTimeConv,const vector<double> &IonTimeConv);
bool Restart;
void getTimeConvMax_PNP_coupling(double StokesTimeConv,
const vector<double> &IonTimeConv);
bool Restart;
int timestepMax;
int num_iter_Stokes;
vector<int> num_iter_Ion;
@ -39,20 +42,20 @@ public:
double time_conv_max;
//double SchmidtNum;//Schmidt number = kinematic_viscosity/mass_diffusivity
int rank,nprocs;
int rank, nprocs;
// input database
std::shared_ptr<Database> db;
std::shared_ptr<Database> study_db;
Utilities::MPI comm;
// filenames
Utilities::MPI comm;
// filenames
char LocalRankString[8];
char LocalRankFilename[40];
char LocalRestartFile[40];
//int rank,nprocs;
void LoadParams(std::shared_ptr<Database> db0);
void LoadParams(std::shared_ptr<Database> db0);

Some files were not shown because too many files have changed in this diff Show More