diff --git a/analysis/ElectroChemistry.cpp b/analysis/ElectroChemistry.cpp index b4c4a504..8b974926 100644 --- a/analysis/ElectroChemistry.cpp +++ b/analysis/ElectroChemistry.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "analysis/ElectroChemistry.h" ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(std::shared_ptr dm) @@ -49,7 +65,7 @@ ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(std::shared_ptr dm) IonFluxElectrical_y.fill(0); IonFluxElectrical_z.resize(Nx, Ny, Nz); IonFluxElectrical_z.fill(0); - + if (Dm->rank() == 0) { bool WriteHeader = false; TIMELOG = fopen("electrokinetic.csv", "r"); @@ -75,9 +91,11 @@ ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(ScaLBL_IonModel &IonModel) Nz = Dm->Nz; Volume = (Nx - 2) * (Ny - 2) * (Nz - 2) * Dm->nprocx() * Dm->nprocy() * Dm->nprocz() * 1.0; - - if (Dm->rank()==0) printf("Analyze system with sub-domain size = %i x %i x %i \n",Nx,Ny,Nz); - + + if (Dm->rank() == 0) + printf("Analyze system with sub-domain size = %i x %i x %i \n", Nx, Ny, + Nz); + USE_MEMBRANE = IonModel.USE_MEMBRANE; ChemicalPotential.resize(Nx, Ny, Nz); @@ -120,11 +138,11 @@ ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(ScaLBL_IonModel &IonModel) IonFluxElectrical_y.fill(0); IonFluxElectrical_z.resize(Nx, Ny, Nz); IonFluxElectrical_z.fill(0); - - + if (Dm->rank() == 0) { - printf("Set up analysis routines for %lu ions \n",IonModel.number_ion_species); - + printf("Set up analysis routines for %lu ions \n", + IonModel.number_ion_species); + bool WriteHeader = false; TIMELOG = fopen("electrokinetic.csv", "r"); if (TIMELOG != NULL) @@ -138,15 +156,19 @@ ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(ScaLBL_IonModel &IonModel) //fprintf(TIMELOG,"--------------------------------------------------------------------------------------\n"); fprintf(TIMELOG, "timestep voltage_out voltage_in "); fprintf(TIMELOG, "voltage_out_membrane voltage_in_membrane "); - for (size_t i=0; irank() == 0) - fprintf(TIMELOG, "%i ", timestep); + if (Dm->rank() == 0) + fprintf(TIMELOG, "%i ", timestep); - - /* int iq, ip, nq, np, nqm, npm; + /* int iq, ip, nq, np, nqm, npm; Ion.MembraneDistance(i,j,k); // inside (-) or outside (+) the ion for (int link; linkmembraneLinkCount; link++){ int iq = Ion.IonMembrane->membraneLinks[2*link]; @@ -187,7 +207,7 @@ void ElectroChemistryAnalyzer::Membrane(ScaLBL_IonModel &Ion, */ unsigned long int in_local_count, out_local_count; unsigned long int in_global_count, out_global_count; - + double value_in_local, value_out_local; double value_in_global, value_out_global; @@ -208,54 +228,53 @@ void ElectroChemistryAnalyzer::Membrane(ScaLBL_IonModel &Ion, double jx_in_global, jx_out_global; double jy_in_global, jy_out_global; double jz_in_global, jz_out_global; - + unsigned long int membrane_in_local_count, membrane_out_local_count; unsigned long int membrane_in_global_count, membrane_out_global_count; - - double memdist,value,jx,jy,jz; + + double memdist, value, jx, jy, jz; in_local_count = 0; out_local_count = 0; membrane_in_local_count = 0; membrane_out_local_count = 0; - + value_membrane_in_local = 0.0; value_membrane_out_local = 0.0; value_in_local = 0.0; value_out_local = 0.0; for (k = Dm->inlet_layers_z; k < Nz; k++) { - for (j = 1; j < Ny; j++) { - for (i = 1; i < Nx; i++) { - /* electric potential */ - memdist = Ion.MembraneDistance(i,j,k); - value = ElectricalPotential(i,j,k); - if (memdist < 0.0){ - // inside the membrane - if (fabs(memdist) < 1.0){ - value_membrane_in_local += value; - membrane_in_local_count++; - } - value_in_local += value; - in_local_count++; + for (j = 1; j < Ny; j++) { + for (i = 1; i < Nx; i++) { + /* electric potential */ + memdist = Ion.MembraneDistance(i, j, k); + value = ElectricalPotential(i, j, k); + if (memdist < 0.0) { + // inside the membrane + if (fabs(memdist) < 1.0) { + value_membrane_in_local += value; + membrane_in_local_count++; + } + value_in_local += value; + in_local_count++; - } - else { - // outside the membrane - if (fabs(memdist) < 1.0){ - value_membrane_out_local += value; - membrane_out_local_count++; - } - value_out_local += value; - out_local_count++; - } - } - } + } else { + // outside the membrane + if (fabs(memdist) < 1.0) { + value_membrane_out_local += value; + membrane_out_local_count++; + } + value_out_local += value; + out_local_count++; + } + } + } } /* these only need to be computed the first time through */ out_global_count = Dm->Comm.sumReduce(out_local_count); in_global_count = Dm->Comm.sumReduce(in_local_count); membrane_out_global_count = Dm->Comm.sumReduce(membrane_out_local_count); membrane_in_global_count = Dm->Comm.sumReduce(membrane_in_local_count); - + value_out_global = Dm->Comm.sumReduce(value_out_local); value_in_global = Dm->Comm.sumReduce(value_in_local); value_membrane_out_global = Dm->Comm.sumReduce(value_membrane_out_local); @@ -265,12 +284,12 @@ void ElectroChemistryAnalyzer::Membrane(ScaLBL_IonModel &Ion, value_in_global /= in_global_count; value_membrane_out_global /= membrane_out_global_count; value_membrane_in_global /= membrane_in_global_count; - + if (Dm->rank() == 0) { - fprintf(TIMELOG, "%.8g ", value_out_global); - fprintf(TIMELOG, "%.8g ", value_in_global); - fprintf(TIMELOG, "%.8g ", value_membrane_out_global); - fprintf(TIMELOG, "%.8g ", value_membrane_in_global); + fprintf(TIMELOG, "%.8g ", value_out_global); + fprintf(TIMELOG, "%.8g ", value_in_global); + fprintf(TIMELOG, "%.8g ", value_membrane_out_global); + fprintf(TIMELOG, "%.8g ", value_membrane_in_global); } value_membrane_in_local = 0.0; @@ -279,72 +298,81 @@ void ElectroChemistryAnalyzer::Membrane(ScaLBL_IonModel &Ion, value_out_local = 0.0; for (size_t ion = 0; ion < Ion.number_ion_species; ion++) { Ion.getIonConcentration(Rho, ion); - Ion.getIonFluxDiffusive(IonFluxDiffusive_x, IonFluxDiffusive_y, IonFluxDiffusive_z, ion); - Ion.getIonFluxAdvective(IonFluxAdvective_x, IonFluxAdvective_y, IonFluxAdvective_z, ion); - Ion.getIonFluxElectrical(IonFluxElectrical_x, IonFluxElectrical_y, IonFluxElectrical_z, ion); + Ion.getIonFluxDiffusive(IonFluxDiffusive_x, IonFluxDiffusive_y, + IonFluxDiffusive_z, ion); + Ion.getIonFluxAdvective(IonFluxAdvective_x, IonFluxAdvective_y, + IonFluxAdvective_z, ion); + Ion.getIonFluxElectrical(IonFluxElectrical_x, IonFluxElectrical_y, + IonFluxElectrical_z, ion); value_membrane_in_local = 0.0; value_membrane_out_local = 0.0; value_in_local = 0.0; value_out_local = 0.0; - - jx_membrane_in_local = jy_membrane_in_local = jz_membrane_in_local = 0.0; - jx_membrane_out_local = jy_membrane_out_local = jz_membrane_out_local = 0.0; - jx_in_local = jy_in_local = jz_in_local = 0.0; - jx_out_local = jy_out_local = jz_out_local = 0.0; + + jx_membrane_in_local = jy_membrane_in_local = jz_membrane_in_local = + 0.0; + jx_membrane_out_local = jy_membrane_out_local = jz_membrane_out_local = + 0.0; + jx_in_local = jy_in_local = jz_in_local = 0.0; + jx_out_local = jy_out_local = jz_out_local = 0.0; for (k = Dm->inlet_layers_z; k < Nz; k++) { - for (j = 1; j < Ny; j++) { - for (i = 1; i < Nx; i++) { - /* electric potential */ - memdist = Ion.MembraneDistance(i,j,k); - value = Rho(i,j,k); - jx = IonFluxDiffusive_x(i,j,k) + IonFluxAdvective_x(i,j,k) + IonFluxElectrical_x(i,j,k); - jy = IonFluxDiffusive_y(i,j,k) + IonFluxAdvective_y(i,j,k) + IonFluxElectrical_y(i,j,k); - jz = IonFluxDiffusive_z(i,j,k) + IonFluxAdvective_z(i,j,k) + IonFluxElectrical_z(i,j,k); - - if (memdist < 0.0){ - // inside the membrane - if (fabs(memdist) < 1.0){ - value_membrane_in_local += value; - jx_membrane_in_local += jx; - jy_membrane_in_local += jy; - jz_membrane_in_local += jz; + for (j = 1; j < Ny; j++) { + for (i = 1; i < Nx; i++) { + /* electric potential */ + memdist = Ion.MembraneDistance(i, j, k); + value = Rho(i, j, k); + jx = IonFluxDiffusive_x(i, j, k) + + IonFluxAdvective_x(i, j, k) + + IonFluxElectrical_x(i, j, k); + jy = IonFluxDiffusive_y(i, j, k) + + IonFluxAdvective_y(i, j, k) + + IonFluxElectrical_y(i, j, k); + jz = IonFluxDiffusive_z(i, j, k) + + IonFluxAdvective_z(i, j, k) + + IonFluxElectrical_z(i, j, k); - } - value_in_local += value; - jx_in_local += jx; - jy_in_local += jy; - jz_in_local += jz; + if (memdist < 0.0) { + // inside the membrane + if (fabs(memdist) < 1.0) { + value_membrane_in_local += value; + jx_membrane_in_local += jx; + jy_membrane_in_local += jy; + jz_membrane_in_local += jz; + } + value_in_local += value; + jx_in_local += jx; + jy_in_local += jy; + jz_in_local += jz; - } - else { - // outside the membrane - if (fabs(memdist) < 1.0){ - value_membrane_out_local += value; - jx_membrane_out_local += jx; - jy_membrane_out_local += jy; - jz_membrane_out_local += jz; - - } - value_out_local += value; - jx_out_local += jx; - jy_out_local += jy; - jz_out_local += jz; - } - } - } + } else { + // outside the membrane + if (fabs(memdist) < 1.0) { + value_membrane_out_local += value; + jx_membrane_out_local += jx; + jy_membrane_out_local += jy; + jz_membrane_out_local += jz; + } + value_out_local += value; + jx_out_local += jx; + jy_out_local += jy; + jz_out_local += jz; + } + } + } } value_out_global = Dm->Comm.sumReduce(value_out_local); value_in_global = Dm->Comm.sumReduce(value_in_local); - value_membrane_out_global = Dm->Comm.sumReduce(value_membrane_out_local); + value_membrane_out_global = + Dm->Comm.sumReduce(value_membrane_out_local); value_membrane_in_global = Dm->Comm.sumReduce(value_membrane_in_local); value_out_global /= out_global_count; value_in_global /= in_global_count; value_membrane_out_global /= membrane_out_global_count; value_membrane_in_global /= membrane_in_global_count; - + jx_out_global = Dm->Comm.sumReduce(jx_out_local); jx_in_global = Dm->Comm.sumReduce(jx_in_local); jx_membrane_out_global = Dm->Comm.sumReduce(jx_membrane_out_local); @@ -354,7 +382,7 @@ void ElectroChemistryAnalyzer::Membrane(ScaLBL_IonModel &Ion, jx_in_global /= in_global_count; jx_membrane_out_global /= membrane_out_global_count; jx_membrane_in_global /= membrane_in_global_count; - + jy_out_global = Dm->Comm.sumReduce(jy_out_local); jy_in_global = Dm->Comm.sumReduce(jy_in_local); jy_membrane_out_global = Dm->Comm.sumReduce(jy_membrane_out_local); @@ -364,7 +392,7 @@ void ElectroChemistryAnalyzer::Membrane(ScaLBL_IonModel &Ion, jy_in_global /= in_global_count; jy_membrane_out_global /= membrane_out_global_count; jy_membrane_in_global /= membrane_in_global_count; - + jz_out_global = Dm->Comm.sumReduce(jz_out_local); jz_in_global = Dm->Comm.sumReduce(jz_in_local); jz_membrane_out_global = Dm->Comm.sumReduce(jz_membrane_out_local); @@ -376,38 +404,37 @@ void ElectroChemistryAnalyzer::Membrane(ScaLBL_IonModel &Ion, jz_membrane_in_global /= membrane_in_global_count; if (Dm->rank() == 0) { - fprintf(TIMELOG, "%.8g ", value_out_global); - fprintf(TIMELOG, "%.8g ", value_in_global); - fprintf(TIMELOG, "%.8g ", value_membrane_out_global); - fprintf(TIMELOG, "%.8g ", value_membrane_in_global); - - fprintf(TIMELOG, "%.8g ", jx_out_global); - fprintf(TIMELOG, "%.8g ", jx_in_global); - fprintf(TIMELOG, "%.8g ", jx_membrane_out_global); - fprintf(TIMELOG, "%.8g ", jx_membrane_in_global); - - fprintf(TIMELOG, "%.8g ", jy_out_global); - fprintf(TIMELOG, "%.8g ", jy_in_global); - fprintf(TIMELOG, "%.8g ", jy_membrane_out_global); - fprintf(TIMELOG, "%.8g ", jy_membrane_in_global); - - fprintf(TIMELOG, "%.8g ", jz_out_global); - fprintf(TIMELOG, "%.8g ", jz_in_global); - fprintf(TIMELOG, "%.8g ", jz_membrane_out_global); - fprintf(TIMELOG, "%.8g ", jz_membrane_in_global); + fprintf(TIMELOG, "%.8g ", value_out_global); + fprintf(TIMELOG, "%.8g ", value_in_global); + fprintf(TIMELOG, "%.8g ", value_membrane_out_global); + fprintf(TIMELOG, "%.8g ", value_membrane_in_global); + + fprintf(TIMELOG, "%.8g ", jx_out_global); + fprintf(TIMELOG, "%.8g ", jx_in_global); + fprintf(TIMELOG, "%.8g ", jx_membrane_out_global); + fprintf(TIMELOG, "%.8g ", jx_membrane_in_global); + + fprintf(TIMELOG, "%.8g ", jy_out_global); + fprintf(TIMELOG, "%.8g ", jy_in_global); + fprintf(TIMELOG, "%.8g ", jy_membrane_out_global); + fprintf(TIMELOG, "%.8g ", jy_membrane_in_global); + + fprintf(TIMELOG, "%.8g ", jz_out_global); + fprintf(TIMELOG, "%.8g ", jz_in_global); + fprintf(TIMELOG, "%.8g ", jz_membrane_out_global); + fprintf(TIMELOG, "%.8g ", jz_membrane_in_global); } } if (Dm->rank() == 0) { - fprintf(TIMELOG, "%lu ", out_global_count); - fprintf(TIMELOG, "%lu ", in_global_count); - fprintf(TIMELOG, "%lu ", membrane_out_global_count); - fprintf(TIMELOG, "%lu\n", membrane_in_global_count); + fprintf(TIMELOG, "%lu ", out_global_count); + fprintf(TIMELOG, "%lu ", in_global_count); + fprintf(TIMELOG, "%lu ", membrane_out_global_count); + fprintf(TIMELOG, "%lu\n", membrane_in_global_count); fflush(TIMELOG); } } - void ElectroChemistryAnalyzer::Basic(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes, int timestep) { @@ -522,34 +549,35 @@ void ElectroChemistryAnalyzer::WriteVis(ScaLBL_IonModel &Ion, ScaLBL_StokesModel &Stokes, std::shared_ptr input_db, int timestep) { - + auto vis_db = input_db->getDatabase("Visualization"); char VisName[40]; - auto format = vis_db->getWithDefault( "format", "hdf5" ); - + auto format = vis_db->getWithDefault("format", "hdf5"); + if (Dm->rank() == 0) { - printf("ElectroChemistryAnalyzer::WriteVis (format = %s)\n", format.c_str()); - if (vis_db->getWithDefault("save_electric_potential", true)){ - printf(" save electric potential \n"); - } - if (vis_db->getWithDefault("save_concentration", true)) { - printf(" save concentration \n"); - } - if (vis_db->getWithDefault("save_velocity", false)) { - printf(" save velocity \n"); - } - if (vis_db->getWithDefault("save_ion_flux_diffusive", false)) { - printf(" save ion flux (diffusive) \n"); - } - if (vis_db->getWithDefault("save_ion_flux_advective", false)) { - printf(" save ion flux (advective) \n"); - } - if (vis_db->getWithDefault("save_ion_flux_electrical", false)) { - printf(" save ion flux (electrical) \n"); - } - if (vis_db->getWithDefault("save_electric_field", false)) { - printf(" save electric field \n"); - } + printf("ElectroChemistryAnalyzer::WriteVis (format = %s)\n", + format.c_str()); + if (vis_db->getWithDefault("save_electric_potential", true)) { + printf(" save electric potential \n"); + } + if (vis_db->getWithDefault("save_concentration", true)) { + printf(" save concentration \n"); + } + if (vis_db->getWithDefault("save_velocity", false)) { + printf(" save velocity \n"); + } + if (vis_db->getWithDefault("save_ion_flux_diffusive", false)) { + printf(" save ion flux (diffusive) \n"); + } + if (vis_db->getWithDefault("save_ion_flux_advective", false)) { + printf(" save ion flux (advective) \n"); + } + if (vis_db->getWithDefault("save_ion_flux_electrical", false)) { + printf(" save ion flux (electrical) \n"); + } + if (vis_db->getWithDefault("save_electric_field", false)) { + printf(" save electric field \n"); + } } std::vector visData; @@ -557,8 +585,8 @@ void ElectroChemistryAnalyzer::WriteVis(ScaLBL_IonModel &Ion, {Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2}, {1, 1, 1}, 0, 1); - IO::initialize("",format,"false"); - // Create the MeshDataStruct + IO::initialize("", format, "false"); + // Create the MeshDataStruct visData.resize(1); visData[0].meshName = "domain"; @@ -955,8 +983,7 @@ void ElectroChemistryAnalyzer::WriteVis(ScaLBL_IonModel &Ion, } void ElectroChemistryAnalyzer::Basic(ScaLBL_IonModel &Ion, - ScaLBL_Poisson &Poisson, - int timestep) { + ScaLBL_Poisson &Poisson, int timestep) { int i, j, k; double Vin = 0.0; @@ -975,10 +1002,10 @@ void ElectroChemistryAnalyzer::Basic(ScaLBL_IonModel &Ion, double *rho_mu_fluctuation_global; double *rho_psi_avg_global; double *rho_psi_fluctuation_global; - + /* Get the distance to the membrane */ - if (Ion.USE_MEMBRANE){ - //Ion.MembraneDistance; + if (Ion.USE_MEMBRANE) { + //Ion.MembraneDistance; } /* local sub-domain averages */ @@ -1075,40 +1102,41 @@ void ElectroChemistryAnalyzer::WriteVis(ScaLBL_IonModel &Ion, auto vis_db = input_db->getDatabase("Visualization"); char VisName[40]; - auto format = vis_db->getWithDefault( "format", "hdf5" ); + auto format = vis_db->getWithDefault("format", "hdf5"); std::vector visData; fillHalo fillData(Dm->Comm, Dm->rank_info, {Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2}, {1, 1, 1}, 0, 1); - + if (Dm->rank() == 0) { - printf("ElectroChemistryAnalyzer::WriteVis (format = %s)\n", format.c_str()); - if (vis_db->getWithDefault("save_electric_potential", true)){ - printf(" save electric potential \n"); - } - if (vis_db->getWithDefault("save_concentration", true)) { - printf(" save concentration \n"); - } - if (vis_db->getWithDefault("save_velocity", false)) { - printf(" save velocity \n"); - } - if (vis_db->getWithDefault("save_ion_flux_diffusive", false)) { - printf(" save ion flux (diffusive) \n"); - } - if (vis_db->getWithDefault("save_ion_flux_advective", false)) { - printf(" save ion flux (advective) \n"); - } - if (vis_db->getWithDefault("save_ion_flux_electrical", false)) { - printf(" save ion flux (electrical) \n"); - } - if (vis_db->getWithDefault("save_electric_field", false)) { - printf(" save electric field \n"); - } + printf("ElectroChemistryAnalyzer::WriteVis (format = %s)\n", + format.c_str()); + if (vis_db->getWithDefault("save_electric_potential", true)) { + printf(" save electric potential \n"); + } + if (vis_db->getWithDefault("save_concentration", true)) { + printf(" save concentration \n"); + } + if (vis_db->getWithDefault("save_velocity", false)) { + printf(" save velocity \n"); + } + if (vis_db->getWithDefault("save_ion_flux_diffusive", false)) { + printf(" save ion flux (diffusive) \n"); + } + if (vis_db->getWithDefault("save_ion_flux_advective", false)) { + printf(" save ion flux (advective) \n"); + } + if (vis_db->getWithDefault("save_ion_flux_electrical", false)) { + printf(" save ion flux (electrical) \n"); + } + if (vis_db->getWithDefault("save_electric_field", false)) { + printf(" save electric field \n"); + } } - IO::initialize("",format,"false"); - // Create the MeshDataStruct + IO::initialize("", format, "false"); + // Create the MeshDataStruct visData.resize(1); visData[0].meshName = "domain"; @@ -1200,7 +1228,6 @@ void ElectroChemistryAnalyzer::WriteVis(ScaLBL_IonModel &Ion, } } - if (vis_db->getWithDefault("save_ion_flux_electrical", false)) { for (size_t ion = 0; ion < Ion.number_ion_species; ion++) { // x-component of electro-migrational flux diff --git a/analysis/ElectroChemistry.h b/analysis/ElectroChemistry.h index f7d3031a..4cae5bce 100644 --- a/analysis/ElectroChemistry.h +++ b/analysis/ElectroChemistry.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ /* * averaging tools for electrochemistry */ @@ -29,7 +45,7 @@ public: double nu_n, nu_w; double gamma_wn, beta; double Fx, Fy, Fz; - + bool USE_MEMBRANE; //........................................................................... @@ -56,14 +72,16 @@ public: DoubleArray IonFluxElectrical_z; ElectroChemistryAnalyzer(std::shared_ptr Dm); - ElectroChemistryAnalyzer( ScaLBL_IonModel &IonModel); + ElectroChemistryAnalyzer(ScaLBL_IonModel &IonModel); ~ElectroChemistryAnalyzer(); void SetParams(); - void Basic(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes, int timestep); + void Basic(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, + ScaLBL_StokesModel &Stokes, int timestep); void Membrane(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, int timestep); - void WriteVis(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, - ScaLBL_StokesModel &Stokes,std::shared_ptr input_db, int timestep); + void WriteVis(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, + ScaLBL_StokesModel &Stokes, + std::shared_ptr input_db, int timestep); void Basic(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, int timestep); void WriteVis(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, std::shared_ptr input_db, int timestep); diff --git a/analysis/FlowAdaptor.cpp b/analysis/FlowAdaptor.cpp index 09d9d4e8..87a4a4f1 100644 --- a/analysis/FlowAdaptor.cpp +++ b/analysis/FlowAdaptor.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ /* Flow adaptor class for multiphase flow methods */ #include "analysis/FlowAdaptor.h" @@ -17,9 +33,7 @@ FlowAdaptor::FlowAdaptor(ScaLBL_ColorModel &M) { phi_t.fill(0); // time derivative for the phase indicator field } -FlowAdaptor::~FlowAdaptor() { - -} +FlowAdaptor::~FlowAdaptor() {} double FlowAdaptor::ImageInit(ScaLBL_ColorModel &M, std::string Filename) { int rank = M.rank; @@ -237,12 +251,12 @@ double FlowAdaptor::UpdateFractionalFlow(ScaLBL_ColorModel &M) { //ScaLBL_CopyToDevice(Phi,phase.data(),7*Np*sizeof(double)); ScaLBL_CopyToDevice(M.Aq, Aq_tmp, 7 * Np * sizeof(double)); ScaLBL_CopyToDevice(M.Bq, Bq_tmp, 7 * Np * sizeof(double)); - - delete Aq_tmp; + + delete Aq_tmp; delete Bq_tmp; - delete Vel_x; - delete Vel_y; - delete Vel_z; + delete Vel_x; + delete Vel_y; + delete Vel_z; delete Phase; return (TOTAL_MASS_CHANGE); @@ -594,8 +608,8 @@ double FlowAdaptor::SeedPhaseField(ScaLBL_ColorModel &M, //ScaLBL_CopyToDevice(Phi,phase.data(),7*Np*sizeof(double)); ScaLBL_CopyToDevice(M.Aq, Aq_tmp, 7 * Np * sizeof(double)); ScaLBL_CopyToDevice(M.Bq, Bq_tmp, 7 * Np * sizeof(double)); - - delete Aq_tmp; + + delete Aq_tmp; delete Bq_tmp; return (mass_loss); } diff --git a/analysis/FlowAdaptor.h b/analysis/FlowAdaptor.h index b2084726..7f12e005 100644 --- a/analysis/FlowAdaptor.h +++ b/analysis/FlowAdaptor.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ /* Flow adaptor class for multiphase flow methods */ #ifndef ScaLBL_FlowAdaptor_INC @@ -62,7 +78,7 @@ public: * \details Update fractional flow condition. Mass will be preferentially added or removed from * phase regions based on where flow is occurring * @param M ScaLBL_ColorModel - */ + */ double UpdateFractionalFlow(ScaLBL_ColorModel &M); /** diff --git a/analysis/FreeEnergy.cpp b/analysis/FreeEnergy.cpp index 567c2f92..9b58fbb6 100644 --- a/analysis/FreeEnergy.cpp +++ b/analysis/FreeEnergy.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "analysis/FreeEnergy.h" FreeEnergyAnalyzer::FreeEnergyAnalyzer(std::shared_ptr dm) : Dm(dm) { diff --git a/analysis/FreeEnergy.h b/analysis/FreeEnergy.h index bd394358..b4f01e72 100644 --- a/analysis/FreeEnergy.h +++ b/analysis/FreeEnergy.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ /* * averaging tools for electrochemistry */ diff --git a/analysis/GreyPhase.cpp b/analysis/GreyPhase.cpp index 8c1d66b3..511e6037 100644 --- a/analysis/GreyPhase.cpp +++ b/analysis/GreyPhase.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "analysis/GreyPhase.h" // Constructor diff --git a/analysis/GreyPhase.h b/analysis/GreyPhase.h index a9a72723..0078b418 100644 --- a/analysis/GreyPhase.h +++ b/analysis/GreyPhase.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ /* * Sub-phase averaging tools */ diff --git a/analysis/Minkowski.cpp b/analysis/Minkowski.cpp index 59700d77..f96f558f 100644 --- a/analysis/Minkowski.cpp +++ b/analysis/Minkowski.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "analysis/Minkowski.h" #include "analysis/pmmc.h" #include "analysis/analysis.h" @@ -145,7 +161,7 @@ void Minkowski::MeasureObject() { * 1 - labels the rest of the */ //DoubleArray smooth_distance(Nx,Ny,Nz); - + for (int k = 0; k < Nz; k++) { for (int j = 0; j < Ny; j++) { for (int i = 0; i < Nx; i++) { diff --git a/analysis/Minkowski.h b/analysis/Minkowski.h index d14b0665..9ecd324a 100644 --- a/analysis/Minkowski.h +++ b/analysis/Minkowski.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ // Header file for two-phase averaging class #ifndef Minkowski_INC #define Minkowski_INC diff --git a/analysis/PointList.h b/analysis/PointList.h index 397309c9..21a813ac 100644 --- a/analysis/PointList.h +++ b/analysis/PointList.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef PointList_INC #define PointList_INC diff --git a/analysis/SubPhase.cpp b/analysis/SubPhase.cpp index dafd31fd..f1f14188 100644 --- a/analysis/SubPhase.cpp +++ b/analysis/SubPhase.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "analysis/SubPhase.h" // Constructor @@ -411,7 +427,7 @@ void SubPhase::Basic() { dir_z = 1.0; force_mag = 1.0; } - double Porosity = (gwb.V + gnb.V)/Dm->Volume; + double Porosity = (gwb.V + gnb.V) / Dm->Volume; double saturation = gwb.V / (gwb.V + gnb.V); double water_flow_rate = gwb.V * (gwb.Px * dir_x + gwb.Py * dir_y + gwb.Pz * dir_z) / gwb.M / @@ -431,10 +447,12 @@ void SubPhase::Basic() { //double fractional_flow = water_flow_rate / total_flow_rate; double h = Dm->voxel_length; double krn = h * h * nu_n * Porosity * not_water_flow_rate / force_mag; - double krw = h * h * nu_w * Porosity* water_flow_rate / force_mag; + double krw = h * h * nu_w * Porosity * water_flow_rate / force_mag; /* not counting films */ - double krnf = krn - h * h * nu_n * Porosity * not_water_film_flow_rate / force_mag; - double krwf = krw - h * h * nu_w * Porosity * water_film_flow_rate / force_mag; + double krnf = krn - h * h * nu_n * Porosity * not_water_film_flow_rate / + force_mag; + double krwf = + krw - h * h * nu_w * Porosity * water_film_flow_rate / force_mag; double eff_pressure = 1.0 / (krn + krw); // effective pressure drop fprintf(TIMELOG, diff --git a/analysis/SubPhase.h b/analysis/SubPhase.h index ed7b8f20..2b95d172 100644 --- a/analysis/SubPhase.h +++ b/analysis/SubPhase.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ /* * Sub-phase averaging tools */ diff --git a/analysis/TwoPhase.cpp b/analysis/TwoPhase.cpp index 5670be6b..f1110b9e 100644 --- a/analysis/TwoPhase.cpp +++ b/analysis/TwoPhase.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "analysis/TwoPhase.h" #include "analysis/pmmc.h" @@ -11,7 +27,6 @@ #include "IO/Writer.h" #include "analysis/filters.h" - #include #define BLOB_AVG_COUNT 35 @@ -401,7 +416,8 @@ void TwoPhase::UpdateSolid() { void TwoPhase::UpdateMeshValues() { int i, j, k, n; - fillHalo fillData(Dm->Comm, Dm->rank_info, {Nx-2,Ny-2,Nz-2}, {1, 1, 1}, 0, 1); + fillHalo fillData(Dm->Comm, Dm->rank_info, {Nx - 2, Ny - 2, Nz - 2}, + {1, 1, 1}, 0, 1); //........................................................................... //Dm->CommunicateMeshHalo(SDn); @@ -560,7 +576,7 @@ void TwoPhase::ComputeLocal() { Kwn += pmmc_CubeSurfaceInterpValue( CubeValues, GaussCurvature, nw_pts, nw_tris, Values, i, j, k, n_nw_pts, n_nw_tris); - + Jwn += pmmc_CubeSurfaceInterpValue( CubeValues, MeanCurvature, nw_pts, nw_tris, Values, i, j, k, n_nw_pts, n_nw_tris); @@ -591,7 +607,7 @@ void TwoPhase::ComputeLocal() { efawns += pmmc_CubeContactAngle( CubeValues, Values, SDn_x, SDn_y, SDn_z, SDs_x, SDs_y, SDs_z, local_nws_pts, i, j, k, n_local_nws_pts); - + wwnsdnwn += pmmc_CommonCurveSpeed( CubeValues, dPdt, vawns, SDn_x, SDn_y, SDn_z, SDs_x, SDs_y, SDs_z, local_nws_pts, i, j, k, n_local_nws_pts); @@ -705,18 +721,19 @@ void TwoPhase::ComputeStatic() { kmin = 1; kmax = Nz - 1; imin = jmin = 1; - + /* set fluid isovalue to "grow" NWP for contact angle measurement */ fluid_isovalue = -1.0; - + string FILENAME = "ContactAngle"; - + char LocalRankString[8]; char LocalRankFilename[40]; sprintf(LocalRankString, "%05d", Dm->rank()); - sprintf(LocalRankFilename, "%s%s%s", "ContactAngle.", LocalRankString,".csv"); + sprintf(LocalRankFilename, "%s%s%s", "ContactAngle.", LocalRankString, + ".csv"); FILE *ANGLES = fopen(LocalRankFilename, "a+"); - fprintf(ANGLES,"x y z angle\n"); + fprintf(ANGLES, "x y z angle\n"); for (k = kmin; k < kmax; k++) { for (j = jmin; j < Ny - 1; j++) { @@ -761,13 +778,13 @@ void TwoPhase::ComputeStatic() { Kwn += pmmc_CubeSurfaceInterpValue( CubeValues, GaussCurvature, nw_pts, nw_tris, Values, i, j, k, n_nw_pts, n_nw_tris); - + Jwn += pmmc_CubeSurfaceInterpValue( CubeValues, MeanCurvature, nw_pts, nw_tris, Values, i, j, k, n_nw_pts, n_nw_tris); - - Xwn += geomavg_EulerCharacteristic(nw_pts, nw_tris, n_nw_pts, - n_nw_tris, i, j, k); + + Xwn += geomavg_EulerCharacteristic( + nw_pts, nw_tris, n_nw_pts, n_nw_tris, i, j, k); // Integrate the trimmed mean curvature (hard-coded to use a distance of 4 pixels) pmmc_CubeTrimSurfaceInterpValues( @@ -785,12 +802,13 @@ void TwoPhase::ComputeStatic() { efawns += pmmc_CubeContactAngle( CubeValues, Values, SDn_x, SDn_y, SDn_z, SDs_x, SDs_y, SDs_z, local_nws_pts, i, j, k, n_local_nws_pts); - + for (int p = 0; p < n_local_nws_pts; p++) { // Extract the line segment Point A = local_nws_pts(p); double value = Values(p); - fprintf(ANGLES, "%.8g %.8g %.8g %.8g\n", A.x, A.y, A.z, value); + fprintf(ANGLES, "%.8g %.8g %.8g %.8g\n", A.x, A.y, A.z, + value); } pmmc_CurveCurvature(SDn, SDs, SDn_x, SDn_y, SDn_z, SDs_x, @@ -800,14 +818,14 @@ void TwoPhase::ComputeStatic() { lwns += pmmc_CubeCurveLength(local_nws_pts, n_local_nws_pts); - + /* half contribution for vertices / edges at the common line * each cube with contact line has a net of undercounting vertices * each cube is undercounting edges due to internal counts */ - Xwn += 0.25*n_local_nws_pts - 0.5; - Xws += 0.25*n_local_nws_pts - 0.5; - Xns += 0.25*n_local_nws_pts - 0.5; + Xwn += 0.25 * n_local_nws_pts - 0.5; + Xws += 0.25 * n_local_nws_pts - 0.5; + Xns += 0.25 * n_local_nws_pts - 0.5; } // Solid interface averagees @@ -820,12 +838,12 @@ void TwoPhase::ComputeStatic() { n_ns_tris); aws += pmmc_CubeSurfaceOrientation(Gws, ws_pts, ws_tris, n_ws_tris); - - Xws += geomavg_EulerCharacteristic(ws_pts, ws_tris, n_ws_pts, - n_ws_tris, i, j, k); - - Xns += geomavg_EulerCharacteristic(ns_pts, ns_tris, n_ns_pts, - n_ns_tris, i, j, k); + + Xws += geomavg_EulerCharacteristic( + ws_pts, ws_tris, n_ws_pts, n_ws_tris, i, j, k); + + Xns += geomavg_EulerCharacteristic( + ns_pts, ns_tris, n_ns_pts, n_ns_tris, i, j, k); } //........................................................................... // Compute the integral curvature of the non-wetting phase @@ -850,11 +868,9 @@ void TwoPhase::ComputeStatic() { Kn += pmmc_CubeSurfaceInterpValue(CubeValues, GaussCurvature, nw_pts, nw_tris, Values, i, j, k, n_nw_pts, n_nw_tris); - euler += geomavg_EulerCharacteristic(nw_pts, nw_tris, n_nw_pts, n_nw_tris, i, j, k); - } } } @@ -1522,7 +1538,6 @@ void TwoPhase::Reduce() { dEs = dEs * iVol_global; lwns_global = lwns_global * iVol_global; */ - } void TwoPhase::NonDimensionalize(double D, double viscosity, double IFT) { @@ -1536,27 +1551,28 @@ void TwoPhase::NonDimensionalize(double D, double viscosity, double IFT) { void TwoPhase::PrintStatic() { if (Dm->rank() == 0) { - FILE *STATIC; + FILE *STATIC; STATIC = fopen("geometry.csv", "a+"); if (fseek(STATIC, 0, SEEK_SET) == fseek(STATIC, 0, SEEK_CUR)) { // If timelog is empty, write a short header to list the averages fprintf(STATIC, "sw awn ans aws Jwn Kwn lwns cwns KGws " - "KGwn Xwn Xws Xns "); // Scalar averages - fprintf(STATIC, + "KGwn Xwn Xws Xns "); // Scalar averages + fprintf( + STATIC, "Gwnxx Gwnyy Gwnzz Gwnxy Gwnxz Gwnyz "); // Orientation tensors fprintf(STATIC, "Gwsxx Gwsyy Gwszz Gwsxy Gwsxz Gwsyz "); fprintf(STATIC, "Gnsxx Gnsyy Gnszz Gnsxy Gnsxz Gnsyz "); fprintf(STATIC, "trawn trJwn trRwn "); //trimmed curvature, - fprintf(STATIC, "Vw Aw Jw Xw "); //miknowski measures, - fprintf(STATIC, "Vn An Jn Xn\n"); //miknowski measures, + fprintf(STATIC, "Vw Aw Jw Xw "); //miknowski measures, + fprintf(STATIC, "Vn An Jn Xn\n"); //miknowski measures, //fprintf(STATIC,"Euler Kn2 Jn2 An2\n"); //miknowski measures, } - fprintf(STATIC, "%.5g ", sat_w); // saturation + fprintf(STATIC, "%.5g ", sat_w); // saturation fprintf(STATIC, "%.5g %.5g %.5g ", awn_global, ans_global, aws_global); // interfacial areas fprintf(STATIC, "%.5g %.5g ", Jwn_global, - Kwn_global); // curvature of wn interface + Kwn_global); // curvature of wn interface fprintf(STATIC, "%.5g ", lwns_global); // common curve length fprintf(STATIC, "%.5g ", efawns_global); // average contact angle fprintf(STATIC, "%.5g %.5g ", KNwns_global, @@ -1576,7 +1592,7 @@ void TwoPhase::PrintStatic() { trRwn_global); // Trimmed curvature fprintf(STATIC, "%.5g %.5g %.5g %.5g ", wet_morph->V(), wet_morph->A(), wet_morph->H(), wet_morph->X()); - fprintf(STATIC, "%.5g %.5g %.5g %.5g\n", nonwet_morph->V(), + fprintf(STATIC, "%.5g %.5g %.5g %.5g\n", nonwet_morph->V(), nonwet_morph->A(), nonwet_morph->H(), nonwet_morph->X()); //fprintf(STATIC,"%.5g %.5g %.5g %.5g\n",euler_global, Kn_global, Jn_global, An_global); // minkowski measures fclose(STATIC); diff --git a/analysis/TwoPhase.h b/analysis/TwoPhase.h index 700b435a..15247431 100644 --- a/analysis/TwoPhase.h +++ b/analysis/TwoPhase.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ // Header file for two-phase averaging class #ifndef TwoPhase_INC #define TwoPhase_INC diff --git a/analysis/analysis.cpp b/analysis/analysis.cpp index 0847811d..de639700 100644 --- a/analysis/analysis.cpp +++ b/analysis/analysis.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "analysis/analysis.h" #include "ProfilerApp.h" diff --git a/analysis/analysis.h b/analysis/analysis.h index 6a729983..2bb47558 100644 --- a/analysis/analysis.h +++ b/analysis/analysis.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef Analysis_H_INC #define Analysis_H_INC diff --git a/analysis/dcel.cpp b/analysis/dcel.cpp index f616a7a8..35a2be75 100644 --- a/analysis/dcel.cpp +++ b/analysis/dcel.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "analysis/dcel.h" DCEL::DCEL() {} diff --git a/analysis/dcel.h b/analysis/dcel.h index 3469d60f..ac20386e 100644 --- a/analysis/dcel.h +++ b/analysis/dcel.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef DCEL_INC #define DCEL_INC diff --git a/analysis/distance.cpp b/analysis/distance.cpp index d67193e0..cf7f1e26 100644 --- a/analysis/distance.cpp +++ b/analysis/distance.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "analysis/distance.h" /****************************************************************** diff --git a/analysis/distance.h b/analysis/distance.h index 291bece9..6043ea56 100644 --- a/analysis/distance.h +++ b/analysis/distance.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef Distance_H_INC #define Distance_H_INC diff --git a/analysis/filters.cpp b/analysis/filters.cpp index a262f088..22e6a3b4 100644 --- a/analysis/filters.cpp +++ b/analysis/filters.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "analysis/filters.h" #include "math.h" #include "ProfilerApp.h" diff --git a/analysis/filters.h b/analysis/filters.h index f19a1a90..74d42352 100644 --- a/analysis/filters.h +++ b/analysis/filters.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef Filters_H_INC #define Filters_H_INC diff --git a/analysis/histogram.h b/analysis/histogram.h index fe27f864..b2cd53d9 100644 --- a/analysis/histogram.h +++ b/analysis/histogram.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ /* * Generate a histogram for volumetric, interfacial and common curve properties * copyright 2014, James E. McClure diff --git a/analysis/imfilter.h b/analysis/imfilter.h index e6f607b3..3390aba8 100644 --- a/analysis/imfilter.h +++ b/analysis/imfilter.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ // These functions mimic the behavior of imfilter in MATLAB #ifndef included_imfilter #define included_imfilter diff --git a/analysis/imfilter.hpp b/analysis/imfilter.hpp index d2c40d0d..98592ebb 100644 --- a/analysis/imfilter.hpp +++ b/analysis/imfilter.hpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "analysis/imfilter.h" #include "ProfilerApp.h" #include diff --git a/analysis/morphology.cpp b/analysis/morphology.cpp index e50a1aee..728bd6ed 100644 --- a/analysis/morphology.cpp +++ b/analysis/morphology.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include // Implementation of morphological opening routine @@ -137,7 +153,8 @@ void Morphology::Initialize(std::shared_ptr Dm, DoubleArray &Distance) { morphRadius.resize(recvLoc); //.............................. /* send the morphological radius */ - Dm->Comm.Irecv(&morphRadius[recvOffset_x], recvCount, Dm->rank_x(), recvtag + 0); + Dm->Comm.Irecv(&morphRadius[recvOffset_x], recvCount, Dm->rank_x(), + recvtag + 0); Dm->Comm.send(&tmpDistance[0], sendCount, Dm->rank_X(), sendtag + 0); /* send the shift values */ Dm->Comm.Irecv(&xShift[recvOffset_x], recvCount, Dm->rank_x(), recvtag + 1); @@ -501,7 +518,7 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, if (rank == 0) printf("Maximum pore size: %f \n", maxdistGlobal); final_void_fraction = volume_fraction; //initialize - + int ii, jj, kk; int imin, jmin, kmin, imax, jmax, kmax; int Nx = nx; @@ -523,28 +540,30 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, int numTry = 0; int maxTry = 100; - while ( !(void_fraction_new < VoidFraction) && numTry < maxTry) { + while (!(void_fraction_new < VoidFraction) && numTry < maxTry) { numTry++; void_fraction_diff_old = void_fraction_diff_new; void_fraction_old = void_fraction_new; Rcrit_old = Rcrit_new; Rcrit_new -= deltaR * Rcrit_old; - if (rank==0) printf("Try %i with radius %f \n", numTry, Rcrit_new); + if (rank == 0) + printf("Try %i with radius %f \n", numTry, Rcrit_new); if (Rcrit_new < 0.5) { numTry = maxTry; } int Window = round(Rcrit_new); if (Window == 0) - Window = 1; // If Window = 0 at the begining, after the following process will have sw=1.0 + Window = + 1; // If Window = 0 at the begining, after the following process will have sw=1.0 // and sw Rcrit_new) { // loop over the window and update - //printf("Distance(%i %i %i) = %f \n",i,j,k, SignDist(i,j,k)); + //printf("Distance(%i %i %i) = %f \n",i,j,k, SignDist(i,j,k)); imin = max(1, i - Window); jmin = max(1, j - Window); kmin = max(1, k - Window); @@ -611,7 +630,8 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, //*************************************************************************************** double MorphDrain(DoubleArray &SignDist, signed char *id, - std::shared_ptr Dm, double VoidFraction, double InitialRadius) { + std::shared_ptr Dm, double VoidFraction, + double InitialRadius) { // SignDist is the distance to the object that you want to constaing the morphological opening // VoidFraction is the the empty space where the object inst // id is a labeled map @@ -688,10 +708,10 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, double deltaR = 0.05; // amount to change the radius in voxel units double Rcrit_old = maxdistGlobal; double Rcrit_new = maxdistGlobal; - - if (InitialRadius < maxdistGlobal){ - Rcrit_old = InitialRadius; - Rcrit_new = InitialRadius; + + if (InitialRadius < maxdistGlobal) { + Rcrit_old = InitialRadius; + Rcrit_new = InitialRadius; } //if (argc>2){ // Rcrit_new = strtod(argv[2],NULL); diff --git a/analysis/morphology.h b/analysis/morphology.h index 681ee6dd..9c1819a9 100644 --- a/analysis/morphology.h +++ b/analysis/morphology.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ // Morphological opening routine #include "common/Array.h" #include "common/Domain.h" @@ -7,7 +23,8 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr Dm, double VoidFraction, signed char ErodeLabel, signed char ReplaceLabel); double MorphDrain(DoubleArray &SignDist, signed char *id, - std::shared_ptr Dm, double VoidFraction, double InitialRadius); + std::shared_ptr Dm, double VoidFraction, + double InitialRadius); double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, std::shared_ptr Dm, double TargetVol, double WallFactor); diff --git a/analysis/pmmc.h b/analysis/pmmc.h index f2152f0d..51c7f9ea 100644 --- a/analysis/pmmc.h +++ b/analysis/pmmc.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef pmmc_INC #define pmmc_INC @@ -4041,7 +4057,7 @@ inline double pmmc_CubeContactAngle(DoubleArray &CubeValues, (A.z - B.z) * (A.z - B.z)); integral += 0.5 * length * (vA + vB); } - + return integral; } //-------------------------------------------------------------------------------------------------------- @@ -4422,12 +4438,12 @@ inline void pmmc_CurveCurvature(DoubleArray &f, DoubleArray &s, fx = f_x(i, j, k); fy = f_y(i, j, k); fz = f_z(i, j, k); - + // Normal to fluid surface Nx.Corners(i - ic, j - jc, k - kc) = fx; Ny.Corners(i - ic, j - jc, k - kc) = fy; Nz.Corners(i - ic, j - jc, k - kc) = fz; - + // Normal to solid surface Sx.Corners(i - ic, j - jc, k - kc) = sx; Sy.Corners(i - ic, j - jc, k - kc) = sy; @@ -4534,7 +4550,7 @@ inline void pmmc_CurveCurvature(DoubleArray &f, DoubleArray &s, nsx /= norm; nsy /= norm; nsz /= norm; - + // Normal vector to the fluid surface nwx = Nx.eval(P); nwy = Ny.eval(P); @@ -4562,7 +4578,7 @@ inline void pmmc_CurveCurvature(DoubleArray &f, DoubleArray &s, nwsy = -nwsy; nwsz = -nwsz; } - + // common curve normal in the fluid surface tangent plane (rel. geodesic curvature) nwnx = twnsy * nwz - twnsz * nwy; nwny = twnsz * nwx - twnsx * nwz; @@ -4580,7 +4596,6 @@ inline void pmmc_CurveCurvature(DoubleArray &f, DoubleArray &s, nwnz = -nwnz; } - if (length > 0.0) { // normal curvature component in the direction of the solid surface //KNavg += K * (nsx * nwnsx + nsy * nwnsy + nsz * nwnsz) * length; diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index 5d1d9c2e..79ff0834 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ // Run the analysis, blob identification, and write restart files #include "analysis/runAnalysis.h" #include "analysis/analysis.h" diff --git a/analysis/runAnalysis.h b/analysis/runAnalysis.h index 15baa00b..bff537f1 100644 --- a/analysis/runAnalysis.h +++ b/analysis/runAnalysis.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef RunAnalysis_H_INC #define RunAnalysis_H_INC diff --git a/analysis/uCT.cpp b/analysis/uCT.cpp index feb9bf02..d7ca0886 100644 --- a/analysis/uCT.cpp +++ b/analysis/uCT.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "analysis/uCT.h" #include "analysis/analysis.h" #include "analysis/distance.h" diff --git a/analysis/uCT.h b/analysis/uCT.h index 3d39d806..63c3f83b 100644 --- a/analysis/uCT.h +++ b/analysis/uCT.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef uCT_H_INC #define uCT_H_INC diff --git a/common/Array.cpp b/common/Array.cpp index 5db23505..28463753 100644 --- a/common/Array.cpp +++ b/common/Array.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ // clang-format off #include "common/Array.h" #include "common/Array.hpp" diff --git a/common/Array.h b/common/Array.h index 2dd7a785..423a33f6 100644 --- a/common/Array.h +++ b/common/Array.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef included_ArrayClass #define included_ArrayClass diff --git a/common/Array.hpp b/common/Array.hpp index df56c2b6..e72e2dec 100644 --- a/common/Array.hpp +++ b/common/Array.hpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef included_ArrayClass_hpp #define included_ArrayClass_hpp diff --git a/common/ArraySize.h b/common/ArraySize.h index 5661bcc8..7cd5512f 100644 --- a/common/ArraySize.h +++ b/common/ArraySize.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef included_ArraySizeClass #define included_ArraySizeClass diff --git a/common/Communication.cpp b/common/Communication.cpp index 08d98e02..22ef57f6 100644 --- a/common/Communication.cpp +++ b/common/Communication.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "common/Communication.h" /******************************************************** diff --git a/common/Communication.h b/common/Communication.h index ed549395..07957a4d 100644 --- a/common/Communication.h +++ b/common/Communication.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef COMMUNICATION_H_INC #define COMMUNICATION_H_INC @@ -192,68 +208,72 @@ inline void CommunicateSendRecvCounts( } //*************************************************************************************** -inline void CommunicateRecvLists( const Utilities::MPI& comm, int sendtag, int recvtag, - int *sendList_x, int *sendList_y, int *sendList_z, int *sendList_X, int *sendList_Y, int *sendList_Z, - int *sendList_xy, int *sendList_XY, int *sendList_xY, int *sendList_Xy, - int *sendList_xz, int *sendList_XZ, int *sendList_xZ, int *sendList_Xz, - int *sendList_yz, int *sendList_YZ, int *sendList_yZ, int *sendList_Yz, - int sendCount_x, int sendCount_y, int sendCount_z, int sendCount_X, int sendCount_Y, int sendCount_Z, - int sendCount_xy, int sendCount_XY, int sendCount_xY, int sendCount_Xy, - int sendCount_xz, int sendCount_XZ, int sendCount_xZ, int sendCount_Xz, - int sendCount_yz, int sendCount_YZ, int sendCount_yZ, int sendCount_Yz, - int *recvList_x, int *recvList_y, int *recvList_z, int *recvList_X, int *recvList_Y, int *recvList_Z, - int *recvList_xy, int *recvList_XY, int *recvList_xY, int *recvList_Xy, - int *recvList_xz, int *recvList_XZ, int *recvList_xZ, int *recvList_Xz, - int *recvList_yz, int *recvList_YZ, int *recvList_yZ, int *recvList_Yz, - int recvCount_x, int recvCount_y, int recvCount_z, int recvCount_X, int recvCount_Y, int recvCount_Z, - int recvCount_xy, int recvCount_XY, int recvCount_xY, int recvCount_Xy, - int recvCount_xz, int recvCount_XZ, int recvCount_xZ, int recvCount_Xz, - int recvCount_yz, int recvCount_YZ, int recvCount_yZ, int recvCount_Yz, - int rank_x, int rank_y, int rank_z, int rank_X, int rank_Y, int rank_Z, int rank_xy, int rank_XY, int rank_xY, - int rank_Xy, int rank_xz, int rank_XZ, int rank_xZ, int rank_Xz, int rank_yz, int rank_YZ, int rank_yZ, int rank_Yz) -{ - MPI_Request req1[18], req2[18]; - req1[0] = comm.Isend(sendList_x,sendCount_x,rank_x,sendtag+0); - req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_X,recvtag+0); - req1[1] = comm.Isend(sendList_X,sendCount_X,rank_X,sendtag+1); - req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_x,recvtag+1); - req1[2] = comm.Isend(sendList_y,sendCount_y,rank_y,sendtag+2); - req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_Y,recvtag+2); - req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_Y,sendtag+3); - req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_y,recvtag+3); - req1[4] = comm.Isend(sendList_z,sendCount_z,rank_z,sendtag+4); - req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_Z,recvtag+4); - req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_Z,sendtag+5); - req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_z,recvtag+5); +inline void CommunicateRecvLists( + const Utilities::MPI &comm, int sendtag, int recvtag, int *sendList_x, + int *sendList_y, int *sendList_z, int *sendList_X, int *sendList_Y, + int *sendList_Z, int *sendList_xy, int *sendList_XY, int *sendList_xY, + int *sendList_Xy, int *sendList_xz, int *sendList_XZ, int *sendList_xZ, + int *sendList_Xz, int *sendList_yz, int *sendList_YZ, int *sendList_yZ, + int *sendList_Yz, int sendCount_x, int sendCount_y, int sendCount_z, + int sendCount_X, int sendCount_Y, int sendCount_Z, int sendCount_xy, + int sendCount_XY, int sendCount_xY, int sendCount_Xy, int sendCount_xz, + int sendCount_XZ, int sendCount_xZ, int sendCount_Xz, int sendCount_yz, + int sendCount_YZ, int sendCount_yZ, int sendCount_Yz, int *recvList_x, + int *recvList_y, int *recvList_z, int *recvList_X, int *recvList_Y, + int *recvList_Z, int *recvList_xy, int *recvList_XY, int *recvList_xY, + int *recvList_Xy, int *recvList_xz, int *recvList_XZ, int *recvList_xZ, + int *recvList_Xz, int *recvList_yz, int *recvList_YZ, int *recvList_yZ, + int *recvList_Yz, int recvCount_x, int recvCount_y, int recvCount_z, + int recvCount_X, int recvCount_Y, int recvCount_Z, int recvCount_xy, + int recvCount_XY, int recvCount_xY, int recvCount_Xy, int recvCount_xz, + int recvCount_XZ, int recvCount_xZ, int recvCount_Xz, int recvCount_yz, + int recvCount_YZ, int recvCount_yZ, int recvCount_Yz, int rank_x, + int rank_y, int rank_z, int rank_X, int rank_Y, int rank_Z, int rank_xy, + int rank_XY, int rank_xY, int rank_Xy, int rank_xz, int rank_XZ, + int rank_xZ, int rank_Xz, int rank_yz, int rank_YZ, int rank_yZ, + int rank_Yz) { + MPI_Request req1[18], req2[18]; + req1[0] = comm.Isend(sendList_x, sendCount_x, rank_x, sendtag + 0); + req2[0] = comm.Irecv(recvList_X, recvCount_X, rank_X, recvtag + 0); + req1[1] = comm.Isend(sendList_X, sendCount_X, rank_X, sendtag + 1); + req2[1] = comm.Irecv(recvList_x, recvCount_x, rank_x, recvtag + 1); + req1[2] = comm.Isend(sendList_y, sendCount_y, rank_y, sendtag + 2); + req2[2] = comm.Irecv(recvList_Y, recvCount_Y, rank_Y, recvtag + 2); + req1[3] = comm.Isend(sendList_Y, sendCount_Y, rank_Y, sendtag + 3); + req2[3] = comm.Irecv(recvList_y, recvCount_y, rank_y, recvtag + 3); + req1[4] = comm.Isend(sendList_z, sendCount_z, rank_z, sendtag + 4); + req2[4] = comm.Irecv(recvList_Z, recvCount_Z, rank_Z, recvtag + 4); + req1[5] = comm.Isend(sendList_Z, sendCount_Z, rank_Z, sendtag + 5); + req2[5] = comm.Irecv(recvList_z, recvCount_z, rank_z, recvtag + 5); - req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_xy,sendtag+6); - req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_XY,recvtag+6); - req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_XY,sendtag+7); - req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_xy,recvtag+7); - req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_Xy,sendtag+8); - req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_xY,recvtag+8); - req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_xY,sendtag+9); - req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_Xy,recvtag+9); + req1[6] = comm.Isend(sendList_xy, sendCount_xy, rank_xy, sendtag + 6); + req2[6] = comm.Irecv(recvList_XY, recvCount_XY, rank_XY, recvtag + 6); + req1[7] = comm.Isend(sendList_XY, sendCount_XY, rank_XY, sendtag + 7); + req2[7] = comm.Irecv(recvList_xy, recvCount_xy, rank_xy, recvtag + 7); + req1[8] = comm.Isend(sendList_Xy, sendCount_Xy, rank_Xy, sendtag + 8); + req2[8] = comm.Irecv(recvList_xY, recvCount_xY, rank_xY, recvtag + 8); + req1[9] = comm.Isend(sendList_xY, sendCount_xY, rank_xY, sendtag + 9); + req2[9] = comm.Irecv(recvList_Xy, recvCount_Xy, rank_Xy, recvtag + 9); - req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_xz,sendtag+10); - req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_XZ,recvtag+10); - req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_XZ,sendtag+11); - req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_xz,recvtag+11); - req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_Xz,sendtag+12); - req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_xZ,recvtag+12); - req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_xZ,sendtag+13); - req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_Xz,recvtag+13); + req1[10] = comm.Isend(sendList_xz, sendCount_xz, rank_xz, sendtag + 10); + req2[10] = comm.Irecv(recvList_XZ, recvCount_XZ, rank_XZ, recvtag + 10); + req1[11] = comm.Isend(sendList_XZ, sendCount_XZ, rank_XZ, sendtag + 11); + req2[11] = comm.Irecv(recvList_xz, recvCount_xz, rank_xz, recvtag + 11); + req1[12] = comm.Isend(sendList_Xz, sendCount_Xz, rank_Xz, sendtag + 12); + req2[12] = comm.Irecv(recvList_xZ, recvCount_xZ, rank_xZ, recvtag + 12); + req1[13] = comm.Isend(sendList_xZ, sendCount_xZ, rank_xZ, sendtag + 13); + req2[13] = comm.Irecv(recvList_Xz, recvCount_Xz, rank_Xz, recvtag + 13); - req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_yz,sendtag+14); - req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_YZ,recvtag+14); - req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_YZ,sendtag+15); - req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_yz,recvtag+15); - req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_Yz,sendtag+16); - req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_yZ,recvtag+16); - req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_yZ,sendtag+17); - req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_Yz,recvtag+17); - comm.waitAll( 18, req1 ); - comm.waitAll( 18, req2 ); + req1[14] = comm.Isend(sendList_yz, sendCount_yz, rank_yz, sendtag + 14); + req2[14] = comm.Irecv(recvList_YZ, recvCount_YZ, rank_YZ, recvtag + 14); + req1[15] = comm.Isend(sendList_YZ, sendCount_YZ, rank_YZ, sendtag + 15); + req2[15] = comm.Irecv(recvList_yz, recvCount_yz, rank_yz, recvtag + 15); + req1[16] = comm.Isend(sendList_Yz, sendCount_Yz, rank_Yz, sendtag + 16); + req2[16] = comm.Irecv(recvList_yZ, recvCount_yZ, rank_yZ, recvtag + 16); + req1[17] = comm.Isend(sendList_yZ, sendCount_yZ, rank_yZ, sendtag + 17); + req2[17] = comm.Irecv(recvList_Yz, recvCount_Yz, rank_Yz, recvtag + 17); + comm.waitAll(18, req1); + comm.waitAll(18, req2); } //*************************************************************************************** diff --git a/common/Communication.hpp b/common/Communication.hpp index 89b5c02e..2127bc76 100644 --- a/common/Communication.hpp +++ b/common/Communication.hpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef COMMUNICATION_HPP_INC #define COMMUNICATION_HPP_INC diff --git a/common/Database.cpp b/common/Database.cpp index b4c2c905..8ef80914 100644 --- a/common/Database.cpp +++ b/common/Database.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "common/Database.h" #include "common/Utilities.h" diff --git a/common/Database.h b/common/Database.h index 812b32b6..0f14f360 100644 --- a/common/Database.h +++ b/common/Database.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef included_Database #define included_Database diff --git a/common/Database.hpp b/common/Database.hpp index a08a80ff..cf3e7092 100644 --- a/common/Database.hpp +++ b/common/Database.hpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef included_Database_hpp #define included_Database_hpp diff --git a/common/Domain.cpp b/common/Domain.cpp index 6ee0920e..2f429a39 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ // Created by James McClure // Copyright 2008-2020 #include @@ -25,144 +41,157 @@ static inline void fgetl(char *str, int num, FILE *stream) { } void Domain::read_swc(const std::string &Filename) { - //...... READ IN SWC FILE................................... - int count = 0; - int number_of_lines = 0; - if (rank() == 0){ - cout << "Reading SWC file..." << endl; - { - std::string line; - std::ifstream myfile(Filename); - while (std::getline(myfile, line)) - ++number_of_lines; - number_of_lines -= 1; - } - std::cout << " Number of lines in SWC file: " << number_of_lines << endl; - } - count = Comm.sumReduce(number_of_lines); // nonzero only for rank=0 - number_of_lines = count; - - // set up structures to read - double *List_cx = new double [number_of_lines]; - double *List_cy = new double [number_of_lines]; - double *List_cz = new double [number_of_lines]; - double *List_rad = new double [number_of_lines]; - int *List_index = new int [number_of_lines]; - int *List_parent = new int [number_of_lines]; - int *List_type = new int [number_of_lines]; + //...... READ IN SWC FILE................................... + int count = 0; + int number_of_lines = 0; + if (rank() == 0) { + cout << "Reading SWC file..." << endl; + { + std::string line; + std::ifstream myfile(Filename); + while (std::getline(myfile, line)) + ++number_of_lines; + number_of_lines -= 1; + } + std::cout << " Number of lines in SWC file: " << number_of_lines + << endl; + } + count = Comm.sumReduce(number_of_lines); // nonzero only for rank=0 + number_of_lines = count; - if (rank()==0){ - FILE *fid = fopen(Filename.c_str(), "rb"); - INSIST(fid != NULL, "Error opening SWC file"); - //.........Trash the header lines (x 1).......... - char line[100]; - fgetl(line, 100, fid); - //........read the spheres.................. - // We will read until a blank like or end-of-file is reached - count = 0; - while (!feof(fid) && fgets(line, 100, fid) != NULL) { - char *line2 = line; - List_index[count] = int(strtod(line2, &line2)); - List_type[count] = int(strtod(line2, &line2)); - List_cx[count] = strtod(line2, &line2); - List_cy[count] = strtod(line2, &line2); - List_cz[count] = strtod(line2, &line2); - List_rad[count] = strtod(line2, &line2); - List_parent[count] = int(strtod(line2, &line2)); - count++; - } - fclose( fid ); - cout << " Number of lines extracted is: " << count << endl; - INSIST(count == number_of_lines, "Problem reading swc file!"); - - double min_cx = List_cx[0]-List_rad[0]; - double min_cy = List_cy[0]-List_rad[0]; - double min_cz = List_cz[0]-List_rad[0]; - for (count=1; count Nx-1 ) start_idx = Nx; - if (start_idy > Ny-1 ) start_idy = Ny; - if (start_idz > Nz-1 ) start_idz = Nz; - if (finish_idx < 0 ) finish_idx = 0; - if (finish_idy < 0 ) finish_idy = 0; - if (finish_idz < 0 ) finish_idz = 0; - if (finish_idx > Nx-1 ) finish_idx = Nx; - if (finish_idy > Ny-1 ) finish_idy = Ny; - if (finish_idz > Nz-1 ) finish_idz = Nz; - /* if (rank()==1) printf(" alpha = %f, beta = %f, gamma= %f\n",alpha, beta,gamma); + if (start_idx < 0) + start_idx = 0; + if (start_idy < 0) + start_idy = 0; + if (start_idz < 0) + start_idz = 0; + if (start_idx > Nx - 1) + start_idx = Nx; + if (start_idy > Ny - 1) + start_idy = Ny; + if (start_idz > Nz - 1) + start_idz = Nz; + if (finish_idx < 0) + finish_idx = 0; + if (finish_idy < 0) + finish_idy = 0; + if (finish_idz < 0) + finish_idz = 0; + if (finish_idx > Nx - 1) + finish_idx = Nx; + if (finish_idy > Ny - 1) + finish_idy = Ny; + if (finish_idz > Nz - 1) + finish_idz = Nz; + + /* if (rank()==1) printf(" alpha = %f, beta = %f, gamma= %f\n",alpha, beta,gamma); if (rank()==1) printf(" xi = %f, yi = %f, zi= %f, ri = %f \n",xi, yi, zi, ri); if (rank()==1) printf(" xp = %f, yp = %f, zp= %f, rp = %f \n",xp, yp, zp, rp); @@ -192,51 +233,61 @@ void Domain::read_swc(const std::string &Filename) { if (rank()==1) printf( "finish: %i, %i, %i \n",finish_idx,finish_idy,finish_idz); */ - for (int k = start_idz; k length ){ - distance = di; - } - else if (s < 0.0){ - distance = dp; - } - else { - // linear variation for radius - double radius = rp + (ri - rp)*s/length; - distance = radius - sqrt((x-xp-alpha*s)*(x-xp-alpha*s) + (y-yp-beta*s)*(y-yp-beta*s) + (z-zp-gamma*s)*(z-zp-gamma*s)); - } - if (distance < di) distance = di; - if (distance < dp) distance = dp; - - if ( distance > 0.0 ){ - /* label the voxel */ - //id[k*Nx*Ny + j*Nx + i] = label; - id[k*Nx*Ny + j*Nx + i] = 2; - } - } - } - } - //if (rank()==0) printf( "next line..\n"); - } - delete[] List_cx; - delete[] List_cy; - delete[] List_cz; - delete[] List_rad; - delete[] List_index; - delete[] List_type; - delete[] List_parent; + double x = i * voxel_length + start_x; + double y = j * voxel_length + start_y; + double z = k * voxel_length + start_z; + + double distance; + double s = ((x - xp) * alpha + (y - yp) * beta + + (z - zp) * gamma) / + (alpha * alpha + beta * beta + gamma * gamma); + + double di = + ri - sqrt((x - xi) * (x - xi) + (y - yi) * (y - yi) + + (z - zi) * (z - zi)); + double dp = + rp - sqrt((x - xp) * (x - xp) + (y - yp) * (y - yp) + + (z - zp) * (z - zp)); + + if (s > length) { + distance = di; + } else if (s < 0.0) { + distance = dp; + } else { + // linear variation for radius + double radius = rp + (ri - rp) * s / length; + distance = + radius - + sqrt((x - xp - alpha * s) * (x - xp - alpha * s) + + (y - yp - beta * s) * (y - yp - beta * s) + + (z - zp - gamma * s) * (z - zp - gamma * s)); + } + if (distance < di) + distance = di; + if (distance < dp) + distance = dp; + + if (distance > 0.0) { + /* label the voxel */ + //id[k*Nx*Ny + j*Nx + i] = label; + id[k * Nx * Ny + j * Nx + i] = 2; + } + } + } + } + //if (rank()==0) printf( "next line..\n"); + } + delete[] List_cx; + delete[] List_cy; + delete[] List_cz; + delete[] List_rad; + delete[] List_index; + delete[] List_type; + delete[] List_parent; } /******************************************************** @@ -528,358 +579,366 @@ void Domain::Decomp(const std::string &Filename) { //printf("INPUT ERROR: Valid ReadType are 8bit, 16bit \n"); ReadType = "8bit"; } - + /* swc format for neurons */ if (ReadType == "swc") { - read_swc(Filename); - } - else { - nx = size[0]; - ny = size[1]; - nz = size[2]; - nprocx = nproc[0]; - nprocy = nproc[1]; - nprocz = nproc[2]; - global_Nx = SIZE[0]; - global_Ny = SIZE[1]; - global_Nz = SIZE[2]; - nprocs = nprocx * nprocy * nprocz; - char *SegData = NULL; + read_swc(Filename); + } else { + nx = size[0]; + ny = size[1]; + nz = size[2]; + nprocx = nproc[0]; + nprocy = nproc[1]; + nprocz = nproc[2]; + global_Nx = SIZE[0]; + global_Ny = SIZE[1]; + global_Nz = SIZE[2]; + nprocs = nprocx * nprocy * nprocz; + char *SegData = NULL; - if (RANK == 0) { - printf("Input media: %s\n", Filename.c_str()); - printf("Relabeling %lu values\n", ReadValues.size()); - for (size_t idx = 0; idx < ReadValues.size(); idx++) { - int oldvalue = ReadValues[idx]; - int newvalue = WriteValues[idx]; - printf("oldvalue=%d, newvalue =%d \n", oldvalue, newvalue); - } + if (RANK == 0) { + printf("Input media: %s\n", Filename.c_str()); + printf("Relabeling %lu values\n", ReadValues.size()); + for (size_t idx = 0; idx < ReadValues.size(); idx++) { + int oldvalue = ReadValues[idx]; + int newvalue = WriteValues[idx]; + printf("oldvalue=%d, newvalue =%d \n", oldvalue, newvalue); + } - // Rank=0 reads the entire segmented data and distributes to worker processes - printf("Dimensions of segmented image: %ld x %ld x %ld \n", global_Nx, - global_Ny, global_Nz); - int64_t SIZE = global_Nx * global_Ny * global_Nz; - SegData = new char[SIZE]; - if (ReadType == "8bit") { - printf("Reading 8-bit input data \n"); - FILE *SEGDAT = fopen(Filename.c_str(), "rb"); - if (SEGDAT == NULL) - ERROR("Domain.cpp: Error reading segmented data"); - size_t ReadSeg; - ReadSeg = fread(SegData, 1, SIZE, SEGDAT); - if (ReadSeg != size_t(SIZE)) - printf("Domain.cpp: Error reading segmented data \n"); - fclose(SEGDAT); - } else if (ReadType == "16bit") { - printf("Reading 16-bit input data \n"); - short int *InputData; - InputData = new short int[SIZE]; - FILE *SEGDAT = fopen(Filename.c_str(), "rb"); - if (SEGDAT == NULL) - ERROR("Domain.cpp: Error reading segmented data"); - size_t ReadSeg; - ReadSeg = fread(InputData, 2, SIZE, SEGDAT); - if (ReadSeg != size_t(SIZE)) - printf("Domain.cpp: Error reading segmented data \n"); - fclose(SEGDAT); - for (int n = 0; n < SIZE; n++) { - SegData[n] = char(InputData[n]); - } - } - else if (ReadType == "SWC"){ + // Rank=0 reads the entire segmented data and distributes to worker processes + printf("Dimensions of segmented image: %ld x %ld x %ld \n", + global_Nx, global_Ny, global_Nz); + int64_t SIZE = global_Nx * global_Ny * global_Nz; + SegData = new char[SIZE]; + if (ReadType == "8bit") { + printf("Reading 8-bit input data \n"); + FILE *SEGDAT = fopen(Filename.c_str(), "rb"); + if (SEGDAT == NULL) + ERROR("Domain.cpp: Error reading segmented data"); + size_t ReadSeg; + ReadSeg = fread(SegData, 1, SIZE, SEGDAT); + if (ReadSeg != size_t(SIZE)) + printf("Domain.cpp: Error reading segmented data \n"); + fclose(SEGDAT); + } else if (ReadType == "16bit") { + printf("Reading 16-bit input data \n"); + short int *InputData; + InputData = new short int[SIZE]; + FILE *SEGDAT = fopen(Filename.c_str(), "rb"); + if (SEGDAT == NULL) + ERROR("Domain.cpp: Error reading segmented data"); + size_t ReadSeg; + ReadSeg = fread(InputData, 2, SIZE, SEGDAT); + if (ReadSeg != size_t(SIZE)) + printf("Domain.cpp: Error reading segmented data \n"); + fclose(SEGDAT); + for (int n = 0; n < SIZE; n++) { + SegData[n] = char(InputData[n]); + } + } else if (ReadType == "SWC") { + } + printf("Read segmented data from %s \n", Filename.c_str()); - } - printf("Read segmented data from %s \n", Filename.c_str()); + // relabel the data + std::vector LabelCount(ReadValues.size(), 0); + for (int k = 0; k < global_Nz; k++) { + for (int j = 0; j < global_Ny; j++) { + for (int i = 0; i < global_Nx; i++) { + n = k * global_Nx * global_Ny + j * global_Nx + i; + //char locval = loc_id[n]; + signed char locval = SegData[n]; + for (size_t idx = 0; idx < ReadValues.size(); idx++) { + signed char oldvalue = ReadValues[idx]; + signed char newvalue = WriteValues[idx]; + if (locval == oldvalue) { + SegData[n] = newvalue; + LabelCount[idx]++; + idx = ReadValues.size(); + } + } + } + } + } + for (size_t idx = 0; idx < ReadValues.size(); idx++) { + long int label = ReadValues[idx]; + long int count = LabelCount[idx]; + printf("Label=%ld, Count=%ld \n", label, count); + } + if (USE_CHECKER) { + if (inlet_layers_x > 0) { + // use checkerboard pattern + printf("Checkerboard pattern at x inlet for %i layers \n", + inlet_layers_x); + for (int k = 0; k < global_Nz; k++) { + for (int j = 0; j < global_Ny; j++) { + for (int i = xStart; i < xStart + inlet_layers_x; + i++) { + if ((j / checkerSize + k / checkerSize) % 2 == + 0) { + // void checkers + SegData[k * global_Nx * global_Ny + + j * global_Nx + i] = 2; + } else { + // solid checkers + SegData[k * global_Nx * global_Ny + + j * global_Nx + i] = 0; + } + } + } + } + } - // relabel the data - std::vector LabelCount(ReadValues.size(), 0); - for (int k = 0; k < global_Nz; k++) { - for (int j = 0; j < global_Ny; j++) { - for (int i = 0; i < global_Nx; i++) { - n = k * global_Nx * global_Ny + j * global_Nx + i; - //char locval = loc_id[n]; - signed char locval = SegData[n]; - for (size_t idx = 0; idx < ReadValues.size(); idx++) { - signed char oldvalue = ReadValues[idx]; - signed char newvalue = WriteValues[idx]; - if (locval == oldvalue) { - SegData[n] = newvalue; - LabelCount[idx]++; - idx = ReadValues.size(); - } - } - } - } - } - for (size_t idx = 0; idx < ReadValues.size(); idx++) { - long int label = ReadValues[idx]; - long int count = LabelCount[idx]; - printf("Label=%ld, Count=%ld \n", label, count); - } - if (USE_CHECKER) { - if (inlet_layers_x > 0) { - // use checkerboard pattern - printf("Checkerboard pattern at x inlet for %i layers \n", - inlet_layers_x); - for (int k = 0; k < global_Nz; k++) { - for (int j = 0; j < global_Ny; j++) { - for (int i = xStart; i < xStart + inlet_layers_x; i++) { - if ((j / checkerSize + k / checkerSize) % 2 == 0) { - // void checkers - SegData[k * global_Nx * global_Ny + - j * global_Nx + i] = 2; - } else { - // solid checkers - SegData[k * global_Nx * global_Ny + - j * global_Nx + i] = 0; - } - } - } - } - } + if (inlet_layers_y > 0) { + printf("Checkerboard pattern at y inlet for %i layers \n", + inlet_layers_y); + // use checkerboard pattern + for (int k = 0; k < global_Nz; k++) { + for (int j = yStart; j < yStart + inlet_layers_y; j++) { + for (int i = 0; i < global_Nx; i++) { + if ((i / checkerSize + k / checkerSize) % 2 == + 0) { + // void checkers + SegData[k * global_Nx * global_Ny + + j * global_Nx + i] = 2; + } else { + // solid checkers + SegData[k * global_Nx * global_Ny + + j * global_Nx + i] = 0; + } + } + } + } + } - if (inlet_layers_y > 0) { - printf("Checkerboard pattern at y inlet for %i layers \n", - inlet_layers_y); - // use checkerboard pattern - for (int k = 0; k < global_Nz; k++) { - for (int j = yStart; j < yStart + inlet_layers_y; j++) { - for (int i = 0; i < global_Nx; i++) { - if ((i / checkerSize + k / checkerSize) % 2 == 0) { - // void checkers - SegData[k * global_Nx * global_Ny + - j * global_Nx + i] = 2; - } else { - // solid checkers - SegData[k * global_Nx * global_Ny + - j * global_Nx + i] = 0; - } - } - } - } - } + if (inlet_layers_z > 0) { + printf("Checkerboard pattern at z inlet for %i layers, " + "saturated with phase label=%i \n", + inlet_layers_z, inlet_layers_phase); + // use checkerboard pattern + for (int k = zStart; k < zStart + inlet_layers_z; k++) { + for (int j = 0; j < global_Ny; j++) { + for (int i = 0; i < global_Nx; i++) { + if ((i / checkerSize + j / checkerSize) % 2 == + 0) { + // void checkers + //SegData[k*global_Nx*global_Ny+j*global_Nx+i] = 2; + SegData[k * global_Nx * global_Ny + + j * global_Nx + i] = + inlet_layers_phase; + } else { + // solid checkers + SegData[k * global_Nx * global_Ny + + j * global_Nx + i] = 0; + } + } + } + } + } - if (inlet_layers_z > 0) { - printf("Checkerboard pattern at z inlet for %i layers, " - "saturated with phase label=%i \n", - inlet_layers_z, inlet_layers_phase); - // use checkerboard pattern - for (int k = zStart; k < zStart + inlet_layers_z; k++) { - for (int j = 0; j < global_Ny; j++) { - for (int i = 0; i < global_Nx; i++) { - if ((i / checkerSize + j / checkerSize) % 2 == 0) { - // void checkers - //SegData[k*global_Nx*global_Ny+j*global_Nx+i] = 2; - SegData[k * global_Nx * global_Ny + - j * global_Nx + i] = inlet_layers_phase; - } else { - // solid checkers - SegData[k * global_Nx * global_Ny + - j * global_Nx + i] = 0; - } - } - } - } - } + if (outlet_layers_x > 0) { + // use checkerboard pattern + printf("Checkerboard pattern at x outlet for %i layers \n", + outlet_layers_x); + for (int k = 0; k < global_Nz; k++) { + for (int j = 0; j < global_Ny; j++) { + for (int i = xStart + nx * nprocx - outlet_layers_x; + i < xStart + nx * nprocx; i++) { + if ((j / checkerSize + k / checkerSize) % 2 == + 0) { + // void checkers + SegData[k * global_Nx * global_Ny + + j * global_Nx + i] = 2; + } else { + // solid checkers + SegData[k * global_Nx * global_Ny + + j * global_Nx + i] = 0; + } + } + } + } + } - if (outlet_layers_x > 0) { - // use checkerboard pattern - printf("Checkerboard pattern at x outlet for %i layers \n", - outlet_layers_x); - for (int k = 0; k < global_Nz; k++) { - for (int j = 0; j < global_Ny; j++) { - for (int i = xStart + nx * nprocx - outlet_layers_x; - i < xStart + nx * nprocx; i++) { - if ((j / checkerSize + k / checkerSize) % 2 == 0) { - // void checkers - SegData[k * global_Nx * global_Ny + - j * global_Nx + i] = 2; - } else { - // solid checkers - SegData[k * global_Nx * global_Ny + - j * global_Nx + i] = 0; - } - } - } - } - } + if (outlet_layers_y > 0) { + printf("Checkerboard pattern at y outlet for %i layers \n", + outlet_layers_y); + // use checkerboard pattern + for (int k = 0; k < global_Nz; k++) { + for (int j = yStart + ny * nprocy - outlet_layers_y; + j < yStart + ny * nprocy; j++) { + for (int i = 0; i < global_Nx; i++) { + if ((i / checkerSize + k / checkerSize) % 2 == + 0) { + // void checkers + SegData[k * global_Nx * global_Ny + + j * global_Nx + i] = 2; + } else { + // solid checkers + SegData[k * global_Nx * global_Ny + + j * global_Nx + i] = 0; + } + } + } + } + } - if (outlet_layers_y > 0) { - printf("Checkerboard pattern at y outlet for %i layers \n", - outlet_layers_y); - // use checkerboard pattern - for (int k = 0; k < global_Nz; k++) { - for (int j = yStart + ny * nprocy - outlet_layers_y; - j < yStart + ny * nprocy; j++) { - for (int i = 0; i < global_Nx; i++) { - if ((i / checkerSize + k / checkerSize) % 2 == 0) { - // void checkers - SegData[k * global_Nx * global_Ny + - j * global_Nx + i] = 2; - } else { - // solid checkers - SegData[k * global_Nx * global_Ny + - j * global_Nx + i] = 0; - } - } - } - } - } + if (outlet_layers_z > 0) { + printf("Checkerboard pattern at z outlet for %i layers, " + "saturated with phase label=%i \n", + outlet_layers_z, outlet_layers_phase); + // use checkerboard pattern + for (int k = zStart + nz * nprocz - outlet_layers_z; + k < zStart + nz * nprocz; k++) { + for (int j = 0; j < global_Ny; j++) { + for (int i = 0; i < global_Nx; i++) { + if ((i / checkerSize + j / checkerSize) % 2 == + 0) { + // void checkers + //SegData[k*global_Nx*global_Ny+j*global_Nx+i] = 2; + SegData[k * global_Nx * global_Ny + + j * global_Nx + i] = + outlet_layers_phase; + } else { + // solid checkers + SegData[k * global_Nx * global_Ny + + j * global_Nx + i] = 0; + } + } + } + } + } + } else { + if (inlet_layers_z > 0) { + printf("Mixed reflection pattern at z inlet for %i layers, " + "saturated with phase label=%i \n", + inlet_layers_z, inlet_layers_phase); + for (int k = zStart; k < zStart + inlet_layers_z; k++) { + for (int j = 0; j < global_Ny; j++) { + for (int i = 0; i < global_Nx; i++) { + signed char local_id = + SegData[k * global_Nx * global_Ny + + j * global_Nx + i]; + signed char reflection_id = + SegData[(zStart + nz * nprocz - 1) * + global_Nx * global_Ny + + j * global_Nx + i]; + if (local_id < 1 && reflection_id > 0) { + SegData[k * global_Nx * global_Ny + + j * global_Nx + i] = reflection_id; + } + } + } + } + } + if (outlet_layers_z > 0) { + printf( + "Mixed reflection pattern at z outlet for %i layers, " + "saturated with phase label=%i \n", + outlet_layers_z, outlet_layers_phase); + for (int k = zStart + nz * nprocz - outlet_layers_z; + k < zStart + nz * nprocz; k++) { + for (int j = 0; j < global_Ny; j++) { + for (int i = 0; i < global_Nx; i++) { + signed char local_id = + SegData[k * global_Nx * global_Ny + + j * global_Nx + i]; + signed char reflection_id = + SegData[zStart * global_Nx * global_Ny + + j * global_Nx + i]; + if (local_id < 1 && reflection_id > 0) { + SegData[k * global_Nx * global_Ny + + j * global_Nx + i] = reflection_id; + } + } + } + } + } + } + } - if (outlet_layers_z > 0) { - printf("Checkerboard pattern at z outlet for %i layers, " - "saturated with phase label=%i \n", - outlet_layers_z, outlet_layers_phase); - // use checkerboard pattern - for (int k = zStart + nz * nprocz - outlet_layers_z; - k < zStart + nz * nprocz; k++) { - for (int j = 0; j < global_Ny; j++) { - for (int i = 0; i < global_Nx; i++) { - if ((i / checkerSize + j / checkerSize) % 2 == 0) { - // void checkers - //SegData[k*global_Nx*global_Ny+j*global_Nx+i] = 2; - SegData[k * global_Nx * global_Ny + - j * global_Nx + i] = - outlet_layers_phase; - } else { - // solid checkers - SegData[k * global_Nx * global_Ny + - j * global_Nx + i] = 0; - } - } - } - } - } - } else { - if (inlet_layers_z > 0) { - printf("Mixed reflection pattern at z inlet for %i layers, " - "saturated with phase label=%i \n", - inlet_layers_z, inlet_layers_phase); - for (int k = zStart; k < zStart + inlet_layers_z; k++) { - for (int j = 0; j < global_Ny; j++) { - for (int i = 0; i < global_Nx; i++) { - signed char local_id = - SegData[k * global_Nx * global_Ny + - j * global_Nx + i]; - signed char reflection_id = - SegData[(zStart + nz * nprocz - 1) * global_Nx * - global_Ny + - j * global_Nx + i]; - if (local_id < 1 && reflection_id > 0) { - SegData[k * global_Nx * global_Ny + - j * global_Nx + i] = reflection_id; - } - } - } - } - } - if (outlet_layers_z > 0) { - printf("Mixed reflection pattern at z outlet for %i layers, " - "saturated with phase label=%i \n", - outlet_layers_z, outlet_layers_phase); - for (int k = zStart + nz * nprocz - outlet_layers_z; - k < zStart + nz * nprocz; k++) { - for (int j = 0; j < global_Ny; j++) { - for (int i = 0; i < global_Nx; i++) { - signed char local_id = - SegData[k * global_Nx * global_Ny + - j * global_Nx + i]; - signed char reflection_id = - SegData[zStart * global_Nx * global_Ny + - j * global_Nx + i]; - if (local_id < 1 && reflection_id > 0) { - SegData[k * global_Nx * global_Ny + - j * global_Nx + i] = reflection_id; - } - } - } - } - } - } - } + // Get the rank info + int64_t N = (nx + 2) * (ny + 2) * (nz + 2); - // Get the rank info - int64_t N = (nx + 2) * (ny + 2) * (nz + 2); + // number of sites to use for periodic boundary condition transition zone + int64_t z_transition_size = (nprocz * nz - (global_Nz - zStart)) / 2; + if (z_transition_size < 0) + z_transition_size = 0; - // number of sites to use for periodic boundary condition transition zone - int64_t z_transition_size = (nprocz * nz - (global_Nz - zStart)) / 2; - if (z_transition_size < 0) - z_transition_size = 0; - - // Set up the sub-domains - if (RANK == 0) { - printf("Distributing subdomains across %i processors \n", nprocs); - printf("Process grid: %i x %i x %i \n", nprocx, nprocy, nprocz); - printf("Subdomain size: %i x %i x %i \n", nx, ny, nz); - printf("Size of transition region: %ld \n", z_transition_size); - auto loc_id = new char[(nx + 2) * (ny + 2) * (nz + 2)]; - for (int kp = 0; kp < nprocz; kp++) { - for (int jp = 0; jp < nprocy; jp++) { - for (int ip = 0; ip < nprocx; ip++) { - // rank of the process that gets this subdomain - int rnk = kp * nprocx * nprocy + jp * nprocx + ip; - // Pack and send the subdomain for rnk - for (k = 0; k < nz + 2; k++) { - for (j = 0; j < ny + 2; j++) { - for (i = 0; i < nx + 2; i++) { - int64_t x = xStart + ip * nx + i - 1; - int64_t y = yStart + jp * ny + j - 1; - // int64_t z = zStart + kp*nz + k-1; - int64_t z = zStart + kp * nz + k - 1 - - z_transition_size; - if (x < xStart) - x = xStart; - if (!(x < global_Nx)) - x = global_Nx - 1; - if (y < yStart) - y = yStart; - if (!(y < global_Ny)) - y = global_Ny - 1; - if (z < zStart) - z = zStart; - if (!(z < global_Nz)) - z = global_Nz - 1; - int64_t nlocal = - k * (nx + 2) * (ny + 2) + j * (nx + 2) + i; - int64_t nglobal = z * global_Nx * global_Ny + - y * global_Nx + x; - loc_id[nlocal] = SegData[nglobal]; - } - } - } - if (rnk == 0) { - for (k = 0; k < nz + 2; k++) { - for (j = 0; j < ny + 2; j++) { - for (i = 0; i < nx + 2; i++) { - int nlocal = k * (nx + 2) * (ny + 2) + - j * (nx + 2) + i; - id[nlocal] = loc_id[nlocal]; - } - } - } - } else { - //printf("Sending data to process %i \n", rnk); - Comm.send(loc_id, N, rnk, 15); - } - // Write the data for this rank data - char LocalRankFilename[40]; - sprintf(LocalRankFilename, "ID.%05i", rnk + rank_offset); - FILE *ID = fopen(LocalRankFilename, "wb"); - fwrite(loc_id, 1, (nx + 2) * (ny + 2) * (nz + 2), ID); - fclose(ID); - } - } - } - delete[] loc_id; - } else { - // Recieve the subdomain from rank = 0 - //printf("Ready to recieve data %i at process %i \n", N,rank); - Comm.recv(id.data(), N, 0, 15); - } + // Set up the sub-domains + if (RANK == 0) { + printf("Distributing subdomains across %i processors \n", nprocs); + printf("Process grid: %i x %i x %i \n", nprocx, nprocy, nprocz); + printf("Subdomain size: %i x %i x %i \n", nx, ny, nz); + printf("Size of transition region: %ld \n", z_transition_size); + auto loc_id = new char[(nx + 2) * (ny + 2) * (nz + 2)]; + for (int kp = 0; kp < nprocz; kp++) { + for (int jp = 0; jp < nprocy; jp++) { + for (int ip = 0; ip < nprocx; ip++) { + // rank of the process that gets this subdomain + int rnk = kp * nprocx * nprocy + jp * nprocx + ip; + // Pack and send the subdomain for rnk + for (k = 0; k < nz + 2; k++) { + for (j = 0; j < ny + 2; j++) { + for (i = 0; i < nx + 2; i++) { + int64_t x = xStart + ip * nx + i - 1; + int64_t y = yStart + jp * ny + j - 1; + // int64_t z = zStart + kp*nz + k-1; + int64_t z = zStart + kp * nz + k - 1 - + z_transition_size; + if (x < xStart) + x = xStart; + if (!(x < global_Nx)) + x = global_Nx - 1; + if (y < yStart) + y = yStart; + if (!(y < global_Ny)) + y = global_Ny - 1; + if (z < zStart) + z = zStart; + if (!(z < global_Nz)) + z = global_Nz - 1; + int64_t nlocal = k * (nx + 2) * (ny + 2) + + j * (nx + 2) + i; + int64_t nglobal = + z * global_Nx * global_Ny + + y * global_Nx + x; + loc_id[nlocal] = SegData[nglobal]; + } + } + } + if (rnk == 0) { + for (k = 0; k < nz + 2; k++) { + for (j = 0; j < ny + 2; j++) { + for (i = 0; i < nx + 2; i++) { + int nlocal = k * (nx + 2) * (ny + 2) + + j * (nx + 2) + i; + id[nlocal] = loc_id[nlocal]; + } + } + } + } else { + //printf("Sending data to process %i \n", rnk); + Comm.send(loc_id, N, rnk, 15); + } + // Write the data for this rank data + char LocalRankFilename[40]; + sprintf(LocalRankFilename, "ID.%05i", + rnk + rank_offset); + FILE *ID = fopen(LocalRankFilename, "wb"); + fwrite(loc_id, 1, (nx + 2) * (ny + 2) * (nz + 2), ID); + fclose(ID); + } + } + } + delete[] loc_id; + } else { + // Recieve the subdomain from rank = 0 + //printf("Ready to recieve data %i at process %i \n", N,rank); + Comm.recv(id.data(), N, 0, 15); + } delete[] SegData; } /************************/ - // inlet layers only apply to lower part of domain + // inlet layers only apply to lower part of domain if (rank_info.ix > 0) inlet_layers_x = 0; if (rank_info.jy > 0) @@ -899,31 +958,31 @@ void Domain::Decomp(const std::string &Filename) { } void Domain::ComputePorosity() { - // Compute the porosity - double sum; - double sum_local = 0.0; - double iVol_global = 1.0 / (1.0 * (Nx - 2) * (Ny - 2) * (Nz - 2) * - nprocx() * nprocy() * nprocz()); - if (BoundaryCondition > 0 && BoundaryCondition != 5) - iVol_global = - 1.0 / (1.0 * (Nx - 2) * nprocx() * (Ny - 2) * nprocy() * - ((Nz - 2) * nprocz() - inlet_layers_z - outlet_layers_z)); - //......................................................... - for (int k = inlet_layers_z + 1; k < Nz - outlet_layers_z - 1; k++) { - for (int j = 1; j < Ny - 1; j++) { - for (int i = 1; i < Nx - 1; i++) { - int n = k * Nx * Ny + j * Nx + i; - if (id[n] > 0) { - sum_local += 1.0; - } - } - } - } - sum = Comm.sumReduce(sum_local); - porosity = sum * iVol_global; - if (rank() == 0) - printf("Media porosity = %f \n", porosity); - //......................................................... + // Compute the porosity + double sum; + double sum_local = 0.0; + double iVol_global = 1.0 / (1.0 * (Nx - 2) * (Ny - 2) * (Nz - 2) * + nprocx() * nprocy() * nprocz()); + if (BoundaryCondition > 0 && BoundaryCondition != 5) + iVol_global = + 1.0 / (1.0 * (Nx - 2) * nprocx() * (Ny - 2) * nprocy() * + ((Nz - 2) * nprocz() - inlet_layers_z - outlet_layers_z)); + //......................................................... + for (int k = inlet_layers_z + 1; k < Nz - outlet_layers_z - 1; k++) { + for (int j = 1; j < Ny - 1; j++) { + for (int i = 1; i < Nx - 1; i++) { + int n = k * Nx * Ny + j * Nx + i; + if (id[n] > 0) { + sum_local += 1.0; + } + } + } + } + sum = Comm.sumReduce(sum_local); + porosity = sum * iVol_global; + if (rank() == 0) + printf("Media porosity = %f \n", porosity); + //......................................................... } void Domain::AggregateLabels(const std::string &filename) { diff --git a/common/Domain.h b/common/Domain.h index 226c8008..bed7a99e 100644 --- a/common/Domain.h +++ b/common/Domain.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef Domain_INC #define Domain_INC @@ -187,7 +203,7 @@ public: // Public variables (need to create accessors instead) * \brief Read domain IDs from file */ void ReadIDs(); - + /** * \brief Read domain IDs from SWC file */ diff --git a/common/FunctionTable.cpp b/common/FunctionTable.cpp index ad5c37cc..9f091083 100644 --- a/common/FunctionTable.cpp +++ b/common/FunctionTable.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "FunctionTable.hpp" /******************************************************** @@ -92,12 +108,11 @@ template<> long double genRand() /******************************************************** * axpy * ********************************************************/ -template <> -void call_axpy(size_t, const float, const float*, float*) { +template <> void call_axpy(size_t, const float, const float *, float *) { ERROR("Not finished"); } template <> -void call_axpy(size_t, const double, const double*, double*) { +void call_axpy(size_t, const double, const double *, double *) { ERROR("Not finished"); } @@ -105,22 +120,22 @@ void call_axpy(size_t, const double, const double*, double*) { * Multiply two arrays * ********************************************************/ template <> -void call_gemv(size_t, size_t, double, double, - const double*, const double*, double*) { +void call_gemv(size_t, size_t, double, double, const double *, + const double *, double *) { ERROR("Not finished"); } template <> -void call_gemv(size_t, size_t, float, float, - const float*, const float*, float*) { +void call_gemv(size_t, size_t, float, float, const float *, + const float *, float *) { ERROR("Not finished"); } template <> -void call_gemm(size_t, size_t, size_t, double, double, - const double*, const double*, double*) { +void call_gemm(size_t, size_t, size_t, double, double, const double *, + const double *, double *) { ERROR("Not finished"); } template <> -void call_gemm(size_t, size_t, size_t, float, float, - const float*, const float*, float*) { +void call_gemm(size_t, size_t, size_t, float, float, const float *, + const float *, float *) { ERROR("Not finished"); } diff --git a/common/FunctionTable.h b/common/FunctionTable.h index c5497848..7a45880b 100644 --- a/common/FunctionTable.h +++ b/common/FunctionTable.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef included_FunctionTable #define included_FunctionTable diff --git a/common/FunctionTable.hpp b/common/FunctionTable.hpp index bcdae59f..02690fba 100644 --- a/common/FunctionTable.hpp +++ b/common/FunctionTable.hpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef included_FunctionTable_hpp #define included_FunctionTable_hpp @@ -265,10 +281,9 @@ TYPE FunctionTable::sum(const Array &A) { } template -inline void FunctionTable::gemmWrapper(char, char, int, int, - int, TYPE, const TYPE*, - int, const TYPE*, int, - TYPE, TYPE*, int) { +inline void FunctionTable::gemmWrapper(char, char, int, int, int, TYPE, + const TYPE *, int, const TYPE *, int, + TYPE, TYPE *, int) { ERROR("Not finished"); } diff --git a/common/MPI.cpp b/common/MPI.cpp index 72f5b462..f40a7abf 100644 --- a/common/MPI.cpp +++ b/common/MPI.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ // This file impliments a wrapper class for MPI functions #include "common/MPI.h" @@ -3691,28 +3707,29 @@ MPI MPI::loadBalance(double local, std::vector work) { return split(0, key[getRank()]); } - - /**************************************************************************** * Function Persistent Communication * ****************************************************************************/ template <> -std::shared_ptr MPI::Isend_init(const double *buf, int N, int proc, int tag) const -{ - std::shared_ptr obj( new MPI_Request, []( MPI_Request *req ) { MPI_Request_free( req ); delete req; } ); - MPI_Send_init( buf, N, MPI_DOUBLE, proc, tag, communicator, obj.get() ); +std::shared_ptr MPI::Isend_init(const double *buf, int N, + int proc, int tag) const { + std::shared_ptr obj(new MPI_Request, [](MPI_Request *req) { + MPI_Request_free(req); + delete req; + }); + MPI_Send_init(buf, N, MPI_DOUBLE, proc, tag, communicator, obj.get()); return obj; } -template<> -std::shared_ptr MPI::Irecv_init(double *buf, int N, int proc, int tag) const -{ - std::shared_ptr obj( new MPI_Request, []( MPI_Request *req ) { MPI_Request_free( req ); delete req; } ); - MPI_Recv_init( buf, N, MPI_DOUBLE, proc, tag, communicator, obj.get() ); +template <> +std::shared_ptr MPI::Irecv_init(double *buf, int N, + int proc, int tag) const { + std::shared_ptr obj(new MPI_Request, [](MPI_Request *req) { + MPI_Request_free(req); + delete req; + }); + MPI_Recv_init(buf, N, MPI_DOUBLE, proc, tag, communicator, obj.get()); return obj; } -void MPI::Start( MPI_Request &request ) -{ - MPI_Start( &request ); -} +void MPI::Start(MPI_Request &request) { MPI_Start(&request); } } // namespace Utilities diff --git a/common/MPI.h b/common/MPI.h index de3c0534..25a034c0 100644 --- a/common/MPI.h +++ b/common/MPI.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ // This file includes a wrapper class for MPI functions // Note this is a modified version of the MPI class for the Advanced Multi-Physics Package // Used with permission @@ -728,8 +744,8 @@ public: // Member functions * need to manually free the request */ template - std::shared_ptr Isend_init(const type *buf, int length, int recv_proc, - int tag) const; + std::shared_ptr Isend_init(const type *buf, int length, + int recv_proc, int tag) const; /*! * @brief This function sets up an Irecv call (see MPI_Recv_init) @@ -742,13 +758,14 @@ public: // Member functions * need to manually free the request */ template - std::shared_ptr Irecv_init(type *buf, int length, int send_proc, int tag) const; + std::shared_ptr Irecv_init(type *buf, int length, + int send_proc, int tag) const; /*! * @brief Start the MPI communication * @param request Request to start */ - void Start( MPI_Request &request ); + void Start(MPI_Request &request); /*! * Each processor sends every other processor a single value. diff --git a/common/Membrane.cpp b/common/Membrane.cpp index 50c7deba..59d570cf 100644 --- a/common/Membrane.cpp +++ b/common/Membrane.cpp @@ -1,143 +1,169 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ /* Membrane class for lattice Boltzmann models */ #include "common/Membrane.h" #include "analysis/distance.h" -Membrane::Membrane(std::shared_ptr sComm, int *dvcNeighborList, int Nsites) { +Membrane::Membrane(std::shared_ptr sComm, + int *dvcNeighborList, int Nsites) { - Np = Nsites; - initialNeighborList = new int[18*Np]; - ScaLBL_AllocateDeviceMemory((void **)&NeighborList, 18*Np*sizeof(int)); - Lock=false; // unlock the communicator - //...................................................................................... - // Create a separate copy of the communicator for the device + Np = Nsites; + initialNeighborList = new int[18 * Np]; + ScaLBL_AllocateDeviceMemory((void **)&NeighborList, 18 * Np * sizeof(int)); + Lock = false; // unlock the communicator + //...................................................................................... + // Create a separate copy of the communicator for the device MPI_COMM_SCALBL = sComm->MPI_COMM_SCALBL.dup(); int myrank = sComm->MPI_COMM_SCALBL.getRank(); - rank_info = RankInfoStruct(myrank, rank_info.nx, rank_info.ny, rank_info.nz); - - ScaLBL_CopyToHost(initialNeighborList, dvcNeighborList, 18*Np*sizeof(int)); + rank_info = + RankInfoStruct(myrank, rank_info.nx, rank_info.ny, rank_info.nz); + + ScaLBL_CopyToHost(initialNeighborList, dvcNeighborList, + 18 * Np * sizeof(int)); sComm->MPI_COMM_SCALBL.barrier(); - ScaLBL_CopyToDevice(NeighborList, initialNeighborList, 18*Np*sizeof(int)); - - /* Copy communication lists */ - //...................................................................................... - //Lock=false; // unlock the communicator - //...................................................................................... - // Create a separate copy of the communicator for the device + ScaLBL_CopyToDevice(NeighborList, initialNeighborList, + 18 * Np * sizeof(int)); + + /* Copy communication lists */ + //...................................................................................... + //Lock=false; // unlock the communicator + //...................................................................................... + // Create a separate copy of the communicator for the device //MPI_COMM_SCALBL = sComm->Comm.dup(); - //...................................................................................... - // Copy the domain size and communication information directly from sComm - Nx = sComm->Nx; - Ny = sComm->Ny; - Nz = sComm->Nz; - N = Nx*Ny*Nz; - //next=0; - rank=sComm->rank; - rank_x=sComm->rank_x; - rank_y=sComm->rank_y; - rank_z=sComm->rank_z; - rank_X=sComm->rank_X; - rank_Y=sComm->rank_Y; - rank_Z=sComm->rank_Z; - - BoundaryCondition = sComm->BoundaryCondition; - - if (rank == 0){ - printf("**** Creating membrane data structure ****** \n"); - printf(" Number of active lattice sites (rank = %i): %i \n",rank, Np); - } + //...................................................................................... + // Copy the domain size and communication information directly from sComm + Nx = sComm->Nx; + Ny = sComm->Ny; + Nz = sComm->Nz; + N = Nx * Ny * Nz; + //next=0; + rank = sComm->rank; + rank_x = sComm->rank_x; + rank_y = sComm->rank_y; + rank_z = sComm->rank_z; + rank_X = sComm->rank_X; + rank_Y = sComm->rank_Y; + rank_Z = sComm->rank_Z; - sendCount_x=sComm->sendCount_x; - sendCount_y=sComm->sendCount_y; - sendCount_z=sComm->sendCount_z; - sendCount_X=sComm->sendCount_X; - sendCount_Y=sComm->sendCount_Y; - sendCount_Z=sComm->sendCount_Z; + BoundaryCondition = sComm->BoundaryCondition; - recvCount_x=sComm->recvCount_x; - recvCount_y=sComm->recvCount_y; - recvCount_z=sComm->recvCount_z; - recvCount_X=sComm->recvCount_X; - recvCount_Y=sComm->recvCount_Y; - recvCount_Z=sComm->recvCount_Z; - - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_x, recvCount_x*sizeof(int)); - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_y, recvCount_y*sizeof(int)); - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_z, recvCount_z*sizeof(int)); - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_X, recvCount_X*sizeof(int)); - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Y, recvCount_Y*sizeof(int)); - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Z, recvCount_Z*sizeof(int)); - - ScaLBL_AllocateZeroCopy((void **) &dvcRecvLinks_x, recvCount_x*sizeof(int)); - ScaLBL_AllocateZeroCopy((void **) &dvcRecvLinks_y, recvCount_y*sizeof(int)); - ScaLBL_AllocateZeroCopy((void **) &dvcRecvLinks_z, recvCount_z*sizeof(int)); - ScaLBL_AllocateZeroCopy((void **) &dvcRecvLinks_X, recvCount_X*sizeof(int)); - ScaLBL_AllocateZeroCopy((void **) &dvcRecvLinks_Y, recvCount_Y*sizeof(int)); - ScaLBL_AllocateZeroCopy((void **) &dvcRecvLinks_Z, recvCount_Z*sizeof(int)); - - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_x, recvCount_x*sizeof(int)); - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_y, recvCount_y*sizeof(int)); - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_z, recvCount_z*sizeof(int)); - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_X, recvCount_X*sizeof(int)); - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_Y, recvCount_Y*sizeof(int)); - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_Z, recvCount_Z*sizeof(int)); - - ScaLBL_AllocateZeroCopy((void **) &sendbuf_x, sendCount_x*sizeof(double)); - ScaLBL_AllocateZeroCopy((void **) &sendbuf_y, sendCount_y*sizeof(double)); - ScaLBL_AllocateZeroCopy((void **) &sendbuf_z, sendCount_z*sizeof(double)); - ScaLBL_AllocateZeroCopy((void **) &sendbuf_X, sendCount_X*sizeof(double)); - ScaLBL_AllocateZeroCopy((void **) &sendbuf_Y, sendCount_Y*sizeof(double)); - ScaLBL_AllocateZeroCopy((void **) &sendbuf_Z, sendCount_Z*sizeof(double)); - - ScaLBL_AllocateZeroCopy((void **) &recvbuf_x, recvCount_x*sizeof(double)); - ScaLBL_AllocateZeroCopy((void **) &recvbuf_y, recvCount_y*sizeof(double)); - ScaLBL_AllocateZeroCopy((void **) &recvbuf_z, recvCount_z*sizeof(double)); - ScaLBL_AllocateZeroCopy((void **) &recvbuf_X, recvCount_X*sizeof(double)); - ScaLBL_AllocateZeroCopy((void **) &recvbuf_Y, recvCount_Y*sizeof(double)); - ScaLBL_AllocateZeroCopy((void **) &recvbuf_Z, recvCount_Z*sizeof(double)); - - sendCount_x=sComm->copySendList("x", dvcSendList_x); - sendCount_y=sComm->copySendList("y", dvcSendList_y); - sendCount_z=sComm->copySendList("z", dvcSendList_z); - sendCount_X=sComm->copySendList("X", dvcSendList_X); - sendCount_Y=sComm->copySendList("Y", dvcSendList_Y); - sendCount_Z=sComm->copySendList("Z", dvcSendList_Z); + if (rank == 0) { + printf("**** Creating membrane data structure ****** \n"); + printf(" Number of active lattice sites (rank = %i): %i \n", rank, + Np); + } - recvCount_x=sComm->copyRecvList("x", dvcRecvDist_x); - recvCount_y=sComm->copyRecvList("y", dvcRecvDist_y); - recvCount_z=sComm->copyRecvList("z", dvcRecvDist_z); - recvCount_X=sComm->copyRecvList("X", dvcRecvDist_X); - recvCount_Y=sComm->copyRecvList("Y", dvcRecvDist_Y); - recvCount_Z=sComm->copyRecvList("Z", dvcRecvDist_Z); + sendCount_x = sComm->sendCount_x; + sendCount_y = sComm->sendCount_y; + sendCount_z = sComm->sendCount_z; + sendCount_X = sComm->sendCount_X; + sendCount_Y = sComm->sendCount_Y; + sendCount_Z = sComm->sendCount_Z; + recvCount_x = sComm->recvCount_x; + recvCount_y = sComm->recvCount_y; + recvCount_z = sComm->recvCount_z; + recvCount_X = sComm->recvCount_X; + recvCount_Y = sComm->recvCount_Y; + recvCount_Z = sComm->recvCount_Z; + + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_x, recvCount_x * sizeof(int)); + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_y, recvCount_y * sizeof(int)); + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_z, recvCount_z * sizeof(int)); + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_X, recvCount_X * sizeof(int)); + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_Y, recvCount_Y * sizeof(int)); + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_Z, recvCount_Z * sizeof(int)); + + ScaLBL_AllocateZeroCopy((void **)&dvcRecvLinks_x, + recvCount_x * sizeof(int)); + ScaLBL_AllocateZeroCopy((void **)&dvcRecvLinks_y, + recvCount_y * sizeof(int)); + ScaLBL_AllocateZeroCopy((void **)&dvcRecvLinks_z, + recvCount_z * sizeof(int)); + ScaLBL_AllocateZeroCopy((void **)&dvcRecvLinks_X, + recvCount_X * sizeof(int)); + ScaLBL_AllocateZeroCopy((void **)&dvcRecvLinks_Y, + recvCount_Y * sizeof(int)); + ScaLBL_AllocateZeroCopy((void **)&dvcRecvLinks_Z, + recvCount_Z * sizeof(int)); + + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_x, recvCount_x * sizeof(int)); + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_y, recvCount_y * sizeof(int)); + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_z, recvCount_z * sizeof(int)); + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_X, recvCount_X * sizeof(int)); + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_Y, recvCount_Y * sizeof(int)); + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_Z, recvCount_Z * sizeof(int)); + + ScaLBL_AllocateZeroCopy((void **)&sendbuf_x, sendCount_x * sizeof(double)); + ScaLBL_AllocateZeroCopy((void **)&sendbuf_y, sendCount_y * sizeof(double)); + ScaLBL_AllocateZeroCopy((void **)&sendbuf_z, sendCount_z * sizeof(double)); + ScaLBL_AllocateZeroCopy((void **)&sendbuf_X, sendCount_X * sizeof(double)); + ScaLBL_AllocateZeroCopy((void **)&sendbuf_Y, sendCount_Y * sizeof(double)); + ScaLBL_AllocateZeroCopy((void **)&sendbuf_Z, sendCount_Z * sizeof(double)); + + ScaLBL_AllocateZeroCopy((void **)&recvbuf_x, recvCount_x * sizeof(double)); + ScaLBL_AllocateZeroCopy((void **)&recvbuf_y, recvCount_y * sizeof(double)); + ScaLBL_AllocateZeroCopy((void **)&recvbuf_z, recvCount_z * sizeof(double)); + ScaLBL_AllocateZeroCopy((void **)&recvbuf_X, recvCount_X * sizeof(double)); + ScaLBL_AllocateZeroCopy((void **)&recvbuf_Y, recvCount_Y * sizeof(double)); + ScaLBL_AllocateZeroCopy((void **)&recvbuf_Z, recvCount_Z * sizeof(double)); + + sendCount_x = sComm->copySendList("x", dvcSendList_x); + sendCount_y = sComm->copySendList("y", dvcSendList_y); + sendCount_z = sComm->copySendList("z", dvcSendList_z); + sendCount_X = sComm->copySendList("X", dvcSendList_X); + sendCount_Y = sComm->copySendList("Y", dvcSendList_Y); + sendCount_Z = sComm->copySendList("Z", dvcSendList_Z); + + recvCount_x = sComm->copyRecvList("x", dvcRecvDist_x); + recvCount_y = sComm->copyRecvList("y", dvcRecvDist_y); + recvCount_z = sComm->copyRecvList("z", dvcRecvDist_z); + recvCount_X = sComm->copyRecvList("X", dvcRecvDist_X); + recvCount_Y = sComm->copyRecvList("Y", dvcRecvDist_Y); + recvCount_Z = sComm->copyRecvList("Z", dvcRecvDist_Z); } Membrane::~Membrane() { - - delete [] initialNeighborList; - delete [] membraneLinks; - delete [] membraneTag; - delete [] membraneDist; - ScaLBL_FreeDeviceMemory( coefficient_x ); - ScaLBL_FreeDeviceMemory( coefficient_X ); - ScaLBL_FreeDeviceMemory( coefficient_y ); - ScaLBL_FreeDeviceMemory( coefficient_Y ); - ScaLBL_FreeDeviceMemory( coefficient_z ); - ScaLBL_FreeDeviceMemory( coefficient_Z ); - - ScaLBL_FreeDeviceMemory( NeighborList ); - ScaLBL_FreeDeviceMemory( MembraneLinks ); - ScaLBL_FreeDeviceMemory( MembraneCoef ); - ScaLBL_FreeDeviceMemory( MembraneDistance ); - - ScaLBL_FreeDeviceMemory( sendbuf_x ); - ScaLBL_FreeDeviceMemory( sendbuf_X ); - ScaLBL_FreeDeviceMemory( sendbuf_y ); - ScaLBL_FreeDeviceMemory( sendbuf_Y ); - ScaLBL_FreeDeviceMemory( sendbuf_z ); - ScaLBL_FreeDeviceMemory( sendbuf_Z ); -/* ScaLBL_FreeDeviceMemory( sendbuf_xy ); + delete[] initialNeighborList; + delete[] membraneLinks; + delete[] membraneTag; + delete[] membraneDist; + + ScaLBL_FreeDeviceMemory(coefficient_x); + ScaLBL_FreeDeviceMemory(coefficient_X); + ScaLBL_FreeDeviceMemory(coefficient_y); + ScaLBL_FreeDeviceMemory(coefficient_Y); + ScaLBL_FreeDeviceMemory(coefficient_z); + ScaLBL_FreeDeviceMemory(coefficient_Z); + + ScaLBL_FreeDeviceMemory(NeighborList); + ScaLBL_FreeDeviceMemory(MembraneLinks); + ScaLBL_FreeDeviceMemory(MembraneCoef); + ScaLBL_FreeDeviceMemory(MembraneDistance); + + ScaLBL_FreeDeviceMemory(sendbuf_x); + ScaLBL_FreeDeviceMemory(sendbuf_X); + ScaLBL_FreeDeviceMemory(sendbuf_y); + ScaLBL_FreeDeviceMemory(sendbuf_Y); + ScaLBL_FreeDeviceMemory(sendbuf_z); + ScaLBL_FreeDeviceMemory(sendbuf_Z); + /* ScaLBL_FreeDeviceMemory( sendbuf_xy ); ScaLBL_FreeDeviceMemory( sendbuf_xY ); ScaLBL_FreeDeviceMemory( sendbuf_Xy ); ScaLBL_FreeDeviceMemory( sendbuf_XY ); @@ -150,13 +176,13 @@ Membrane::~Membrane() { ScaLBL_FreeDeviceMemory( sendbuf_Yz ); ScaLBL_FreeDeviceMemory( sendbuf_YZ ); */ - ScaLBL_FreeDeviceMemory( recvbuf_x ); - ScaLBL_FreeDeviceMemory( recvbuf_X ); - ScaLBL_FreeDeviceMemory( recvbuf_y ); - ScaLBL_FreeDeviceMemory( recvbuf_Y ); - ScaLBL_FreeDeviceMemory( recvbuf_z ); - ScaLBL_FreeDeviceMemory( recvbuf_Z ); - /* + ScaLBL_FreeDeviceMemory(recvbuf_x); + ScaLBL_FreeDeviceMemory(recvbuf_X); + ScaLBL_FreeDeviceMemory(recvbuf_y); + ScaLBL_FreeDeviceMemory(recvbuf_Y); + ScaLBL_FreeDeviceMemory(recvbuf_z); + ScaLBL_FreeDeviceMemory(recvbuf_Z); + /* ScaLBL_FreeDeviceMemory( recvbuf_xy ); ScaLBL_FreeDeviceMemory( recvbuf_xY ); ScaLBL_FreeDeviceMemory( recvbuf_Xy ); @@ -170,13 +196,13 @@ Membrane::~Membrane() { ScaLBL_FreeDeviceMemory( recvbuf_Yz ); ScaLBL_FreeDeviceMemory( recvbuf_YZ ); */ - ScaLBL_FreeDeviceMemory( dvcSendList_x ); - ScaLBL_FreeDeviceMemory( dvcSendList_X ); - ScaLBL_FreeDeviceMemory( dvcSendList_y ); - ScaLBL_FreeDeviceMemory( dvcSendList_Y ); - ScaLBL_FreeDeviceMemory( dvcSendList_z ); - ScaLBL_FreeDeviceMemory( dvcSendList_Z ); - /* + ScaLBL_FreeDeviceMemory(dvcSendList_x); + ScaLBL_FreeDeviceMemory(dvcSendList_X); + ScaLBL_FreeDeviceMemory(dvcSendList_y); + ScaLBL_FreeDeviceMemory(dvcSendList_Y); + ScaLBL_FreeDeviceMemory(dvcSendList_z); + ScaLBL_FreeDeviceMemory(dvcSendList_Z); + /* ScaLBL_FreeDeviceMemory( dvcSendList_xy ); ScaLBL_FreeDeviceMemory( dvcSendList_xY ); ScaLBL_FreeDeviceMemory( dvcSendList_Xy ); @@ -208,13 +234,13 @@ Membrane::~Membrane() { ScaLBL_FreeDeviceMemory( dvcRecvList_Yz ); ScaLBL_FreeDeviceMemory( dvcRecvList_YZ ); */ - ScaLBL_FreeDeviceMemory( dvcRecvLinks_x ); - ScaLBL_FreeDeviceMemory( dvcRecvLinks_X ); - ScaLBL_FreeDeviceMemory( dvcRecvLinks_y ); - ScaLBL_FreeDeviceMemory( dvcRecvLinks_Y ); - ScaLBL_FreeDeviceMemory( dvcRecvLinks_z ); - ScaLBL_FreeDeviceMemory( dvcRecvLinks_Z ); - /* + ScaLBL_FreeDeviceMemory(dvcRecvLinks_x); + ScaLBL_FreeDeviceMemory(dvcRecvLinks_X); + ScaLBL_FreeDeviceMemory(dvcRecvLinks_y); + ScaLBL_FreeDeviceMemory(dvcRecvLinks_Y); + ScaLBL_FreeDeviceMemory(dvcRecvLinks_z); + ScaLBL_FreeDeviceMemory(dvcRecvLinks_Z); + /* ScaLBL_FreeDeviceMemory( dvcRecvLinks_xy ); ScaLBL_FreeDeviceMemory( dvcRecvLinks_xY ); ScaLBL_FreeDeviceMemory( dvcRecvLinks_Xy ); @@ -228,13 +254,13 @@ Membrane::~Membrane() { ScaLBL_FreeDeviceMemory( dvcRecvLinks_Yz ); ScaLBL_FreeDeviceMemory( dvcRecvLinks_YZ ); */ - ScaLBL_FreeDeviceMemory( dvcRecvDist_x ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_X ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_y ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_Y ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_z ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_Z ); - /* + ScaLBL_FreeDeviceMemory(dvcRecvDist_x); + ScaLBL_FreeDeviceMemory(dvcRecvDist_X); + ScaLBL_FreeDeviceMemory(dvcRecvDist_y); + ScaLBL_FreeDeviceMemory(dvcRecvDist_Y); + ScaLBL_FreeDeviceMemory(dvcRecvDist_z); + ScaLBL_FreeDeviceMemory(dvcRecvDist_Z); + /* ScaLBL_FreeDeviceMemory( dvcRecvDist_xy ); ScaLBL_FreeDeviceMemory( dvcRecvDist_xY ); ScaLBL_FreeDeviceMemory( dvcRecvDist_Xy ); @@ -250,365 +276,365 @@ Membrane::~Membrane() { */ } -int Membrane::Create(DoubleArray &Distance, IntArray &Map){ - int mlink = 0; - int i,j,k; +int Membrane::Create(DoubleArray &Distance, IntArray &Map) { + int mlink = 0; + int i, j, k; - int idx, neighbor; - double dist, locdist; - - if (rank == 0) printf(" Copy initial neighborlist... \n"); - int * neighborList = new int[18*Np]; - /* Copy neighborList */ - for (int idx=0; idx 7){ - neighbor=Map(i-1,j-1,k); - dist=Distance(i-1,j-1,k); - if (dist*locdist < 0.0 && !(neighbor<0)){ - neighborList[6*Np+idx]=idx + 8*Np; - } + neighbor = Map(i, j, k + 1); + dist = Distance(i, j, k + 1); + if (dist * locdist < 0.0 && !(neighbor < 0)) { + neighborList[5 * Np + idx] = idx + 5 * Np; + mlink++; + } - neighbor=Map(i+1,j+1,k); - dist=Distance(i+1,j+1,k); - if (dist*locdist < 0.0){ - neighborList[7*Np+idx]=idx + 7*Np; - mlink++; - } + if (Q > 7) { + neighbor = Map(i - 1, j - 1, k); + dist = Distance(i - 1, j - 1, k); + if (dist * locdist < 0.0 && !(neighbor < 0)) { + neighborList[6 * Np + idx] = idx + 8 * Np; + } - neighbor=Map(i-1,j+1,k); - dist=Distance(i-1,j+1,k); - if (dist*locdist < 0.0 && !(neighbor<0)){ - neighborList[8*Np+idx]=idx + 10*Np; - } + neighbor = Map(i + 1, j + 1, k); + dist = Distance(i + 1, j + 1, k); + if (dist * locdist < 0.0) { + neighborList[7 * Np + idx] = idx + 7 * Np; + mlink++; + } - neighbor=Map(i+1,j-1,k); - dist=Distance(i+1,j-1,k); - if (dist*locdist < 0.0 && !(neighbor<0)){ - neighborList[9*Np+idx]=idx + 9*Np; - mlink++; - } + neighbor = Map(i - 1, j + 1, k); + dist = Distance(i - 1, j + 1, k); + if (dist * locdist < 0.0 && !(neighbor < 0)) { + neighborList[8 * Np + idx] = idx + 10 * Np; + } - neighbor=Map(i-1,j,k-1); - dist=Distance(i-1,j,k-1); - if (dist*locdist < 0.0 && !(neighbor<0)){ - neighborList[10*Np+idx]=idx + 12*Np; - } + neighbor = Map(i + 1, j - 1, k); + dist = Distance(i + 1, j - 1, k); + if (dist * locdist < 0.0 && !(neighbor < 0)) { + neighborList[9 * Np + idx] = idx + 9 * Np; + mlink++; + } - neighbor=Map(i+1,j,k+1); - dist=Distance(i+1,j,k+1); - if (dist*locdist < 0.0 && !(neighbor<0)){ - neighborList[11*Np+idx]=idx + 11*Np; - mlink++; - } + neighbor = Map(i - 1, j, k - 1); + dist = Distance(i - 1, j, k - 1); + if (dist * locdist < 0.0 && !(neighbor < 0)) { + neighborList[10 * Np + idx] = idx + 12 * Np; + } - neighbor=Map(i-1,j,k+1); - dist=Distance(i-1,j,k+1); - if (dist*locdist < 0.0 && !(neighbor<0)){ - neighborList[12*Np+idx]=idx + 14*Np; - } + neighbor = Map(i + 1, j, k + 1); + dist = Distance(i + 1, j, k + 1); + if (dist * locdist < 0.0 && !(neighbor < 0)) { + neighborList[11 * Np + idx] = idx + 11 * Np; + mlink++; + } - neighbor=Map(i+1,j,k-1); - dist=Distance(i+1,j,k-1); - if (dist*locdist < 0.0 && !(neighbor<0)){ - neighborList[13*Np+idx]=idx + 13*Np; - mlink++; - } + neighbor = Map(i - 1, j, k + 1); + dist = Distance(i - 1, j, k + 1); + if (dist * locdist < 0.0 && !(neighbor < 0)) { + neighborList[12 * Np + idx] = idx + 14 * Np; + } - neighbor=Map(i,j-1,k-1); - dist=Distance(i,j-1,k-1); - if (dist*locdist < 0.0 && !(neighbor<0)){ - neighborList[14*Np+idx]=idx + 16*Np; - } + neighbor = Map(i + 1, j, k - 1); + dist = Distance(i + 1, j, k - 1); + if (dist * locdist < 0.0 && !(neighbor < 0)) { + neighborList[13 * Np + idx] = idx + 13 * Np; + mlink++; + } - neighbor=Map(i,j+1,k+1); - dist=Distance(i,j+1,k+1); - if (dist*locdist < 0.0 && !(neighbor<0)){ - neighborList[15*Np+idx]=idx + 15*Np; - mlink++; - } + neighbor = Map(i, j - 1, k - 1); + dist = Distance(i, j - 1, k - 1); + if (dist * locdist < 0.0 && !(neighbor < 0)) { + neighborList[14 * Np + idx] = idx + 16 * Np; + } - neighbor=Map(i,j-1,k+1); - dist=Distance(i,j-1,k+1); - if (dist*locdist < 0.0 && !(neighbor<0)){ - neighborList[16*Np+idx]=idx + 18*Np; - } + neighbor = Map(i, j + 1, k + 1); + dist = Distance(i, j + 1, k + 1); + if (dist * locdist < 0.0 && !(neighbor < 0)) { + neighborList[15 * Np + idx] = idx + 15 * Np; + mlink++; + } - neighbor=Map(i,j+1,k-1); - dist=Distance(i,j+1,k-1); - if (dist*locdist < 0.0 && !(neighbor<0)){ - neighborList[17*Np+idx]=idx + 17*Np; - mlink++; - } - } - } - } - } - } + neighbor = Map(i, j - 1, k + 1); + dist = Distance(i, j - 1, k + 1); + if (dist * locdist < 0.0 && !(neighbor < 0)) { + neighborList[16 * Np + idx] = idx + 18 * Np; + } - /* allocate memory */ - membraneTag = new int [mlink]; - membraneLinks = new int [2*mlink]; - membraneDist = new double [2*mlink]; - membraneLinkCount = mlink; + neighbor = Map(i, j + 1, k - 1); + dist = Distance(i, j + 1, k - 1); + if (dist * locdist < 0.0 && !(neighbor < 0)) { + neighborList[17 * Np + idx] = idx + 17 * Np; + mlink++; + } + } + } + } + } + } - if (rank == 0) printf(" (cut %i links crossing membrane) \n",mlink); + /* allocate memory */ + membraneTag = new int[mlink]; + membraneLinks = new int[2 * mlink]; + membraneDist = new double[2 * mlink]; + membraneLinkCount = mlink; - /* construct the membrane*/ - /* * + if (rank == 0) + printf(" (cut %i links crossing membrane) \n", mlink); + + /* construct the membrane*/ + /* * * Sites inside the membrane (negative distance) -- store at 2*mlink * Sites outside the membrane (positive distance) -- store at 2*mlink+1 */ - if (rank == 0) printf(" Construct membrane data structures... \n"); - mlink = 0; - int localSite = 0; int neighborSite = 0; - for (k=1;k 7){ + if (Q > 7) { - neighbor=Map(i+1,j+1,k); - dist=Distance(i+1,j+1,k); - if (dist*locdist < 0.0 && !(neighbor<0)){ - if (locdist < 0.0){ - localSite = 2*mlink; - neighborSite = 2*mlink+1; - } - else{ - localSite = 2*mlink+1; - neighborSite = 2*mlink; - } - membraneLinks[localSite] = idx + 7*Np; - membraneLinks[neighborSite] = neighbor+8*Np; - membraneDist[localSite] = locdist; - membraneDist[neighborSite] = dist; - mlink++; - } + neighbor = Map(i + 1, j + 1, k); + dist = Distance(i + 1, j + 1, k); + if (dist * locdist < 0.0 && !(neighbor < 0)) { + if (locdist < 0.0) { + localSite = 2 * mlink; + neighborSite = 2 * mlink + 1; + } else { + localSite = 2 * mlink + 1; + neighborSite = 2 * mlink; + } + membraneLinks[localSite] = idx + 7 * Np; + membraneLinks[neighborSite] = neighbor + 8 * Np; + membraneDist[localSite] = locdist; + membraneDist[neighborSite] = dist; + mlink++; + } - neighbor=Map(i+1,j-1,k); - dist=Distance(i+1,j-1,k); - if (dist*locdist < 0.0 && !(neighbor<0)){ - if (locdist < 0.0){ - localSite = 2*mlink; - neighborSite = 2*mlink+1; - } - else{ - localSite = 2*mlink+1; - neighborSite = 2*mlink; - } - membraneLinks[localSite] = idx + 9*Np; - membraneLinks[neighborSite] = neighbor + 10*Np; - membraneDist[localSite] = locdist; - membraneDist[neighborSite] = dist; - mlink++; - } + neighbor = Map(i + 1, j - 1, k); + dist = Distance(i + 1, j - 1, k); + if (dist * locdist < 0.0 && !(neighbor < 0)) { + if (locdist < 0.0) { + localSite = 2 * mlink; + neighborSite = 2 * mlink + 1; + } else { + localSite = 2 * mlink + 1; + neighborSite = 2 * mlink; + } + membraneLinks[localSite] = idx + 9 * Np; + membraneLinks[neighborSite] = neighbor + 10 * Np; + membraneDist[localSite] = locdist; + membraneDist[neighborSite] = dist; + mlink++; + } - neighbor=Map(i+1,j,k+1); - dist=Distance(i+1,j,k+1); - if (dist*locdist < 0.0 && !(neighbor<0)){ - if (locdist < 0.0){ - localSite = 2*mlink; - neighborSite = 2*mlink+1; - } - else{ - localSite = 2*mlink+1; - neighborSite = 2*mlink; - } - membraneLinks[localSite] = idx + 11*Np; - membraneLinks[neighborSite] = neighbor + 12*Np; - membraneDist[localSite] = locdist; - membraneDist[neighborSite] = dist; - mlink++; - } + neighbor = Map(i + 1, j, k + 1); + dist = Distance(i + 1, j, k + 1); + if (dist * locdist < 0.0 && !(neighbor < 0)) { + if (locdist < 0.0) { + localSite = 2 * mlink; + neighborSite = 2 * mlink + 1; + } else { + localSite = 2 * mlink + 1; + neighborSite = 2 * mlink; + } + membraneLinks[localSite] = idx + 11 * Np; + membraneLinks[neighborSite] = neighbor + 12 * Np; + membraneDist[localSite] = locdist; + membraneDist[neighborSite] = dist; + mlink++; + } - neighbor=Map(i+1,j,k-1); - dist=Distance(i+1,j,k-1); - if (dist*locdist < 0.0 && !(neighbor<0)){ - if (locdist < 0.0){ - localSite = 2*mlink; - neighborSite = 2*mlink+1; - } - else{ - localSite = 2*mlink+1; - neighborSite = 2*mlink; - } - membraneLinks[localSite] = idx + 13*Np; - membraneLinks[neighborSite] = neighbor + 14*Np; - membraneDist[localSite] = locdist; - membraneDist[neighborSite] = dist; - mlink++; - } + neighbor = Map(i + 1, j, k - 1); + dist = Distance(i + 1, j, k - 1); + if (dist * locdist < 0.0 && !(neighbor < 0)) { + if (locdist < 0.0) { + localSite = 2 * mlink; + neighborSite = 2 * mlink + 1; + } else { + localSite = 2 * mlink + 1; + neighborSite = 2 * mlink; + } + membraneLinks[localSite] = idx + 13 * Np; + membraneLinks[neighborSite] = neighbor + 14 * Np; + membraneDist[localSite] = locdist; + membraneDist[neighborSite] = dist; + mlink++; + } - neighbor=Map(i,j+1,k+1); - dist=Distance(i,j+1,k+1); - if (dist*locdist < 0.0 && !(neighbor<0)){ - if (locdist < 0.0){ - localSite = 2*mlink; - neighborSite = 2*mlink+1; - } - else{ - localSite = 2*mlink+1; - neighborSite = 2*mlink; - } - membraneLinks[localSite] = idx + 15*Np; - membraneLinks[neighborSite] = neighbor + 16*Np; - membraneDist[localSite] = locdist; - membraneDist[neighborSite] = dist; - mlink++; - } + neighbor = Map(i, j + 1, k + 1); + dist = Distance(i, j + 1, k + 1); + if (dist * locdist < 0.0 && !(neighbor < 0)) { + if (locdist < 0.0) { + localSite = 2 * mlink; + neighborSite = 2 * mlink + 1; + } else { + localSite = 2 * mlink + 1; + neighborSite = 2 * mlink; + } + membraneLinks[localSite] = idx + 15 * Np; + membraneLinks[neighborSite] = neighbor + 16 * Np; + membraneDist[localSite] = locdist; + membraneDist[neighborSite] = dist; + mlink++; + } - neighbor=Map(i,j+1,k-1); - dist=Distance(i,j+1,k-1); - if (dist*locdist < 0.0 && !(neighbor<0)){ - if (locdist < 0.0){ - localSite = 2*mlink; - neighborSite = 2*mlink+1; - } - else{ - localSite = 2*mlink+1; - neighborSite = 2*mlink; - } - membraneLinks[localSite] = idx + 17*Np; - membraneLinks[neighborSite] = neighbor + 18*Np; - membraneDist[localSite] = locdist; - membraneDist[neighborSite] = dist; - mlink++; - } - } - } - } - } - } - - if (rank == 0) printf(" Create device data structures... \n"); - /* Create device copies of data structures */ - ScaLBL_AllocateDeviceMemory((void **)&MembraneLinks, 2*mlink*sizeof(int)); - ScaLBL_AllocateDeviceMemory((void **)&MembraneCoef, 2*mlink*sizeof(double)); + neighbor = Map(i, j + 1, k - 1); + dist = Distance(i, j + 1, k - 1); + if (dist * locdist < 0.0 && !(neighbor < 0)) { + if (locdist < 0.0) { + localSite = 2 * mlink; + neighborSite = 2 * mlink + 1; + } else { + localSite = 2 * mlink + 1; + neighborSite = 2 * mlink; + } + membraneLinks[localSite] = idx + 17 * Np; + membraneLinks[neighborSite] = neighbor + 18 * Np; + membraneDist[localSite] = locdist; + membraneDist[neighborSite] = dist; + mlink++; + } + } + } + } + } + } + + if (rank == 0) + printf(" Create device data structures... \n"); + /* Create device copies of data structures */ + ScaLBL_AllocateDeviceMemory((void **)&MembraneLinks, + 2 * mlink * sizeof(int)); + ScaLBL_AllocateDeviceMemory((void **)&MembraneCoef, + 2 * mlink * sizeof(double)); //ScaLBL_AllocateDeviceMemory((void **)&MembraneDistance, 2*mlink*sizeof(double)); - ScaLBL_AllocateDeviceMemory((void **)&MembraneDistance, Nx*Ny*Nz*sizeof(double)); - - ScaLBL_CopyToDevice(NeighborList, neighborList, 18*Np*sizeof(int)); - ScaLBL_CopyToDevice(MembraneLinks, membraneLinks, 2*mlink*sizeof(int)); - //ScaLBL_CopyToDevice(MembraneDistance, membraneDist, 2*mlink*sizeof(double)); - ScaLBL_CopyToDevice(MembraneDistance, Distance.data(), Nx*Ny*Nz*sizeof(double)); + ScaLBL_AllocateDeviceMemory((void **)&MembraneDistance, + Nx * Ny * Nz * sizeof(double)); - - int *dvcTmpMap; + ScaLBL_CopyToDevice(NeighborList, neighborList, 18 * Np * sizeof(int)); + ScaLBL_CopyToDevice(MembraneLinks, membraneLinks, 2 * mlink * sizeof(int)); + //ScaLBL_CopyToDevice(MembraneDistance, membraneDist, 2*mlink*sizeof(double)); + ScaLBL_CopyToDevice(MembraneDistance, Distance.data(), + Nx * Ny * Nz * sizeof(double)); + + int *dvcTmpMap; ScaLBL_AllocateDeviceMemory((void **)&dvcTmpMap, sizeof(int) * Np); int *TmpMap; TmpMap = new int[Np]; @@ -622,75 +648,94 @@ int Membrane::Create(DoubleArray &Distance, IntArray &Map){ } } ScaLBL_CopyToDevice(dvcTmpMap, TmpMap, sizeof(int) * Np); - - //int Membrane::D3Q7_MapRecv(int Cqx, int Cqy, int Cqz, int *d3q19_recvlist, - // int count, int *membraneRecvLabels, DoubleArray &Distance, int *dvcMap){ - if (rank == 0) printf(" Construct communication data structures... \n"); - /* Re-organize communication based on membrane structure*/ - //...dvcMap recieve list for the X face: q=2,8,10,12,14 ................................. - linkCount_X[0] = D3Q7_MapRecv(-1,0,0, dvcRecvDist_X,recvCount_X,dvcRecvLinks_X,Distance,dvcTmpMap); - //................................................................................... - //...dvcMap recieve list for the x face: q=1,7,9,11,13.................................. - linkCount_x[0] = D3Q7_MapRecv(1,0,0, dvcRecvDist_x,recvCount_x,dvcRecvLinks_x,Distance,dvcTmpMap); - //................................................................................... - //...dvcMap recieve list for the y face: q=4,8,9,16,18 ................................... - linkCount_Y[0] = D3Q7_MapRecv(0,-1,0, dvcRecvDist_Y,recvCount_Y,dvcRecvLinks_Y,Distance,dvcTmpMap); - //................................................................................... - //...dvcMap recieve list for the Y face: q=3,7,10,15,17 .................................. - linkCount_y[0] = D3Q7_MapRecv(0,1,0, dvcRecvDist_y,recvCount_y,dvcRecvLinks_y,Distance,dvcTmpMap); - //................................................................................... - //...dvcMap recieve list for the z face<<<6,12,13,16,17).............................................. - linkCount_Z[0] = D3Q7_MapRecv(0,0,-1, dvcRecvDist_Z,recvCount_Z,dvcRecvLinks_Z,Distance,dvcTmpMap); - //...dvcMap recieve list for the Z face<<<5,11,14,15,18).............................................. - linkCount_z[0] = D3Q7_MapRecv(0,0,1, dvcRecvDist_z,recvCount_z,dvcRecvLinks_z,Distance,dvcTmpMap); - //.................................................................................. + //int Membrane::D3Q7_MapRecv(int Cqx, int Cqy, int Cqz, int *d3q19_recvlist, + // int count, int *membraneRecvLabels, DoubleArray &Distance, int *dvcMap){ + if (rank == 0) + printf(" Construct communication data structures... \n"); + /* Re-organize communication based on membrane structure*/ + //...dvcMap recieve list for the X face: q=2,8,10,12,14 ................................. + linkCount_X[0] = D3Q7_MapRecv(-1, 0, 0, dvcRecvDist_X, recvCount_X, + dvcRecvLinks_X, Distance, dvcTmpMap); + //................................................................................... + //...dvcMap recieve list for the x face: q=1,7,9,11,13.................................. + linkCount_x[0] = D3Q7_MapRecv(1, 0, 0, dvcRecvDist_x, recvCount_x, + dvcRecvLinks_x, Distance, dvcTmpMap); + //................................................................................... + //...dvcMap recieve list for the y face: q=4,8,9,16,18 ................................... + linkCount_Y[0] = D3Q7_MapRecv(0, -1, 0, dvcRecvDist_Y, recvCount_Y, + dvcRecvLinks_Y, Distance, dvcTmpMap); + //................................................................................... + //...dvcMap recieve list for the Y face: q=3,7,10,15,17 .................................. + linkCount_y[0] = D3Q7_MapRecv(0, 1, 0, dvcRecvDist_y, recvCount_y, + dvcRecvLinks_y, Distance, dvcTmpMap); + //................................................................................... + //...dvcMap recieve list for the z face<<<6,12,13,16,17).............................................. + linkCount_Z[0] = D3Q7_MapRecv(0, 0, -1, dvcRecvDist_Z, recvCount_Z, + dvcRecvLinks_Z, Distance, dvcTmpMap); - //...................................................................................... - MPI_COMM_SCALBL.barrier(); - ScaLBL_DeviceBarrier(); - //....................................................................... - SendCount = sendCount_x+sendCount_X+sendCount_y+sendCount_Y+sendCount_z+sendCount_Z; - RecvCount = recvCount_x+recvCount_X+recvCount_y+recvCount_Y+recvCount_z+recvCount_Z; - CommunicationCount = SendCount+RecvCount; - //...................................................................................... + //...dvcMap recieve list for the Z face<<<5,11,14,15,18).............................................. + linkCount_z[0] = D3Q7_MapRecv(0, 0, 1, dvcRecvDist_z, recvCount_z, + dvcRecvLinks_z, Distance, dvcTmpMap); + //.................................................................................. - //...................................................................................... - // Allocate membrane coefficient buffers (for d3q7 recv) - ScaLBL_AllocateZeroCopy((void **) &coefficient_x, 2*(recvCount_x )*sizeof(double)); - ScaLBL_AllocateZeroCopy((void **) &coefficient_X, 2*(recvCount_X)*sizeof(double)); - ScaLBL_AllocateZeroCopy((void **) &coefficient_y, 2*(recvCount_y)*sizeof(double)); - ScaLBL_AllocateZeroCopy((void **) &coefficient_Y, 2*(recvCount_Y)*sizeof(double)); - ScaLBL_AllocateZeroCopy((void **) &coefficient_z, 2*(recvCount_z)*sizeof(double)); - ScaLBL_AllocateZeroCopy((void **) &coefficient_Z, 2*(recvCount_Z)*sizeof(double)); - //...................................................................................... - - ScaLBL_FreeDeviceMemory (dvcTmpMap); - delete [] neighborList; - delete [] TmpMap; - return mlink; + //...................................................................................... + MPI_COMM_SCALBL.barrier(); + ScaLBL_DeviceBarrier(); + //....................................................................... + SendCount = sendCount_x + sendCount_X + sendCount_y + sendCount_Y + + sendCount_z + sendCount_Z; + RecvCount = recvCount_x + recvCount_X + recvCount_y + recvCount_Y + + recvCount_z + recvCount_Z; + CommunicationCount = SendCount + RecvCount; + //...................................................................................... + + //...................................................................................... + // Allocate membrane coefficient buffers (for d3q7 recv) + ScaLBL_AllocateZeroCopy((void **)&coefficient_x, + 2 * (recvCount_x) * sizeof(double)); + ScaLBL_AllocateZeroCopy((void **)&coefficient_X, + 2 * (recvCount_X) * sizeof(double)); + ScaLBL_AllocateZeroCopy((void **)&coefficient_y, + 2 * (recvCount_y) * sizeof(double)); + ScaLBL_AllocateZeroCopy((void **)&coefficient_Y, + 2 * (recvCount_Y) * sizeof(double)); + ScaLBL_AllocateZeroCopy((void **)&coefficient_z, + 2 * (recvCount_z) * sizeof(double)); + ScaLBL_AllocateZeroCopy((void **)&coefficient_Z, + 2 * (recvCount_Z) * sizeof(double)); + //...................................................................................... + + ScaLBL_FreeDeviceMemory(dvcTmpMap); + delete[] neighborList; + delete[] TmpMap; + return mlink; } -void Membrane::Write(string filename){ - - int mlink = membraneLinkCount; - std::ofstream ofs (filename, std::ofstream::out); - /* Create local copies of membrane data structures */ - double *tmpMembraneCoef; // mass transport coefficient for the membrane - tmpMembraneCoef = new double [2*mlink*sizeof(double)]; - ScaLBL_CopyToHost(tmpMembraneCoef, MembraneCoef, 2*mlink*sizeof(double)); - int i,j,k; - for (int m=0; m 0.0) ReturnLabels[idx] = 1; - else ReturnLabels[idx] = 2; - countMembraneLinks++; - } - } - // Return updated version to the device - ScaLBL_CopyToDevice(membraneRecvLabels, ReturnLabels, count*sizeof(int)); + distanceLocal = Distance(i, j, k); // this site should be in the halo + //printf(" Local value %i, %i, %i \n",i,j,k); - // clean up the work arrays - delete [] ReturnLabels; - delete [] TmpMap; - delete [] list; - return countMembraneLinks; + // Streaming for the non-local distribution + i -= Cqx; + j -= Cqy; + k -= Cqz; + distanceNonLocal = Distance(i, j, k); + //printf(" Nonlocal value %i, %i, %i \n",i,j,k); + ReturnLabels[idx] = 0; + if (distanceLocal * distanceNonLocal < 0.0) { + if (distanceLocal > 0.0) + ReturnLabels[idx] = 1; + else + ReturnLabels[idx] = 2; + countMembraneLinks++; + } + } + // Return updated version to the device + ScaLBL_CopyToDevice(membraneRecvLabels, ReturnLabels, count * sizeof(int)); + + // clean up the work arrays + delete[] ReturnLabels; + delete[] TmpMap; + delete[] list; + return countMembraneLinks; } -void Membrane::SendD3Q7AA(double *dist){ - - if (Lock==true){ - ERROR("Membrane Error (SendD3Q7): Membrane communicator is locked -- did you forget to match Send/Recv calls?"); - } - else{ - Lock=true; - } - // assign tag of 37 to D3Q7 communication - sendtag = recvtag = 37; - ScaLBL_DeviceBarrier(); - // Pack the distributions - //...Packing for x face(q=2)................................ - ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,dist,Np); - req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag); - req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x,rank_x,sendtag); - //...Packing for X face(q=1)................................ - ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,dist,Np); - req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag); - req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X,rank_X,sendtag); - //for (int idx=0; idx 0 && rank_info.kz == 0) - if (BounceBack && rank_info.kz == 0) - {/* leave the bounce-back distributions in place */} - else { - //...Packing for z face(q=6)................................ - ScaLBL_D3Q7_Membrane_Unpack(6,dvcRecvDist_z, recvbuf_z, recvCount_z,dist,Np,coefficient_z); - } - //if (BoundaryCondition > 0 && rank_info.kz == rank_info.nz-1) - if (BounceBack && rank_info.kz == rank_info.nz-1) - {/* leave the bounce-back distributions in place */} - else { - //...Packing for Z face(q=5)................................ - ScaLBL_D3Q7_Membrane_Unpack(5,dvcRecvDist_Z, recvbuf_Z,recvCount_Z,dist,Np,coefficient_Z); - //.................................................................................. - } - MPI_COMM_SCALBL.barrier(); - //................................................................................... - Lock=false; // unlock the communicator after communications complete - //................................................................................... + //................................................................................... + // Wait for completion of D3Q19 communication + MPI_COMM_SCALBL.waitAll(6, req1); + MPI_COMM_SCALBL.waitAll(6, req2); + ScaLBL_DeviceBarrier(); + //................................................................................... + // NOTE: AA Routine writes to opposite + // Unpack the distributions on the device + //................................................................................... + //...Unpacking for x face(q=2)................................ + ScaLBL_D3Q7_Membrane_Unpack(2, dvcRecvDist_x, recvbuf_x, recvCount_x, dist, + Np, coefficient_x); + //................................................................................... + //...Packing for X face(q=1)................................ + ScaLBL_D3Q7_Membrane_Unpack(1, dvcRecvDist_X, recvbuf_X, recvCount_X, dist, + Np, coefficient_X); + //................................................................................... + //...Packing for y face(q=4)................................. + ScaLBL_D3Q7_Membrane_Unpack(4, dvcRecvDist_y, recvbuf_y, recvCount_y, dist, + Np, coefficient_y); + //................................................................................... + //...Packing for Y face(q=3)................................. + ScaLBL_D3Q7_Membrane_Unpack(3, dvcRecvDist_Y, recvbuf_Y, recvCount_Y, dist, + Np, coefficient_Y); + //................................................................................... + //if (BoundaryCondition > 0 && rank_info.kz == 0) + if (BounceBack && + rank_info.kz == 0) { /* leave the bounce-back distributions in place */ + } else { + //...Packing for z face(q=6)................................ + ScaLBL_D3Q7_Membrane_Unpack(6, dvcRecvDist_z, recvbuf_z, recvCount_z, + dist, Np, coefficient_z); + } + //if (BoundaryCondition > 0 && rank_info.kz == rank_info.nz-1) + if (BounceBack && + rank_info.kz == + rank_info.nz - + 1) { /* leave the bounce-back distributions in place */ + } else { + //...Packing for Z face(q=5)................................ + ScaLBL_D3Q7_Membrane_Unpack(5, dvcRecvDist_Z, recvbuf_Z, recvCount_Z, + dist, Np, coefficient_Z); + //.................................................................................. + } + MPI_COMM_SCALBL.barrier(); + //................................................................................... + Lock = false; // unlock the communicator after communications complete + //................................................................................... } -void Membrane::IonTransport(double *dist, double *den){ - - ScaLBL_D3Q7_Membrane_IonTransport(MembraneLinks,MembraneCoef, dist, den, membraneLinkCount, Np); +void Membrane::IonTransport(double *dist, double *den) { + + ScaLBL_D3Q7_Membrane_IonTransport(MembraneLinks, MembraneCoef, dist, den, + membraneLinkCount, Np); } // std::shared_ptr db){ void Membrane::AssignCoefficients(int *Map, double *Psi, double Threshold, - double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, - double ThresholdMassFractionOut){ - /* Assign mass transfer coefficients to the membrane data structure */ + double MassFractionIn, double MassFractionOut, + double ThresholdMassFractionIn, + double ThresholdMassFractionOut) { + /* Assign mass transfer coefficients to the membrane data structure */ - if (membraneLinkCount > 0) - ScaLBL_D3Q7_Membrane_AssignLinkCoef(MembraneLinks, Map, MembraneDistance, Psi, MembraneCoef, - Threshold, MassFractionIn, MassFractionOut, ThresholdMassFractionIn, ThresholdMassFractionOut, - membraneLinkCount, Nx, Ny, Nz, Np); + if (membraneLinkCount > 0) + ScaLBL_D3Q7_Membrane_AssignLinkCoef( + MembraneLinks, Map, MembraneDistance, Psi, MembraneCoef, Threshold, + MassFractionIn, MassFractionOut, ThresholdMassFractionIn, + ThresholdMassFractionOut, membraneLinkCount, Nx, Ny, Nz, Np); - if (linkCount_X[0] < recvCount_X) - ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(-1,0,0,Map,MembraneDistance,Psi,Threshold, - MassFractionIn,MassFractionOut,ThresholdMassFractionIn,ThresholdMassFractionOut, - dvcRecvDist_X,dvcRecvLinks_X,coefficient_X,0,linkCount_X[0],recvCount_X, - Np,Nx,Ny,Nz); + if (linkCount_X[0] < recvCount_X) + ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo( + -1, 0, 0, Map, MembraneDistance, Psi, Threshold, MassFractionIn, + MassFractionOut, ThresholdMassFractionIn, ThresholdMassFractionOut, + dvcRecvDist_X, dvcRecvLinks_X, coefficient_X, 0, linkCount_X[0], + recvCount_X, Np, Nx, Ny, Nz); - if (linkCount_x[0] < recvCount_x) - ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(1,0,0,Map,MembraneDistance,Psi,Threshold, - MassFractionIn,MassFractionOut,ThresholdMassFractionIn,ThresholdMassFractionOut, - dvcRecvDist_x,dvcRecvLinks_x,coefficient_x,0,linkCount_x[0],recvCount_x, - Np,Nx,Ny,Nz); + if (linkCount_x[0] < recvCount_x) + ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo( + 1, 0, 0, Map, MembraneDistance, Psi, Threshold, MassFractionIn, + MassFractionOut, ThresholdMassFractionIn, ThresholdMassFractionOut, + dvcRecvDist_x, dvcRecvLinks_x, coefficient_x, 0, linkCount_x[0], + recvCount_x, Np, Nx, Ny, Nz); - if (linkCount_Y[0] < recvCount_Y) - ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(0,-1,0,Map,MembraneDistance,Psi,Threshold, - MassFractionIn,MassFractionOut,ThresholdMassFractionIn,ThresholdMassFractionOut, - dvcRecvDist_Y,dvcRecvLinks_Y,coefficient_Y,0,linkCount_Y[0],recvCount_Y, - Np,Nx,Ny,Nz); + if (linkCount_Y[0] < recvCount_Y) + ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo( + 0, -1, 0, Map, MembraneDistance, Psi, Threshold, MassFractionIn, + MassFractionOut, ThresholdMassFractionIn, ThresholdMassFractionOut, + dvcRecvDist_Y, dvcRecvLinks_Y, coefficient_Y, 0, linkCount_Y[0], + recvCount_Y, Np, Nx, Ny, Nz); - if (linkCount_y[0]. +*/ /* Flow adaptor class for multiphase flow methods */ #ifndef ScaLBL_Membrane_INC @@ -21,9 +37,9 @@ * @param dist - memory buffer to hold the distributions * @param N - size of the distributions (derived from Domain structure) */ -extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q, - int *d3q7_recvlist, double *recvbuf, int count, - double *dist, int N, double *coef); +extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q, int *d3q7_recvlist, + double *recvbuf, int count, + double *dist, int N, double *coef); /** * \brief Set custom link rules for D3Q19 distribution based on membrane location @@ -38,8 +54,10 @@ extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q, * @param dist - memory buffer to hold the distributions * @param N - size of the distributions (derived from Domain structure) */ -extern "C" void Membrane_D3Q19_Transport(int q, int *list, int *links, double *coef, int start, int offset, - int linkCount, double *recvbuf, double *dist, int N); +extern "C" void Membrane_D3Q19_Transport(int q, int *list, int *links, + double *coef, int start, int offset, + int linkCount, double *recvbuf, + double *dist, int N); /** * \class Membrane @@ -51,34 +69,35 @@ extern "C" void Membrane_D3Q19_Transport(int q, int *list, int *links, double *c class Membrane { public: int Np; - int Nx,Ny,Nz,N; + int Nx, Ny, Nz, N; int membraneLinkCount; - int BoundaryCondition; + int BoundaryCondition; - int *initialNeighborList; // original neighborlist - int *NeighborList; // modified neighborlist + int *initialNeighborList; // original neighborlist + int *NeighborList; // modified neighborlist /* host data structures */ - int *membraneLinks; // D3Q7 links that cross membrane - int *membraneTag; // label each link in the membrane - double *membraneDist; // distance to membrane for each linked site - double *membraneOrientation; // distance to membrane for each linked site + int *membraneLinks; // D3Q7 links that cross membrane + int *membraneTag; // label each link in the membrane + double *membraneDist; // distance to membrane for each linked site + double *membraneOrientation; // distance to membrane for each linked site /* * Device data structures */ int *MembraneLinks; - double *MembraneCoef; // mass transport coefficient for the membrane + double *MembraneCoef; // mass transport coefficient for the membrane double *MembraneDistance; double *MembraneOrientation; - + /** * \brief Create a flow adaptor to operate on the LB model * @param ScaLBL - originating data structures * @param neighborList - list of neighbors for each site */ //Membrane(std::shared_ptr Dm, int *initialNeighborList, int Nsites); - Membrane(std::shared_ptr sComm, int *dvcNeighborList, int Nsites); + Membrane(std::shared_ptr sComm, int *dvcNeighborList, + int Nsites); /** * \brief Destructor @@ -93,40 +112,44 @@ public: * @param Map - mapping between regular layout and compact layout */ int Create(DoubleArray &Distance, IntArray &Map); - + /** * \brief Write membrane data to output file * @param filename - name of file to save */ void Write(string filename); - + /** * \brief Read membrane data from input file * @param filename - name of file to save */ void Read(string filename); - - void SendD3Q7AA(double *dist); - void RecvD3Q7AA(double *dist, bool BounceBack); - void AssignCoefficients(int *Map, double *Psi, double Threshold, - double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, - double ThresholdMassFractionOut); - void IonTransport(double *dist, double *den); - //...................................................................................... - // Buffers to store data sent and recieved by this MPI process - double *sendbuf_x, *sendbuf_y, *sendbuf_z, *sendbuf_X, *sendbuf_Y, *sendbuf_Z; - double *recvbuf_x, *recvbuf_y, *recvbuf_z, *recvbuf_X, *recvbuf_Y, *recvbuf_Z; - //...................................................................................... - + + void SendD3Q7AA(double *dist); + void RecvD3Q7AA(double *dist, bool BounceBack); + void AssignCoefficients(int *Map, double *Psi, double Threshold, + double MassFractionIn, double MassFractionOut, + double ThresholdMassFractionIn, + double ThresholdMassFractionOut); + void IonTransport(double *dist, double *den); + //...................................................................................... + // Buffers to store data sent and recieved by this MPI process + double *sendbuf_x, *sendbuf_y, *sendbuf_z, *sendbuf_X, *sendbuf_Y, + *sendbuf_Z; + double *recvbuf_x, *recvbuf_y, *recvbuf_z, *recvbuf_X, *recvbuf_Y, + *recvbuf_Z; + //...................................................................................... + private: - bool Lock; // use Lock to make sure only one call at a time to protect data in transit - int sendtag, recvtag; - int iproc,jproc,kproc; - int nprocx,nprocy,nprocz; - // Give the object it's own MPI communicator - RankInfoStruct rank_info; - Utilities::MPI MPI_COMM_SCALBL; // MPI Communicator for this domain - MPI_Request req1[18],req2[18]; + bool + Lock; // use Lock to make sure only one call at a time to protect data in transit + int sendtag, recvtag; + int iproc, jproc, kproc; + int nprocx, nprocy, nprocz; + // Give the object it's own MPI communicator + RankInfoStruct rank_info; + Utilities::MPI MPI_COMM_SCALBL; // MPI Communicator for this domain + MPI_Request req1[18], req2[18]; /** * \brief Set up membrane communication * \details associate p2p communication links to membrane where necessary @@ -143,49 +166,59 @@ private: * @param dvcMap - data structure used to define mapping between dense and sparse representation * @param Np - number of sites in dense representation * */ - int D3Q7_MapRecv(int Cqx, int Cqy, int Cqz, int *d3q19_recvlist, - int count, int *membraneRecvLabels, DoubleArray &Distance, int *dvcMap); - //...................................................................................... - // MPI ranks for all 18 neighbors - //...................................................................................... - // These variables are all private to prevent external things from modifying them!! - //...................................................................................... - int rank; - int rank_x,rank_y,rank_z,rank_X,rank_Y,rank_Z; - int rank_xy,rank_XY,rank_xY,rank_Xy; - int rank_xz,rank_XZ,rank_xZ,rank_Xz; - int rank_yz,rank_YZ,rank_yZ,rank_Yz; - //...................................................................................... - int SendCount, RecvCount, CommunicationCount; - //...................................................................................... - int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z; - //...................................................................................... - int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z; - //...................................................................................... - int linkCount_x[5], linkCount_y[5], linkCount_z[5], linkCount_X[5], linkCount_Y[5], linkCount_Z[5]; - int linkCount_xy, linkCount_yz, linkCount_xz, linkCount_Xy, linkCount_Yz, linkCount_xZ; - int linkCount_xY, linkCount_yZ, linkCount_Xz, linkCount_XY, linkCount_YZ, linkCount_XZ; - //...................................................................................... - // Send buffers that reside on the compute device - int *dvcSendList_x, *dvcSendList_y, *dvcSendList_z, *dvcSendList_X, *dvcSendList_Y, *dvcSendList_Z; - //int *dvcSendList_xy, *dvcSendList_yz, *dvcSendList_xz, *dvcSendList_Xy, *dvcSendList_Yz, *dvcSendList_xZ; - //int *dvcSendList_xY, *dvcSendList_yZ, *dvcSendList_Xz, *dvcSendList_XY, *dvcSendList_YZ, *dvcSendList_XZ; - // Recieve buffers that reside on the compute device - int *dvcRecvList_x, *dvcRecvList_y, *dvcRecvList_z, *dvcRecvList_X, *dvcRecvList_Y, *dvcRecvList_Z; - //int *dvcRecvList_xy, *dvcRecvList_yz, *dvcRecvList_xz, *dvcRecvList_Xy, *dvcRecvList_Yz, *dvcRecvList_xZ; - //int *dvcRecvList_xY, *dvcRecvList_yZ, *dvcRecvList_Xz, *dvcRecvList_XY, *dvcRecvList_YZ, *dvcRecvList_XZ; - // Link lists that reside on the compute device - int *dvcRecvLinks_x, *dvcRecvLinks_y, *dvcRecvLinks_z, *dvcRecvLinks_X, *dvcRecvLinks_Y, *dvcRecvLinks_Z; - //int *dvcRecvLinks_xy, *dvcRecvLinks_yz, *dvcRecvLinks_xz, *dvcRecvLinks_Xy, *dvcRecvLinks_Yz, *dvcRecvLinks_xZ; - //int *dvcRecvLinks_xY, *dvcRecvLinks_yZ, *dvcRecvLinks_Xz, *dvcRecvLinks_XY, *dvcRecvLinks_YZ, *dvcRecvLinks_XZ; - // Recieve buffers for the distributions - int *dvcRecvDist_x, *dvcRecvDist_y, *dvcRecvDist_z, *dvcRecvDist_X, *dvcRecvDist_Y, *dvcRecvDist_Z; - //int *dvcRecvDist_xy, *dvcRecvDist_yz, *dvcRecvDist_xz, *dvcRecvDist_Xy, *dvcRecvDist_Yz, *dvcRecvDist_xZ; - //int *dvcRecvDist_xY, *dvcRecvDist_yZ, *dvcRecvDist_Xz, *dvcRecvDist_XY, *dvcRecvDist_YZ, *dvcRecvDist_XZ; - //...................................................................................... - // mass transfer coefficient arrays - double *coefficient_x, *coefficient_X, *coefficient_y, *coefficient_Y, *coefficient_z, *coefficient_Z; - //...................................................................................... - + int D3Q7_MapRecv(int Cqx, int Cqy, int Cqz, int *d3q19_recvlist, int count, + int *membraneRecvLabels, DoubleArray &Distance, + int *dvcMap); + //...................................................................................... + // MPI ranks for all 18 neighbors + //...................................................................................... + // These variables are all private to prevent external things from modifying them!! + //...................................................................................... + int rank; + int rank_x, rank_y, rank_z, rank_X, rank_Y, rank_Z; + int rank_xy, rank_XY, rank_xY, rank_Xy; + int rank_xz, rank_XZ, rank_xZ, rank_Xz; + int rank_yz, rank_YZ, rank_yZ, rank_Yz; + //...................................................................................... + int SendCount, RecvCount, CommunicationCount; + //...................................................................................... + int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, + sendCount_Z; + //...................................................................................... + int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, + recvCount_Z; + //...................................................................................... + int linkCount_x[5], linkCount_y[5], linkCount_z[5], linkCount_X[5], + linkCount_Y[5], linkCount_Z[5]; + int linkCount_xy, linkCount_yz, linkCount_xz, linkCount_Xy, linkCount_Yz, + linkCount_xZ; + int linkCount_xY, linkCount_yZ, linkCount_Xz, linkCount_XY, linkCount_YZ, + linkCount_XZ; + //...................................................................................... + // Send buffers that reside on the compute device + int *dvcSendList_x, *dvcSendList_y, *dvcSendList_z, *dvcSendList_X, + *dvcSendList_Y, *dvcSendList_Z; + //int *dvcSendList_xy, *dvcSendList_yz, *dvcSendList_xz, *dvcSendList_Xy, *dvcSendList_Yz, *dvcSendList_xZ; + //int *dvcSendList_xY, *dvcSendList_yZ, *dvcSendList_Xz, *dvcSendList_XY, *dvcSendList_YZ, *dvcSendList_XZ; + // Recieve buffers that reside on the compute device + int *dvcRecvList_x, *dvcRecvList_y, *dvcRecvList_z, *dvcRecvList_X, + *dvcRecvList_Y, *dvcRecvList_Z; + //int *dvcRecvList_xy, *dvcRecvList_yz, *dvcRecvList_xz, *dvcRecvList_Xy, *dvcRecvList_Yz, *dvcRecvList_xZ; + //int *dvcRecvList_xY, *dvcRecvList_yZ, *dvcRecvList_Xz, *dvcRecvList_XY, *dvcRecvList_YZ, *dvcRecvList_XZ; + // Link lists that reside on the compute device + int *dvcRecvLinks_x, *dvcRecvLinks_y, *dvcRecvLinks_z, *dvcRecvLinks_X, + *dvcRecvLinks_Y, *dvcRecvLinks_Z; + //int *dvcRecvLinks_xy, *dvcRecvLinks_yz, *dvcRecvLinks_xz, *dvcRecvLinks_Xy, *dvcRecvLinks_Yz, *dvcRecvLinks_xZ; + //int *dvcRecvLinks_xY, *dvcRecvLinks_yZ, *dvcRecvLinks_Xz, *dvcRecvLinks_XY, *dvcRecvLinks_YZ, *dvcRecvLinks_XZ; + // Recieve buffers for the distributions + int *dvcRecvDist_x, *dvcRecvDist_y, *dvcRecvDist_z, *dvcRecvDist_X, + *dvcRecvDist_Y, *dvcRecvDist_Z; + //int *dvcRecvDist_xy, *dvcRecvDist_yz, *dvcRecvDist_xz, *dvcRecvDist_Xy, *dvcRecvDist_Yz, *dvcRecvDist_xZ; + //int *dvcRecvDist_xY, *dvcRecvDist_yZ, *dvcRecvDist_Xz, *dvcRecvDist_XY, *dvcRecvDist_YZ, *dvcRecvDist_XZ; + //...................................................................................... + // mass transfer coefficient arrays + double *coefficient_x, *coefficient_X, *coefficient_y, *coefficient_Y, + *coefficient_z, *coefficient_Z; + //...................................................................................... }; #endif \ No newline at end of file diff --git a/common/ReadMicroCT.cpp b/common/ReadMicroCT.cpp index 2fc2310c..2dd72d02 100644 --- a/common/ReadMicroCT.cpp +++ b/common/ReadMicroCT.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "common/ReadMicroCT.h" #include "common/Utilities.h" diff --git a/common/ReadMicroCT.h b/common/ReadMicroCT.h index 066fbe17..939c5f34 100644 --- a/common/ReadMicroCT.h +++ b/common/ReadMicroCT.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef READMICROCT_H #define READMICROCT_H diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index c513df4f..5f73015a 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -1,479 +1,779 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "common/ScaLBL.h" #include -ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ - //...................................................................................... - Lock=false; // unlock the communicator - //...................................................................................... - // Create a separate copy of the communicator for the device +ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm) { + //...................................................................................... + Lock = false; // unlock the communicator + //...................................................................................... + // Create a separate copy of the communicator for the device MPI_COMM_SCALBL = Dm->Comm.dup(); int myrank = MPI_COMM_SCALBL.getRank(); - rank_info = RankInfoStruct(myrank, rank_info.nx, rank_info.ny, rank_info.nz); - - //...................................................................................... - // Copy the domain size and communication information directly from Dm - Nx = Dm->Nx; - Ny = Dm->Ny; - Nz = Dm->Nz; - N = Nx*Ny*Nz; - next=0; - rank=Dm->rank(); - rank_x=Dm->rank_x(); - rank_y=Dm->rank_y(); - rank_z=Dm->rank_z(); - rank_X=Dm->rank_X(); - rank_Y=Dm->rank_Y(); - rank_Z=Dm->rank_Z(); - rank_xy=Dm->rank_xy(); - rank_XY=Dm->rank_XY(); - rank_xY=Dm->rank_xY(); - rank_Xy=Dm->rank_Xy(); - rank_xz=Dm->rank_xz(); - rank_XZ=Dm->rank_XZ(); - rank_xZ=Dm->rank_xZ(); - rank_Xz=Dm->rank_Xz(); - rank_yz=Dm->rank_yz(); - rank_YZ=Dm->rank_YZ(); - rank_yZ=Dm->rank_yZ(); - rank_Yz=Dm->rank_Yz(); - sendCount_x=Dm->sendCount("x"); - sendCount_y=Dm->sendCount("y"); - sendCount_z=Dm->sendCount("z"); - sendCount_X=Dm->sendCount("X"); - sendCount_Y=Dm->sendCount("Y"); - sendCount_Z=Dm->sendCount("Z"); - sendCount_xy=Dm->sendCount("xy"); - sendCount_yz=Dm->sendCount("yz"); - sendCount_xz=Dm->sendCount("xz"); - sendCount_Xy=Dm->sendCount("Xy"); - sendCount_Yz=Dm->sendCount("Yz"); - sendCount_xZ=Dm->sendCount("xZ"); - sendCount_xY=Dm->sendCount("xY"); - sendCount_yZ=Dm->sendCount("yZ"); - sendCount_Xz=Dm->sendCount("Xz"); - sendCount_XY=Dm->sendCount("XY"); - sendCount_YZ=Dm->sendCount("YZ"); - sendCount_XZ=Dm->sendCount("XZ"); - recvCount_x=Dm->recvCount("x"); - recvCount_y=Dm->recvCount("y"); - recvCount_z=Dm->recvCount("z"); - recvCount_X=Dm->recvCount("X"); - recvCount_Y=Dm->recvCount("Y"); - recvCount_Z=Dm->recvCount("Z"); - recvCount_xy=Dm->recvCount("xy"); - recvCount_yz=Dm->recvCount("yz"); - recvCount_xz=Dm->recvCount("xz"); - recvCount_Xy=Dm->recvCount("Xy"); - recvCount_Yz=Dm->recvCount("Yz"); - recvCount_xZ=Dm->recvCount("xZ"); - recvCount_xY=Dm->recvCount("xY"); - recvCount_yZ=Dm->recvCount("yZ"); - recvCount_Xz=Dm->recvCount("Xz"); - recvCount_XY=Dm->recvCount("XY"); - recvCount_YZ=Dm->recvCount("YZ"); - recvCount_XZ=Dm->recvCount("XZ"); - - iproc = Dm->iproc(); - jproc = Dm->jproc(); - kproc = Dm->kproc(); - nprocx = Dm->nprocx(); - nprocy = Dm->nprocy(); - nprocz = Dm->nprocz(); - BoundaryCondition = Dm->BoundaryCondition; - //...................................................................................... + rank_info = + RankInfoStruct(myrank, rank_info.nx, rank_info.ny, rank_info.nz); - ScaLBL_AllocateZeroCopy((void **) &sendbuf_x, 2*5*sendCount_x*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_X, 2*5*sendCount_X*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_y, 2*5*sendCount_y*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_Y, 2*5*sendCount_Y*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_z, 2*5*sendCount_z*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_Z, 2*5*sendCount_Z*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_xy, 2*sendCount_xy*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_xY, 2*sendCount_xY*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_Xy, 2*sendCount_Xy*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_XY, 2*sendCount_XY*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_xz, 2*sendCount_xz*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_xZ, 2*sendCount_xZ*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_Xz, 2*sendCount_Xz*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_XZ, 2*sendCount_XZ*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_yz, 2*sendCount_yz*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_yZ, 2*sendCount_yZ*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_Yz, 2*sendCount_Yz*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_YZ, 2*sendCount_YZ*sizeof(double)); // Allocate device memory - //...................................................................................... - ScaLBL_AllocateZeroCopy((void **) &recvbuf_x, 2*5*recvCount_x*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_X, 2*5*recvCount_X*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_y, 2*5*recvCount_y*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_Y, 2*5*recvCount_Y*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_z, 2*5*recvCount_z*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_Z, 2*5*recvCount_Z*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_xy, 2*recvCount_xy*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_xY, 2*recvCount_xY*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_Xy, 2*recvCount_Xy*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_XY, 2*recvCount_XY*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_xz, 2*recvCount_xz*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_xZ, 2*recvCount_xZ*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_Xz, 2*recvCount_Xz*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_XZ, 2*recvCount_XZ*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_yz, 2*recvCount_yz*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_yZ, 2*recvCount_yZ*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_Yz, 2*recvCount_Yz*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_YZ, 2*recvCount_YZ*sizeof(double)); // Allocate device memory - //...................................................................................... - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_x, sendCount_x*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_X, sendCount_X*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_y, sendCount_y*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Y, sendCount_Y*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_z, sendCount_z*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Z, sendCount_Z*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xy, sendCount_xy*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xY, sendCount_xY*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Xy, sendCount_Xy*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_XY, sendCount_XY*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xz, sendCount_xz*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xZ, sendCount_xZ*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Xz, sendCount_Xz*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_XZ, sendCount_XZ*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_yz, sendCount_yz*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_yZ, sendCount_yZ*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Yz, sendCount_Yz*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcSendList_YZ, sendCount_YZ*sizeof(int)); // Allocate device memory - //...................................................................................... - ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_x, recvCount_x*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_X, recvCount_X*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_y, recvCount_y*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Y, recvCount_Y*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_z, recvCount_z*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Z, recvCount_Z*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xy, recvCount_xy*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xY, recvCount_xY*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Xy, recvCount_Xy*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_XY, recvCount_XY*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xz, recvCount_xz*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xZ, recvCount_xZ*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Xz, recvCount_Xz*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_XZ, recvCount_XZ*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_yz, recvCount_yz*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_yZ, recvCount_yZ*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Yz, recvCount_Yz*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_YZ, recvCount_YZ*sizeof(int)); // Allocate device memory - //...................................................................................... - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_x, 5*recvCount_x*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_X, 5*recvCount_X*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_y, 5*recvCount_y*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_Y, 5*recvCount_Y*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_z, 5*recvCount_z*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_Z, 5*recvCount_Z*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_xy, recvCount_xy*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_xY, recvCount_xY*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_Xy, recvCount_Xy*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_XY, recvCount_XY*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_xz, recvCount_xz*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_xZ, recvCount_xZ*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_Xz, recvCount_Xz*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_XZ, recvCount_XZ*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_yz, recvCount_yz*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_yZ, recvCount_yZ*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_Yz, recvCount_Yz*sizeof(int)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_YZ, recvCount_YZ*sizeof(int)); // Allocate device memory - //...................................................................................... + //...................................................................................... + // Copy the domain size and communication information directly from Dm + Nx = Dm->Nx; + Ny = Dm->Ny; + Nz = Dm->Nz; + N = Nx * Ny * Nz; + next = 0; + rank = Dm->rank(); + rank_x = Dm->rank_x(); + rank_y = Dm->rank_y(); + rank_z = Dm->rank_z(); + rank_X = Dm->rank_X(); + rank_Y = Dm->rank_Y(); + rank_Z = Dm->rank_Z(); + rank_xy = Dm->rank_xy(); + rank_XY = Dm->rank_XY(); + rank_xY = Dm->rank_xY(); + rank_Xy = Dm->rank_Xy(); + rank_xz = Dm->rank_xz(); + rank_XZ = Dm->rank_XZ(); + rank_xZ = Dm->rank_xZ(); + rank_Xz = Dm->rank_Xz(); + rank_yz = Dm->rank_yz(); + rank_YZ = Dm->rank_YZ(); + rank_yZ = Dm->rank_yZ(); + rank_Yz = Dm->rank_Yz(); + sendCount_x = Dm->sendCount("x"); + sendCount_y = Dm->sendCount("y"); + sendCount_z = Dm->sendCount("z"); + sendCount_X = Dm->sendCount("X"); + sendCount_Y = Dm->sendCount("Y"); + sendCount_Z = Dm->sendCount("Z"); + sendCount_xy = Dm->sendCount("xy"); + sendCount_yz = Dm->sendCount("yz"); + sendCount_xz = Dm->sendCount("xz"); + sendCount_Xy = Dm->sendCount("Xy"); + sendCount_Yz = Dm->sendCount("Yz"); + sendCount_xZ = Dm->sendCount("xZ"); + sendCount_xY = Dm->sendCount("xY"); + sendCount_yZ = Dm->sendCount("yZ"); + sendCount_Xz = Dm->sendCount("Xz"); + sendCount_XY = Dm->sendCount("XY"); + sendCount_YZ = Dm->sendCount("YZ"); + sendCount_XZ = Dm->sendCount("XZ"); + recvCount_x = Dm->recvCount("x"); + recvCount_y = Dm->recvCount("y"); + recvCount_z = Dm->recvCount("z"); + recvCount_X = Dm->recvCount("X"); + recvCount_Y = Dm->recvCount("Y"); + recvCount_Z = Dm->recvCount("Z"); + recvCount_xy = Dm->recvCount("xy"); + recvCount_yz = Dm->recvCount("yz"); + recvCount_xz = Dm->recvCount("xz"); + recvCount_Xy = Dm->recvCount("Xy"); + recvCount_Yz = Dm->recvCount("Yz"); + recvCount_xZ = Dm->recvCount("xZ"); + recvCount_xY = Dm->recvCount("xY"); + recvCount_yZ = Dm->recvCount("yZ"); + recvCount_Xz = Dm->recvCount("Xz"); + recvCount_XY = Dm->recvCount("XY"); + recvCount_YZ = Dm->recvCount("YZ"); + recvCount_XZ = Dm->recvCount("XZ"); - //...................................................................................... - ScaLBL_CopyToZeroCopy(dvcSendList_x,Dm->sendList("x"),sendCount_x*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_X,Dm->sendList("X"),sendCount_X*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_y,Dm->sendList("y"),sendCount_y*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_Y,Dm->sendList("Y"),sendCount_Y*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_z,Dm->sendList("z"),sendCount_z*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_Z,Dm->sendList("Z"),sendCount_Z*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_xy,Dm->sendList("xy"),sendCount_xy*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_XY,Dm->sendList("XY"),sendCount_XY*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_xY,Dm->sendList("xY"),sendCount_xY*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_Xy,Dm->sendList("Xy"),sendCount_Xy*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_xz,Dm->sendList("xz"),sendCount_xz*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_XZ,Dm->sendList("XZ"),sendCount_XZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_xZ,Dm->sendList("xZ"),sendCount_xZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_Xz,Dm->sendList("Xz"),sendCount_Xz*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_yz,Dm->sendList("yz"),sendCount_yz*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_YZ,Dm->sendList("YZ"),sendCount_YZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_yZ,Dm->sendList("yZ"),sendCount_yZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_Yz,Dm->sendList("Yz"),sendCount_Yz*sizeof(int)); - //...................................................................................... - ScaLBL_CopyToZeroCopy(dvcRecvList_x,Dm->recvList("x"),recvCount_x*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_X,Dm->recvList("X"),recvCount_X*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_y,Dm->recvList("y"),recvCount_y*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_Y,Dm->recvList("Y"),recvCount_Y*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_z,Dm->recvList("z"),recvCount_z*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_Z,Dm->recvList("Z"),recvCount_Z*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_xy,Dm->recvList("xy"),recvCount_xy*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_XY,Dm->recvList("XY"),recvCount_XY*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_xY,Dm->recvList("xY"),recvCount_xY*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_Xy,Dm->recvList("Xy"),recvCount_Xy*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_xz,Dm->recvList("xz"),recvCount_xz*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_XZ,Dm->recvList("XZ"),recvCount_XZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_xZ,Dm->recvList("xZ"),recvCount_xZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_Xz,Dm->recvList("Xz"),recvCount_Xz*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_yz,Dm->recvList("yz"),recvCount_yz*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_YZ,Dm->recvList("YZ"),recvCount_YZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_yZ,Dm->recvList("yZ"),recvCount_yZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_Yz,Dm->recvList("Yz"),recvCount_Yz*sizeof(int)); - //...................................................................................... + iproc = Dm->iproc(); + jproc = Dm->jproc(); + kproc = Dm->kproc(); + nprocx = Dm->nprocx(); + nprocy = Dm->nprocy(); + nprocz = Dm->nprocz(); + BoundaryCondition = Dm->BoundaryCondition; + //...................................................................................... - MPI_COMM_SCALBL.barrier(); + ScaLBL_AllocateZeroCopy((void **)&sendbuf_x, + 2 * 5 * sendCount_x * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&sendbuf_X, + 2 * 5 * sendCount_X * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&sendbuf_y, + 2 * 5 * sendCount_y * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&sendbuf_Y, + 2 * 5 * sendCount_Y * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&sendbuf_z, + 2 * 5 * sendCount_z * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&sendbuf_Z, + 2 * 5 * sendCount_Z * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&sendbuf_xy, + 2 * sendCount_xy * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&sendbuf_xY, + 2 * sendCount_xY * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&sendbuf_Xy, + 2 * sendCount_Xy * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&sendbuf_XY, + 2 * sendCount_XY * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&sendbuf_xz, + 2 * sendCount_xz * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&sendbuf_xZ, + 2 * sendCount_xZ * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&sendbuf_Xz, + 2 * sendCount_Xz * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&sendbuf_XZ, + 2 * sendCount_XZ * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&sendbuf_yz, + 2 * sendCount_yz * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&sendbuf_yZ, + 2 * sendCount_yZ * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&sendbuf_Yz, + 2 * sendCount_Yz * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&sendbuf_YZ, + 2 * sendCount_YZ * + sizeof(double)); // Allocate device memory + //...................................................................................... + ScaLBL_AllocateZeroCopy((void **)&recvbuf_x, + 2 * 5 * recvCount_x * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&recvbuf_X, + 2 * 5 * recvCount_X * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&recvbuf_y, + 2 * 5 * recvCount_y * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&recvbuf_Y, + 2 * 5 * recvCount_Y * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&recvbuf_z, + 2 * 5 * recvCount_z * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&recvbuf_Z, + 2 * 5 * recvCount_Z * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&recvbuf_xy, + 2 * recvCount_xy * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&recvbuf_xY, + 2 * recvCount_xY * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&recvbuf_Xy, + 2 * recvCount_Xy * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&recvbuf_XY, + 2 * recvCount_XY * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&recvbuf_xz, + 2 * recvCount_xz * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&recvbuf_xZ, + 2 * recvCount_xZ * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&recvbuf_Xz, + 2 * recvCount_Xz * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&recvbuf_XZ, + 2 * recvCount_XZ * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&recvbuf_yz, + 2 * recvCount_yz * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&recvbuf_yZ, + 2 * recvCount_yZ * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&recvbuf_Yz, + 2 * recvCount_Yz * + sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&recvbuf_YZ, + 2 * recvCount_YZ * + sizeof(double)); // Allocate device memory + //...................................................................................... + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_x, + sendCount_x * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_X, + sendCount_X * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_y, + sendCount_y * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_Y, + sendCount_Y * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_z, + sendCount_z * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_Z, + sendCount_Z * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_xy, + sendCount_xy * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_xY, + sendCount_xY * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_Xy, + sendCount_Xy * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_XY, + sendCount_XY * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_xz, + sendCount_xz * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_xZ, + sendCount_xZ * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_Xz, + sendCount_Xz * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_XZ, + sendCount_XZ * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_yz, + sendCount_yz * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_yZ, + sendCount_yZ * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_Yz, + sendCount_Yz * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcSendList_YZ, + sendCount_YZ * + sizeof(int)); // Allocate device memory + //...................................................................................... + ScaLBL_AllocateZeroCopy((void **)&dvcRecvList_x, + recvCount_x * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvList_X, + recvCount_X * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvList_y, + recvCount_y * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvList_Y, + recvCount_Y * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvList_z, + recvCount_z * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvList_Z, + recvCount_Z * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvList_xy, + recvCount_xy * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvList_xY, + recvCount_xY * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvList_Xy, + recvCount_Xy * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvList_XY, + recvCount_XY * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvList_xz, + recvCount_xz * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvList_xZ, + recvCount_xZ * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvList_Xz, + recvCount_Xz * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvList_XZ, + recvCount_XZ * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvList_yz, + recvCount_yz * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvList_yZ, + recvCount_yZ * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvList_Yz, + recvCount_Yz * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvList_YZ, + recvCount_YZ * + sizeof(int)); // Allocate device memory + //...................................................................................... + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_x, + 5 * recvCount_x * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_X, + 5 * recvCount_X * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_y, + 5 * recvCount_y * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_Y, + 5 * recvCount_Y * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_z, + 5 * recvCount_z * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_Z, + 5 * recvCount_Z * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_xy, + recvCount_xy * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_xY, + recvCount_xY * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_Xy, + recvCount_Xy * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_XY, + recvCount_XY * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_xz, + recvCount_xz * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_xZ, + recvCount_xZ * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_Xz, + recvCount_Xz * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_XZ, + recvCount_XZ * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_yz, + recvCount_yz * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_yZ, + recvCount_yZ * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_Yz, + recvCount_Yz * + sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **)&dvcRecvDist_YZ, + recvCount_YZ * + sizeof(int)); // Allocate device memory + //...................................................................................... - //................................................................................... - // Set up the recieve distribution lists - //................................................................................... - //...Map recieve list for the X face: q=2,8,10,12,14 ................................. - D3Q19_MapRecv(-1,0,0, Dm->recvList("X"),0,recvCount_X,dvcRecvDist_X); - D3Q19_MapRecv(-1,-1,0,Dm->recvList("X"),recvCount_X,recvCount_X,dvcRecvDist_X); - D3Q19_MapRecv(-1,1,0, Dm->recvList("X"),2*recvCount_X,recvCount_X,dvcRecvDist_X); - D3Q19_MapRecv(-1,0,-1,Dm->recvList("X"),3*recvCount_X,recvCount_X,dvcRecvDist_X); - D3Q19_MapRecv(-1,0,1, Dm->recvList("X"),4*recvCount_X,recvCount_X,dvcRecvDist_X); - //................................................................................... - //...Map recieve list for the x face: q=1,7,9,11,13.................................. - D3Q19_MapRecv(1,0,0, Dm->recvList("x"),0,recvCount_x,dvcRecvDist_x); - D3Q19_MapRecv(1,1,0, Dm->recvList("x"),recvCount_x,recvCount_x,dvcRecvDist_x); - D3Q19_MapRecv(1,-1,0,Dm->recvList("x"),2*recvCount_x,recvCount_x,dvcRecvDist_x); - D3Q19_MapRecv(1,0,1, Dm->recvList("x"),3*recvCount_x,recvCount_x,dvcRecvDist_x); - D3Q19_MapRecv(1,0,-1,Dm->recvList("x"),4*recvCount_x,recvCount_x,dvcRecvDist_x); - //................................................................................... - //...Map recieve list for the y face: q=4,8,9,16,18 ................................... - D3Q19_MapRecv(0,-1,0, Dm->recvList("Y"),0,recvCount_Y,dvcRecvDist_Y); - D3Q19_MapRecv(-1,-1,0,Dm->recvList("Y"),recvCount_Y,recvCount_Y,dvcRecvDist_Y); - D3Q19_MapRecv(1,-1,0, Dm->recvList("Y"),2*recvCount_Y,recvCount_Y,dvcRecvDist_Y); - D3Q19_MapRecv(0,-1,-1,Dm->recvList("Y"),3*recvCount_Y,recvCount_Y,dvcRecvDist_Y); - D3Q19_MapRecv(0,-1,1, Dm->recvList("Y"),4*recvCount_Y,recvCount_Y,dvcRecvDist_Y); - //................................................................................... - //...Map recieve list for the Y face: q=3,7,10,15,17 .................................. - D3Q19_MapRecv(0,1,0, Dm->recvList("y"),0,recvCount_y,dvcRecvDist_y); - D3Q19_MapRecv(1,1,0, Dm->recvList("y"),recvCount_y,recvCount_y,dvcRecvDist_y); - D3Q19_MapRecv(-1,1,0,Dm->recvList("y"),2*recvCount_y,recvCount_y,dvcRecvDist_y); - D3Q19_MapRecv(0,1,1, Dm->recvList("y"),3*recvCount_y,recvCount_y,dvcRecvDist_y); - D3Q19_MapRecv(0,1,-1,Dm->recvList("y"),4*recvCount_y,recvCount_y,dvcRecvDist_y); - //................................................................................... - //...Map recieve list for the z face<<<6,12,13,16,17).............................................. - D3Q19_MapRecv(0,0,-1, Dm->recvList("Z"),0,recvCount_Z,dvcRecvDist_Z); - D3Q19_MapRecv(-1,0,-1,Dm->recvList("Z"),recvCount_Z,recvCount_Z,dvcRecvDist_Z); - D3Q19_MapRecv(1,0,-1, Dm->recvList("Z"),2*recvCount_Z,recvCount_Z,dvcRecvDist_Z); - D3Q19_MapRecv(0,-1,-1,Dm->recvList("Z"),3*recvCount_Z,recvCount_Z,dvcRecvDist_Z); - D3Q19_MapRecv(0,1,-1, Dm->recvList("Z"),4*recvCount_Z,recvCount_Z,dvcRecvDist_Z); - //...Map recieve list for the Z face<<<5,11,14,15,18).............................................. - D3Q19_MapRecv(0,0,1, Dm->recvList("z"),0,recvCount_z,dvcRecvDist_z); - D3Q19_MapRecv(1,0,1, Dm->recvList("z"),recvCount_z,recvCount_z,dvcRecvDist_z); - D3Q19_MapRecv(-1,0,1,Dm->recvList("z"),2*recvCount_z,recvCount_z,dvcRecvDist_z); - D3Q19_MapRecv(0,1,1, Dm->recvList("z"),3*recvCount_z,recvCount_z,dvcRecvDist_z); - D3Q19_MapRecv(0,-1,1,Dm->recvList("z"),4*recvCount_z,recvCount_z,dvcRecvDist_z); - //.................................................................................. - //...Map recieve list for the xy edge <<<8)................................ - D3Q19_MapRecv(-1,-1,0,Dm->recvList("XY"),0,recvCount_XY,dvcRecvDist_XY); - //...Map recieve list for the Xy edge <<<9)................................ - D3Q19_MapRecv(1,-1,0,Dm->recvList("xY"),0,recvCount_xY,dvcRecvDist_xY); - //...Map recieve list for the xY edge <<<10)................................ - D3Q19_MapRecv(-1,1,0,Dm->recvList("Xy"),0,recvCount_Xy,dvcRecvDist_Xy); - //...Map recieve list for the XY edge <<<7)................................ - D3Q19_MapRecv(1,1,0,Dm->recvList("xy"),0,recvCount_xy,dvcRecvDist_xy); - //...Map recieve list for the xz edge <<<12)................................ - D3Q19_MapRecv(-1,0,-1,Dm->recvList("XZ"),0,recvCount_XZ,dvcRecvDist_XZ); - //...Map recieve list for the xZ edge <<<14)................................ - D3Q19_MapRecv(-1,0,1,Dm->recvList("Xz"),0,recvCount_Xz,dvcRecvDist_Xz); - //...Map recieve list for the Xz edge <<<13)................................ - D3Q19_MapRecv(1,0,-1,Dm->recvList("xZ"),0,recvCount_xZ,dvcRecvDist_xZ); - //...Map recieve list for the XZ edge <<<11)................................ - D3Q19_MapRecv(1,0,1,Dm->recvList("xz"),0,recvCount_xz,dvcRecvDist_xz); - //...Map recieve list for the yz edge <<<16)................................ - D3Q19_MapRecv(0,-1,-1,Dm->recvList("YZ"),0,recvCount_YZ,dvcRecvDist_YZ); - //...Map recieve list for the yZ edge <<<18)................................ - D3Q19_MapRecv(0,-1,1,Dm->recvList("Yz"),0,recvCount_Yz,dvcRecvDist_Yz); - //...Map recieve list for the Yz edge <<<17)................................ - D3Q19_MapRecv(0,1,-1,Dm->recvList("yZ"),0,recvCount_yZ,dvcRecvDist_yZ); - //...Map recieve list for the YZ edge <<<15)................................ - D3Q19_MapRecv(0,1,1,Dm->recvList("yz"),0,recvCount_yz,dvcRecvDist_yz); - //................................................................................... + //...................................................................................... + ScaLBL_CopyToZeroCopy(dvcSendList_x, Dm->sendList("x"), + sendCount_x * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_X, Dm->sendList("X"), + sendCount_X * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_y, Dm->sendList("y"), + sendCount_y * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_Y, Dm->sendList("Y"), + sendCount_Y * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_z, Dm->sendList("z"), + sendCount_z * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_Z, Dm->sendList("Z"), + sendCount_Z * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_xy, Dm->sendList("xy"), + sendCount_xy * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_XY, Dm->sendList("XY"), + sendCount_XY * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_xY, Dm->sendList("xY"), + sendCount_xY * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_Xy, Dm->sendList("Xy"), + sendCount_Xy * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_xz, Dm->sendList("xz"), + sendCount_xz * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_XZ, Dm->sendList("XZ"), + sendCount_XZ * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_xZ, Dm->sendList("xZ"), + sendCount_xZ * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_Xz, Dm->sendList("Xz"), + sendCount_Xz * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_yz, Dm->sendList("yz"), + sendCount_yz * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_YZ, Dm->sendList("YZ"), + sendCount_YZ * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_yZ, Dm->sendList("yZ"), + sendCount_yZ * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_Yz, Dm->sendList("Yz"), + sendCount_Yz * sizeof(int)); + //...................................................................................... + ScaLBL_CopyToZeroCopy(dvcRecvList_x, Dm->recvList("x"), + recvCount_x * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_X, Dm->recvList("X"), + recvCount_X * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_y, Dm->recvList("y"), + recvCount_y * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_Y, Dm->recvList("Y"), + recvCount_Y * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_z, Dm->recvList("z"), + recvCount_z * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_Z, Dm->recvList("Z"), + recvCount_Z * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_xy, Dm->recvList("xy"), + recvCount_xy * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_XY, Dm->recvList("XY"), + recvCount_XY * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_xY, Dm->recvList("xY"), + recvCount_xY * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_Xy, Dm->recvList("Xy"), + recvCount_Xy * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_xz, Dm->recvList("xz"), + recvCount_xz * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_XZ, Dm->recvList("XZ"), + recvCount_XZ * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_xZ, Dm->recvList("xZ"), + recvCount_xZ * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_Xz, Dm->recvList("Xz"), + recvCount_Xz * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_yz, Dm->recvList("yz"), + recvCount_yz * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_YZ, Dm->recvList("YZ"), + recvCount_YZ * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_yZ, Dm->recvList("yZ"), + recvCount_yZ * sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_Yz, Dm->recvList("Yz"), + recvCount_Yz * sizeof(int)); + //...................................................................................... - //...................................................................................... - MPI_COMM_SCALBL.barrier(); - ScaLBL_DeviceBarrier(); - //...................................................................................... - SendCount = 5*(sendCount_x+sendCount_X+sendCount_y+sendCount_Y+sendCount_z+sendCount_Z)+ - sendCount_xy+sendCount_Xy+sendCount_xY+sendCount_XY+ - sendCount_xZ+sendCount_Xz+sendCount_xZ+sendCount_XZ+ - sendCount_yz+sendCount_Yz+sendCount_yZ+sendCount_YZ; + MPI_COMM_SCALBL.barrier(); - RecvCount = 5*(recvCount_x+recvCount_X+recvCount_y+recvCount_Y+recvCount_z+recvCount_Z)+ - recvCount_xy+recvCount_Xy+recvCount_xY+recvCount_XY+ - recvCount_xZ+recvCount_Xz+recvCount_xZ+recvCount_XZ+ - recvCount_yz+recvCount_Yz+recvCount_yZ+recvCount_YZ; + //................................................................................... + // Set up the recieve distribution lists + //................................................................................... + //...Map recieve list for the X face: q=2,8,10,12,14 ................................. + D3Q19_MapRecv(-1, 0, 0, Dm->recvList("X"), 0, recvCount_X, dvcRecvDist_X); + D3Q19_MapRecv(-1, -1, 0, Dm->recvList("X"), recvCount_X, recvCount_X, + dvcRecvDist_X); + D3Q19_MapRecv(-1, 1, 0, Dm->recvList("X"), 2 * recvCount_X, recvCount_X, + dvcRecvDist_X); + D3Q19_MapRecv(-1, 0, -1, Dm->recvList("X"), 3 * recvCount_X, recvCount_X, + dvcRecvDist_X); + D3Q19_MapRecv(-1, 0, 1, Dm->recvList("X"), 4 * recvCount_X, recvCount_X, + dvcRecvDist_X); + //................................................................................... + //...Map recieve list for the x face: q=1,7,9,11,13.................................. + D3Q19_MapRecv(1, 0, 0, Dm->recvList("x"), 0, recvCount_x, dvcRecvDist_x); + D3Q19_MapRecv(1, 1, 0, Dm->recvList("x"), recvCount_x, recvCount_x, + dvcRecvDist_x); + D3Q19_MapRecv(1, -1, 0, Dm->recvList("x"), 2 * recvCount_x, recvCount_x, + dvcRecvDist_x); + D3Q19_MapRecv(1, 0, 1, Dm->recvList("x"), 3 * recvCount_x, recvCount_x, + dvcRecvDist_x); + D3Q19_MapRecv(1, 0, -1, Dm->recvList("x"), 4 * recvCount_x, recvCount_x, + dvcRecvDist_x); + //................................................................................... + //...Map recieve list for the y face: q=4,8,9,16,18 ................................... + D3Q19_MapRecv(0, -1, 0, Dm->recvList("Y"), 0, recvCount_Y, dvcRecvDist_Y); + D3Q19_MapRecv(-1, -1, 0, Dm->recvList("Y"), recvCount_Y, recvCount_Y, + dvcRecvDist_Y); + D3Q19_MapRecv(1, -1, 0, Dm->recvList("Y"), 2 * recvCount_Y, recvCount_Y, + dvcRecvDist_Y); + D3Q19_MapRecv(0, -1, -1, Dm->recvList("Y"), 3 * recvCount_Y, recvCount_Y, + dvcRecvDist_Y); + D3Q19_MapRecv(0, -1, 1, Dm->recvList("Y"), 4 * recvCount_Y, recvCount_Y, + dvcRecvDist_Y); + //................................................................................... + //...Map recieve list for the Y face: q=3,7,10,15,17 .................................. + D3Q19_MapRecv(0, 1, 0, Dm->recvList("y"), 0, recvCount_y, dvcRecvDist_y); + D3Q19_MapRecv(1, 1, 0, Dm->recvList("y"), recvCount_y, recvCount_y, + dvcRecvDist_y); + D3Q19_MapRecv(-1, 1, 0, Dm->recvList("y"), 2 * recvCount_y, recvCount_y, + dvcRecvDist_y); + D3Q19_MapRecv(0, 1, 1, Dm->recvList("y"), 3 * recvCount_y, recvCount_y, + dvcRecvDist_y); + D3Q19_MapRecv(0, 1, -1, Dm->recvList("y"), 4 * recvCount_y, recvCount_y, + dvcRecvDist_y); + //................................................................................... + //...Map recieve list for the z face<<<6,12,13,16,17).............................................. + D3Q19_MapRecv(0, 0, -1, Dm->recvList("Z"), 0, recvCount_Z, dvcRecvDist_Z); + D3Q19_MapRecv(-1, 0, -1, Dm->recvList("Z"), recvCount_Z, recvCount_Z, + dvcRecvDist_Z); + D3Q19_MapRecv(1, 0, -1, Dm->recvList("Z"), 2 * recvCount_Z, recvCount_Z, + dvcRecvDist_Z); + D3Q19_MapRecv(0, -1, -1, Dm->recvList("Z"), 3 * recvCount_Z, recvCount_Z, + dvcRecvDist_Z); + D3Q19_MapRecv(0, 1, -1, Dm->recvList("Z"), 4 * recvCount_Z, recvCount_Z, + dvcRecvDist_Z); + //...Map recieve list for the Z face<<<5,11,14,15,18).............................................. + D3Q19_MapRecv(0, 0, 1, Dm->recvList("z"), 0, recvCount_z, dvcRecvDist_z); + D3Q19_MapRecv(1, 0, 1, Dm->recvList("z"), recvCount_z, recvCount_z, + dvcRecvDist_z); + D3Q19_MapRecv(-1, 0, 1, Dm->recvList("z"), 2 * recvCount_z, recvCount_z, + dvcRecvDist_z); + D3Q19_MapRecv(0, 1, 1, Dm->recvList("z"), 3 * recvCount_z, recvCount_z, + dvcRecvDist_z); + D3Q19_MapRecv(0, -1, 1, Dm->recvList("z"), 4 * recvCount_z, recvCount_z, + dvcRecvDist_z); + //.................................................................................. + //...Map recieve list for the xy edge <<<8)................................ + D3Q19_MapRecv(-1, -1, 0, Dm->recvList("XY"), 0, recvCount_XY, + dvcRecvDist_XY); + //...Map recieve list for the Xy edge <<<9)................................ + D3Q19_MapRecv(1, -1, 0, Dm->recvList("xY"), 0, recvCount_xY, + dvcRecvDist_xY); + //...Map recieve list for the xY edge <<<10)................................ + D3Q19_MapRecv(-1, 1, 0, Dm->recvList("Xy"), 0, recvCount_Xy, + dvcRecvDist_Xy); + //...Map recieve list for the XY edge <<<7)................................ + D3Q19_MapRecv(1, 1, 0, Dm->recvList("xy"), 0, recvCount_xy, dvcRecvDist_xy); + //...Map recieve list for the xz edge <<<12)................................ + D3Q19_MapRecv(-1, 0, -1, Dm->recvList("XZ"), 0, recvCount_XZ, + dvcRecvDist_XZ); + //...Map recieve list for the xZ edge <<<14)................................ + D3Q19_MapRecv(-1, 0, 1, Dm->recvList("Xz"), 0, recvCount_Xz, + dvcRecvDist_Xz); + //...Map recieve list for the Xz edge <<<13)................................ + D3Q19_MapRecv(1, 0, -1, Dm->recvList("xZ"), 0, recvCount_xZ, + dvcRecvDist_xZ); + //...Map recieve list for the XZ edge <<<11)................................ + D3Q19_MapRecv(1, 0, 1, Dm->recvList("xz"), 0, recvCount_xz, dvcRecvDist_xz); + //...Map recieve list for the yz edge <<<16)................................ + D3Q19_MapRecv(0, -1, -1, Dm->recvList("YZ"), 0, recvCount_YZ, + dvcRecvDist_YZ); + //...Map recieve list for the yZ edge <<<18)................................ + D3Q19_MapRecv(0, -1, 1, Dm->recvList("Yz"), 0, recvCount_Yz, + dvcRecvDist_Yz); + //...Map recieve list for the Yz edge <<<17)................................ + D3Q19_MapRecv(0, 1, -1, Dm->recvList("yZ"), 0, recvCount_yZ, + dvcRecvDist_yZ); + //...Map recieve list for the YZ edge <<<15)................................ + D3Q19_MapRecv(0, 1, 1, Dm->recvList("yz"), 0, recvCount_yz, dvcRecvDist_yz); + //................................................................................... - CommunicationCount = SendCount+RecvCount; - //...................................................................................... + //...................................................................................... + MPI_COMM_SCALBL.barrier(); + ScaLBL_DeviceBarrier(); + //...................................................................................... + SendCount = 5 * (sendCount_x + sendCount_X + sendCount_y + sendCount_Y + + sendCount_z + sendCount_Z) + + sendCount_xy + sendCount_Xy + sendCount_xY + sendCount_XY + + sendCount_xZ + sendCount_Xz + sendCount_xZ + sendCount_XZ + + sendCount_yz + sendCount_Yz + sendCount_yZ + sendCount_YZ; + RecvCount = 5 * (recvCount_x + recvCount_X + recvCount_y + recvCount_Y + + recvCount_z + recvCount_Z) + + recvCount_xy + recvCount_Xy + recvCount_xY + recvCount_XY + + recvCount_xZ + recvCount_Xz + recvCount_xZ + recvCount_XZ + + recvCount_yz + recvCount_Yz + recvCount_yZ + recvCount_YZ; - //................................................................................... - // Set up the persistent communication for D3Q19AA (use tags 130-145) - //................................................................................... + CommunicationCount = SendCount + RecvCount; + //...................................................................................... + + //................................................................................... + // Set up the persistent communication for D3Q19AA (use tags 130-145) + //................................................................................... req_D3Q19AA.clear(); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_x, 5*sendCount_x, rank_x, 130 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_X, 5*recvCount_X, rank_X, 130 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_X, 5*sendCount_X, rank_X, 131 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_x, 5*recvCount_x, rank_x, 131 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_y, 5*sendCount_y, rank_y, 132 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Y, 5*recvCount_Y, rank_Y, 132 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Y, 5*sendCount_Y, rank_Y, 133 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_y, 5*recvCount_y, rank_y, 133 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_z, 5*sendCount_z, rank_z, 134 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Z, 5*recvCount_Z, rank_Z, 134 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Z, 5*sendCount_Z, rank_Z, 135 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_z, 5*recvCount_z, rank_z, 135 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xy, sendCount_xy, rank_xy, 136 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_XY, recvCount_XY, rank_XY, 136 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_XY, sendCount_XY, rank_XY, 137 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xy, recvCount_xy, rank_xy, 137 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Xy, sendCount_Xy, rank_Xy, 138 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xY, recvCount_xY, rank_xY, 138 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xY, sendCount_xY, rank_xY, 139 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Xy, recvCount_Xy, rank_Xy, 139 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xz, sendCount_xz, rank_xz, 140 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_XZ, recvCount_XZ, rank_XZ, 140 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xZ, sendCount_xZ, rank_xZ, 143 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Xz, recvCount_Xz, rank_Xz, 143 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Xz, sendCount_Xz, rank_Xz, 142 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xZ, recvCount_xZ, rank_xZ, 142 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_XZ, sendCount_XZ, rank_XZ, 141 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xz, recvCount_xz, rank_xz, 141 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_yz, sendCount_yz, rank_yz, 144 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_YZ, recvCount_YZ, rank_YZ, 144 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_yZ, sendCount_yZ, rank_yZ, 147 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Yz, recvCount_Yz, rank_Yz, 147 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Yz, sendCount_Yz, rank_Yz, 146 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_yZ, recvCount_yZ, rank_yZ, 146 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_YZ, sendCount_YZ, rank_YZ, 145 ) ); - req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_yz, recvCount_yz, rank_yz, 145 ) ); - + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Isend_init(sendbuf_x, 5 * sendCount_x, rank_x, 130)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Irecv_init(recvbuf_X, 5 * recvCount_X, rank_X, 130)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Isend_init(sendbuf_X, 5 * sendCount_X, rank_X, 131)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Irecv_init(recvbuf_x, 5 * recvCount_x, rank_x, 131)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Isend_init(sendbuf_y, 5 * sendCount_y, rank_y, 132)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Irecv_init(recvbuf_Y, 5 * recvCount_Y, rank_Y, 132)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Isend_init(sendbuf_Y, 5 * sendCount_Y, rank_Y, 133)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Irecv_init(recvbuf_y, 5 * recvCount_y, rank_y, 133)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Isend_init(sendbuf_z, 5 * sendCount_z, rank_z, 134)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Irecv_init(recvbuf_Z, 5 * recvCount_Z, rank_Z, 134)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Isend_init(sendbuf_Z, 5 * sendCount_Z, rank_Z, 135)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Irecv_init(recvbuf_z, 5 * recvCount_z, rank_z, 135)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Isend_init(sendbuf_xy, sendCount_xy, rank_xy, 136)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Irecv_init(recvbuf_XY, recvCount_XY, rank_XY, 136)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Isend_init(sendbuf_XY, sendCount_XY, rank_XY, 137)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Irecv_init(recvbuf_xy, recvCount_xy, rank_xy, 137)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Isend_init(sendbuf_Xy, sendCount_Xy, rank_Xy, 138)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Irecv_init(recvbuf_xY, recvCount_xY, rank_xY, 138)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Isend_init(sendbuf_xY, sendCount_xY, rank_xY, 139)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Irecv_init(recvbuf_Xy, recvCount_Xy, rank_Xy, 139)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Isend_init(sendbuf_xz, sendCount_xz, rank_xz, 140)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Irecv_init(recvbuf_XZ, recvCount_XZ, rank_XZ, 140)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Isend_init(sendbuf_xZ, sendCount_xZ, rank_xZ, 143)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Irecv_init(recvbuf_Xz, recvCount_Xz, rank_Xz, 143)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Isend_init(sendbuf_Xz, sendCount_Xz, rank_Xz, 142)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Irecv_init(recvbuf_xZ, recvCount_xZ, rank_xZ, 142)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Isend_init(sendbuf_XZ, sendCount_XZ, rank_XZ, 141)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Irecv_init(recvbuf_xz, recvCount_xz, rank_xz, 141)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Isend_init(sendbuf_yz, sendCount_yz, rank_yz, 144)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Irecv_init(recvbuf_YZ, recvCount_YZ, rank_YZ, 144)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Isend_init(sendbuf_yZ, sendCount_yZ, rank_yZ, 147)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Irecv_init(recvbuf_Yz, recvCount_Yz, rank_Yz, 147)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Isend_init(sendbuf_Yz, sendCount_Yz, rank_Yz, 146)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Irecv_init(recvbuf_yZ, recvCount_yZ, rank_yZ, 146)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Isend_init(sendbuf_YZ, sendCount_YZ, rank_YZ, 145)); + req_D3Q19AA.push_back( + MPI_COMM_SCALBL.Irecv_init(recvbuf_yz, recvCount_yz, rank_yz, 145)); } -ScaLBL_Communicator::~ScaLBL_Communicator() -{ +ScaLBL_Communicator::~ScaLBL_Communicator() { - ScaLBL_FreeDeviceMemory( sendbuf_x ); - ScaLBL_FreeDeviceMemory( sendbuf_X ); - ScaLBL_FreeDeviceMemory( sendbuf_y ); - ScaLBL_FreeDeviceMemory( sendbuf_Y ); - ScaLBL_FreeDeviceMemory( sendbuf_z ); - ScaLBL_FreeDeviceMemory( sendbuf_Z ); - ScaLBL_FreeDeviceMemory( sendbuf_xy ); - ScaLBL_FreeDeviceMemory( sendbuf_xY ); - ScaLBL_FreeDeviceMemory( sendbuf_Xy ); - ScaLBL_FreeDeviceMemory( sendbuf_XY ); - ScaLBL_FreeDeviceMemory( sendbuf_xz ); - ScaLBL_FreeDeviceMemory( sendbuf_xZ ); - ScaLBL_FreeDeviceMemory( sendbuf_Xz ); - ScaLBL_FreeDeviceMemory( sendbuf_XZ ); - ScaLBL_FreeDeviceMemory( sendbuf_yz ); - ScaLBL_FreeDeviceMemory( sendbuf_yZ ); - ScaLBL_FreeDeviceMemory( sendbuf_Yz ); - ScaLBL_FreeDeviceMemory( sendbuf_YZ ); - ScaLBL_FreeDeviceMemory( recvbuf_x ); - ScaLBL_FreeDeviceMemory( recvbuf_X ); - ScaLBL_FreeDeviceMemory( recvbuf_y ); - ScaLBL_FreeDeviceMemory( recvbuf_Y ); - ScaLBL_FreeDeviceMemory( recvbuf_z ); - ScaLBL_FreeDeviceMemory( recvbuf_Z ); - ScaLBL_FreeDeviceMemory( recvbuf_xy ); - ScaLBL_FreeDeviceMemory( recvbuf_xY ); - ScaLBL_FreeDeviceMemory( recvbuf_Xy ); - ScaLBL_FreeDeviceMemory( recvbuf_XY ); - ScaLBL_FreeDeviceMemory( recvbuf_xz ); - ScaLBL_FreeDeviceMemory( recvbuf_xZ ); - ScaLBL_FreeDeviceMemory( recvbuf_Xz ); - ScaLBL_FreeDeviceMemory( recvbuf_XZ ); - ScaLBL_FreeDeviceMemory( recvbuf_yz ); - ScaLBL_FreeDeviceMemory( recvbuf_yZ ); - ScaLBL_FreeDeviceMemory( recvbuf_Yz ); - ScaLBL_FreeDeviceMemory( recvbuf_YZ ); - ScaLBL_FreeDeviceMemory( dvcSendList_x ); - ScaLBL_FreeDeviceMemory( dvcSendList_X ); - ScaLBL_FreeDeviceMemory( dvcSendList_y ); - ScaLBL_FreeDeviceMemory( dvcSendList_Y ); - ScaLBL_FreeDeviceMemory( dvcSendList_z ); - ScaLBL_FreeDeviceMemory( dvcSendList_Z ); - ScaLBL_FreeDeviceMemory( dvcSendList_xy ); - ScaLBL_FreeDeviceMemory( dvcSendList_xY ); - ScaLBL_FreeDeviceMemory( dvcSendList_Xy ); - ScaLBL_FreeDeviceMemory( dvcSendList_XY ); - ScaLBL_FreeDeviceMemory( dvcSendList_xz ); - ScaLBL_FreeDeviceMemory( dvcSendList_xZ ); - ScaLBL_FreeDeviceMemory( dvcSendList_Xz ); - ScaLBL_FreeDeviceMemory( dvcSendList_XZ ); - ScaLBL_FreeDeviceMemory( dvcSendList_yz ); - ScaLBL_FreeDeviceMemory( dvcSendList_yZ ); - ScaLBL_FreeDeviceMemory( dvcSendList_Yz ); - ScaLBL_FreeDeviceMemory( dvcSendList_YZ ); - ScaLBL_FreeDeviceMemory( dvcRecvList_x ); - ScaLBL_FreeDeviceMemory( dvcRecvList_X ); - ScaLBL_FreeDeviceMemory( dvcRecvList_y ); - ScaLBL_FreeDeviceMemory( dvcRecvList_Y ); - ScaLBL_FreeDeviceMemory( dvcRecvList_z ); - ScaLBL_FreeDeviceMemory( dvcRecvList_Z ); - ScaLBL_FreeDeviceMemory( dvcRecvList_xy ); - ScaLBL_FreeDeviceMemory( dvcRecvList_xY ); - ScaLBL_FreeDeviceMemory( dvcRecvList_Xy ); - ScaLBL_FreeDeviceMemory( dvcRecvList_XY ); - ScaLBL_FreeDeviceMemory( dvcRecvList_xz ); - ScaLBL_FreeDeviceMemory( dvcRecvList_xZ ); - ScaLBL_FreeDeviceMemory( dvcRecvList_Xz ); - ScaLBL_FreeDeviceMemory( dvcRecvList_XZ ); - ScaLBL_FreeDeviceMemory( dvcRecvList_yz ); - ScaLBL_FreeDeviceMemory( dvcRecvList_yZ ); - ScaLBL_FreeDeviceMemory( dvcRecvList_Yz ); - ScaLBL_FreeDeviceMemory( dvcRecvList_YZ ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_x ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_X ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_y ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_Y ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_z ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_Z ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_xy ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_xY ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_Xy ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_XY ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_xz ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_xZ ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_Xz ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_XZ ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_yz ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_yZ ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_Yz ); - ScaLBL_FreeDeviceMemory( dvcRecvDist_YZ ); + ScaLBL_FreeDeviceMemory(sendbuf_x); + ScaLBL_FreeDeviceMemory(sendbuf_X); + ScaLBL_FreeDeviceMemory(sendbuf_y); + ScaLBL_FreeDeviceMemory(sendbuf_Y); + ScaLBL_FreeDeviceMemory(sendbuf_z); + ScaLBL_FreeDeviceMemory(sendbuf_Z); + ScaLBL_FreeDeviceMemory(sendbuf_xy); + ScaLBL_FreeDeviceMemory(sendbuf_xY); + ScaLBL_FreeDeviceMemory(sendbuf_Xy); + ScaLBL_FreeDeviceMemory(sendbuf_XY); + ScaLBL_FreeDeviceMemory(sendbuf_xz); + ScaLBL_FreeDeviceMemory(sendbuf_xZ); + ScaLBL_FreeDeviceMemory(sendbuf_Xz); + ScaLBL_FreeDeviceMemory(sendbuf_XZ); + ScaLBL_FreeDeviceMemory(sendbuf_yz); + ScaLBL_FreeDeviceMemory(sendbuf_yZ); + ScaLBL_FreeDeviceMemory(sendbuf_Yz); + ScaLBL_FreeDeviceMemory(sendbuf_YZ); + ScaLBL_FreeDeviceMemory(recvbuf_x); + ScaLBL_FreeDeviceMemory(recvbuf_X); + ScaLBL_FreeDeviceMemory(recvbuf_y); + ScaLBL_FreeDeviceMemory(recvbuf_Y); + ScaLBL_FreeDeviceMemory(recvbuf_z); + ScaLBL_FreeDeviceMemory(recvbuf_Z); + ScaLBL_FreeDeviceMemory(recvbuf_xy); + ScaLBL_FreeDeviceMemory(recvbuf_xY); + ScaLBL_FreeDeviceMemory(recvbuf_Xy); + ScaLBL_FreeDeviceMemory(recvbuf_XY); + ScaLBL_FreeDeviceMemory(recvbuf_xz); + ScaLBL_FreeDeviceMemory(recvbuf_xZ); + ScaLBL_FreeDeviceMemory(recvbuf_Xz); + ScaLBL_FreeDeviceMemory(recvbuf_XZ); + ScaLBL_FreeDeviceMemory(recvbuf_yz); + ScaLBL_FreeDeviceMemory(recvbuf_yZ); + ScaLBL_FreeDeviceMemory(recvbuf_Yz); + ScaLBL_FreeDeviceMemory(recvbuf_YZ); + ScaLBL_FreeDeviceMemory(dvcSendList_x); + ScaLBL_FreeDeviceMemory(dvcSendList_X); + ScaLBL_FreeDeviceMemory(dvcSendList_y); + ScaLBL_FreeDeviceMemory(dvcSendList_Y); + ScaLBL_FreeDeviceMemory(dvcSendList_z); + ScaLBL_FreeDeviceMemory(dvcSendList_Z); + ScaLBL_FreeDeviceMemory(dvcSendList_xy); + ScaLBL_FreeDeviceMemory(dvcSendList_xY); + ScaLBL_FreeDeviceMemory(dvcSendList_Xy); + ScaLBL_FreeDeviceMemory(dvcSendList_XY); + ScaLBL_FreeDeviceMemory(dvcSendList_xz); + ScaLBL_FreeDeviceMemory(dvcSendList_xZ); + ScaLBL_FreeDeviceMemory(dvcSendList_Xz); + ScaLBL_FreeDeviceMemory(dvcSendList_XZ); + ScaLBL_FreeDeviceMemory(dvcSendList_yz); + ScaLBL_FreeDeviceMemory(dvcSendList_yZ); + ScaLBL_FreeDeviceMemory(dvcSendList_Yz); + ScaLBL_FreeDeviceMemory(dvcSendList_YZ); + ScaLBL_FreeDeviceMemory(dvcRecvList_x); + ScaLBL_FreeDeviceMemory(dvcRecvList_X); + ScaLBL_FreeDeviceMemory(dvcRecvList_y); + ScaLBL_FreeDeviceMemory(dvcRecvList_Y); + ScaLBL_FreeDeviceMemory(dvcRecvList_z); + ScaLBL_FreeDeviceMemory(dvcRecvList_Z); + ScaLBL_FreeDeviceMemory(dvcRecvList_xy); + ScaLBL_FreeDeviceMemory(dvcRecvList_xY); + ScaLBL_FreeDeviceMemory(dvcRecvList_Xy); + ScaLBL_FreeDeviceMemory(dvcRecvList_XY); + ScaLBL_FreeDeviceMemory(dvcRecvList_xz); + ScaLBL_FreeDeviceMemory(dvcRecvList_xZ); + ScaLBL_FreeDeviceMemory(dvcRecvList_Xz); + ScaLBL_FreeDeviceMemory(dvcRecvList_XZ); + ScaLBL_FreeDeviceMemory(dvcRecvList_yz); + ScaLBL_FreeDeviceMemory(dvcRecvList_yZ); + ScaLBL_FreeDeviceMemory(dvcRecvList_Yz); + ScaLBL_FreeDeviceMemory(dvcRecvList_YZ); + ScaLBL_FreeDeviceMemory(dvcRecvDist_x); + ScaLBL_FreeDeviceMemory(dvcRecvDist_X); + ScaLBL_FreeDeviceMemory(dvcRecvDist_y); + ScaLBL_FreeDeviceMemory(dvcRecvDist_Y); + ScaLBL_FreeDeviceMemory(dvcRecvDist_z); + ScaLBL_FreeDeviceMemory(dvcRecvDist_Z); + ScaLBL_FreeDeviceMemory(dvcRecvDist_xy); + ScaLBL_FreeDeviceMemory(dvcRecvDist_xY); + ScaLBL_FreeDeviceMemory(dvcRecvDist_Xy); + ScaLBL_FreeDeviceMemory(dvcRecvDist_XY); + ScaLBL_FreeDeviceMemory(dvcRecvDist_xz); + ScaLBL_FreeDeviceMemory(dvcRecvDist_xZ); + ScaLBL_FreeDeviceMemory(dvcRecvDist_Xz); + ScaLBL_FreeDeviceMemory(dvcRecvDist_XZ); + ScaLBL_FreeDeviceMemory(dvcRecvDist_yz); + ScaLBL_FreeDeviceMemory(dvcRecvDist_yZ); + ScaLBL_FreeDeviceMemory(dvcRecvDist_Yz); + ScaLBL_FreeDeviceMemory(dvcRecvDist_YZ); } - -void ScaLBL_Communicator::start( std::vector>& requests ) -{ - for ( auto& req : requests ) - MPI_COMM_SCALBL.Start( *req ); +void ScaLBL_Communicator::start( + std::vector> &requests) { + for (auto &req : requests) + MPI_COMM_SCALBL.Start(*req); } -void ScaLBL_Communicator::wait( std::vector>& requests ) -{ +void ScaLBL_Communicator::wait( + std::vector> &requests) { std::vector request2; - for ( auto& req : requests ) - request2.push_back( *req ); - MPI_COMM_SCALBL.waitAll( request2.size(), request2.data() ); + for (auto &req : requests) + request2.push_back(*req); + MPI_COMM_SCALBL.waitAll(request2.size(), request2.data()); } - /******************************************************** * Get send/recv lists * ********************************************************/ int ScaLBL_Communicator::copyRecvList(const char *dir, int *buffer) { if (dir[0] == 'x') { - if (dir[1] == 0){ - int *TempBuffer = new int [recvCount_x]; - ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_x,recvCount_x*sizeof(int)); - ScaLBL_CopyToZeroCopy(buffer,TempBuffer,recvCount_x*sizeof(int)); - delete [] TempBuffer; + if (dir[1] == 0) { + int *TempBuffer = new int[recvCount_x]; + ScaLBL_CopyToHost(TempBuffer, dvcRecvDist_x, + recvCount_x * sizeof(int)); + ScaLBL_CopyToZeroCopy(buffer, TempBuffer, + recvCount_x * sizeof(int)); + delete[] TempBuffer; return recvCount_x; - } - else if (dir[1] == 'y') + } else if (dir[1] == 'y') return recvCount_xy; else if (dir[1] == 'Y') return recvCount_xY; @@ -482,34 +782,38 @@ int ScaLBL_Communicator::copyRecvList(const char *dir, int *buffer) { else if (dir[1] == 'Z') return recvCount_xZ; } else if (dir[0] == 'y') { - if (dir[1] == 0){ - int *TempBuffer = new int [recvCount_y]; - ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_y,recvCount_y*sizeof(int)); - ScaLBL_CopyToZeroCopy(buffer,TempBuffer,recvCount_y*sizeof(int)); - delete [] TempBuffer; + if (dir[1] == 0) { + int *TempBuffer = new int[recvCount_y]; + ScaLBL_CopyToHost(TempBuffer, dvcRecvDist_y, + recvCount_y * sizeof(int)); + ScaLBL_CopyToZeroCopy(buffer, TempBuffer, + recvCount_y * sizeof(int)); + delete[] TempBuffer; return recvCount_y; - } - else if (dir[1] == 'z') + } else if (dir[1] == 'z') return recvCount_yz; else if (dir[1] == 'Z') return recvCount_yZ; } else if (dir[0] == 'z') { - if (dir[1] == 0){ - int *TempBuffer = new int [recvCount_z]; - ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_z,recvCount_z*sizeof(int)); - ScaLBL_CopyToZeroCopy(buffer,TempBuffer,recvCount_z*sizeof(int)); - delete [] TempBuffer; - return recvCount_z; + if (dir[1] == 0) { + int *TempBuffer = new int[recvCount_z]; + ScaLBL_CopyToHost(TempBuffer, dvcRecvDist_z, + recvCount_z * sizeof(int)); + ScaLBL_CopyToZeroCopy(buffer, TempBuffer, + recvCount_z * sizeof(int)); + delete[] TempBuffer; + return recvCount_z; } } else if (dir[0] == 'X') { - if (dir[1] == 0){ - int *TempBuffer = new int [recvCount_X]; - ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_X,recvCount_X*sizeof(int)); - ScaLBL_CopyToZeroCopy(buffer,TempBuffer,recvCount_X*sizeof(int)); - delete [] TempBuffer; - return recvCount_X; - } - else if (dir[1] == 'y') + if (dir[1] == 0) { + int *TempBuffer = new int[recvCount_X]; + ScaLBL_CopyToHost(TempBuffer, dvcRecvDist_X, + recvCount_X * sizeof(int)); + ScaLBL_CopyToZeroCopy(buffer, TempBuffer, + recvCount_X * sizeof(int)); + delete[] TempBuffer; + return recvCount_X; + } else if (dir[1] == 'y') return recvCount_Xy; else if (dir[1] == 'Y') return recvCount_XY; @@ -518,39 +822,43 @@ int ScaLBL_Communicator::copyRecvList(const char *dir, int *buffer) { else if (dir[1] == 'Z') return recvCount_XZ; } else if (dir[0] == 'Y') { - if (dir[1] == 0){ - int *TempBuffer = new int [recvCount_Y]; - ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_Y,recvCount_Y*sizeof(int)); - ScaLBL_CopyToZeroCopy(buffer,TempBuffer,recvCount_Y*sizeof(int)); - delete [] TempBuffer; + if (dir[1] == 0) { + int *TempBuffer = new int[recvCount_Y]; + ScaLBL_CopyToHost(TempBuffer, dvcRecvDist_Y, + recvCount_Y * sizeof(int)); + ScaLBL_CopyToZeroCopy(buffer, TempBuffer, + recvCount_Y * sizeof(int)); + delete[] TempBuffer; return recvCount_Y; - } - else if (dir[1] == 'z') + } else if (dir[1] == 'z') return recvCount_Yz; else if (dir[1] == 'Z') return recvCount_YZ; } else if (dir[0] == 'Z') { - if (dir[1] == 0){ - int *TempBuffer = new int [recvCount_Z]; - ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_Z,recvCount_Z*sizeof(int)); - ScaLBL_CopyToZeroCopy(buffer,TempBuffer,recvCount_Z*sizeof(int)); - delete [] TempBuffer; + if (dir[1] == 0) { + int *TempBuffer = new int[recvCount_Z]; + ScaLBL_CopyToHost(TempBuffer, dvcRecvDist_Z, + recvCount_Z * sizeof(int)); + ScaLBL_CopyToZeroCopy(buffer, TempBuffer, + recvCount_Z * sizeof(int)); + delete[] TempBuffer; return recvCount_Z; } } throw std::logic_error("Internal error"); } -int ScaLBL_Communicator::copySendList(const char *dir, int *buffer) { +int ScaLBL_Communicator::copySendList(const char *dir, int *buffer) { if (dir[0] == 'x') { - if (dir[1] == 0){ - int *TempBuffer = new int [sendCount_x]; - ScaLBL_CopyToHost(TempBuffer,dvcSendList_x,sendCount_x*sizeof(int)); - ScaLBL_CopyToZeroCopy(buffer,TempBuffer,sendCount_x*sizeof(int)); - delete [] TempBuffer; + if (dir[1] == 0) { + int *TempBuffer = new int[sendCount_x]; + ScaLBL_CopyToHost(TempBuffer, dvcSendList_x, + sendCount_x * sizeof(int)); + ScaLBL_CopyToZeroCopy(buffer, TempBuffer, + sendCount_x * sizeof(int)); + delete[] TempBuffer; return sendCount_x; - } - else if (dir[1] == 'y') + } else if (dir[1] == 'y') return sendCount_xy; else if (dir[1] == 'Y') return sendCount_xY; @@ -559,34 +867,38 @@ int ScaLBL_Communicator::copySendList(const char *dir, int *buffer) { else if (dir[1] == 'Z') return sendCount_xZ; } else if (dir[0] == 'y') { - if (dir[1] == 0){ - int *TempBuffer = new int [sendCount_y]; - ScaLBL_CopyToHost(TempBuffer,dvcSendList_y,sendCount_y*sizeof(int)); - ScaLBL_CopyToZeroCopy(buffer,TempBuffer,sendCount_y*sizeof(int)); - delete [] TempBuffer; + if (dir[1] == 0) { + int *TempBuffer = new int[sendCount_y]; + ScaLBL_CopyToHost(TempBuffer, dvcSendList_y, + sendCount_y * sizeof(int)); + ScaLBL_CopyToZeroCopy(buffer, TempBuffer, + sendCount_y * sizeof(int)); + delete[] TempBuffer; return sendCount_y; - } - else if (dir[1] == 'z') + } else if (dir[1] == 'z') return sendCount_yz; else if (dir[1] == 'Z') return sendCount_yZ; } else if (dir[0] == 'z') { - if (dir[1] == 0){ - int *TempBuffer = new int [sendCount_z]; - ScaLBL_CopyToHost(TempBuffer,dvcSendList_z,sendCount_z*sizeof(int)); - ScaLBL_CopyToZeroCopy(buffer,TempBuffer,sendCount_z*sizeof(int)); - delete [] TempBuffer; - return sendCount_z; + if (dir[1] == 0) { + int *TempBuffer = new int[sendCount_z]; + ScaLBL_CopyToHost(TempBuffer, dvcSendList_z, + sendCount_z * sizeof(int)); + ScaLBL_CopyToZeroCopy(buffer, TempBuffer, + sendCount_z * sizeof(int)); + delete[] TempBuffer; + return sendCount_z; } } else if (dir[0] == 'X') { - if (dir[1] == 0){ - int *TempBuffer = new int [sendCount_X]; - ScaLBL_CopyToHost(TempBuffer,dvcSendList_X,sendCount_X*sizeof(int)); - ScaLBL_CopyToZeroCopy(buffer,TempBuffer,sendCount_X*sizeof(int)); - delete [] TempBuffer; - return sendCount_X; - } - else if (dir[1] == 'y') + if (dir[1] == 0) { + int *TempBuffer = new int[sendCount_X]; + ScaLBL_CopyToHost(TempBuffer, dvcSendList_X, + sendCount_X * sizeof(int)); + ScaLBL_CopyToZeroCopy(buffer, TempBuffer, + sendCount_X * sizeof(int)); + delete[] TempBuffer; + return sendCount_X; + } else if (dir[1] == 'y') return sendCount_Xy; else if (dir[1] == 'Y') return sendCount_XY; @@ -595,102 +907,112 @@ int ScaLBL_Communicator::copySendList(const char *dir, int *buffer) { else if (dir[1] == 'Z') return sendCount_XZ; } else if (dir[0] == 'Y') { - if (dir[1] == 0){ - int *TempBuffer = new int [sendCount_Y]; - ScaLBL_CopyToHost(TempBuffer,dvcSendList_Y,sendCount_Y*sizeof(int)); - ScaLBL_CopyToZeroCopy(buffer,TempBuffer,sendCount_Y*sizeof(int)); - delete [] TempBuffer; + if (dir[1] == 0) { + int *TempBuffer = new int[sendCount_Y]; + ScaLBL_CopyToHost(TempBuffer, dvcSendList_Y, + sendCount_Y * sizeof(int)); + ScaLBL_CopyToZeroCopy(buffer, TempBuffer, + sendCount_Y * sizeof(int)); + delete[] TempBuffer; return sendCount_Y; - } - else if (dir[1] == 'z') + } else if (dir[1] == 'z') return sendCount_Yz; else if (dir[1] == 'Z') return sendCount_YZ; } else if (dir[0] == 'Z') { - if (dir[1] == 0){ - int *TempBuffer = new int [sendCount_Z]; - ScaLBL_CopyToHost(TempBuffer,dvcSendList_Z,sendCount_Z*sizeof(int)); - ScaLBL_CopyToZeroCopy(buffer,TempBuffer,sendCount_Z*sizeof(int)); - delete [] TempBuffer; + if (dir[1] == 0) { + int *TempBuffer = new int[sendCount_Z]; + ScaLBL_CopyToHost(TempBuffer, dvcSendList_Z, + sendCount_Z * sizeof(int)); + ScaLBL_CopyToZeroCopy(buffer, TempBuffer, + sendCount_Z * sizeof(int)); + delete[] TempBuffer; return sendCount_Z; } } throw std::logic_error("Internal error"); } - -double ScaLBL_Communicator::GetPerformance(int *NeighborList, double *fq, int Np){ - /* EACH MPI PROCESS GETS ITS OWN MEASUREMENT*/ - /* use MRT kernels to check performance without communication / synchronization */ - int TIMESTEPS=500; - double RLX_SETA=1.0; - double RLX_SETB = 8.f*(2.f-RLX_SETA)/(8.f-RLX_SETA); - double FX = 0.0; - double FY = 0.0; - double FZ = 0.0; +double ScaLBL_Communicator::GetPerformance(int *NeighborList, double *fq, + int Np) { + /* EACH MPI PROCESS GETS ITS OWN MEASUREMENT*/ + /* use MRT kernels to check performance without communication / synchronization */ + int TIMESTEPS = 500; + double RLX_SETA = 1.0; + double RLX_SETB = 8.f * (2.f - RLX_SETA) / (8.f - RLX_SETA); + double FX = 0.0; + double FY = 0.0; + double FZ = 0.0; ScaLBL_D3Q19_Init(fq, Np); - //.......create and start timer............ - Barrier(); + //.......create and start timer............ + Barrier(); auto t1 = std::chrono::system_clock::now(); - for (int t=0; t( t2 - t1 ).count(); - double cputime = 0.5*diff/TIMESTEPS; - // Performance obtained from each node - double MLUPS = double(Np)/cputime/1000000; - return MLUPS; + Barrier(); + // Compute the walltime per timestep + double diff = std::chrono::duration(t2 - t1).count(); + double cputime = 0.5 * diff / TIMESTEPS; + // Performance obtained from each node + double MLUPS = double(Np) / cputime / 1000000; + return MLUPS; +} +int ScaLBL_Communicator::LastExterior() { return next; } +int ScaLBL_Communicator::FirstInterior() { return first_interior; } +int ScaLBL_Communicator::LastInterior() { return last_interior; } -} -int ScaLBL_Communicator::LastExterior(){ - return next; -} -int ScaLBL_Communicator::FirstInterior(){ - return first_interior; -} -int ScaLBL_Communicator::LastInterior(){ - return last_interior; +void ScaLBL_Communicator::D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, + const int *list, int start, int count, + int *d3q19_recvlist) { + int i, j, k, n, nn, idx; + int *ReturnDist; + ReturnDist = new int[count]; + + for (idx = 0; idx < count; idx++) { + + // Get the value from the list -- note that n is the index is from the send (non-local) process + n = list[idx]; // if (rank == 0) printf("@ rank:%d n=%d\n",rank,n); + // Get the 3-D indices from the send process + k = n / (Nx * Ny); + j = (n - Nx * Ny * k) / Nx; + i = n - Nx * Ny * k - Nx * j; + // if (rank ==0) printf("@ Get 3D indices from the send process: i=%d, j=%d, k=%d\n",i,j,k); + + // Streaming for the non-local distribution + i += Cqx; + j += Cqy; + k += Cqz; + // if (rank == 0) printf("@ Streaming for the non-local distribution: i=%d, j=%d, k=%d\n",i,j,k); + + // Compute 1D index for the neighbor and save + nn = k * Nx * Ny + j * Nx + i; + // if (rank == 0) printf("@ rank:%d: neighbor=%d\n",rank,nn); + ReturnDist[idx] = nn; + } + + // Return updated version to the device + ScaLBL_CopyToDevice(&d3q19_recvlist[start], ReturnDist, + count * sizeof(int)); + + // clean up the work arrays + delete[] ReturnDist; } -void ScaLBL_Communicator::D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, const int *list, int start, int count, - int *d3q19_recvlist){ - int i,j,k,n,nn,idx; - int * ReturnDist; - ReturnDist=new int [count]; - - for (idx=0; idx 0) - Map(i,j,k) = -2; // this label is for parallel communication sites - else - Map(i,j,k) = -1; // this label is for solid bounce-back sites - } - } - } - - //printf("Exterior... \n"); - - // ********* Exterior ********** - // Step 1/2: Index the outer walls of the grid only - idx=0; next=0; - for (k=1; k 0){ - // Counts for the six faces - if (i>0 && i<=width) Map(n)=idx++; - else if (j>0 && j<=width) Map(n)=idx++; - else if (k>0 && k<=width) Map(n)=idx++; - else if (i>Nx-width-2 && iNy-width-2 && jNz-width-2 && k 0 ){ - Map(n) = idx++; - //neighborList[idx++] = n; // index of self in regular layout - } - } - } - } - last_interior=idx; - - Np = (last_interior/16 + 1)*16; - //printf(" Np=%i \n",Np); - - // Now use Map to determine the neighbors for each lattice direction - for (k=1;k Np) printf("ScaLBL_Communicator::MemoryOptimizedLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); - else if (!(idx<0)){ - // store the idx associated with each neighbor - // store idx for self if neighbor is in solid or out of domain - //D3Q19 = {{1,0,0},{-1,0,0} - // {0,1,0},{0,-1,0} - // {0,0,1},{0,0,-1}, - // {1,1,0},{-1,-1,0}, - // {1,-1,0},{-1,1,0}, - // {1,0,1},{-1,0,-1}, - // {1,0,-1},{-1,0,1}, - // {0,1,1},{0,-1,-1}, - // {0,1,-1},{0,-1,1}}; - int neighbor; // cycle through the neighbors of lattice site idx - neighbor=Map(i-1,j,k); - if (neighbor<0) neighborList[idx]=idx + 2*Np; - else neighborList[idx]=neighbor + 1*Np; - - neighbor=Map(i+1,j,k); - if (neighbor<0) neighborList[Np+idx] = idx + 1*Np; - else neighborList[Np+idx]= neighbor + 2*Np; - - neighbor=Map(i,j-1,k); - if (neighbor<0) neighborList[2*Np+idx]=idx + 4*Np; - else neighborList[2*Np+idx]=neighbor + 3*Np; - - neighbor=Map(i,j+1,k); - if (neighbor<0) neighborList[3*Np+idx]=idx + 3*Np; - else neighborList[3*Np+idx]=neighbor + 4*Np; - - neighbor=Map(i,j,k-1); - if (neighbor<0) neighborList[4*Np+idx]=idx + 6*Np; - else neighborList[4*Np+idx]=neighbor + 5*Np; - - neighbor=Map(i,j,k+1); - if (neighbor<0) neighborList[5*Np+idx]=idx + 5*Np; - else neighborList[5*Np+idx]=neighbor + 6*Np; - - neighbor=Map(i-1,j-1,k); - if (neighbor<0) neighborList[6*Np+idx]=idx + 8*Np; - else neighborList[6*Np+idx]=neighbor + 7*Np; - - neighbor=Map(i+1,j+1,k); - if (neighbor<0) neighborList[7*Np+idx]=idx + 7*Np; - else neighborList[7*Np+idx]=neighbor+8*Np; - - neighbor=Map(i-1,j+1,k); - if (neighbor<0) neighborList[8*Np+idx]=idx + 10*Np; - else neighborList[8*Np+idx]=neighbor + 9*Np; - - neighbor=Map(i+1,j-1,k); - if (neighbor<0) neighborList[9*Np+idx]=idx + 9*Np; - else neighborList[9*Np+idx]=neighbor + 10*Np; - - neighbor=Map(i-1,j,k-1); - if (neighbor<0) neighborList[10*Np+idx]=idx + 12*Np; - else neighborList[10*Np+idx]=neighbor + 11*Np; - - neighbor=Map(i+1,j,k+1); - if (neighbor<0) neighborList[11*Np+idx]=idx + 11*Np; - else neighborList[11*Np+idx]=neighbor + 12*Np; - - neighbor=Map(i-1,j,k+1); - if (neighbor<0) neighborList[12*Np+idx]=idx + 14*Np; - else neighborList[12*Np+idx]=neighbor + 13*Np; - - neighbor=Map(i+1,j,k-1); - if (neighbor<0) neighborList[13*Np+idx]=idx + 13*Np; - else neighborList[13*Np+idx]=neighbor + 14*Np; - - neighbor=Map(i,j-1,k-1); - if (neighbor<0) neighborList[14*Np+idx]=idx + 16*Np; - else neighborList[14*Np+idx]=neighbor + 15*Np; - - neighbor=Map(i,j+1,k+1); - if (neighbor<0) neighborList[15*Np+idx]=idx + 15*Np; - else neighborList[15*Np+idx]=neighbor + 16*Np; - - neighbor=Map(i,j-1,k+1); - if (neighbor<0) neighborList[16*Np+idx]=idx + 18*Np; - else neighborList[16*Np+idx]=neighbor + 17*Np; - - neighbor=Map(i,j+1,k-1); - if (neighbor<0) neighborList[17*Np+idx]=idx + 17*Np; - else neighborList[17*Np+idx]=neighbor + 18*Np; - } - } - } - } - - //for (idx=0; idx 0) + Map(i, j, k) = + -2; // this label is for parallel communication sites + else + Map(i, j, k) = + -1; // this label is for solid bounce-back sites + } + } + } + + //printf("Exterior... \n"); + + // ********* Exterior ********** + // Step 1/2: Index the outer walls of the grid only + idx = 0; + next = 0; + for (k = 1; k < Nz - 1; k++) { + for (j = 1; j < Ny - 1; j++) { + for (i = 1; i < Nx - 1; i++) { + // domain interior + Map(i, j, k) = -1; + // Local index + n = k * Nx * Ny + j * Nx + i; + if (id[n] > 0) { + // Counts for the six faces + if (i > 0 && i <= width) + Map(n) = idx++; + else if (j > 0 && j <= width) + Map(n) = idx++; + else if (k > 0 && k <= width) + Map(n) = idx++; + else if (i > Nx - width - 2 && i < Nx - 1) + Map(n) = idx++; + else if (j > Ny - width - 2 && j < Ny - 1) + Map(n) = idx++; + else if (k > Nz - width - 2 && k < Nz - 1) + Map(n) = idx++; + } + } + } + } + next = idx; + + // ********* Interior ********** + // align the next read + first_interior = (next / 16 + 1) * 16; + idx = first_interior; + // Step 2/2: Next loop over the domain interior in block-cyclic fashion + for (k = width + 1; k < Nz - width - 1; k++) { + for (j = width + 1; j < Ny - width - 1; j++) { + for (i = width + 1; i < Nx - width - 1; i++) { + // Local index (regular layout) + n = k * Nx * Ny + j * Nx + i; + if (id[n] > 0) { + Map(n) = idx++; + //neighborList[idx++] = n; // index of self in regular layout + } + } + } + } + last_interior = idx; + + Np = (last_interior / 16 + 1) * 16; + //printf(" Np=%i \n",Np); + + // Now use Map to determine the neighbors for each lattice direction + for (k = 1; k < Nz - 1; k++) { + for (j = 1; j < Ny - 1; j++) { + for (i = 1; i < Nx - 1; i++) { + n = k * Nx * Ny + j * Nx + i; + idx = Map(i, j, k); + if (idx > Np) + printf("ScaLBL_Communicator::MemoryOptimizedLayout: " + "Map(%i,%i,%i) = %i > %i \n", + i, j, k, Map(i, j, k), Np); + else if (!(idx < 0)) { + // store the idx associated with each neighbor + // store idx for self if neighbor is in solid or out of domain + //D3Q19 = {{1,0,0},{-1,0,0} + // {0,1,0},{0,-1,0} + // {0,0,1},{0,0,-1}, + // {1,1,0},{-1,-1,0}, + // {1,-1,0},{-1,1,0}, + // {1,0,1},{-1,0,-1}, + // {1,0,-1},{-1,0,1}, + // {0,1,1},{0,-1,-1}, + // {0,1,-1},{0,-1,1}}; + int neighbor; // cycle through the neighbors of lattice site idx + neighbor = Map(i - 1, j, k); + if (neighbor < 0) + neighborList[idx] = idx + 2 * Np; + else + neighborList[idx] = neighbor + 1 * Np; + + neighbor = Map(i + 1, j, k); + if (neighbor < 0) + neighborList[Np + idx] = idx + 1 * Np; + else + neighborList[Np + idx] = neighbor + 2 * Np; + + neighbor = Map(i, j - 1, k); + if (neighbor < 0) + neighborList[2 * Np + idx] = idx + 4 * Np; + else + neighborList[2 * Np + idx] = neighbor + 3 * Np; + + neighbor = Map(i, j + 1, k); + if (neighbor < 0) + neighborList[3 * Np + idx] = idx + 3 * Np; + else + neighborList[3 * Np + idx] = neighbor + 4 * Np; + + neighbor = Map(i, j, k - 1); + if (neighbor < 0) + neighborList[4 * Np + idx] = idx + 6 * Np; + else + neighborList[4 * Np + idx] = neighbor + 5 * Np; + + neighbor = Map(i, j, k + 1); + if (neighbor < 0) + neighborList[5 * Np + idx] = idx + 5 * Np; + else + neighborList[5 * Np + idx] = neighbor + 6 * Np; + + neighbor = Map(i - 1, j - 1, k); + if (neighbor < 0) + neighborList[6 * Np + idx] = idx + 8 * Np; + else + neighborList[6 * Np + idx] = neighbor + 7 * Np; + + neighbor = Map(i + 1, j + 1, k); + if (neighbor < 0) + neighborList[7 * Np + idx] = idx + 7 * Np; + else + neighborList[7 * Np + idx] = neighbor + 8 * Np; + + neighbor = Map(i - 1, j + 1, k); + if (neighbor < 0) + neighborList[8 * Np + idx] = idx + 10 * Np; + else + neighborList[8 * Np + idx] = neighbor + 9 * Np; + + neighbor = Map(i + 1, j - 1, k); + if (neighbor < 0) + neighborList[9 * Np + idx] = idx + 9 * Np; + else + neighborList[9 * Np + idx] = neighbor + 10 * Np; + + neighbor = Map(i - 1, j, k - 1); + if (neighbor < 0) + neighborList[10 * Np + idx] = idx + 12 * Np; + else + neighborList[10 * Np + idx] = neighbor + 11 * Np; + + neighbor = Map(i + 1, j, k + 1); + if (neighbor < 0) + neighborList[11 * Np + idx] = idx + 11 * Np; + else + neighborList[11 * Np + idx] = neighbor + 12 * Np; + + neighbor = Map(i - 1, j, k + 1); + if (neighbor < 0) + neighborList[12 * Np + idx] = idx + 14 * Np; + else + neighborList[12 * Np + idx] = neighbor + 13 * Np; + + neighbor = Map(i + 1, j, k - 1); + if (neighbor < 0) + neighborList[13 * Np + idx] = idx + 13 * Np; + else + neighborList[13 * Np + idx] = neighbor + 14 * Np; + + neighbor = Map(i, j - 1, k - 1); + if (neighbor < 0) + neighborList[14 * Np + idx] = idx + 16 * Np; + else + neighborList[14 * Np + idx] = neighbor + 15 * Np; + + neighbor = Map(i, j + 1, k + 1); + if (neighbor < 0) + neighborList[15 * Np + idx] = idx + 15 * Np; + else + neighborList[15 * Np + idx] = neighbor + 16 * Np; + + neighbor = Map(i, j - 1, k + 1); + if (neighbor < 0) + neighborList[16 * Np + idx] = idx + 18 * Np; + else + neighborList[16 * Np + idx] = neighbor + 17 * Np; + + neighbor = Map(i, j + 1, k - 1); + if (neighbor < 0) + neighborList[17 * Np + idx] = idx + 17 * Np; + else + neighborList[17 * Np + idx] = neighbor + 18 * Np; + } + } + } + } + + //for (idx=0; idx 0){ + neighbor = Map(i, j - 1, k + 1); + if (neighbor == -1) + local_count++; - int *bb_dist_tmp = new int [local_count]; - int *bb_interactions_tmp = new int [local_count]; - ScaLBL_AllocateDeviceMemory((void **) &bb_dist, sizeof(int)*local_count); - ScaLBL_AllocateDeviceMemory((void **) &bb_interactions, sizeof(int)*local_count); - int *fluid_boundary_tmp; - double *lattice_weight_tmp; - float *lattice_cx_tmp; - float *lattice_cy_tmp; - float *lattice_cz_tmp; - /* allocate memory for bounce-back sites */ - fluid_boundary_tmp = new int [local_count]; - lattice_weight_tmp = new double [local_count]; - lattice_cx_tmp = new float [local_count]; - lattice_cy_tmp = new float [local_count]; - lattice_cz_tmp = new float [local_count]; - ScaLBL_AllocateDeviceMemory((void **) &fluid_boundary, sizeof(int)*local_count); - ScaLBL_AllocateDeviceMemory((void **) &lattice_weight, sizeof(double)*local_count); - ScaLBL_AllocateDeviceMemory((void **) &lattice_cx, sizeof(float)*local_count); - ScaLBL_AllocateDeviceMemory((void **) &lattice_cy, sizeof(float)*local_count); - ScaLBL_AllocateDeviceMemory((void **) &lattice_cz, sizeof(float)*local_count); + neighbor = Map(i, j + 1, k - 1); + if (neighbor == -1) + local_count++; + } + } + } + } + if (local_count > 0) { - local_count=0; - for (k=1;k Dirichlet BC; 2-> Neumann BC. - ScaLBL_Solid_DirichletAndNeumann_D3Q7(fq,BoundaryValue,BoundaryLabel,bb_dist,bb_interactions,n_bb_d3q7); + ScaLBL_Solid_DirichletAndNeumann_D3Q7(fq, BoundaryValue, BoundaryLabel, + bb_dist, bb_interactions, n_bb_d3q7); } -void ScaLBL_Communicator::SolidSlippingVelocityBCD3Q19(double *fq, double *zeta_potential, double *ElectricField, double *SolidGrad, - double epsilon_LB, double tau, double rho0, double den_scale,double h, double time_conv){ - // fq is a D3Q19 distribution - // BoundaryValues is a list of values to assign at bounce-back solid sites - ScaLBL_Solid_SlippingVelocityBC_D3Q19(fq,zeta_potential,ElectricField,SolidGrad,epsilon_LB,tau,rho0,den_scale,h,time_conv, - bb_dist,bb_interactions,fluid_boundary,lattice_weight,lattice_cx,lattice_cy,lattice_cz,n_bb_d3q19,N); +void ScaLBL_Communicator::SolidSlippingVelocityBCD3Q19( + double *fq, double *zeta_potential, double *ElectricField, + double *SolidGrad, double epsilon_LB, double tau, double rho0, + double den_scale, double h, double time_conv) { + // fq is a D3Q19 distribution + // BoundaryValues is a list of values to assign at bounce-back solid sites + ScaLBL_Solid_SlippingVelocityBC_D3Q19( + fq, zeta_potential, ElectricField, SolidGrad, epsilon_LB, tau, rho0, + den_scale, h, time_conv, bb_dist, bb_interactions, fluid_boundary, + lattice_weight, lattice_cx, lattice_cy, lattice_cz, n_bb_d3q19, N); } -void ScaLBL_Communicator::SendD3Q19AA(double *dist){ +void ScaLBL_Communicator::SendD3Q19AA(double *dist) { - if (Lock==true){ - ERROR("ScaLBL Error (SendD3Q19): ScaLBL_Communicator is locked -- did you forget to match Send/Recv calls?"); - } - else{ - Lock=true; - } - ScaLBL_DeviceBarrier(); - // Pack the distributions - //...Packing for x face(2,8,10,12,14)................................ - ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,dist,N); - ScaLBL_D3Q19_Pack(8,dvcSendList_x,sendCount_x,sendCount_x,sendbuf_x,dist,N); - ScaLBL_D3Q19_Pack(10,dvcSendList_x,2*sendCount_x,sendCount_x,sendbuf_x,dist,N); - ScaLBL_D3Q19_Pack(12,dvcSendList_x,3*sendCount_x,sendCount_x,sendbuf_x,dist,N); - ScaLBL_D3Q19_Pack(14,dvcSendList_x,4*sendCount_x,sendCount_x,sendbuf_x,dist,N); - - //...Packing for X face(1,7,9,11,13)................................ - ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,dist,N); - ScaLBL_D3Q19_Pack(7,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,dist,N); - ScaLBL_D3Q19_Pack(9,dvcSendList_X,2*sendCount_X,sendCount_X,sendbuf_X,dist,N); - ScaLBL_D3Q19_Pack(11,dvcSendList_X,3*sendCount_X,sendCount_X,sendbuf_X,dist,N); - ScaLBL_D3Q19_Pack(13,dvcSendList_X,4*sendCount_X,sendCount_X,sendbuf_X,dist,N); - - //...Packing for y face(4,8,9,16,18)................................. - ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,dist,N); - ScaLBL_D3Q19_Pack(8,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,dist,N); - ScaLBL_D3Q19_Pack(9,dvcSendList_y,2*sendCount_y,sendCount_y,sendbuf_y,dist,N); - ScaLBL_D3Q19_Pack(16,dvcSendList_y,3*sendCount_y,sendCount_y,sendbuf_y,dist,N); - ScaLBL_D3Q19_Pack(18,dvcSendList_y,4*sendCount_y,sendCount_y,sendbuf_y,dist,N); - - //...Packing for Y face(3,7,10,15,17)................................. - ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,dist,N); - ScaLBL_D3Q19_Pack(7,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,dist,N); - ScaLBL_D3Q19_Pack(10,dvcSendList_Y,2*sendCount_Y,sendCount_Y,sendbuf_Y,dist,N); - ScaLBL_D3Q19_Pack(15,dvcSendList_Y,3*sendCount_Y,sendCount_Y,sendbuf_Y,dist,N); - ScaLBL_D3Q19_Pack(17,dvcSendList_Y,4*sendCount_Y,sendCount_Y,sendbuf_Y,dist,N); - - //...Packing for z face(6,12,13,16,17)................................ - ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,dist,N); - ScaLBL_D3Q19_Pack(12,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,dist,N); - ScaLBL_D3Q19_Pack(13,dvcSendList_z,2*sendCount_z,sendCount_z,sendbuf_z,dist,N); - ScaLBL_D3Q19_Pack(16,dvcSendList_z,3*sendCount_z,sendCount_z,sendbuf_z,dist,N); - ScaLBL_D3Q19_Pack(17,dvcSendList_z,4*sendCount_z,sendCount_z,sendbuf_z,dist,N); - - //...Packing for Z face(5,11,14,15,18)................................ - ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,dist,N); - ScaLBL_D3Q19_Pack(11,dvcSendList_Z,sendCount_Z,sendCount_Z,sendbuf_Z,dist,N); - ScaLBL_D3Q19_Pack(14,dvcSendList_Z,2*sendCount_Z,sendCount_Z,sendbuf_Z,dist,N); - ScaLBL_D3Q19_Pack(15,dvcSendList_Z,3*sendCount_Z,sendCount_Z,sendbuf_Z,dist,N); - ScaLBL_D3Q19_Pack(18,dvcSendList_Z,4*sendCount_Z,sendCount_Z,sendbuf_Z,dist,N); + if (Lock == true) { + ERROR("ScaLBL Error (SendD3Q19): ScaLBL_Communicator is locked -- did " + "you forget to match Send/Recv calls?"); + } else { + Lock = true; + } + ScaLBL_DeviceBarrier(); + // Pack the distributions + //...Packing for x face(2,8,10,12,14)................................ + ScaLBL_D3Q19_Pack(2, dvcSendList_x, 0, sendCount_x, sendbuf_x, dist, N); + ScaLBL_D3Q19_Pack(8, dvcSendList_x, sendCount_x, sendCount_x, sendbuf_x, + dist, N); + ScaLBL_D3Q19_Pack(10, dvcSendList_x, 2 * sendCount_x, sendCount_x, + sendbuf_x, dist, N); + ScaLBL_D3Q19_Pack(12, dvcSendList_x, 3 * sendCount_x, sendCount_x, + sendbuf_x, dist, N); + ScaLBL_D3Q19_Pack(14, dvcSendList_x, 4 * sendCount_x, sendCount_x, + sendbuf_x, dist, N); - //...Pack the xy edge (8)................................ - ScaLBL_D3Q19_Pack(8,dvcSendList_xy,0,sendCount_xy,sendbuf_xy,dist,N); - //...Pack the Xy edge (9)................................ - ScaLBL_D3Q19_Pack(9,dvcSendList_Xy,0,sendCount_Xy,sendbuf_Xy,dist,N); - //...Pack the xY edge (10)................................ - ScaLBL_D3Q19_Pack(10,dvcSendList_xY,0,sendCount_xY,sendbuf_xY,dist,N); - //...Pack the XY edge (7)................................ - ScaLBL_D3Q19_Pack(7,dvcSendList_XY,0,sendCount_XY,sendbuf_XY,dist,N); - //...Pack the xz edge (12)................................ - ScaLBL_D3Q19_Pack(12,dvcSendList_xz,0,sendCount_xz,sendbuf_xz,dist,N); - - //...Pack the xZ edge (14)................................ - ScaLBL_D3Q19_Pack(14,dvcSendList_xZ,0,sendCount_xZ,sendbuf_xZ,dist,N); - //...Pack the Xz edge (13)................................ - ScaLBL_D3Q19_Pack(13,dvcSendList_Xz,0,sendCount_Xz,sendbuf_Xz,dist,N); + //...Packing for X face(1,7,9,11,13)................................ + ScaLBL_D3Q19_Pack(1, dvcSendList_X, 0, sendCount_X, sendbuf_X, dist, N); + ScaLBL_D3Q19_Pack(7, dvcSendList_X, sendCount_X, sendCount_X, sendbuf_X, + dist, N); + ScaLBL_D3Q19_Pack(9, dvcSendList_X, 2 * sendCount_X, sendCount_X, sendbuf_X, + dist, N); + ScaLBL_D3Q19_Pack(11, dvcSendList_X, 3 * sendCount_X, sendCount_X, + sendbuf_X, dist, N); + ScaLBL_D3Q19_Pack(13, dvcSendList_X, 4 * sendCount_X, sendCount_X, + sendbuf_X, dist, N); - //...Pack the XZ edge (11)................................ - ScaLBL_D3Q19_Pack(11,dvcSendList_XZ,0,sendCount_XZ,sendbuf_XZ,dist,N); - //...Pack the yz edge (16)................................ - ScaLBL_D3Q19_Pack(16,dvcSendList_yz,0,sendCount_yz,sendbuf_yz,dist,N); - //...Pack the yZ edge (18)................................ - ScaLBL_D3Q19_Pack(18,dvcSendList_yZ,0,sendCount_yZ,sendbuf_yZ,dist,N); - //...Pack the Yz edge (17)................................ - ScaLBL_D3Q19_Pack(17,dvcSendList_Yz,0,sendCount_Yz,sendbuf_Yz,dist,N); - //...Pack the YZ edge (15)................................ - ScaLBL_D3Q19_Pack(15,dvcSendList_YZ,0,sendCount_YZ,sendbuf_YZ,dist,N); + //...Packing for y face(4,8,9,16,18)................................. + ScaLBL_D3Q19_Pack(4, dvcSendList_y, 0, sendCount_y, sendbuf_y, dist, N); + ScaLBL_D3Q19_Pack(8, dvcSendList_y, sendCount_y, sendCount_y, sendbuf_y, + dist, N); + ScaLBL_D3Q19_Pack(9, dvcSendList_y, 2 * sendCount_y, sendCount_y, sendbuf_y, + dist, N); + ScaLBL_D3Q19_Pack(16, dvcSendList_y, 3 * sendCount_y, sendCount_y, + sendbuf_y, dist, N); + ScaLBL_D3Q19_Pack(18, dvcSendList_y, 4 * sendCount_y, sendCount_y, + sendbuf_y, dist, N); - //................................................................................... + //...Packing for Y face(3,7,10,15,17)................................. + ScaLBL_D3Q19_Pack(3, dvcSendList_Y, 0, sendCount_Y, sendbuf_Y, dist, N); + ScaLBL_D3Q19_Pack(7, dvcSendList_Y, sendCount_Y, sendCount_Y, sendbuf_Y, + dist, N); + ScaLBL_D3Q19_Pack(10, dvcSendList_Y, 2 * sendCount_Y, sendCount_Y, + sendbuf_Y, dist, N); + ScaLBL_D3Q19_Pack(15, dvcSendList_Y, 3 * sendCount_Y, sendCount_Y, + sendbuf_Y, dist, N); + ScaLBL_D3Q19_Pack(17, dvcSendList_Y, 4 * sendCount_Y, sendCount_Y, + sendbuf_Y, dist, N); - ScaLBL_DeviceBarrier(); - start( req_D3Q19AA ); + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_D3Q19_Pack(6, dvcSendList_z, 0, sendCount_z, sendbuf_z, dist, N); + ScaLBL_D3Q19_Pack(12, dvcSendList_z, sendCount_z, sendCount_z, sendbuf_z, + dist, N); + ScaLBL_D3Q19_Pack(13, dvcSendList_z, 2 * sendCount_z, sendCount_z, + sendbuf_z, dist, N); + ScaLBL_D3Q19_Pack(16, dvcSendList_z, 3 * sendCount_z, sendCount_z, + sendbuf_z, dist, N); + ScaLBL_D3Q19_Pack(17, dvcSendList_z, 4 * sendCount_z, sendCount_z, + sendbuf_z, dist, N); + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q19_Pack(5, dvcSendList_Z, 0, sendCount_Z, sendbuf_Z, dist, N); + ScaLBL_D3Q19_Pack(11, dvcSendList_Z, sendCount_Z, sendCount_Z, sendbuf_Z, + dist, N); + ScaLBL_D3Q19_Pack(14, dvcSendList_Z, 2 * sendCount_Z, sendCount_Z, + sendbuf_Z, dist, N); + ScaLBL_D3Q19_Pack(15, dvcSendList_Z, 3 * sendCount_Z, sendCount_Z, + sendbuf_Z, dist, N); + ScaLBL_D3Q19_Pack(18, dvcSendList_Z, 4 * sendCount_Z, sendCount_Z, + sendbuf_Z, dist, N); + + //...Pack the xy edge (8)................................ + ScaLBL_D3Q19_Pack(8, dvcSendList_xy, 0, sendCount_xy, sendbuf_xy, dist, N); + //...Pack the Xy edge (9)................................ + ScaLBL_D3Q19_Pack(9, dvcSendList_Xy, 0, sendCount_Xy, sendbuf_Xy, dist, N); + //...Pack the xY edge (10)................................ + ScaLBL_D3Q19_Pack(10, dvcSendList_xY, 0, sendCount_xY, sendbuf_xY, dist, N); + //...Pack the XY edge (7)................................ + ScaLBL_D3Q19_Pack(7, dvcSendList_XY, 0, sendCount_XY, sendbuf_XY, dist, N); + //...Pack the xz edge (12)................................ + ScaLBL_D3Q19_Pack(12, dvcSendList_xz, 0, sendCount_xz, sendbuf_xz, dist, N); + + //...Pack the xZ edge (14)................................ + ScaLBL_D3Q19_Pack(14, dvcSendList_xZ, 0, sendCount_xZ, sendbuf_xZ, dist, N); + //...Pack the Xz edge (13)................................ + ScaLBL_D3Q19_Pack(13, dvcSendList_Xz, 0, sendCount_Xz, sendbuf_Xz, dist, N); + + //...Pack the XZ edge (11)................................ + ScaLBL_D3Q19_Pack(11, dvcSendList_XZ, 0, sendCount_XZ, sendbuf_XZ, dist, N); + //...Pack the yz edge (16)................................ + ScaLBL_D3Q19_Pack(16, dvcSendList_yz, 0, sendCount_yz, sendbuf_yz, dist, N); + //...Pack the yZ edge (18)................................ + ScaLBL_D3Q19_Pack(18, dvcSendList_yZ, 0, sendCount_yZ, sendbuf_yZ, dist, N); + //...Pack the Yz edge (17)................................ + ScaLBL_D3Q19_Pack(17, dvcSendList_Yz, 0, sendCount_Yz, sendbuf_Yz, dist, N); + //...Pack the YZ edge (15)................................ + ScaLBL_D3Q19_Pack(15, dvcSendList_YZ, 0, sendCount_YZ, sendbuf_YZ, dist, N); + + //................................................................................... + + ScaLBL_DeviceBarrier(); + start(req_D3Q19AA); } -void ScaLBL_Communicator::RecvD3Q19AA(double *dist){ +void ScaLBL_Communicator::RecvD3Q19AA(double *dist) { - // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 - //................................................................................... - // Wait for completion of D3Q19 communication - wait( req_D3Q19AA ); - ScaLBL_DeviceBarrier(); + // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 + //................................................................................... + // Wait for completion of D3Q19 communication + wait(req_D3Q19AA); + ScaLBL_DeviceBarrier(); - //................................................................................... - // NOTE: AA Routine writes to opposite - // Unpack the distributions on the device - //................................................................................... - //...Unpacking for x face(2,8,10,12,14)................................ - ScaLBL_D3Q19_Unpack(2,dvcRecvDist_x,0,recvCount_x,recvbuf_x,dist,N); - ScaLBL_D3Q19_Unpack(8,dvcRecvDist_x,recvCount_x,recvCount_x,recvbuf_x,dist,N); - ScaLBL_D3Q19_Unpack(10,dvcRecvDist_x,2*recvCount_x,recvCount_x,recvbuf_x,dist,N); - ScaLBL_D3Q19_Unpack(12,dvcRecvDist_x,3*recvCount_x,recvCount_x,recvbuf_x,dist,N); - ScaLBL_D3Q19_Unpack(14,dvcRecvDist_x,4*recvCount_x,recvCount_x,recvbuf_x,dist,N); - //................................................................................... - //...Packing for X face(1,7,9,11,13)................................ - ScaLBL_D3Q19_Unpack(1,dvcRecvDist_X,0,recvCount_X,recvbuf_X,dist,N); - ScaLBL_D3Q19_Unpack(7,dvcRecvDist_X,recvCount_X,recvCount_X,recvbuf_X,dist,N); - ScaLBL_D3Q19_Unpack(9,dvcRecvDist_X,2*recvCount_X,recvCount_X,recvbuf_X,dist,N); - ScaLBL_D3Q19_Unpack(11,dvcRecvDist_X,3*recvCount_X,recvCount_X,recvbuf_X,dist,N); - ScaLBL_D3Q19_Unpack(13,dvcRecvDist_X,4*recvCount_X,recvCount_X,recvbuf_X,dist,N); - //................................................................................... - //...Packing for y face(4,8,9,16,18)................................. - ScaLBL_D3Q19_Unpack(4,dvcRecvDist_y,0,recvCount_y,recvbuf_y,dist,N); - ScaLBL_D3Q19_Unpack(8,dvcRecvDist_y,recvCount_y,recvCount_y,recvbuf_y,dist,N); - ScaLBL_D3Q19_Unpack(9,dvcRecvDist_y,2*recvCount_y,recvCount_y,recvbuf_y,dist,N); - ScaLBL_D3Q19_Unpack(16,dvcRecvDist_y,3*recvCount_y,recvCount_y,recvbuf_y,dist,N); - ScaLBL_D3Q19_Unpack(18,dvcRecvDist_y,4*recvCount_y,recvCount_y,recvbuf_y,dist,N); - //................................................................................... - //...Packing for Y face(3,7,10,15,17)................................. - ScaLBL_D3Q19_Unpack(3,dvcRecvDist_Y,0,recvCount_Y,recvbuf_Y,dist,N); - ScaLBL_D3Q19_Unpack(7,dvcRecvDist_Y,recvCount_Y,recvCount_Y,recvbuf_Y,dist,N); - ScaLBL_D3Q19_Unpack(10,dvcRecvDist_Y,2*recvCount_Y,recvCount_Y,recvbuf_Y,dist,N); - ScaLBL_D3Q19_Unpack(15,dvcRecvDist_Y,3*recvCount_Y,recvCount_Y,recvbuf_Y,dist,N); - ScaLBL_D3Q19_Unpack(17,dvcRecvDist_Y,4*recvCount_Y,recvCount_Y,recvbuf_Y,dist,N); - //................................................................................... - - //.................................................................................. - //...Pack the xy edge (8)................................ - ScaLBL_D3Q19_Unpack(8,dvcRecvDist_xy,0,recvCount_xy,recvbuf_xy,dist,N); - //...Pack the Xy edge (9)................................ - ScaLBL_D3Q19_Unpack(9,dvcRecvDist_Xy,0,recvCount_Xy,recvbuf_Xy,dist,N); - //...Pack the xY edge (10)................................ - ScaLBL_D3Q19_Unpack(10,dvcRecvDist_xY,0,recvCount_xY,recvbuf_xY,dist,N); - //...Pack the XY edge (7)................................ - ScaLBL_D3Q19_Unpack(7,dvcRecvDist_XY,0,recvCount_XY,recvbuf_XY,dist,N); - - //if (BoundaryCondition == 0 || kproc != 0 ){ - ScaLBL_D3Q19_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,dist,N); - ScaLBL_D3Q19_Unpack(12,dvcRecvDist_z,recvCount_z,recvCount_z,recvbuf_z,dist,N); - ScaLBL_D3Q19_Unpack(13,dvcRecvDist_z,2*recvCount_z,recvCount_z,recvbuf_z,dist,N); - ScaLBL_D3Q19_Unpack(16,dvcRecvDist_z,3*recvCount_z,recvCount_z,recvbuf_z,dist,N); - ScaLBL_D3Q19_Unpack(17,dvcRecvDist_z,4*recvCount_z,recvCount_z,recvbuf_z,dist,N); - - //...Pack the xz edge (12)................................ - ScaLBL_D3Q19_Unpack(12,dvcRecvDist_xz,0,recvCount_xz,recvbuf_xz,dist,N); - //...Pack the Xz edge (13)................................ - ScaLBL_D3Q19_Unpack(13,dvcRecvDist_Xz,0,recvCount_Xz,recvbuf_Xz,dist,N); - //...Pack the yz edge (16)................................ - ScaLBL_D3Q19_Unpack(16,dvcRecvDist_yz,0,recvCount_yz,recvbuf_yz,dist,N); - //...Pack the Yz edge (17)................................ - ScaLBL_D3Q19_Unpack(17,dvcRecvDist_Yz,0,recvCount_Yz,recvbuf_Yz,dist,N); - - //} - //if (BoundaryCondition == 0 || kproc != nprocz-1){ - //...Packing for Z face(5,11,14,15,18)................................ - ScaLBL_D3Q19_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,dist,N); - ScaLBL_D3Q19_Unpack(11,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,dist,N); - ScaLBL_D3Q19_Unpack(14,dvcRecvDist_Z,2*recvCount_Z,recvCount_Z,recvbuf_Z,dist,N); - ScaLBL_D3Q19_Unpack(15,dvcRecvDist_Z,3*recvCount_Z,recvCount_Z,recvbuf_Z,dist,N); - ScaLBL_D3Q19_Unpack(18,dvcRecvDist_Z,4*recvCount_Z,recvCount_Z,recvbuf_Z,dist,N); + //................................................................................... + // NOTE: AA Routine writes to opposite + // Unpack the distributions on the device + //................................................................................... + //...Unpacking for x face(2,8,10,12,14)................................ + ScaLBL_D3Q19_Unpack(2, dvcRecvDist_x, 0, recvCount_x, recvbuf_x, dist, N); + ScaLBL_D3Q19_Unpack(8, dvcRecvDist_x, recvCount_x, recvCount_x, recvbuf_x, + dist, N); + ScaLBL_D3Q19_Unpack(10, dvcRecvDist_x, 2 * recvCount_x, recvCount_x, + recvbuf_x, dist, N); + ScaLBL_D3Q19_Unpack(12, dvcRecvDist_x, 3 * recvCount_x, recvCount_x, + recvbuf_x, dist, N); + ScaLBL_D3Q19_Unpack(14, dvcRecvDist_x, 4 * recvCount_x, recvCount_x, + recvbuf_x, dist, N); + //................................................................................... + //...Packing for X face(1,7,9,11,13)................................ + ScaLBL_D3Q19_Unpack(1, dvcRecvDist_X, 0, recvCount_X, recvbuf_X, dist, N); + ScaLBL_D3Q19_Unpack(7, dvcRecvDist_X, recvCount_X, recvCount_X, recvbuf_X, + dist, N); + ScaLBL_D3Q19_Unpack(9, dvcRecvDist_X, 2 * recvCount_X, recvCount_X, + recvbuf_X, dist, N); + ScaLBL_D3Q19_Unpack(11, dvcRecvDist_X, 3 * recvCount_X, recvCount_X, + recvbuf_X, dist, N); + ScaLBL_D3Q19_Unpack(13, dvcRecvDist_X, 4 * recvCount_X, recvCount_X, + recvbuf_X, dist, N); + //................................................................................... + //...Packing for y face(4,8,9,16,18)................................. + ScaLBL_D3Q19_Unpack(4, dvcRecvDist_y, 0, recvCount_y, recvbuf_y, dist, N); + ScaLBL_D3Q19_Unpack(8, dvcRecvDist_y, recvCount_y, recvCount_y, recvbuf_y, + dist, N); + ScaLBL_D3Q19_Unpack(9, dvcRecvDist_y, 2 * recvCount_y, recvCount_y, + recvbuf_y, dist, N); + ScaLBL_D3Q19_Unpack(16, dvcRecvDist_y, 3 * recvCount_y, recvCount_y, + recvbuf_y, dist, N); + ScaLBL_D3Q19_Unpack(18, dvcRecvDist_y, 4 * recvCount_y, recvCount_y, + recvbuf_y, dist, N); + //................................................................................... + //...Packing for Y face(3,7,10,15,17)................................. + ScaLBL_D3Q19_Unpack(3, dvcRecvDist_Y, 0, recvCount_Y, recvbuf_Y, dist, N); + ScaLBL_D3Q19_Unpack(7, dvcRecvDist_Y, recvCount_Y, recvCount_Y, recvbuf_Y, + dist, N); + ScaLBL_D3Q19_Unpack(10, dvcRecvDist_Y, 2 * recvCount_Y, recvCount_Y, + recvbuf_Y, dist, N); + ScaLBL_D3Q19_Unpack(15, dvcRecvDist_Y, 3 * recvCount_Y, recvCount_Y, + recvbuf_Y, dist, N); + ScaLBL_D3Q19_Unpack(17, dvcRecvDist_Y, 4 * recvCount_Y, recvCount_Y, + recvbuf_Y, dist, N); + //................................................................................... - //...Pack the xZ edge (14)................................ - ScaLBL_D3Q19_Unpack(14,dvcRecvDist_xZ,0,recvCount_xZ,recvbuf_xZ,dist,N); - //...Pack the XZ edge (11)................................ - ScaLBL_D3Q19_Unpack(11,dvcRecvDist_XZ,0,recvCount_XZ,recvbuf_XZ,dist,N); - //...Pack the yZ edge (18)................................ - ScaLBL_D3Q19_Unpack(18,dvcRecvDist_yZ,0,recvCount_yZ,recvbuf_yZ,dist,N); - //...Pack the YZ edge (15)................................ - ScaLBL_D3Q19_Unpack(15,dvcRecvDist_YZ,0,recvCount_YZ,recvbuf_YZ,dist,N); - //} + //.................................................................................. + //...Pack the xy edge (8)................................ + ScaLBL_D3Q19_Unpack(8, dvcRecvDist_xy, 0, recvCount_xy, recvbuf_xy, dist, + N); + //...Pack the Xy edge (9)................................ + ScaLBL_D3Q19_Unpack(9, dvcRecvDist_Xy, 0, recvCount_Xy, recvbuf_Xy, dist, + N); + //...Pack the xY edge (10)................................ + ScaLBL_D3Q19_Unpack(10, dvcRecvDist_xY, 0, recvCount_xY, recvbuf_xY, dist, + N); + //...Pack the XY edge (7)................................ + ScaLBL_D3Q19_Unpack(7, dvcRecvDist_XY, 0, recvCount_XY, recvbuf_XY, dist, + N); - //................................................................................... - Lock=false; // unlock the communicator after communications complete - //................................................................................... + //if (BoundaryCondition == 0 || kproc != 0 ){ + ScaLBL_D3Q19_Unpack(6, dvcRecvDist_z, 0, recvCount_z, recvbuf_z, dist, N); + ScaLBL_D3Q19_Unpack(12, dvcRecvDist_z, recvCount_z, recvCount_z, recvbuf_z, + dist, N); + ScaLBL_D3Q19_Unpack(13, dvcRecvDist_z, 2 * recvCount_z, recvCount_z, + recvbuf_z, dist, N); + ScaLBL_D3Q19_Unpack(16, dvcRecvDist_z, 3 * recvCount_z, recvCount_z, + recvbuf_z, dist, N); + ScaLBL_D3Q19_Unpack(17, dvcRecvDist_z, 4 * recvCount_z, recvCount_z, + recvbuf_z, dist, N); + //...Pack the xz edge (12)................................ + ScaLBL_D3Q19_Unpack(12, dvcRecvDist_xz, 0, recvCount_xz, recvbuf_xz, dist, + N); + //...Pack the Xz edge (13)................................ + ScaLBL_D3Q19_Unpack(13, dvcRecvDist_Xz, 0, recvCount_Xz, recvbuf_Xz, dist, + N); + //...Pack the yz edge (16)................................ + ScaLBL_D3Q19_Unpack(16, dvcRecvDist_yz, 0, recvCount_yz, recvbuf_yz, dist, + N); + //...Pack the Yz edge (17)................................ + ScaLBL_D3Q19_Unpack(17, dvcRecvDist_Yz, 0, recvCount_Yz, recvbuf_Yz, dist, + N); + + //} + //if (BoundaryCondition == 0 || kproc != nprocz-1){ + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q19_Unpack(5, dvcRecvDist_Z, 0, recvCount_Z, recvbuf_Z, dist, N); + ScaLBL_D3Q19_Unpack(11, dvcRecvDist_Z, recvCount_Z, recvCount_Z, recvbuf_Z, + dist, N); + ScaLBL_D3Q19_Unpack(14, dvcRecvDist_Z, 2 * recvCount_Z, recvCount_Z, + recvbuf_Z, dist, N); + ScaLBL_D3Q19_Unpack(15, dvcRecvDist_Z, 3 * recvCount_Z, recvCount_Z, + recvbuf_Z, dist, N); + ScaLBL_D3Q19_Unpack(18, dvcRecvDist_Z, 4 * recvCount_Z, recvCount_Z, + recvbuf_Z, dist, N); + + //...Pack the xZ edge (14)................................ + ScaLBL_D3Q19_Unpack(14, dvcRecvDist_xZ, 0, recvCount_xZ, recvbuf_xZ, dist, + N); + //...Pack the XZ edge (11)................................ + ScaLBL_D3Q19_Unpack(11, dvcRecvDist_XZ, 0, recvCount_XZ, recvbuf_XZ, dist, + N); + //...Pack the yZ edge (18)................................ + ScaLBL_D3Q19_Unpack(18, dvcRecvDist_yZ, 0, recvCount_yZ, recvbuf_yZ, dist, + N); + //...Pack the YZ edge (15)................................ + ScaLBL_D3Q19_Unpack(15, dvcRecvDist_YZ, 0, recvCount_YZ, recvbuf_YZ, dist, + N); + //} + + //................................................................................... + Lock = false; // unlock the communicator after communications complete + //................................................................................... } -void ScaLBL_Communicator::RecvGrad(double *phi, double *grad){ +void ScaLBL_Communicator::RecvGrad(double *phi, double *grad) { - // Recieves halo and incorporates into D3Q19 based stencil gradient computation - //................................................................................... - // Wait for completion of D3Q19 communication - MPI_COMM_SCALBL.waitAll(18,req1); - MPI_COMM_SCALBL.waitAll(18,req2); - ScaLBL_DeviceBarrier(); - - //................................................................................... - // Unpack the gradributions on the device - //................................................................................... - //...Unpacking for x face(2,8,10,12,14)................................ - ScaLBL_Gradient_Unpack(1.0,-1,0,0,dvcRecvDist_x,0,recvCount_x,recvbuf_x,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,-1,-1,0,dvcRecvDist_x,recvCount_x,recvCount_x,recvbuf_x,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,-1,1,0,dvcRecvDist_x,2*recvCount_x,recvCount_x,recvbuf_x,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,-1,0,1,dvcRecvDist_x,4*recvCount_x,recvCount_x,recvbuf_x,phi,grad,N); - //................................................................................... - //...Packing for X face(1,7,9,11,13)................................ - ScaLBL_Gradient_Unpack(1.0,1,0,0,dvcRecvDist_X,0,recvCount_X,recvbuf_X,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,1,1,0,dvcRecvDist_X,recvCount_X,recvCount_X,recvbuf_X,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,1,-1,0,dvcRecvDist_X,2*recvCount_X,recvCount_X,recvbuf_X,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,1,0,1,dvcRecvDist_X,3*recvCount_X,recvCount_X,recvbuf_X,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,1,0,-1,dvcRecvDist_X,4*recvCount_X,recvCount_X,recvbuf_X,phi,grad,N); - //................................................................................... - //...Packing for y face(4,8,9,16,18)................................. - ScaLBL_Gradient_Unpack(1.0,0,-1,0,dvcRecvDist_y,0,recvCount_y,recvbuf_y,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,-1,-1,0,dvcRecvDist_y,recvCount_y,recvCount_y,recvbuf_y,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,1,-1,0,dvcRecvDist_y,2*recvCount_y,recvCount_y,recvbuf_y,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,0,-1,-1,dvcRecvDist_y,3*recvCount_y,recvCount_y,recvbuf_y,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,0,-1,1,dvcRecvDist_y,4*recvCount_y,recvCount_y,recvbuf_y,phi,grad,N); - //................................................................................... - //...Packing for Y face(3,7,10,15,17)................................. - ScaLBL_Gradient_Unpack(1.0,0,1,0,dvcRecvDist_Y,0,recvCount_Y,recvbuf_Y,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,1,1,0,dvcRecvDist_Y,recvCount_Y,recvCount_Y,recvbuf_Y,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,-1,1,0,dvcRecvDist_Y,2*recvCount_Y,recvCount_Y,recvbuf_Y,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,0,1,1,dvcRecvDist_Y,3*recvCount_Y,recvCount_Y,recvbuf_Y,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,0,1,-1,dvcRecvDist_Y,4*recvCount_Y,recvCount_Y,recvbuf_Y,phi,grad,N); - //................................................................................... - //...Packing for z face(6,12,13,16,17)................................ - ScaLBL_Gradient_Unpack(1.0,0,0,-1,dvcRecvDist_z,0,recvCount_z,recvbuf_z,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,-1,0,-1,dvcRecvDist_z,recvCount_z,recvCount_z,recvbuf_z,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,1,0,-1,dvcRecvDist_z,2*recvCount_z,recvCount_z,recvbuf_z,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,0,-1,-1,dvcRecvDist_z,3*recvCount_z,recvCount_z,recvbuf_z,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,0,1,-1,dvcRecvDist_z,4*recvCount_z,recvCount_z,recvbuf_z,phi,grad,N); - //...Packing for Z face(5,11,14,15,18)................................ - ScaLBL_Gradient_Unpack(1.0,0,0,1,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,1,0,1,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,-1,0,1,dvcRecvDist_Z,2*recvCount_Z,recvCount_Z,recvbuf_Z,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,0,1,1,dvcRecvDist_Z,3*recvCount_Z,recvCount_Z,recvbuf_Z,phi,grad,N); - ScaLBL_Gradient_Unpack(0.5,0,-1,1,dvcRecvDist_Z,4*recvCount_Z,recvCount_Z,recvbuf_Z,phi,grad,N); - //.................................................................................. - //...Pack the xy edge (8)................................ - ScaLBL_Gradient_Unpack(0.5,-1,-1,0,dvcRecvDist_xy,0,recvCount_xy,recvbuf_xy,phi,grad,N); - //...Pack the Xy edge (9)................................ - ScaLBL_Gradient_Unpack(0.5,1,-1,0,dvcRecvDist_Xy,0,recvCount_Xy,recvbuf_Xy,phi,grad,N); - //...Pack the xY edge (10)................................ - ScaLBL_Gradient_Unpack(0.5,-1,1,0,dvcRecvDist_xY,0,recvCount_xY,recvbuf_xY,phi,grad,N); - //...Pack the XY edge (7)................................ - ScaLBL_Gradient_Unpack(0.5,1,1,0,dvcRecvDist_XY,0,recvCount_XY,recvbuf_XY,phi,grad,N); - //...Pack the xz edge (12)................................ - ScaLBL_Gradient_Unpack(0.5,-1,0,-1,dvcRecvDist_xz,0,recvCount_xz,recvbuf_xz,phi,grad,N); - //...Pack the xZ edge (14)................................ - ScaLBL_Gradient_Unpack(0.5,-1,0,1,dvcRecvDist_xZ,0,recvCount_xZ,recvbuf_xZ,phi,grad,N); - //...Pack the Xz edge (13)................................ - ScaLBL_Gradient_Unpack(0.5,1,0,-1,dvcRecvDist_Xz,0,recvCount_Xz,recvbuf_Xz,phi,grad,N); - //...Pack the XZ edge (11)................................ - ScaLBL_Gradient_Unpack(0.5,1,0,1,dvcRecvDist_XZ,0,recvCount_XZ,recvbuf_XZ,phi,grad,N); - //...Pack the yz edge (16)................................ - ScaLBL_Gradient_Unpack(0.5,0,-1,-1,dvcRecvDist_yz,0,recvCount_yz,recvbuf_yz,phi,grad,N); - //...Pack the yZ edge (18)................................ - ScaLBL_Gradient_Unpack(0.5,0,-1,1,dvcRecvDist_yZ,0,recvCount_yZ,recvbuf_yZ,phi,grad,N); - //...Pack the Yz edge (17)................................ - ScaLBL_Gradient_Unpack(0.5,0,1,-1,dvcRecvDist_Yz,0,recvCount_Yz,recvbuf_Yz,phi,grad,N); - //...Pack the YZ edge (15)................................ - ScaLBL_Gradient_Unpack(0.5,0,1,1,dvcRecvDist_YZ,0,recvCount_YZ,recvbuf_YZ,phi,grad,N); - //................................................................................... - Lock=false; // unlock the communicator after communications complete - //................................................................................... + // Recieves halo and incorporates into D3Q19 based stencil gradient computation + //................................................................................... + // Wait for completion of D3Q19 communication + MPI_COMM_SCALBL.waitAll(18, req1); + MPI_COMM_SCALBL.waitAll(18, req2); + ScaLBL_DeviceBarrier(); + //................................................................................... + // Unpack the gradributions on the device + //................................................................................... + //...Unpacking for x face(2,8,10,12,14)................................ + ScaLBL_Gradient_Unpack(1.0, -1, 0, 0, dvcRecvDist_x, 0, recvCount_x, + recvbuf_x, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, -1, -1, 0, dvcRecvDist_x, recvCount_x, + recvCount_x, recvbuf_x, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, -1, 1, 0, dvcRecvDist_x, 2 * recvCount_x, + recvCount_x, recvbuf_x, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, -1, 0, 1, dvcRecvDist_x, 4 * recvCount_x, + recvCount_x, recvbuf_x, phi, grad, N); + //................................................................................... + //...Packing for X face(1,7,9,11,13)................................ + ScaLBL_Gradient_Unpack(1.0, 1, 0, 0, dvcRecvDist_X, 0, recvCount_X, + recvbuf_X, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, 1, 1, 0, dvcRecvDist_X, recvCount_X, + recvCount_X, recvbuf_X, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, 1, -1, 0, dvcRecvDist_X, 2 * recvCount_X, + recvCount_X, recvbuf_X, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, 1, 0, 1, dvcRecvDist_X, 3 * recvCount_X, + recvCount_X, recvbuf_X, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, 1, 0, -1, dvcRecvDist_X, 4 * recvCount_X, + recvCount_X, recvbuf_X, phi, grad, N); + //................................................................................... + //...Packing for y face(4,8,9,16,18)................................. + ScaLBL_Gradient_Unpack(1.0, 0, -1, 0, dvcRecvDist_y, 0, recvCount_y, + recvbuf_y, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, -1, -1, 0, dvcRecvDist_y, recvCount_y, + recvCount_y, recvbuf_y, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, 1, -1, 0, dvcRecvDist_y, 2 * recvCount_y, + recvCount_y, recvbuf_y, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, 0, -1, -1, dvcRecvDist_y, 3 * recvCount_y, + recvCount_y, recvbuf_y, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, 0, -1, 1, dvcRecvDist_y, 4 * recvCount_y, + recvCount_y, recvbuf_y, phi, grad, N); + //................................................................................... + //...Packing for Y face(3,7,10,15,17)................................. + ScaLBL_Gradient_Unpack(1.0, 0, 1, 0, dvcRecvDist_Y, 0, recvCount_Y, + recvbuf_Y, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, 1, 1, 0, dvcRecvDist_Y, recvCount_Y, + recvCount_Y, recvbuf_Y, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, -1, 1, 0, dvcRecvDist_Y, 2 * recvCount_Y, + recvCount_Y, recvbuf_Y, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, 0, 1, 1, dvcRecvDist_Y, 3 * recvCount_Y, + recvCount_Y, recvbuf_Y, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, 0, 1, -1, dvcRecvDist_Y, 4 * recvCount_Y, + recvCount_Y, recvbuf_Y, phi, grad, N); + //................................................................................... + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_Gradient_Unpack(1.0, 0, 0, -1, dvcRecvDist_z, 0, recvCount_z, + recvbuf_z, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, -1, 0, -1, dvcRecvDist_z, recvCount_z, + recvCount_z, recvbuf_z, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, 1, 0, -1, dvcRecvDist_z, 2 * recvCount_z, + recvCount_z, recvbuf_z, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, 0, -1, -1, dvcRecvDist_z, 3 * recvCount_z, + recvCount_z, recvbuf_z, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, 0, 1, -1, dvcRecvDist_z, 4 * recvCount_z, + recvCount_z, recvbuf_z, phi, grad, N); + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_Gradient_Unpack(1.0, 0, 0, 1, dvcRecvDist_Z, 0, recvCount_Z, + recvbuf_Z, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, 1, 0, 1, dvcRecvDist_Z, recvCount_Z, + recvCount_Z, recvbuf_Z, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, -1, 0, 1, dvcRecvDist_Z, 2 * recvCount_Z, + recvCount_Z, recvbuf_Z, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, 0, 1, 1, dvcRecvDist_Z, 3 * recvCount_Z, + recvCount_Z, recvbuf_Z, phi, grad, N); + ScaLBL_Gradient_Unpack(0.5, 0, -1, 1, dvcRecvDist_Z, 4 * recvCount_Z, + recvCount_Z, recvbuf_Z, phi, grad, N); + //.................................................................................. + //...Pack the xy edge (8)................................ + ScaLBL_Gradient_Unpack(0.5, -1, -1, 0, dvcRecvDist_xy, 0, recvCount_xy, + recvbuf_xy, phi, grad, N); + //...Pack the Xy edge (9)................................ + ScaLBL_Gradient_Unpack(0.5, 1, -1, 0, dvcRecvDist_Xy, 0, recvCount_Xy, + recvbuf_Xy, phi, grad, N); + //...Pack the xY edge (10)................................ + ScaLBL_Gradient_Unpack(0.5, -1, 1, 0, dvcRecvDist_xY, 0, recvCount_xY, + recvbuf_xY, phi, grad, N); + //...Pack the XY edge (7)................................ + ScaLBL_Gradient_Unpack(0.5, 1, 1, 0, dvcRecvDist_XY, 0, recvCount_XY, + recvbuf_XY, phi, grad, N); + //...Pack the xz edge (12)................................ + ScaLBL_Gradient_Unpack(0.5, -1, 0, -1, dvcRecvDist_xz, 0, recvCount_xz, + recvbuf_xz, phi, grad, N); + //...Pack the xZ edge (14)................................ + ScaLBL_Gradient_Unpack(0.5, -1, 0, 1, dvcRecvDist_xZ, 0, recvCount_xZ, + recvbuf_xZ, phi, grad, N); + //...Pack the Xz edge (13)................................ + ScaLBL_Gradient_Unpack(0.5, 1, 0, -1, dvcRecvDist_Xz, 0, recvCount_Xz, + recvbuf_Xz, phi, grad, N); + //...Pack the XZ edge (11)................................ + ScaLBL_Gradient_Unpack(0.5, 1, 0, 1, dvcRecvDist_XZ, 0, recvCount_XZ, + recvbuf_XZ, phi, grad, N); + //...Pack the yz edge (16)................................ + ScaLBL_Gradient_Unpack(0.5, 0, -1, -1, dvcRecvDist_yz, 0, recvCount_yz, + recvbuf_yz, phi, grad, N); + //...Pack the yZ edge (18)................................ + ScaLBL_Gradient_Unpack(0.5, 0, -1, 1, dvcRecvDist_yZ, 0, recvCount_yZ, + recvbuf_yZ, phi, grad, N); + //...Pack the Yz edge (17)................................ + ScaLBL_Gradient_Unpack(0.5, 0, 1, -1, dvcRecvDist_Yz, 0, recvCount_Yz, + recvbuf_Yz, phi, grad, N); + //...Pack the YZ edge (15)................................ + ScaLBL_Gradient_Unpack(0.5, 0, 1, 1, dvcRecvDist_YZ, 0, recvCount_YZ, + recvbuf_YZ, phi, grad, N); + //................................................................................... + Lock = false; // unlock the communicator after communications complete + //................................................................................... } -void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq){ +void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq) { - // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 - if (Lock==true){ - ERROR("ScaLBL Error (SendD3Q19): ScaLBL_Communicator is locked -- did you forget to match Send/Recv calls?"); - } - else{ - Lock=true; - } - // assign tag of 19 to D3Q19 communication - sendtag = recvtag = 148; - ScaLBL_DeviceBarrier(); - // Pack the distributions - //...Packing for x face(2,8,10,12,14)................................ - ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,Aq,N); - ScaLBL_D3Q19_Pack(2,dvcSendList_x,sendCount_x,sendCount_x,sendbuf_x,Bq,N); + // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 + if (Lock == true) { + ERROR("ScaLBL Error (SendD3Q19): ScaLBL_Communicator is locked -- did " + "you forget to match Send/Recv calls?"); + } else { + Lock = true; + } + // assign tag of 19 to D3Q19 communication + sendtag = recvtag = 148; + ScaLBL_DeviceBarrier(); + // Pack the distributions + //...Packing for x face(2,8,10,12,14)................................ + ScaLBL_D3Q19_Pack(2, dvcSendList_x, 0, sendCount_x, sendbuf_x, Aq, N); + ScaLBL_D3Q19_Pack(2, dvcSendList_x, sendCount_x, sendCount_x, sendbuf_x, Bq, + N); - req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 2*sendCount_x, rank_x,sendtag+0); - req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 2*recvCount_X, rank_X,recvtag+0); - - //...Packing for X face(1,7,9,11,13)................................ - ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,Aq,N); - ScaLBL_D3Q19_Pack(1,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,Bq,N); - - req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 2*sendCount_X, rank_X,sendtag+1); - req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 2*recvCount_x, rank_x,recvtag+1); + req1[0] = + MPI_COMM_SCALBL.Isend(sendbuf_x, 2 * sendCount_x, rank_x, sendtag + 0); + req2[0] = + MPI_COMM_SCALBL.Irecv(recvbuf_X, 2 * recvCount_X, rank_X, recvtag + 0); - //...Packing for y face(4,8,9,16,18)................................. - ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,Aq,N); - ScaLBL_D3Q19_Pack(4,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,Bq,N); + //...Packing for X face(1,7,9,11,13)................................ + ScaLBL_D3Q19_Pack(1, dvcSendList_X, 0, sendCount_X, sendbuf_X, Aq, N); + ScaLBL_D3Q19_Pack(1, dvcSendList_X, sendCount_X, sendCount_X, sendbuf_X, Bq, + N); - req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 2*sendCount_y, rank_y,sendtag+2); - req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 2*recvCount_Y, rank_Y,recvtag+2); - - //...Packing for Y face(3,7,10,15,17)................................. - ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,Aq,N); - ScaLBL_D3Q19_Pack(3,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,Bq,N); + req1[1] = + MPI_COMM_SCALBL.Isend(sendbuf_X, 2 * sendCount_X, rank_X, sendtag + 1); + req2[1] = + MPI_COMM_SCALBL.Irecv(recvbuf_x, 2 * recvCount_x, rank_x, recvtag + 1); - req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 2*sendCount_Y, rank_Y,sendtag+3); - req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 2*recvCount_y, rank_y,recvtag+3); - - //...Packing for z face(6,12,13,16,17)................................ - ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,Aq,N); - ScaLBL_D3Q19_Pack(6,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,Bq,N); - - req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 2*sendCount_z, rank_z,sendtag+4); - req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 2*recvCount_Z, rank_Z,recvtag+4); - - //...Packing for Z face(5,11,14,15,18)................................ - ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,Aq,N); - ScaLBL_D3Q19_Pack(5,dvcSendList_Z,sendCount_Z,sendCount_Z,sendbuf_Z,Bq,N); + //...Packing for y face(4,8,9,16,18)................................. + ScaLBL_D3Q19_Pack(4, dvcSendList_y, 0, sendCount_y, sendbuf_y, Aq, N); + ScaLBL_D3Q19_Pack(4, dvcSendList_y, sendCount_y, sendCount_y, sendbuf_y, Bq, + N); - //................................................................................... - // Send all the distributions - req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 2*sendCount_Z, rank_Z,sendtag+5); - req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 2*recvCount_z, rank_z,recvtag+5); + req1[2] = + MPI_COMM_SCALBL.Isend(sendbuf_y, 2 * sendCount_y, rank_y, sendtag + 2); + req2[2] = + MPI_COMM_SCALBL.Irecv(recvbuf_Y, 2 * recvCount_Y, rank_Y, recvtag + 2); + //...Packing for Y face(3,7,10,15,17)................................. + ScaLBL_D3Q19_Pack(3, dvcSendList_Y, 0, sendCount_Y, sendbuf_Y, Aq, N); + ScaLBL_D3Q19_Pack(3, dvcSendList_Y, sendCount_Y, sendCount_Y, sendbuf_Y, Bq, + N); + + req1[3] = + MPI_COMM_SCALBL.Isend(sendbuf_Y, 2 * sendCount_Y, rank_Y, sendtag + 3); + req2[3] = + MPI_COMM_SCALBL.Irecv(recvbuf_y, 2 * recvCount_y, rank_y, recvtag + 3); + + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_D3Q19_Pack(6, dvcSendList_z, 0, sendCount_z, sendbuf_z, Aq, N); + ScaLBL_D3Q19_Pack(6, dvcSendList_z, sendCount_z, sendCount_z, sendbuf_z, Bq, + N); + + req1[4] = + MPI_COMM_SCALBL.Isend(sendbuf_z, 2 * sendCount_z, rank_z, sendtag + 4); + req2[4] = + MPI_COMM_SCALBL.Irecv(recvbuf_Z, 2 * recvCount_Z, rank_Z, recvtag + 4); + + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q19_Pack(5, dvcSendList_Z, 0, sendCount_Z, sendbuf_Z, Aq, N); + ScaLBL_D3Q19_Pack(5, dvcSendList_Z, sendCount_Z, sendCount_Z, sendbuf_Z, Bq, + N); + + //................................................................................... + // Send all the distributions + req1[5] = + MPI_COMM_SCALBL.Isend(sendbuf_Z, 2 * sendCount_Z, rank_Z, sendtag + 5); + req2[5] = + MPI_COMM_SCALBL.Irecv(recvbuf_z, 2 * recvCount_z, rank_z, recvtag + 5); } +void ScaLBL_Communicator::BiRecvD3Q7AA(double *Aq, double *Bq) { -void ScaLBL_Communicator::BiRecvD3Q7AA(double *Aq, double *Bq){ + // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 + //................................................................................... + // Wait for completion of D3Q19 communication + MPI_COMM_SCALBL.waitAll(6, req1); + MPI_COMM_SCALBL.waitAll(6, req2); + ScaLBL_DeviceBarrier(); - // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 - //................................................................................... - // Wait for completion of D3Q19 communication - MPI_COMM_SCALBL.waitAll(6,req1); - MPI_COMM_SCALBL.waitAll(6,req2); - ScaLBL_DeviceBarrier(); + //................................................................................... + // NOTE: AA Routine writes to opposite + // Unpack the distributions on the device + //................................................................................... + //...Unpacking for x face(2,8,10,12,14)................................ + ScaLBL_D3Q7_Unpack(2, dvcRecvDist_x, 0, recvCount_x, recvbuf_x, Aq, N); + ScaLBL_D3Q7_Unpack(2, dvcRecvDist_x, recvCount_x, recvCount_x, recvbuf_x, + Bq, N); + //................................................................................... + //...Packing for X face(1,7,9,11,13)................................ + ScaLBL_D3Q7_Unpack(1, dvcRecvDist_X, 0, recvCount_X, recvbuf_X, Aq, N); + ScaLBL_D3Q7_Unpack(1, dvcRecvDist_X, recvCount_X, recvCount_X, recvbuf_X, + Bq, N); + //................................................................................... + //...Packing for y face(4,8,9,16,18)................................. + ScaLBL_D3Q7_Unpack(4, dvcRecvDist_y, 0, recvCount_y, recvbuf_y, Aq, N); + ScaLBL_D3Q7_Unpack(4, dvcRecvDist_y, recvCount_y, recvCount_y, recvbuf_y, + Bq, N); + //................................................................................... + //...Packing for Y face(3,7,10,15,17)................................. + ScaLBL_D3Q7_Unpack(3, dvcRecvDist_Y, 0, recvCount_Y, recvbuf_Y, Aq, N); + ScaLBL_D3Q7_Unpack(3, dvcRecvDist_Y, recvCount_Y, recvCount_Y, recvbuf_Y, + Bq, N); + //................................................................................... - //................................................................................... - // NOTE: AA Routine writes to opposite - // Unpack the distributions on the device - //................................................................................... - //...Unpacking for x face(2,8,10,12,14)................................ - ScaLBL_D3Q7_Unpack(2,dvcRecvDist_x,0,recvCount_x,recvbuf_x,Aq,N); - ScaLBL_D3Q7_Unpack(2,dvcRecvDist_x,recvCount_x,recvCount_x,recvbuf_x,Bq,N); - //................................................................................... - //...Packing for X face(1,7,9,11,13)................................ - ScaLBL_D3Q7_Unpack(1,dvcRecvDist_X,0,recvCount_X,recvbuf_X,Aq,N); - ScaLBL_D3Q7_Unpack(1,dvcRecvDist_X,recvCount_X,recvCount_X,recvbuf_X,Bq,N); - //................................................................................... - //...Packing for y face(4,8,9,16,18)................................. - ScaLBL_D3Q7_Unpack(4,dvcRecvDist_y,0,recvCount_y,recvbuf_y,Aq,N); - ScaLBL_D3Q7_Unpack(4,dvcRecvDist_y,recvCount_y,recvCount_y,recvbuf_y,Bq,N); - //................................................................................... - //...Packing for Y face(3,7,10,15,17)................................. - ScaLBL_D3Q7_Unpack(3,dvcRecvDist_Y,0,recvCount_Y,recvbuf_Y,Aq,N); - ScaLBL_D3Q7_Unpack(3,dvcRecvDist_Y,recvCount_Y,recvCount_Y,recvbuf_Y,Bq,N); - //................................................................................... - - if (BoundaryCondition > 0 && kproc == 0){ - // don't unpack little z - //...Packing for Z face(5,11,14,15,18)................................ - ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,Aq,N); - ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,Bq,N); - } - else if (BoundaryCondition > 0 && kproc == nprocz-1){ - // don't unpack big z - //...Packing for z face(6,12,13,16,17)................................ - ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,Aq,N); - ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,recvCount_z,recvCount_z,recvbuf_z,Bq,N); - } - else { - //...Packing for z face(6,12,13,16,17)................................ - ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,Aq,N); - ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,recvCount_z,recvCount_z,recvbuf_z,Bq,N); - //...Packing for Z face(5,11,14,15,18)................................ - ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,Aq,N); - ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,Bq,N); - } - - //................................................................................... - Lock=false; // unlock the communicator after communications complete - //................................................................................... + if (BoundaryCondition > 0 && kproc == 0) { + // don't unpack little z + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q7_Unpack(5, dvcRecvDist_Z, 0, recvCount_Z, recvbuf_Z, Aq, N); + ScaLBL_D3Q7_Unpack(5, dvcRecvDist_Z, recvCount_Z, recvCount_Z, + recvbuf_Z, Bq, N); + } else if (BoundaryCondition > 0 && kproc == nprocz - 1) { + // don't unpack big z + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_D3Q7_Unpack(6, dvcRecvDist_z, 0, recvCount_z, recvbuf_z, Aq, N); + ScaLBL_D3Q7_Unpack(6, dvcRecvDist_z, recvCount_z, recvCount_z, + recvbuf_z, Bq, N); + } else { + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_D3Q7_Unpack(6, dvcRecvDist_z, 0, recvCount_z, recvbuf_z, Aq, N); + ScaLBL_D3Q7_Unpack(6, dvcRecvDist_z, recvCount_z, recvCount_z, + recvbuf_z, Bq, N); + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q7_Unpack(5, dvcRecvDist_Z, 0, recvCount_Z, recvbuf_Z, Aq, N); + ScaLBL_D3Q7_Unpack(5, dvcRecvDist_Z, recvCount_Z, recvCount_Z, + recvbuf_Z, Bq, N); + } + //................................................................................... + Lock = false; // unlock the communicator after communications complete + //................................................................................... } -void ScaLBL_Communicator::SendD3Q7AA(double *Aq, int Component){ +void ScaLBL_Communicator::SendD3Q7AA(double *Aq, int Component) { - // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 - if (Lock==true){ - ERROR("ScaLBL Error (SendD3Q7): ScaLBL_Communicator is locked -- did you forget to match Send/Recv calls?"); - } - else{ - Lock=true; - } - // assign tag of 154 to D3Q19 communication - sendtag = recvtag = 154; - ScaLBL_DeviceBarrier(); - // Pack the distributions - //...Packing for x face(2,8,10,12,14)................................ - ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,&Aq[Component*7*N],N); - req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x, rank_x,sendtag+0); - req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X, rank_X,recvtag+0); - - //...Packing for X face(1,7,9,11,13)................................ - ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,&Aq[Component*7*N],N); - req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X, rank_X,sendtag+1); - req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x, rank_x,recvtag+1); - - //...Packing for y face(4,8,9,16,18)................................. - ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,&Aq[Component*7*N],N); - req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y, rank_y,sendtag+2); - req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y, rank_Y,recvtag+2); - - //...Packing for Y face(3,7,10,15,17)................................. - ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,&Aq[Component*7*N],N); - req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y, rank_Y,sendtag+3); - req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y, rank_y,recvtag+3); - - //...Packing for z face(6,12,13,16,17)................................ - ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,&Aq[Component*7*N],N); - req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z, rank_z,sendtag+4); - req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z, rank_Z,recvtag+4); - - //...Packing for Z face(5,11,14,15,18)................................ - ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,&Aq[Component*7*N],N); - req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z, rank_Z,sendtag+5); - req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z, rank_z,recvtag+5); + // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 + if (Lock == true) { + ERROR("ScaLBL Error (SendD3Q7): ScaLBL_Communicator is locked -- did " + "you forget to match Send/Recv calls?"); + } else { + Lock = true; + } + // assign tag of 154 to D3Q19 communication + sendtag = recvtag = 154; + ScaLBL_DeviceBarrier(); + // Pack the distributions + //...Packing for x face(2,8,10,12,14)................................ + ScaLBL_D3Q19_Pack(2, dvcSendList_x, 0, sendCount_x, sendbuf_x, + &Aq[Component * 7 * N], N); + req1[0] = + MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x, rank_x, sendtag + 0); + req2[0] = + MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X, rank_X, recvtag + 0); + + //...Packing for X face(1,7,9,11,13)................................ + ScaLBL_D3Q19_Pack(1, dvcSendList_X, 0, sendCount_X, sendbuf_X, + &Aq[Component * 7 * N], N); + req1[1] = + MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X, rank_X, sendtag + 1); + req2[1] = + MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x, rank_x, recvtag + 1); + + //...Packing for y face(4,8,9,16,18)................................. + ScaLBL_D3Q19_Pack(4, dvcSendList_y, 0, sendCount_y, sendbuf_y, + &Aq[Component * 7 * N], N); + req1[2] = + MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y, rank_y, sendtag + 2); + req2[2] = + MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y, rank_Y, recvtag + 2); + + //...Packing for Y face(3,7,10,15,17)................................. + ScaLBL_D3Q19_Pack(3, dvcSendList_Y, 0, sendCount_Y, sendbuf_Y, + &Aq[Component * 7 * N], N); + req1[3] = + MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y, rank_Y, sendtag + 3); + req2[3] = + MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y, rank_y, recvtag + 3); + + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_D3Q19_Pack(6, dvcSendList_z, 0, sendCount_z, sendbuf_z, + &Aq[Component * 7 * N], N); + req1[4] = + MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z, rank_z, sendtag + 4); + req2[4] = + MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z, rank_Z, recvtag + 4); + + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q19_Pack(5, dvcSendList_Z, 0, sendCount_Z, sendbuf_Z, + &Aq[Component * 7 * N], N); + req1[5] = + MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z, rank_Z, sendtag + 5); + req2[5] = + MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z, rank_z, recvtag + 5); } +void ScaLBL_Communicator::RecvD3Q7AA(double *Aq, int Component) { -void ScaLBL_Communicator::RecvD3Q7AA(double *Aq, int Component){ + // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 + //................................................................................... + // Wait for completion of D3Q19 communication + MPI_COMM_SCALBL.waitAll(6, req1); + MPI_COMM_SCALBL.waitAll(6, req2); + ScaLBL_DeviceBarrier(); - // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 - //................................................................................... - // Wait for completion of D3Q19 communication - MPI_COMM_SCALBL.waitAll(6,req1); - MPI_COMM_SCALBL.waitAll(6,req2); - ScaLBL_DeviceBarrier(); + //................................................................................... + // NOTE: AA Routine writes to opposite + // Unpack the distributions on the device + //................................................................................... + //...Unpacking for x face(2,8,10,12,14)................................ + ScaLBL_D3Q7_Unpack(2, dvcRecvDist_x, 0, recvCount_x, recvbuf_x, + &Aq[Component * 7 * N], N); + //................................................................................... + //...Packing for X face(1,7,9,11,13)................................ + ScaLBL_D3Q7_Unpack(1, dvcRecvDist_X, 0, recvCount_X, recvbuf_X, + &Aq[Component * 7 * N], N); + //................................................................................... + //...Packing for y face(4,8,9,16,18)................................. + ScaLBL_D3Q7_Unpack(4, dvcRecvDist_y, 0, recvCount_y, recvbuf_y, + &Aq[Component * 7 * N], N); + //................................................................................... + //...Packing for Y face(3,7,10,15,17)................................. + ScaLBL_D3Q7_Unpack(3, dvcRecvDist_Y, 0, recvCount_Y, recvbuf_Y, + &Aq[Component * 7 * N], N); + //................................................................................... - //................................................................................... - // NOTE: AA Routine writes to opposite - // Unpack the distributions on the device - //................................................................................... - //...Unpacking for x face(2,8,10,12,14)................................ - ScaLBL_D3Q7_Unpack(2,dvcRecvDist_x,0,recvCount_x,recvbuf_x,&Aq[Component*7*N],N); - //................................................................................... - //...Packing for X face(1,7,9,11,13)................................ - ScaLBL_D3Q7_Unpack(1,dvcRecvDist_X,0,recvCount_X,recvbuf_X,&Aq[Component*7*N],N); - //................................................................................... - //...Packing for y face(4,8,9,16,18)................................. - ScaLBL_D3Q7_Unpack(4,dvcRecvDist_y,0,recvCount_y,recvbuf_y,&Aq[Component*7*N],N); - //................................................................................... - //...Packing for Y face(3,7,10,15,17)................................. - ScaLBL_D3Q7_Unpack(3,dvcRecvDist_Y,0,recvCount_Y,recvbuf_Y,&Aq[Component*7*N],N); - //................................................................................... - - if (BoundaryCondition > 0){ - if (kproc != 0){ - //...Packing for z face(6,12,13,16,17)................................ - ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,&Aq[Component*7*N],N); - } - if (kproc != nprocz-1){ - //...Packing for Z face(5,11,14,15,18)................................ - ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,&Aq[Component*7*N],N); - } - } - else { - //...Packing for z face(6,12,13,16,17)................................ - ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,&Aq[Component*7*N],N); - //...Packing for Z face(5,11,14,15,18)................................ - ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,&Aq[Component*7*N],N); - } - - //................................................................................... - Lock=false; // unlock the communicator after communications complete - //................................................................................... + if (BoundaryCondition > 0) { + if (kproc != 0) { + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_D3Q7_Unpack(6, dvcRecvDist_z, 0, recvCount_z, recvbuf_z, + &Aq[Component * 7 * N], N); + } + if (kproc != nprocz - 1) { + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q7_Unpack(5, dvcRecvDist_Z, 0, recvCount_Z, recvbuf_Z, + &Aq[Component * 7 * N], N); + } + } else { + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_D3Q7_Unpack(6, dvcRecvDist_z, 0, recvCount_z, recvbuf_z, + &Aq[Component * 7 * N], N); + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q7_Unpack(5, dvcRecvDist_Z, 0, recvCount_Z, recvbuf_Z, + &Aq[Component * 7 * N], N); + } + //................................................................................... + Lock = false; // unlock the communicator after communications complete + //................................................................................... } -void ScaLBL_Communicator::TriSendD3Q7AA(double *Aq, double *Bq, double *Cq){ +void ScaLBL_Communicator::TriSendD3Q7AA(double *Aq, double *Bq, double *Cq) { - // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 - if (Lock==true){ - ERROR("ScaLBL Error (SendD3Q19): ScaLBL_Communicator is locked -- did you forget to match Send/Recv calls?"); - } - else{ - Lock=true; - } - // assign tag of 19 to D3Q19 communication - sendtag = recvtag = 162; - ScaLBL_DeviceBarrier(); - // Pack the distributions - //...Packing for x face(2,8,10,12,14)................................ - ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,Aq,N); - ScaLBL_D3Q19_Pack(2,dvcSendList_x,sendCount_x,sendCount_x,sendbuf_x,Bq,N); - ScaLBL_D3Q19_Pack(2,dvcSendList_x,2*sendCount_x,sendCount_x,sendbuf_x,Cq,N); - //...Packing for X face(1,7,9,11,13)................................ - ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,Aq,N); - ScaLBL_D3Q19_Pack(1,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,Bq,N); - ScaLBL_D3Q19_Pack(1,dvcSendList_X,2*sendCount_X,sendCount_X,sendbuf_X,Cq,N); - //...Packing for y face(4,8,9,16,18)................................. - ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,Aq,N); - ScaLBL_D3Q19_Pack(4,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,Bq,N); - ScaLBL_D3Q19_Pack(4,dvcSendList_y,2*sendCount_y,sendCount_y,sendbuf_y,Cq,N); - //...Packing for Y face(3,7,10,15,17)................................. - ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,Aq,N); - ScaLBL_D3Q19_Pack(3,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,Bq,N); - ScaLBL_D3Q19_Pack(3,dvcSendList_Y,2*sendCount_Y,sendCount_Y,sendbuf_Y,Cq,N); - //...Packing for z face(6,12,13,16,17)................................ - ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,Aq,N); - ScaLBL_D3Q19_Pack(6,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,Bq,N); - ScaLBL_D3Q19_Pack(6,dvcSendList_z,2*sendCount_z,sendCount_z,sendbuf_z,Cq,N); - //...Packing for Z face(5,11,14,15,18)................................ - ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,Aq,N); - ScaLBL_D3Q19_Pack(5,dvcSendList_Z,sendCount_Z,sendCount_Z,sendbuf_Z,Bq,N); - ScaLBL_D3Q19_Pack(5,dvcSendList_Z,2*sendCount_Z,sendCount_Z,sendbuf_Z,Cq,N); - - //................................................................................... - // Send all the distributions - req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 3*sendCount_x, rank_x,sendtag+0); - req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 3*recvCount_X, rank_X,recvtag+0); - req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 3*sendCount_X, rank_X,sendtag+1); - req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 3*recvCount_x, rank_x,recvtag+1); - req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 3*sendCount_y, rank_y,sendtag+2); - req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 3*recvCount_Y, rank_Y,recvtag+2); - req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 3*sendCount_Y, rank_Y,sendtag+3); - req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 3*recvCount_y, rank_y,recvtag+3); - req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 3*sendCount_z, rank_z,sendtag+4); - req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 3*recvCount_Z, rank_Z,recvtag+4); - req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 3*sendCount_Z, rank_Z,sendtag+5); - req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 3*recvCount_z, rank_z,recvtag+5); + // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 + if (Lock == true) { + ERROR("ScaLBL Error (SendD3Q19): ScaLBL_Communicator is locked -- did " + "you forget to match Send/Recv calls?"); + } else { + Lock = true; + } + // assign tag of 19 to D3Q19 communication + sendtag = recvtag = 162; + ScaLBL_DeviceBarrier(); + // Pack the distributions + //...Packing for x face(2,8,10,12,14)................................ + ScaLBL_D3Q19_Pack(2, dvcSendList_x, 0, sendCount_x, sendbuf_x, Aq, N); + ScaLBL_D3Q19_Pack(2, dvcSendList_x, sendCount_x, sendCount_x, sendbuf_x, Bq, + N); + ScaLBL_D3Q19_Pack(2, dvcSendList_x, 2 * sendCount_x, sendCount_x, sendbuf_x, + Cq, N); + //...Packing for X face(1,7,9,11,13)................................ + ScaLBL_D3Q19_Pack(1, dvcSendList_X, 0, sendCount_X, sendbuf_X, Aq, N); + ScaLBL_D3Q19_Pack(1, dvcSendList_X, sendCount_X, sendCount_X, sendbuf_X, Bq, + N); + ScaLBL_D3Q19_Pack(1, dvcSendList_X, 2 * sendCount_X, sendCount_X, sendbuf_X, + Cq, N); + //...Packing for y face(4,8,9,16,18)................................. + ScaLBL_D3Q19_Pack(4, dvcSendList_y, 0, sendCount_y, sendbuf_y, Aq, N); + ScaLBL_D3Q19_Pack(4, dvcSendList_y, sendCount_y, sendCount_y, sendbuf_y, Bq, + N); + ScaLBL_D3Q19_Pack(4, dvcSendList_y, 2 * sendCount_y, sendCount_y, sendbuf_y, + Cq, N); + //...Packing for Y face(3,7,10,15,17)................................. + ScaLBL_D3Q19_Pack(3, dvcSendList_Y, 0, sendCount_Y, sendbuf_Y, Aq, N); + ScaLBL_D3Q19_Pack(3, dvcSendList_Y, sendCount_Y, sendCount_Y, sendbuf_Y, Bq, + N); + ScaLBL_D3Q19_Pack(3, dvcSendList_Y, 2 * sendCount_Y, sendCount_Y, sendbuf_Y, + Cq, N); + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_D3Q19_Pack(6, dvcSendList_z, 0, sendCount_z, sendbuf_z, Aq, N); + ScaLBL_D3Q19_Pack(6, dvcSendList_z, sendCount_z, sendCount_z, sendbuf_z, Bq, + N); + ScaLBL_D3Q19_Pack(6, dvcSendList_z, 2 * sendCount_z, sendCount_z, sendbuf_z, + Cq, N); + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q19_Pack(5, dvcSendList_Z, 0, sendCount_Z, sendbuf_Z, Aq, N); + ScaLBL_D3Q19_Pack(5, dvcSendList_Z, sendCount_Z, sendCount_Z, sendbuf_Z, Bq, + N); + ScaLBL_D3Q19_Pack(5, dvcSendList_Z, 2 * sendCount_Z, sendCount_Z, sendbuf_Z, + Cq, N); + //................................................................................... + // Send all the distributions + req1[0] = + MPI_COMM_SCALBL.Isend(sendbuf_x, 3 * sendCount_x, rank_x, sendtag + 0); + req2[0] = + MPI_COMM_SCALBL.Irecv(recvbuf_X, 3 * recvCount_X, rank_X, recvtag + 0); + req1[1] = + MPI_COMM_SCALBL.Isend(sendbuf_X, 3 * sendCount_X, rank_X, sendtag + 1); + req2[1] = + MPI_COMM_SCALBL.Irecv(recvbuf_x, 3 * recvCount_x, rank_x, recvtag + 1); + req1[2] = + MPI_COMM_SCALBL.Isend(sendbuf_y, 3 * sendCount_y, rank_y, sendtag + 2); + req2[2] = + MPI_COMM_SCALBL.Irecv(recvbuf_Y, 3 * recvCount_Y, rank_Y, recvtag + 2); + req1[3] = + MPI_COMM_SCALBL.Isend(sendbuf_Y, 3 * sendCount_Y, rank_Y, sendtag + 3); + req2[3] = + MPI_COMM_SCALBL.Irecv(recvbuf_y, 3 * recvCount_y, rank_y, recvtag + 3); + req1[4] = + MPI_COMM_SCALBL.Isend(sendbuf_z, 3 * sendCount_z, rank_z, sendtag + 4); + req2[4] = + MPI_COMM_SCALBL.Irecv(recvbuf_Z, 3 * recvCount_Z, rank_Z, recvtag + 4); + req1[5] = + MPI_COMM_SCALBL.Isend(sendbuf_Z, 3 * sendCount_Z, rank_Z, sendtag + 5); + req2[5] = + MPI_COMM_SCALBL.Irecv(recvbuf_z, 3 * recvCount_z, rank_z, recvtag + 5); } +void ScaLBL_Communicator::TriRecvD3Q7AA(double *Aq, double *Bq, double *Cq) { -void ScaLBL_Communicator::TriRecvD3Q7AA(double *Aq, double *Bq, double *Cq){ + // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 + //................................................................................... + // Wait for completion of D3Q19 communication + MPI_COMM_SCALBL.waitAll(6, req1); + MPI_COMM_SCALBL.waitAll(6, req2); + ScaLBL_DeviceBarrier(); - // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 - //................................................................................... - // Wait for completion of D3Q19 communication - MPI_COMM_SCALBL.waitAll(6,req1); - MPI_COMM_SCALBL.waitAll(6,req2); - ScaLBL_DeviceBarrier(); + //................................................................................... + // NOTE: AA Routine writes to opposite + // Unpack the distributions on the device + //................................................................................... + //...Unpacking for x face(2,8,10,12,14)................................ + ScaLBL_D3Q7_Unpack(2, dvcRecvDist_x, 0, recvCount_x, recvbuf_x, Aq, N); + ScaLBL_D3Q7_Unpack(2, dvcRecvDist_x, recvCount_x, recvCount_x, recvbuf_x, + Bq, N); + ScaLBL_D3Q7_Unpack(2, dvcRecvDist_x, 2 * recvCount_x, recvCount_x, + recvbuf_x, Cq, N); + //................................................................................... + //...Packing for X face(1,7,9,11,13)................................ + ScaLBL_D3Q7_Unpack(1, dvcRecvDist_X, 0, recvCount_X, recvbuf_X, Aq, N); + ScaLBL_D3Q7_Unpack(1, dvcRecvDist_X, recvCount_X, recvCount_X, recvbuf_X, + Bq, N); + ScaLBL_D3Q7_Unpack(1, dvcRecvDist_X, 2 * recvCount_X, recvCount_X, + recvbuf_X, Cq, N); + //................................................................................... + //...Packing for y face(4,8,9,16,18)................................. + ScaLBL_D3Q7_Unpack(4, dvcRecvDist_y, 0, recvCount_y, recvbuf_y, Aq, N); + ScaLBL_D3Q7_Unpack(4, dvcRecvDist_y, recvCount_y, recvCount_y, recvbuf_y, + Bq, N); + ScaLBL_D3Q7_Unpack(4, dvcRecvDist_y, 2 * recvCount_y, recvCount_y, + recvbuf_y, Cq, N); + //................................................................................... + //...Packing for Y face(3,7,10,15,17)................................. + ScaLBL_D3Q7_Unpack(3, dvcRecvDist_Y, 0, recvCount_Y, recvbuf_Y, Aq, N); + ScaLBL_D3Q7_Unpack(3, dvcRecvDist_Y, recvCount_Y, recvCount_Y, recvbuf_Y, + Bq, N); + ScaLBL_D3Q7_Unpack(3, dvcRecvDist_Y, 2 * recvCount_Y, recvCount_Y, + recvbuf_Y, Cq, N); + //................................................................................... - //................................................................................... - // NOTE: AA Routine writes to opposite - // Unpack the distributions on the device - //................................................................................... - //...Unpacking for x face(2,8,10,12,14)................................ - ScaLBL_D3Q7_Unpack(2,dvcRecvDist_x,0,recvCount_x,recvbuf_x,Aq,N); - ScaLBL_D3Q7_Unpack(2,dvcRecvDist_x,recvCount_x,recvCount_x,recvbuf_x,Bq,N); - ScaLBL_D3Q7_Unpack(2,dvcRecvDist_x,2*recvCount_x,recvCount_x,recvbuf_x,Cq,N); - //................................................................................... - //...Packing for X face(1,7,9,11,13)................................ - ScaLBL_D3Q7_Unpack(1,dvcRecvDist_X,0,recvCount_X,recvbuf_X,Aq,N); - ScaLBL_D3Q7_Unpack(1,dvcRecvDist_X,recvCount_X,recvCount_X,recvbuf_X,Bq,N); - ScaLBL_D3Q7_Unpack(1,dvcRecvDist_X,2*recvCount_X,recvCount_X,recvbuf_X,Cq,N); - //................................................................................... - //...Packing for y face(4,8,9,16,18)................................. - ScaLBL_D3Q7_Unpack(4,dvcRecvDist_y,0,recvCount_y,recvbuf_y,Aq,N); - ScaLBL_D3Q7_Unpack(4,dvcRecvDist_y,recvCount_y,recvCount_y,recvbuf_y,Bq,N); - ScaLBL_D3Q7_Unpack(4,dvcRecvDist_y,2*recvCount_y,recvCount_y,recvbuf_y,Cq,N); - //................................................................................... - //...Packing for Y face(3,7,10,15,17)................................. - ScaLBL_D3Q7_Unpack(3,dvcRecvDist_Y,0,recvCount_Y,recvbuf_Y,Aq,N); - ScaLBL_D3Q7_Unpack(3,dvcRecvDist_Y,recvCount_Y,recvCount_Y,recvbuf_Y,Bq,N); - ScaLBL_D3Q7_Unpack(3,dvcRecvDist_Y,2*recvCount_Y,recvCount_Y,recvbuf_Y,Cq,N); - //................................................................................... - - if (BoundaryCondition > 0 && kproc == 0){ - // don't unpack little z - //...Packing for Z face(5,11,14,15,18)................................ - ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,Aq,N); - ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,Bq,N); - ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,2*recvCount_Z,recvCount_Z,recvbuf_Z,Cq,N); - } - else if (BoundaryCondition > 0 && kproc == nprocz-1){ - // don't unpack big z - //...Packing for z face(6,12,13,16,17)................................ - ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,Aq,N); - ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,recvCount_z,recvCount_z,recvbuf_z,Bq,N); - ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,2*recvCount_z,recvCount_z,recvbuf_z,Cq,N); - } - else { - //...Packing for z face(6,12,13,16,17)................................ - ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,Aq,N); - ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,recvCount_z,recvCount_z,recvbuf_z,Bq,N); - ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,2*recvCount_z,recvCount_z,recvbuf_z,Cq,N); - //...Packing for Z face(5,11,14,15,18)................................ - ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,Aq,N); - ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,Bq,N); - ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,2*recvCount_Z,recvCount_Z,recvbuf_Z,Cq,N); - } - - //................................................................................... - Lock=false; // unlock the communicator after communications complete - //................................................................................... + if (BoundaryCondition > 0 && kproc == 0) { + // don't unpack little z + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q7_Unpack(5, dvcRecvDist_Z, 0, recvCount_Z, recvbuf_Z, Aq, N); + ScaLBL_D3Q7_Unpack(5, dvcRecvDist_Z, recvCount_Z, recvCount_Z, + recvbuf_Z, Bq, N); + ScaLBL_D3Q7_Unpack(5, dvcRecvDist_Z, 2 * recvCount_Z, recvCount_Z, + recvbuf_Z, Cq, N); + } else if (BoundaryCondition > 0 && kproc == nprocz - 1) { + // don't unpack big z + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_D3Q7_Unpack(6, dvcRecvDist_z, 0, recvCount_z, recvbuf_z, Aq, N); + ScaLBL_D3Q7_Unpack(6, dvcRecvDist_z, recvCount_z, recvCount_z, + recvbuf_z, Bq, N); + ScaLBL_D3Q7_Unpack(6, dvcRecvDist_z, 2 * recvCount_z, recvCount_z, + recvbuf_z, Cq, N); + } else { + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_D3Q7_Unpack(6, dvcRecvDist_z, 0, recvCount_z, recvbuf_z, Aq, N); + ScaLBL_D3Q7_Unpack(6, dvcRecvDist_z, recvCount_z, recvCount_z, + recvbuf_z, Bq, N); + ScaLBL_D3Q7_Unpack(6, dvcRecvDist_z, 2 * recvCount_z, recvCount_z, + recvbuf_z, Cq, N); + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q7_Unpack(5, dvcRecvDist_Z, 0, recvCount_Z, recvbuf_Z, Aq, N); + ScaLBL_D3Q7_Unpack(5, dvcRecvDist_Z, recvCount_Z, recvCount_Z, + recvbuf_Z, Bq, N); + ScaLBL_D3Q7_Unpack(5, dvcRecvDist_Z, 2 * recvCount_Z, recvCount_Z, + recvbuf_Z, Cq, N); + } + //................................................................................... + Lock = false; // unlock the communicator after communications complete + //................................................................................... } +void ScaLBL_Communicator::SendHalo(double *data) { + //................................................................................... + if (Lock == true) { + ERROR("ScaLBL Error (SendHalo): ScaLBL_Communicator is locked -- did " + "you forget to match Send/Recv calls?"); + } else { + Lock = true; + } + ScaLBL_DeviceBarrier(); + //................................................................................... + sendtag = recvtag = 168; + //................................................................................... + ScaLBL_Scalar_Pack(dvcSendList_x, sendCount_x, sendbuf_x, data, N); + ScaLBL_Scalar_Pack(dvcSendList_y, sendCount_y, sendbuf_y, data, N); + ScaLBL_Scalar_Pack(dvcSendList_z, sendCount_z, sendbuf_z, data, N); + ScaLBL_Scalar_Pack(dvcSendList_X, sendCount_X, sendbuf_X, data, N); + ScaLBL_Scalar_Pack(dvcSendList_Y, sendCount_Y, sendbuf_Y, data, N); + ScaLBL_Scalar_Pack(dvcSendList_Z, sendCount_Z, sendbuf_Z, data, N); + ScaLBL_Scalar_Pack(dvcSendList_xy, sendCount_xy, sendbuf_xy, data, N); + ScaLBL_Scalar_Pack(dvcSendList_xY, sendCount_xY, sendbuf_xY, data, N); + ScaLBL_Scalar_Pack(dvcSendList_Xy, sendCount_Xy, sendbuf_Xy, data, N); + ScaLBL_Scalar_Pack(dvcSendList_XY, sendCount_XY, sendbuf_XY, data, N); + ScaLBL_Scalar_Pack(dvcSendList_xz, sendCount_xz, sendbuf_xz, data, N); + ScaLBL_Scalar_Pack(dvcSendList_xZ, sendCount_xZ, sendbuf_xZ, data, N); + ScaLBL_Scalar_Pack(dvcSendList_Xz, sendCount_Xz, sendbuf_Xz, data, N); + ScaLBL_Scalar_Pack(dvcSendList_XZ, sendCount_XZ, sendbuf_XZ, data, N); + ScaLBL_Scalar_Pack(dvcSendList_yz, sendCount_yz, sendbuf_yz, data, N); + ScaLBL_Scalar_Pack(dvcSendList_yZ, sendCount_yZ, sendbuf_yZ, data, N); + ScaLBL_Scalar_Pack(dvcSendList_Yz, sendCount_Yz, sendbuf_Yz, data, N); + ScaLBL_Scalar_Pack(dvcSendList_YZ, sendCount_YZ, sendbuf_YZ, data, N); + //................................................................................... + // Send / Recv all the phase indcator field values + //................................................................................... -void ScaLBL_Communicator::SendHalo(double *data){ - //................................................................................... - if (Lock==true){ - ERROR("ScaLBL Error (SendHalo): ScaLBL_Communicator is locked -- did you forget to match Send/Recv calls?"); - } - else{ - Lock=true; - } - ScaLBL_DeviceBarrier(); - //................................................................................... - sendtag = recvtag = 168; - //................................................................................... - ScaLBL_Scalar_Pack(dvcSendList_x, sendCount_x,sendbuf_x, data, N); - ScaLBL_Scalar_Pack(dvcSendList_y, sendCount_y,sendbuf_y, data, N); - ScaLBL_Scalar_Pack(dvcSendList_z, sendCount_z,sendbuf_z, data, N); - ScaLBL_Scalar_Pack(dvcSendList_X, sendCount_X,sendbuf_X, data, N); - ScaLBL_Scalar_Pack(dvcSendList_Y, sendCount_Y,sendbuf_Y, data, N); - ScaLBL_Scalar_Pack(dvcSendList_Z, sendCount_Z,sendbuf_Z, data, N); - ScaLBL_Scalar_Pack(dvcSendList_xy, sendCount_xy,sendbuf_xy, data, N); - ScaLBL_Scalar_Pack(dvcSendList_xY, sendCount_xY,sendbuf_xY, data, N); - ScaLBL_Scalar_Pack(dvcSendList_Xy, sendCount_Xy,sendbuf_Xy, data, N); - ScaLBL_Scalar_Pack(dvcSendList_XY, sendCount_XY,sendbuf_XY, data, N); - ScaLBL_Scalar_Pack(dvcSendList_xz, sendCount_xz,sendbuf_xz, data, N); - ScaLBL_Scalar_Pack(dvcSendList_xZ, sendCount_xZ,sendbuf_xZ, data, N); - ScaLBL_Scalar_Pack(dvcSendList_Xz, sendCount_Xz,sendbuf_Xz, data, N); - ScaLBL_Scalar_Pack(dvcSendList_XZ, sendCount_XZ,sendbuf_XZ, data, N); - ScaLBL_Scalar_Pack(dvcSendList_yz, sendCount_yz,sendbuf_yz, data, N); - ScaLBL_Scalar_Pack(dvcSendList_yZ, sendCount_yZ,sendbuf_yZ, data, N); - ScaLBL_Scalar_Pack(dvcSendList_Yz, sendCount_Yz,sendbuf_Yz, data, N); - ScaLBL_Scalar_Pack(dvcSendList_YZ, sendCount_YZ,sendbuf_YZ, data, N); - //................................................................................... - // Send / Recv all the phase indcator field values - //................................................................................... - - req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x, rank_x,sendtag+0); - req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X, rank_X,recvtag+0); - req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X, rank_X,sendtag+1); - req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x, rank_x,recvtag+1); - req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y, rank_y,sendtag+2); - req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y, rank_Y,recvtag+2); - req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y, rank_Y,sendtag+3); - req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y, rank_y,recvtag+3); - req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z, rank_z,sendtag+4); - req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z, rank_Z,recvtag+4); - req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z, rank_Z,sendtag+5); - req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z, rank_z,recvtag+5); - req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy, rank_xy,sendtag+6); - req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY, rank_XY,recvtag+6); - req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY, rank_XY,sendtag+7); - req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy, rank_xy,recvtag+7); - req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy, rank_Xy,sendtag+8); - req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY, rank_xY,recvtag+8); - req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY, rank_xY,sendtag+9); - req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy, rank_Xy,recvtag+9); - req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz, rank_xz,sendtag+10); - req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ, rank_XZ,recvtag+10); - req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ, rank_XZ,sendtag+11); - req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz, rank_xz,recvtag+11); - req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz, rank_Xz,sendtag+12); - req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ, rank_xZ,recvtag+12); - req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ, rank_xZ,sendtag+13); - req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz, rank_Xz,recvtag+13); - req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz, rank_yz,sendtag+14); - req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ, rank_YZ,recvtag+14); - req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ, rank_YZ,sendtag+15); - req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz, rank_yz,recvtag+15); - req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz, rank_Yz,sendtag+16); - req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ, rank_yZ,recvtag+16); - req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ, rank_yZ,sendtag+17); - req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz, rank_Yz,recvtag+17); - //................................................................................... + req1[0] = + MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x, rank_x, sendtag + 0); + req2[0] = + MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X, rank_X, recvtag + 0); + req1[1] = + MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X, rank_X, sendtag + 1); + req2[1] = + MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x, rank_x, recvtag + 1); + req1[2] = + MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y, rank_y, sendtag + 2); + req2[2] = + MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y, rank_Y, recvtag + 2); + req1[3] = + MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y, rank_Y, sendtag + 3); + req2[3] = + MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y, rank_y, recvtag + 3); + req1[4] = + MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z, rank_z, sendtag + 4); + req2[4] = + MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z, rank_Z, recvtag + 4); + req1[5] = + MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z, rank_Z, sendtag + 5); + req2[5] = + MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z, rank_z, recvtag + 5); + req1[6] = + MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy, rank_xy, sendtag + 6); + req2[6] = + MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY, rank_XY, recvtag + 6); + req1[7] = + MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY, rank_XY, sendtag + 7); + req2[7] = + MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy, rank_xy, recvtag + 7); + req1[8] = + MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy, rank_Xy, sendtag + 8); + req2[8] = + MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY, rank_xY, recvtag + 8); + req1[9] = + MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY, rank_xY, sendtag + 9); + req2[9] = + MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy, rank_Xy, recvtag + 9); + req1[10] = + MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz, rank_xz, sendtag + 10); + req2[10] = + MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ, rank_XZ, recvtag + 10); + req1[11] = + MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ, rank_XZ, sendtag + 11); + req2[11] = + MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz, rank_xz, recvtag + 11); + req1[12] = + MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz, rank_Xz, sendtag + 12); + req2[12] = + MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ, rank_xZ, recvtag + 12); + req1[13] = + MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ, rank_xZ, sendtag + 13); + req2[13] = + MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz, rank_Xz, recvtag + 13); + req1[14] = + MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz, rank_yz, sendtag + 14); + req2[14] = + MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ, rank_YZ, recvtag + 14); + req1[15] = + MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ, rank_YZ, sendtag + 15); + req2[15] = + MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz, rank_yz, recvtag + 15); + req1[16] = + MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz, rank_Yz, sendtag + 16); + req2[16] = + MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ, rank_yZ, recvtag + 16); + req1[17] = + MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ, rank_yZ, sendtag + 17); + req2[17] = + MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz, rank_Yz, recvtag + 17); + //................................................................................... } -void ScaLBL_Communicator::RecvHalo(double *data){ +void ScaLBL_Communicator::RecvHalo(double *data) { - //................................................................................... - MPI_COMM_SCALBL.waitAll(18,req1); - MPI_COMM_SCALBL.waitAll(18,req2); - ScaLBL_DeviceBarrier(); - //................................................................................... - //................................................................................... - ScaLBL_Scalar_Unpack(dvcRecvList_x, recvCount_x,recvbuf_x, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_y, recvCount_y,recvbuf_y, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_z, recvCount_z,recvbuf_z, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_X, recvCount_X,recvbuf_X, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_Y, recvCount_Y,recvbuf_Y, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_Z, recvCount_Z,recvbuf_Z, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_xy, recvCount_xy,recvbuf_xy, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_xY, recvCount_xY,recvbuf_xY, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_Xy, recvCount_Xy,recvbuf_Xy, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_XY, recvCount_XY,recvbuf_XY, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_xz, recvCount_xz,recvbuf_xz, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_xZ, recvCount_xZ,recvbuf_xZ, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_Xz, recvCount_Xz,recvbuf_Xz, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_XZ, recvCount_XZ,recvbuf_XZ, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_yz, recvCount_yz,recvbuf_yz, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_yZ, recvCount_yZ,recvbuf_yZ, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_Yz, recvCount_Yz,recvbuf_Yz, data, N); - ScaLBL_Scalar_Unpack(dvcRecvList_YZ, recvCount_YZ,recvbuf_YZ, data, N); - //................................................................................... - Lock=false; // unlock the communicator after communications complete - //................................................................................... + //................................................................................... + MPI_COMM_SCALBL.waitAll(18, req1); + MPI_COMM_SCALBL.waitAll(18, req2); + ScaLBL_DeviceBarrier(); + //................................................................................... + //................................................................................... + ScaLBL_Scalar_Unpack(dvcRecvList_x, recvCount_x, recvbuf_x, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_y, recvCount_y, recvbuf_y, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_z, recvCount_z, recvbuf_z, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_X, recvCount_X, recvbuf_X, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_Y, recvCount_Y, recvbuf_Y, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_Z, recvCount_Z, recvbuf_Z, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_xy, recvCount_xy, recvbuf_xy, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_xY, recvCount_xY, recvbuf_xY, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_Xy, recvCount_Xy, recvbuf_Xy, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_XY, recvCount_XY, recvbuf_XY, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_xz, recvCount_xz, recvbuf_xz, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_xZ, recvCount_xZ, recvbuf_xZ, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_Xz, recvCount_Xz, recvbuf_Xz, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_XZ, recvCount_XZ, recvbuf_XZ, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_yz, recvCount_yz, recvbuf_yz, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_yZ, recvCount_yZ, recvbuf_yZ, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_Yz, recvCount_Yz, recvbuf_Yz, data, N); + ScaLBL_Scalar_Unpack(dvcRecvList_YZ, recvCount_YZ, recvbuf_YZ, data, N); + //................................................................................... + Lock = false; // unlock the communicator after communications complete + //................................................................................... } -void ScaLBL_Communicator::RegularLayout(IntArray map, const double *data, DoubleArray ®data){ - // Gets data from the device and stores in regular layout - int i,j,k,idx; - int Nx = map.size(0); - int Ny = map.size(1); - int Nz = map.size(2); +void ScaLBL_Communicator::RegularLayout(IntArray map, const double *data, + DoubleArray ®data) { + // Gets data from the device and stores in regular layout + int i, j, k, idx; + int Nx = map.size(0); + int Ny = map.size(1); + int Nz = map.size(2); - // initialize the array - regdata.fill(0.f); - - double *TmpDat; - double value; - TmpDat = new double [N]; - ScaLBL_CopyToHost(&TmpDat[0],&data[0], N*sizeof(double)); - for (k=0; k. +*/ /** @file ScaLBL.h */ /* \details Header file for Scalable Lattice Boltzmann Library * Separate implementations for GPU and CPU must both follow the conventions defined in this header @@ -22,13 +38,13 @@ extern "C" int ScaLBL_SetDevice(int rank); * @param address memory address * @param size size in bytes */ -extern "C" void ScaLBL_AllocateDeviceMemory(void** address, size_t size); +extern "C" void ScaLBL_AllocateDeviceMemory(void **address, size_t size); /** * \brief Free memory * @param pointer pointer to memory to free */ -extern "C" void ScaLBL_FreeDeviceMemory(void* pointer); +extern "C" void ScaLBL_FreeDeviceMemory(void *pointer); /** * \brief Copy memory from host to device @@ -39,8 +55,8 @@ extern "C" void ScaLBL_FreeDeviceMemory(void* pointer); * @param source memory region to copy from * @param size size of the region to copy in bytes */ -extern "C" void ScaLBL_CopyToDevice(void* dest, const void* source, size_t size); - +extern "C" void ScaLBL_CopyToDevice(void *dest, const void *source, + size_t size); /** * \brief Copy memory from device to host @@ -51,14 +67,14 @@ extern "C" void ScaLBL_CopyToDevice(void* dest, const void* source, size_t size) * @param source memory region to copy from * @param size size of the region to copy in bytes */ -extern "C" void ScaLBL_CopyToHost(void* dest, const void* source, size_t size); +extern "C" void ScaLBL_CopyToHost(void *dest, const void *source, size_t size); /** =* \brief Allocate zero copy memory buffer (i.e. shared memory) * @param address memory address * @param size size in bytes */ -extern "C" void ScaLBL_AllocateZeroCopy(void** address, size_t size); +extern "C" void ScaLBL_AllocateZeroCopy(void **address, size_t size); /** * \brief Copy memory from host to zero copy buffer @@ -69,7 +85,8 @@ extern "C" void ScaLBL_AllocateZeroCopy(void** address, size_t size); * @param source memory region to copy from * @param size size of the region to copy in bytes */ -extern "C" void ScaLBL_CopyToZeroCopy(void* dest, const void* source, size_t size); +extern "C" void ScaLBL_CopyToZeroCopy(void *dest, const void *source, + size_t size); /** * \brief Device barrier routine @@ -86,7 +103,8 @@ extern "C" void ScaLBL_DeviceBarrier(); * @param dist - memory buffer to hold the distributions * @param N - size of the distributions (derived from Domain structure) */ -extern "C" void ScaLBL_D3Q19_Pack(int q, int *list, int start, int count, double *sendbuf, double *dist, int N); +extern "C" void ScaLBL_D3Q19_Pack(int q, int *list, int start, int count, + double *sendbuf, double *dist, int N); /** * \brief Unpack D3Q19 distributions after communication @@ -98,7 +116,8 @@ extern "C" void ScaLBL_D3Q19_Pack(int q, int *list, int start, int count, double * @param dist - memory buffer to hold the distributions * @param N - size of the distributions (derived from Domain structure) */ -extern "C" void ScaLBL_D3Q19_Unpack(int q, int *list, int start, int count, double *recvbuf, double *dist, int N); +extern "C" void ScaLBL_D3Q19_Unpack(int q, int *list, int start, int count, + double *recvbuf, double *dist, int N); /** * \brief Unpack D3Q7 distributions after communication @@ -110,7 +129,8 @@ extern "C" void ScaLBL_D3Q19_Unpack(int q, int *list, int start, int count, doub * @param dist - memory buffer to hold the distributions * @param N - size of the distributions (derived from Domain structure) */ -extern "C" void ScaLBL_D3Q7_Unpack(int q, int *list, int start, int count, double *recvbuf, double *dist, int N); +extern "C" void ScaLBL_D3Q7_Unpack(int q, int *list, int start, int count, + double *recvbuf, double *dist, int N); /** * \brief Pack halo for scalar field to be prepare for communication @@ -120,7 +140,8 @@ extern "C" void ScaLBL_D3Q7_Unpack(int q, int *list, int start, int count, doub * @param Data - scalar field * @param N - size of local sub-domain (derived from Domain structure) */ -extern "C" void ScaLBL_Scalar_Pack(int *list, int count, double *sendbuf, double *Data, int N); +extern "C" void ScaLBL_Scalar_Pack(int *list, int count, double *sendbuf, + double *Data, int N); /** * \brief Pack halo for scalar field to be prepare for communication @@ -130,7 +151,8 @@ extern "C" void ScaLBL_Scalar_Pack(int *list, int count, double *sendbuf, double * @param Data - scalar field * @param N - size of local sub-domain (derived from Domain structure) */ -extern "C" void ScaLBL_Scalar_Unpack(int *list, int count, double *recvbuf, double *Data, int N); +extern "C" void ScaLBL_Scalar_Unpack(int *list, int count, double *recvbuf, + double *Data, int N); /** * \brief Unpack values and compute Shan-Chen type of gradient @@ -146,9 +168,10 @@ extern "C" void ScaLBL_Scalar_Unpack(int *list, int count, double *recvbuf, doub * @param grad - gradient * @param N - size of local sub-domain (derived from Domain structure) */ -extern "C" void ScaLBL_Gradient_Unpack(double weight, double Cqx, double Cqy, double Cqz, - int *list, int start, int count, double *recvbuf, double *phi, double *grad, int N); - +extern "C" void ScaLBL_Gradient_Unpack(double weight, double Cqx, double Cqy, + double Cqz, int *list, int start, + int count, double *recvbuf, double *phi, + double *grad, int N); /** * \brief Initialize D3Q19 distributions @@ -185,7 +208,9 @@ extern "C" void ScaLBL_D3Q19_Pressure(double *dist, double *press, int Np); * @param Fy - force in y direction * @param Fz - force in z direction */ -extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz); +extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, + int Np, double rlx, double Fx, + double Fy, double Fz); /** * \brief BGK collision based on AA odd access pattern for D3Q19 @@ -199,7 +224,10 @@ extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int * @param Fy - force in y direction * @param Fz - force in z direction */ -extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz); +extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, + int start, int finish, int Np, + double rlx, double Fx, double Fy, + double Fz); /** * \brief BGK collision based on AA even access pattern for D3Q19 @@ -212,7 +240,8 @@ extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int star * @param Fy - force in y direction * @param Fz - force in z direction */ -extern "C" void ScaLBL_D3Q19_AAeven_Kubo(double *dist, double *Integral, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAeven_Kubo(double *dist, double *Integral, + int start, int finish, int Np); /** * \brief Kubo integral function * @param neighborList - neighbors based on D3Q19 lattice structure @@ -223,7 +252,9 @@ extern "C" void ScaLBL_D3Q19_AAeven_Kubo(double *dist, double *Integral, int sta * @param Np - size of local sub-domain (derived from Domain structure) */ -extern "C" void ScaLBL_D3Q19_AAodd_Kubo(int *neighborList, double *dist, double *Integral, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAodd_Kubo(int *neighborList, double *dist, + double *Integral, int start, int finish, + int Np); /** * \brief Kubo integral function * @param neighborList - neighbors based on D3Q19 lattice structure @@ -236,104 +267,166 @@ extern "C" void ScaLBL_D3Q19_AAodd_Kubo(int *neighborList, double *dist, double // MEMBRANE MODEL -extern "C" void ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef, double *dist, double *Den, int memLinks, int Np); +extern "C" void ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef, + double *dist, double *Den, + int memLinks, int Np); -extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef, - double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut, - int memLinks, int Nx, int Ny, int Nz, int Np); +extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef( + int *membrane, int *Map, double *Distance, double *Psi, double *coef, + double Threshold, double MassFractionIn, double MassFractionOut, + double ThresholdMassFractionIn, double ThresholdMassFractionOut, + int memLinks, int Nx, int Ny, int Nz, int Np); extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo( - const int Cqx, const int Cqy, int const Cqz, - int *Map, double *Distance, double *Psi, double Threshold, - double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut, - int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count, - const int N, const int Nx, const int Ny, const int Nz); + const int Cqx, const int Cqy, int const Cqz, int *Map, double *Distance, + double *Psi, double Threshold, double MassFractionIn, + double MassFractionOut, double ThresholdMassFractionIn, + double ThresholdMassFractionOut, int *d3q7_recvlist, int *d3q7_linkList, + double *coef, int start, int nlinks, int count, const int N, const int Nx, + const int Ny, const int Nz); -extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q, - int *d3q7_recvlist, double *recvbuf, int count, - double *dist, int N, double *coef); +extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q, int *d3q7_recvlist, + double *recvbuf, int count, + double *dist, int N, double *coef); // GREYSCALE MODEL (Single-component) extern "C" void ScaLBL_D3Q19_GreyIMRT_Init(double *Dist, int Np, double Den); -extern "C" void ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz, - double *Poros,double *Perm, double *Velocity,double *Pressure); +extern "C" void +ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, + double rlx, double rlx_eff, double Fx, double Fy, + double Fz, double *Poros, double *Perm, + double *Velocity, double *Pressure); -extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz, - double *Poros,double *Perm, double *Velocity,double *Pressure); +extern "C" void +ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, + int finish, int Np, double rlx, double rlx_eff, + double Fx, double Fy, double Fz, double *Poros, + double *Perm, double *Velocity, double *Pressure); -extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz, - double *Poros,double *Perm, double *Velocity,double Den,double *Pressure); +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT( + double *dist, int start, int finish, int Np, double rlx, double rlx_eff, + double Fx, double Fy, double Fz, double *Poros, double *Perm, + double *Velocity, double Den, double *Pressure); -extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz, - double *Poros,double *Perm, double *Velocity,double Den,double *Pressure); +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT( + int *neighborList, double *dist, int start, int finish, int Np, double rlx, + double rlx_eff, double Fx, double Fy, double Fz, double *Poros, + double *Perm, double *Velocity, double Den, double *Pressure); -extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_MRT(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz, - double *Poros,double *Perm, double *Velocity,double Den,double *Pressure); +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_MRT( + double *dist, int start, int finish, int Np, double rlx, double rlx_eff, + double Fx, double Fy, double Fz, double *Poros, double *Perm, + double *Velocity, double Den, double *Pressure); -extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_MRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz, - double *Poros,double *Perm, double *Velocity,double Den,double *Pressure); +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_MRT( + int *neighborList, double *dist, int start, int finish, int Np, double rlx, + double rlx_eff, double Fx, double Fy, double Fz, double *Poros, + double *Perm, double *Velocity, double Den, double *Pressure); -extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleColor(int *Map, double *dist, double *Aq, double *Bq, double *Den, - double *Phi,double *GreySolidGrad, double *Poros,double *Perm,double *Vel,double *Pressure, - double rhoA, double rhoB, double tauA, double tauB,double tauA_eff,double tauB_eff, double alpha, double beta, - double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleColor( + int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi, + double *GreySolidGrad, double *Poros, double *Perm, double *Vel, + double *Pressure, double rhoA, double rhoB, double tauA, double tauB, + double tauA_eff, double tauB_eff, double alpha, double beta, double Fx, + double Fy, double Fz, int strideY, int strideZ, int start, int finish, + int Np); -extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleColor(int *d_neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, - double *Phi, double *GreySolidGrad, double *Poros,double *Perm,double *Vel,double *Pressure, - double rhoA, double rhoB, double tauA, double tauB, double tauA_eff,double tauB_eff, double alpha, double beta, - double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleColor( + int *d_neighborList, int *Map, double *dist, double *Aq, double *Bq, + double *Den, double *Phi, double *GreySolidGrad, double *Poros, + double *Perm, double *Vel, double *Pressure, double rhoA, double rhoB, + double tauA, double tauB, double tauA_eff, double tauB_eff, double alpha, + double beta, double Fx, double Fy, double Fz, int strideY, int strideZ, + int start, int finish, int Np); -extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleColor_CP(int *Map, double *dist, double *Aq, double *Bq, double *Den, - double *Phi, double *GreySolidW, double *GreySn, double *GreySw, double *GreyKn, double *GreyKw, double *Poros,double *Perm,double *Vel, double *MobilityRatio, double *Pressure, - double rhoA, double rhoB, double tauA, double tauB,double tauA_eff,double tauB_eff, double alpha, double beta, - double Fx, double Fy, double Fz, bool RecoloringOff, int strideY, int strideZ, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleColor_CP( + int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi, + double *GreySolidW, double *GreySn, double *GreySw, double *GreyKn, + double *GreyKw, double *Poros, double *Perm, double *Vel, + double *MobilityRatio, double *Pressure, double rhoA, double rhoB, + double tauA, double tauB, double tauA_eff, double tauB_eff, double alpha, + double beta, double Fx, double Fy, double Fz, bool RecoloringOff, + int strideY, int strideZ, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleColor_CP(int *d_neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, - double *Phi, double *GreySolidW, double *GreySn, double *GreySw, double *GreyKn, double *GreyKw, double *Poros, double *Perm,double *Vel, double *MobilityRatio, double *Pressure, - double rhoA, double rhoB, double tauA, double tauB, double tauA_eff,double tauB_eff, double alpha, double beta, - double Fx, double Fy, double Fz, bool RecoloringOff, int strideY, int strideZ, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleColor_CP( + int *d_neighborList, int *Map, double *dist, double *Aq, double *Bq, + double *Den, double *Phi, double *GreySolidW, double *GreySn, + double *GreySw, double *GreyKn, double *GreyKw, double *Poros, double *Perm, + double *Vel, double *MobilityRatio, double *Pressure, double rhoA, + double rhoB, double tauA, double tauB, double tauA_eff, double tauB_eff, + double alpha, double beta, double Fx, double Fy, double Fz, + bool RecoloringOff, int strideY, int strideZ, int start, int finish, + int Np); -//extern "C" void ScaLBL_Update_GreyscalePotential(int *Map, double *Phi, double *Psi, double *Poro, double *Perm, double alpha, double W, +//extern "C" void ScaLBL_Update_GreyscalePotential(int *Map, double *Phi, double *Psi, double *Poro, double *Perm, double alpha, double W, // int start, int finish, int Np); -extern "C" void ScaLBL_D3Q19_AAeven_Compact( double *d_dist, int Np); +extern "C" void ScaLBL_D3Q19_AAeven_Compact(double *d_dist, int Np); -extern "C" void ScaLBL_D3Q19_AAodd_Compact( int *d_neighborList, double *d_dist, int Np); +extern "C" void ScaLBL_D3Q19_AAodd_Compact(int *d_neighborList, double *d_dist, + int Np); // ION TRANSPORT MODEL -extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, + double *dist, double *Den, + int start, int finish, + int Np); -extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, + int start, int finish, + int Np); -extern "C" void ScaLBL_D3Q7_AAodd_Ion_v0(int *neighborList, double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField, - double Di, int zi, double rlx, double Vt, int start, int finish, int Np); +extern "C" void +ScaLBL_D3Q7_AAodd_Ion_v0(int *neighborList, double *dist, double *Den, + double *FluxDiffusive, double *FluxAdvective, + double *FluxElectrical, double *Velocity, + double *ElectricField, double Di, int zi, double rlx, + double Vt, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q7_AAeven_Ion_v0(double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField, - double Di, int zi, double rlx, double Vt, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_Ion_v0( + double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, + double *FluxElectrical, double *Velocity, double *ElectricField, double Di, + int zi, double rlx, double Vt, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField, - double Di, int zi, double rlx, double Vt, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, + double *Den, double *FluxDiffusive, + double *FluxAdvective, + double *FluxElectrical, double *Velocity, + double *ElectricField, double Di, int zi, + double rlx, double Vt, int start, + int finish, int Np); -extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField, - double Di, int zi, double rlx, double Vt, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_Ion( + double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, + double *FluxElectrical, double *Velocity, double *ElectricField, double Di, + int zi, double rlx, double Vt, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np); -extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np); +extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, + int Np); +extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, + int Np); -extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, double IonValence, int ion_component, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, + double *ChargeDensity, + double IonValence, + int ion_component, int start, + int finish, int Np); -extern "C" void ScaLBL_D3Q7_AAeven_pH_ionization( double *dist, - double *Den, double *ElectricField, double * Velocity, - double Di, double Vt, - int pH_ion, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_pH_ionization(double *dist, double *Den, + double *ElectricField, + double *Velocity, double Di, + double Vt, int pH_ion, + int start, int finish, int Np); extern "C" void ScaLBL_D3Q7_AAodd_pH_ionization(int *neighborList, double *dist, - double *Den, double *ElectricField, double *Velocity, - double Di, double Vt, - int pH_ion, int start, int finish, int Np); + double *Den, + double *ElectricField, + double *Velocity, double Di, + double Vt, int pH_ion, + int start, int finish, int Np); // LBM Poisson solver @@ -351,8 +444,12 @@ extern "C" void ScaLBL_D3Q7_AAodd_pH_ionization(int *neighborList, double *dist, * @param finish - lattice node to finish loop * @param Np - size of local sub-domain (derived from Domain structure) */ -extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList,int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB, bool UseSlippingVelBC, - int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, + double *dist, double *Den_charge, + double *Psi, double *ElectricField, + double tau, double epsilon_LB, + bool UseSlippingVelBC, int start, + int finish, int Np); /** * \brief Poisson-Boltzmann collision based on AA even access pattern for D3Q7 @@ -368,8 +465,12 @@ extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList,int *Map, double *di * @param finish - lattice node to finish loop * @param Np - size of local sub-domain (derived from Domain structure) */ -extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, - double epsilon_LB, bool UseSlippingVelBC, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, + double *Den_charge, double *Psi, + double *ElectricField, double tau, + double epsilon_LB, + bool UseSlippingVelBC, int start, + int finish, int Np); /** * \brief Poisson-Boltzmann solver / solve electric potential based on AA odd access pattern for D3Q7 * @param neighborList - neighbors based on D3Q19 lattice structure @@ -382,7 +483,10 @@ extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_c * @param finish - lattice node to finish loop * @param Np - size of local sub-domain (derived from Domain structure) */ -extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np); +extern "C" void +ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList, int *Map, + double *dist, double *Psi, + int start, int finish, int Np); /** * \brief Poisson-Boltzmann solver / solve electric potential based on AA odd access pattern for D3Q7 @@ -393,7 +497,8 @@ extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,in * @param finish - lattice node to finish loop * @param Np - size of local sub-domain (derived from Domain structure) */ -extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential( + int *Map, double *dist, double *Psi, int start, int finish, int Np); /** * \brief Initialize Poisson-Boltzmann solver @@ -404,39 +509,60 @@ extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *d * @param finish - lattice node to finish loop * @param Np - size of local sub-domain (derived from Domain structure) */ -extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, + int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi, + int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAodd_Poisson( + int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, + double *ElectricField, double tau, double Vt, double Cp, double epsilon_LB, + bool UseSlippingVelBC, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map, - double *dist, double *Den_charge, - double *Psi, double *ElectricField, - double tau, double Vt, double Cp, - double epsilon_LB, bool UseSlippingVelBC, - int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAeven_Poisson( + int *Map, double *dist, double *Den_charge, double *Psi, + double *ElectricField, double *Error, double tau, double Vt, double Cp, + double epsilon_LB, bool UseSlippingVelBC, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist, - double *Den_charge, double *Psi, double *ElectricField, double *Error, - double tau, double Vt, double Cp, - double epsilon_LB, bool UseSlippingVelBC, - int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_Poisson_getElectricField(double *dist, + double *ElectricField, + double tau, int Np); -extern "C" void ScaLBL_D3Q19_Poisson_getElectricField(double *dist, double *ElectricField, double tau, int Np); - - -extern "C" void ScaLBL_D3Q19_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np); -extern "C" void ScaLBL_D3Q19_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np); -extern "C" void ScaLBL_D3Q19_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np); -extern "C" void ScaLBL_D3Q19_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vout, int count, int Np); +extern "C" void ScaLBL_D3Q19_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, + int *list, + double *dist, + double Vin, int count, + int Np); +extern "C" void ScaLBL_D3Q19_AAodd_Poisson_Potential_BC_z(int *d_neighborList, + int *list, + double *dist, + double Vin, int count, + int Np); +extern "C" void ScaLBL_D3Q19_AAeven_Poisson_Potential_BC_Z(int *list, + double *dist, + double Vout, + int count, int Np); +extern "C" void ScaLBL_D3Q19_AAeven_Poisson_Potential_BC_z(int *list, + double *dist, + double Vout, + int count, int Np); // LBM Stokes Model (adapted from MRT model) -extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, - double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, bool UseSlippingVelBC, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT( + double *dist, double *Velocity, double *ChargeDensity, + double *ElectricField, double rlx_setA, double rlx_setB, double Gx, + double Gy, double Gz, double rho0, double den_scale, double h, + double time_conv, bool UseSlippingVelBC, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, - double Gx, double Gy, double Gz, double rho0, double den_scale, double h, double time_conv, bool UseSlippingVelBC, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT( + int *neighborList, double *dist, double *Velocity, double *ChargeDensity, + double *ElectricField, double rlx_setA, double rlx_setB, double Gx, + double Gy, double Gz, double rho0, double den_scale, double h, + double time_conv, bool UseSlippingVelBC, int start, int finish, int Np); -extern "C" void ScaLBL_PhaseField_InitFromRestart(double *Den, double *Aq, double *Bq, int start, int finish, int Np); +extern "C" void ScaLBL_PhaseField_InitFromRestart(double *Den, double *Aq, + double *Bq, int start, + int finish, int Np); // MRT MODEL /** @@ -451,8 +577,10 @@ extern "C" void ScaLBL_PhaseField_InitFromRestart(double *Den, double *Aq, doubl * @param Fy - force in y direction * @param Fz - force in z direction */ -extern "C" void ScaLBL_D3Q19_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, - double Fy, double Fz); +extern "C" void ScaLBL_D3Q19_AAeven_MRT(double *dist, int start, int finish, + int Np, double rlx_setA, + double rlx_setB, double Fx, double Fy, + double Fz); /** * \brief MRT collision based on AA even access pattern for D3Q19 @@ -467,8 +595,10 @@ extern "C" void ScaLBL_D3Q19_AAeven_MRT(double *dist, int start, int finish, int * @param Fy - force in y direction * @param Fz - force in z direction */ -extern "C" void ScaLBL_D3Q19_AAodd_MRT(int *neighborList, double *dist, int start, int finish, int Np, - double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz); +extern "C" void ScaLBL_D3Q19_AAodd_MRT(int *neighborList, double *dist, + int start, int finish, int Np, + double rlx_setA, double rlx_setB, + double Fx, double Fy, double Fz); // COLOR MODEL /** @@ -495,9 +625,11 @@ extern "C" void ScaLBL_D3Q19_AAodd_MRT(int *neighborList, double *dist, int star * @param finish - lattice node to finish loop * @param Np - size of local sub-domain (derived from Domain structure) */ -extern "C" void ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi, - double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, - double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAeven_Color( + int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi, + double *Vel, double rhoA, double rhoB, double tauA, double tauB, + double alpha, double beta, double Fx, double Fy, double Fz, int strideY, + int strideZ, int start, int finish, int Np); /** * \brief Color model collision based on AA even access pattern for D3Q19 @@ -524,9 +656,11 @@ extern "C" void ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, do * @param finish - lattice node to finish loop * @param Np - size of local sub-domain (derived from Domain structure) */ -extern "C" void ScaLBL_D3Q19_AAodd_Color(int *NeighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, - double *Phi, double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, - double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAodd_Color( + int *NeighborList, int *Map, double *dist, double *Aq, double *Bq, + double *Den, double *Phi, double *Vel, double rhoA, double rhoB, + double tauA, double tauB, double alpha, double beta, double Fx, double Fy, + double Fz, int strideY, int strideZ, int start, int finish, int Np); /** * \brief Compute phase field based on AA odd access pattern for D3Q19 @@ -540,8 +674,10 @@ extern "C" void ScaLBL_D3Q19_AAodd_Color(int *NeighborList, int *Map, double *di * @param finish - lattice node to finish loop * @param Np - size of local sub-domain (derived from Domain structure) */ -extern "C" void ScaLBL_D3Q7_AAodd_PhaseField(int *NeighborList, int *Map, double *Aq, double *Bq, - double *Den, double *Phi, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_PhaseField(int *NeighborList, int *Map, + double *Aq, double *Bq, + double *Den, double *Phi, + int start, int finish, int Np); /** * \brief Compute phase field based on AA even access pattern for D3Q19 @@ -554,8 +690,9 @@ extern "C" void ScaLBL_D3Q7_AAodd_PhaseField(int *NeighborList, int *Map, double * @param finish - lattice node to finish loop * @param Np - size of local sub-domain (derived from Domain structure) */ -extern "C" void ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq, double *Den, double *Phi, - int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq, + double *Den, double *Phi, + int start, int finish, int Np); /** * \brief Initialize phase field for color model @@ -568,170 +705,324 @@ extern "C" void ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq, * @param finish - lattice node to finish loop * @param Np - size of local sub-domain (derived from Domain structure) */ -extern "C" void ScaLBL_PhaseField_Init(int *Map, double *Phi, double *Den, double *Aq, double *Bq, int start, int finish, int Np); +extern "C" void ScaLBL_PhaseField_Init(int *Map, double *Phi, double *Den, + double *Aq, double *Bq, int start, + int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_Color(int *neighborList, int *Map, double *Aq, + double *Bq, double *Den, double *Phi, + double *ColorGrad, double *Vel, + double rhoA, double rhoB, double beta, + int start, int finish, int Np); -extern "C" void ScaLBL_D3Q7_AAodd_Color(int *neighborList, int *Map, double *Aq, double *Bq, double *Den, - double *Phi, double *ColorGrad, double *Vel, double rhoA, double rhoB, double beta, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_Color(int *Map, double *Aq, double *Bq, + double *Den, double *Phi, + double *ColorGrad, double *Vel, + double rhoA, double rhoB, double beta, + int start, int finish, int Np); -extern "C" void ScaLBL_D3Q7_AAeven_Color(int *Map, double *Aq, double *Bq, double *Den, - double *Phi, double *ColorGrad, double *Vel, double rhoA, double rhoB, double beta, int start, int finish, int Np); - -extern "C" void ScaLBL_D3Q19_Gradient(int *Map, double *Phi, double *ColorGrad, int start, int finish, int Np, int Nx, int Ny, int Nz); - -extern "C" void ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz); +extern "C" void ScaLBL_D3Q19_Gradient(int *Map, double *Phi, double *ColorGrad, + int start, int finish, int Np, int Nx, + int Ny, int Nz); +extern "C" void ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, + double *Gradient, int start, + int finish, int Np, int Nx, int Ny, + int Nz); // Density functional hydrodynamics LBM -extern "C" void ScaLBL_DFH_Init(double *Phi, double *Den, double *Aq, double *Bq, int start, int finish, int Np); +extern "C" void ScaLBL_DFH_Init(double *Phi, double *Den, double *Aq, + double *Bq, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q19_AAeven_DFH(int *neighborList, double *dist, double *Aq, double *Bq, double *Den, double *Phi, - double *Gradient, double *SolidForce, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, - double Fx, double Fy, double Fz, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAeven_DFH( + int *neighborList, double *dist, double *Aq, double *Bq, double *Den, + double *Phi, double *Gradient, double *SolidForce, double rhoA, double rhoB, + double tauA, double tauB, double alpha, double beta, double Fx, double Fy, + double Fz, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q19_AAodd_DFH(int *neighborList, double *dist, double *Aq, double *Bq, double *Den, - double *Phi, double *Gradient, double *SolidForce, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, - double Fx, double Fy, double Fz, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAodd_DFH( + int *neighborList, double *dist, double *Aq, double *Bq, double *Den, + double *Phi, double *Gradient, double *SolidForce, double rhoA, double rhoB, + double tauA, double tauB, double alpha, double beta, double Fx, double Fy, + double Fz, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q7_AAodd_DFH(int *NeighborList, double *Aq, double *Bq, double *Den, double *Phi, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_DFH(int *NeighborList, double *Aq, double *Bq, + double *Den, double *Phi, int start, + int finish, int Np); -extern "C" void ScaLBL_D3Q7_AAeven_DFH(double *Aq, double *Bq, double *Den, double *Phi, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_DFH(double *Aq, double *Bq, double *Den, + double *Phi, int start, int finish, + int Np); -extern "C" void ScaLBL_D3Q19_Gradient_DFH(int *NeighborList, double *Phi, double *ColorGrad, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_Gradient_DFH(int *NeighborList, double *Phi, + double *ColorGrad, int start, + int finish, int Np); // FREE ENERGY LEE MODEL -extern "C" void ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np); +extern "C" void ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, + double *mu_phi, + double *ColorGrad, + double Fx, double Fy, + double Fz, int Np); -extern "C" void ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(double *gqbar, double Fx, double Fy, double Fz, int Np); +extern "C" void ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(double *gqbar, + double Fx, double Fy, + double Fz, int Np); -extern "C" void ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, double *Den, double *hq, double *ColorGrad, - double rhonA, double rhoB, double tauM, double W, int start, int finish, int Np); +extern "C" void +ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, double *Den, + double *hq, double *ColorGrad, double rhonA, + double rhoB, double tauM, double W, + int start, int finish, int Np); -extern "C" void ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, - double rhoA, double rhoB, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField( + int *neighborList, int *Map, double *hq, double *Den, double *Phi, + double rhoA, double rhoB, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double *hq, double *Den, double *Phi, - double rhoA, double rhoB, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField( + int *Map, double *hq, double *Den, double *Phi, double rhoA, double rhoB, + int start, int finish, int Np); -extern "C" void ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel, - double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_FreeLee_PhaseField( + int *neighborList, int *Map, double *hq, double *Den, double *Phi, + double *ColorGrad, double *Vel, double rhoA, double rhoB, double tauM, + double W, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q7_AAeven_FreeLee_PhaseField( int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel, - double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_FreeLee_PhaseField( + int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, + double *Vel, double rhoA, double rhoB, double tauM, double W, int start, + int finish, int Np); -extern "C" void ScaLBL_D3Q7_ComputePhaseField(int *Map, double *hq, double *Den, double *Phi, double rhoA, double rhoB, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_ComputePhaseField(int *Map, double *hq, double *Den, + double *Phi, double rhoA, + double rhoB, int start, + int finish, int Np); -extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, double *dist, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, - double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, - int strideY, int strideZ, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel( + int *neighborList, int *Map, double *dist, double *Den, double *Phi, + double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double kappa, + double beta, double W, double Fx, double Fy, double Fz, int strideY, + int strideZ, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, - double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, - int strideY, int strideZ, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel( + int *Map, double *dist, double *Den, double *Phi, double *mu_phi, + double *Vel, double *Pressure, double *ColorGrad, double rhoA, double rhoB, + double tauA, double tauB, double kappa, double beta, double W, double Fx, + double Fy, double Fz, int strideY, int strideZ, int start, int finish, + int Np); -extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_Combined(int *neighborList, int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, - double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, - int strideY, int strideZ, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_Combined( + int *neighborList, int *Map, double *dist, double *hq, double *Den, + double *Phi, double *mu_phi, double *Vel, double *Pressure, + double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, + double tauM, double kappa, double beta, double W, double Fx, double Fy, + double Fz, int strideY, int strideZ, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_Combined(int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, - double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, - int strideY, int strideZ, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_Combined( + int *Map, double *dist, double *hq, double *Den, double *Phi, + double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double tauM, + double kappa, double beta, double W, double Fx, double Fy, double Fz, + int strideY, int strideZ, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_Combined_HigherOrder(int *neighborList, int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, - double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, - int strideY, int strideZ, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_Combined_HigherOrder( + int *neighborList, int *Map, double *dist, double *hq, double *Den, + double *Phi, double *mu_phi, double *Vel, double *Pressure, + double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, + double tauM, double kappa, double beta, double W, double Fx, double Fy, + double Fz, int strideY, int strideZ, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_Combined_HigherOrder(int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, - double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, - int strideY, int strideZ, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_Combined_HigherOrder( + int *Map, double *dist, double *hq, double *Den, double *Phi, + double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double tauM, + double kappa, double beta, double W, double Fx, double Fy, double Fz, + int strideY, int strideZ, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborList, double *dist, double *Vel, double *Pressure, - double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK( + int *neighborList, double *dist, double *Vel, double *Pressure, double tau, + double rho0, double Fx, double Fy, double Fz, int start, int finish, + int Np); -extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure, - double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK( + double *dist, double *Vel, double *Pressure, double tau, double rho0, + double Fx, double Fy, double Fz, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q9_MGTest(int *Map, double *Phi,double *ColorGrad,int strideY, int strideZ, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q9_MGTest(int *Map, double *Phi, double *ColorGrad, + int strideY, int strideZ, int start, + int finish, int Np); // BOUNDARY CONDITION ROUTINES -extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *neighborList, int *list, double *dist, double din, int count, int Np); +extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *neighborList, int *list, + double *dist, double din, + int count, int Np); -extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_Z(int *neighborList, int *list, double *dist, double dout, int count, int Np); +extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_Z(int *neighborList, int *list, + double *dist, double dout, + int count, int Np); -extern "C" void ScaLBL_D3Q19_AAeven_Pressure_BC_z(int *list, double *dist, double din, int count, int Np); +extern "C" void ScaLBL_D3Q19_AAeven_Pressure_BC_z(int *list, double *dist, + double din, int count, + int Np); -extern "C" void ScaLBL_D3Q19_AAeven_Pressure_BC_Z(int *list, double *dist, double dout, int count, int Np); +extern "C" void ScaLBL_D3Q19_AAeven_Pressure_BC_Z(int *list, double *dist, + double dout, int count, + int Np); -extern "C" double ScaLBL_D3Q19_AAodd_Flux_BC_z(int *neighborList, int *list, double *dist, double flux, - double area, int count, int N); +extern "C" double ScaLBL_D3Q19_AAodd_Flux_BC_z(int *neighborList, int *list, + double *dist, double flux, + double area, int count, int N); -extern "C" double ScaLBL_D3Q19_AAeven_Flux_BC_z(int *list, double *dist, double flux, double area, - int count, int N); +extern "C" double ScaLBL_D3Q19_AAeven_Flux_BC_z(int *list, double *dist, + double flux, double area, + int count, int N); -extern "C" void ScaLBL_Color_BC_z(int *list, int *Map, double *Phi, double *Den, double vA, double vB, int count, int Np); +extern "C" void ScaLBL_Color_BC_z(int *list, int *Map, double *Phi, double *Den, + double vA, double vB, int count, int Np); -extern "C" void ScaLBL_Color_BC_Z(int *list, int *Map, double *Phi, double *Den, double vA, double vB, int count, int Np); +extern "C" void ScaLBL_Color_BC_Z(int *list, int *Map, double *Phi, double *Den, + double vA, double vB, int count, int Np); -extern "C" void ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np); +extern "C" void ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, + int Np); -extern "C" void ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np); +extern "C" void ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, + int Np); -extern "C" void ScaLBL_D3Q7_Reflection_BC_z(int *list, double *dist, int count, int Np); +extern "C" void ScaLBL_D3Q7_Reflection_BC_z(int *list, double *dist, int count, + int Np); -extern "C" void ScaLBL_D3Q7_Reflection_BC_Z(int *list, double *dist, int count, int Np); +extern "C" void ScaLBL_D3Q7_Reflection_BC_Z(int *list, double *dist, int count, + int Np); -extern "C" void ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice); +extern "C" void ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, + int Nz, int Slice); -extern "C" void ScaLBL_CopySlice_z(double *Phi, int Nx, int Ny, int Nz, int Source, int Destination); +extern "C" void ScaLBL_CopySlice_z(double *Phi, int Nx, int Ny, int Nz, + int Source, int Destination); -extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist,double *BoundaryValue,int *BounceBackDist_list,int *BounceBackSolid_list,int N); +extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, + int *BounceBackDist_list, + int *BounceBackSolid_list, int N); -extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist,double *BoundaryValue,int *BounceBackDist_list,int *BounceBackSolid_list,int N); +extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, + int *BounceBackDist_list, + int *BounceBackSolid_list, int N); -extern "C" void ScaLBL_Solid_DirichletAndNeumann_D3Q7(double *dist,double *BoundaryValue,int *BoundaryLabel,int *BounceBackDist_list,int *BounceBackSolid_list,int N); +extern "C" void ScaLBL_Solid_DirichletAndNeumann_D3Q7( + double *dist, double *BoundaryValue, int *BoundaryLabel, + int *BounceBackDist_list, int *BounceBackSolid_list, int N); -extern "C" void ScaLBL_Solid_SlippingVelocityBC_D3Q19(double *dist, double *zeta_potential, double *ElectricField, double *SolidGrad, - double epsilon_LB, double tau, double rho0,double den_scale, double h, double time_conv, - int *BounceBackDist_list, int *BounceBackSolid_list, int *FluidBoundary_list, - double *lattice_weight, float *lattice_cx, float *lattice_cy, float *lattice_cz, - int count, int Np); +extern "C" void ScaLBL_Solid_SlippingVelocityBC_D3Q19( + double *dist, double *zeta_potential, double *ElectricField, + double *SolidGrad, double epsilon_LB, double tau, double rho0, + double den_scale, double h, double time_conv, int *BounceBackDist_list, + int *BounceBackSolid_list, int *FluidBoundary_list, double *lattice_weight, + float *lattice_cx, float *lattice_cy, float *lattice_cz, int count, int Np); -extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, + double *dist, + double Vin, int count, + int Np); -extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, + double *dist, + double Vout, + int count, int Np); -extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, + int *list, + double *dist, + double Vin, int count, + int Np); -extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, + int *list, + double *dist, + double Vout, int count, + int Np); -extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count); +extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, + double Vin, int count); -extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count); +extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, + double Vout, int count); -extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, + double *dist, + double Cin, int count, + int Np); -extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, + double *dist, + double Cout, + int count, int Np); -extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, + int *list, + double *dist, + double Cin, int count, + int Np); -extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, + int *list, + double *dist, + double Cout, int count, + int Np); -extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z(int *list, double *dist, double Cin, double tau, double *VelocityZ, int count, int Np); -extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z(int *list, double *dist, double Cout, double tau, double *VelocityZ, int count, int Np); -extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z(int *d_neighborList, int *list, double *dist, double Cin, double tau, double *VelocityZ, int count, int Np); -extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, double tau, double *VelocityZ, int count, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z(int *list, double *dist, + double Cin, double tau, + double *VelocityZ, + int count, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z(int *list, double *dist, + double Cout, double tau, + double *VelocityZ, + int count, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z(int *d_neighborList, + int *list, double *dist, + double Cin, double tau, + double *VelocityZ, + int count, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z(int *d_neighborList, + int *list, double *dist, + double Cout, double tau, + double *VelocityZ, + int count, int Np); -extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z(int *list, double *dist, double Cin, double tau, double *VelocityZ, int count, int Np); -extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z(int *list, double *dist, double Cout, double tau, double *VelocityZ, int count, int Np); -extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z(int *d_neighborList, int *list, double *dist, double Cin, double tau, double *VelocityZ, int count, int Np); -extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, double tau, double *VelocityZ, int count, int Np); +extern "C" void +ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z(int *list, double *dist, double Cin, + double tau, double *VelocityZ, + int count, int Np); +extern "C" void +ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z(int *list, double *dist, double Cout, + double tau, double *VelocityZ, + int count, int Np); +extern "C" void +ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z(int *d_neighborList, int *list, + double *dist, double Cin, double tau, + double *VelocityZ, int count, int Np); +extern "C" void +ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z(int *d_neighborList, int *list, + double *dist, double Cout, double tau, + double *VelocityZ, int count, int Np); -extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z(int *list, double *dist, double Cin, double tau, double *VelocityZ,double *ElectricField,double Di,double zi,double Vt,int count,int Np); -extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z(int *list, double *dist, double Cout, double tau, double *VelocityZ,double *ElectricField,double Di,double zi,double Vt,int count,int Np); -extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z(int *d_neighborList, int *list, double *dist, double Cin, double tau, double *VelocityZ,double *ElectricField,double Di,double zi,double Vt, int count, int Np); -extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, double tau, double *VelocityZ,double *ElectricField,double Di,double zi,double Vt, int count, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z( + int *list, double *dist, double Cin, double tau, double *VelocityZ, + double *ElectricField, double Di, double zi, double Vt, int count, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z( + int *list, double *dist, double Cout, double tau, double *VelocityZ, + double *ElectricField, double Di, double zi, double Vt, int count, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z( + int *d_neighborList, int *list, double *dist, double Cin, double tau, + double *VelocityZ, double *ElectricField, double Di, double zi, double Vt, + int count, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z( + int *d_neighborList, int *list, double *dist, double Cout, double tau, + double *VelocityZ, double *ElectricField, double Di, double zi, double Vt, + int count, int Np); /** * \class ScaLBL_Communicator @@ -739,67 +1030,80 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z(int *d_neighborList * @brief Generalized communication routines for lattice Boltzmann methods. * */ -class ScaLBL_Communicator{ +class ScaLBL_Communicator { public: - //...................................................................................... - /** + //...................................................................................... + /** *\brief Constructor * @param Dm - Domain information */ - ScaLBL_Communicator(std::shared_ptr Dm); + ScaLBL_Communicator(std::shared_ptr Dm); - /** + /** *\brief Destructor */ - ~ScaLBL_Communicator(); - //...................................................................................... - Utilities::MPI MPI_COMM_SCALBL; // MPI Communicator for this domain - int rank; - int rank_x,rank_y,rank_z,rank_X,rank_Y,rank_Z; - int rank_xy,rank_XY,rank_xY,rank_Xy; - int rank_xz,rank_XZ,rank_xZ,rank_Xz; - int rank_yz,rank_YZ,rank_yZ,rank_Yz; - //...................................................................................... - unsigned long int CommunicationCount,SendCount,RecvCount; - int Nx,Ny,Nz,N; - int iproc,jproc,kproc; - int nprocx,nprocy,nprocz; - int n_bb_d3q7, n_bb_d3q19; - int BoundaryCondition; - //...................................................................................... - int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z; - int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ; - int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ; - //...................................................................................... - int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z; - int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, recvCount_xZ; - int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, recvCount_XZ; - //...................................................................................... - - int next; - int first_interior,last_interior; - //...................................................................................... - // Set up for D319 distributions - // - determines how much memory is allocated - // - buffers are reused to send D3Q7 distributions and halo exchange as needed - //...................................................................................... - // Buffers to store data sent and recieved by this MPI process - double *sendbuf_x, *sendbuf_y, *sendbuf_z, *sendbuf_X, *sendbuf_Y, *sendbuf_Z; - double *sendbuf_xy, *sendbuf_yz, *sendbuf_xz, *sendbuf_Xy, *sendbuf_Yz, *sendbuf_xZ; - double *sendbuf_xY, *sendbuf_yZ, *sendbuf_Xz, *sendbuf_XY, *sendbuf_YZ, *sendbuf_XZ; - double *recvbuf_x, *recvbuf_y, *recvbuf_z, *recvbuf_X, *recvbuf_Y, *recvbuf_Z; - double *recvbuf_xy, *recvbuf_yz, *recvbuf_xz, *recvbuf_Xy, *recvbuf_Yz, *recvbuf_xZ; - double *recvbuf_xY, *recvbuf_yZ, *recvbuf_Xz, *recvbuf_XY, *recvbuf_YZ, *recvbuf_XZ; - //...................................................................................... + ~ScaLBL_Communicator(); + //...................................................................................... + Utilities::MPI MPI_COMM_SCALBL; // MPI Communicator for this domain + int rank; + int rank_x, rank_y, rank_z, rank_X, rank_Y, rank_Z; + int rank_xy, rank_XY, rank_xY, rank_Xy; + int rank_xz, rank_XZ, rank_xZ, rank_Xz; + int rank_yz, rank_YZ, rank_yZ, rank_Yz; + //...................................................................................... + unsigned long int CommunicationCount, SendCount, RecvCount; + int Nx, Ny, Nz, N; + int iproc, jproc, kproc; + int nprocx, nprocy, nprocz; + int n_bb_d3q7, n_bb_d3q19; + int BoundaryCondition; + //...................................................................................... + int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, + sendCount_Z; + int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, + sendCount_xZ; + int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, + sendCount_XZ; + //...................................................................................... + int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, + recvCount_Z; + int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, + recvCount_xZ; + int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, + recvCount_XZ; + //...................................................................................... - int LastExterior(); - int FirstInterior(); - int LastInterior(); - int copySendList(const char *dir, int *buffer); - int copyRecvList(const char *dir, int *buffer); - - double GetPerformance(int *NeighborList, double *fq, int Np); - int MemoryOptimizedLayoutAA(IntArray &Map, int *neighborList, signed char *id, int Np, int width); + int next; + int first_interior, last_interior; + //...................................................................................... + // Set up for D319 distributions + // - determines how much memory is allocated + // - buffers are reused to send D3Q7 distributions and halo exchange as needed + //...................................................................................... + // Buffers to store data sent and recieved by this MPI process + double *sendbuf_x, *sendbuf_y, *sendbuf_z, *sendbuf_X, *sendbuf_Y, + *sendbuf_Z; + double *sendbuf_xy, *sendbuf_yz, *sendbuf_xz, *sendbuf_Xy, *sendbuf_Yz, + *sendbuf_xZ; + double *sendbuf_xY, *sendbuf_yZ, *sendbuf_Xz, *sendbuf_XY, *sendbuf_YZ, + *sendbuf_XZ; + double *recvbuf_x, *recvbuf_y, *recvbuf_z, *recvbuf_X, *recvbuf_Y, + *recvbuf_Z; + double *recvbuf_xy, *recvbuf_yz, *recvbuf_xz, *recvbuf_Xy, *recvbuf_Yz, + *recvbuf_xZ; + double *recvbuf_xY, *recvbuf_yZ, *recvbuf_Xz, *recvbuf_XY, *recvbuf_YZ, + *recvbuf_XZ; + //...................................................................................... + + int LastExterior(); + int FirstInterior(); + int LastInterior(); + int copySendList(const char *dir, int *buffer); + int copyRecvList(const char *dir, int *buffer); + + double GetPerformance(int *NeighborList, double *fq, int Np); + int MemoryOptimizedLayoutAA(IntArray &Map, int *neighborList, + signed char *id, int Np, int width); /** * \brief Create membrane data structure * - cut lattice links based on distance map @@ -808,101 +1112,141 @@ public: * @param Np - number of lattice sites * @param width - halo width for the model */ - void Barrier(){ - ScaLBL_DeviceBarrier(); - MPI_COMM_SCALBL.barrier(); - }; - void SendD3Q19AA(double *dist); - void RecvD3Q19AA(double *dist); - void SendD3Q7AA(double *fq, int Component); - void RecvD3Q7AA(double *fq, int Component); - void BiSendD3Q7AA(double *Aq, double *Bq); - void BiRecvD3Q7AA(double *Aq, double *Bq); - void TriSendD3Q7AA(double *Aq, double *Bq, double *Cq); - void TriRecvD3Q7AA(double *Aq, double *Bq, double *Cq); - void SendHalo(double *data); - void RecvHalo(double *data); - void RecvGrad(double *Phi, double *Gradient); - void RegularLayout(IntArray map, const double *data, DoubleArray ®data); - void SetupBounceBackList(IntArray &Map, signed char *id, int Np, bool SlippingVelBC=false); + void Barrier() { + ScaLBL_DeviceBarrier(); + MPI_COMM_SCALBL.barrier(); + }; + void SendD3Q19AA(double *dist); + void RecvD3Q19AA(double *dist); + void SendD3Q7AA(double *fq, int Component); + void RecvD3Q7AA(double *fq, int Component); + void BiSendD3Q7AA(double *Aq, double *Bq); + void BiRecvD3Q7AA(double *Aq, double *Bq); + void TriSendD3Q7AA(double *Aq, double *Bq, double *Cq); + void TriRecvD3Q7AA(double *Aq, double *Bq, double *Cq); + void SendHalo(double *data); + void RecvHalo(double *data); + void RecvGrad(double *Phi, double *Gradient); + void RegularLayout(IntArray map, const double *data, DoubleArray ®data); + void SetupBounceBackList(IntArray &Map, signed char *id, int Np, + bool SlippingVelBC = false); void SolidDirichletD3Q7(double *fq, double *BoundaryValue); void SolidNeumannD3Q7(double *fq, double *BoundaryValue); - void SolidDirichletAndNeumannD3Q7(double *fq, double *BoundaryValue, int *BoundaryLabel); - void SolidSlippingVelocityBCD3Q19(double *fq, double *zeta_potential, double *ElectricField, double *SolidGrad, - double epslion_LB, double tau, double rho0, double den_scale,double h, double time_conv); + void SolidDirichletAndNeumannD3Q7(double *fq, double *BoundaryValue, + int *BoundaryLabel); + void SolidSlippingVelocityBCD3Q19(double *fq, double *zeta_potential, + double *ElectricField, double *SolidGrad, + double epslion_LB, double tau, + double rho0, double den_scale, double h, + double time_conv); // Routines to set boundary conditions void Color_BC_z(int *Map, double *Phi, double *Den, double vA, double vB); void Color_BC_Z(int *Map, double *Phi, double *Den, double vA, double vB); - void D3Q19_Pressure_BC_z(int *neighborList, double *fq, double din, int time); - void D3Q19_Pressure_BC_Z(int *neighborList, double *fq, double dout, int time); + void D3Q19_Pressure_BC_z(int *neighborList, double *fq, double din, + int time); + void D3Q19_Pressure_BC_Z(int *neighborList, double *fq, double dout, + int time); void D3Q19_Reflection_BC_z(double *fq); void D3Q19_Reflection_BC_Z(double *fq); - double D3Q19_Flux_BC_z(int *neighborList, double *fq, double flux, int time); - void D3Q7_Poisson_Potential_BC_z(int *neighborList, double *fq, double Vin, int time); - void D3Q7_Poisson_Potential_BC_Z(int *neighborList, double *fq, double Vout, int time); - void D3Q19_Poisson_Potential_BC_z(int *neighborList, double *fq, double Vin, int time); - void D3Q19_Poisson_Potential_BC_Z(int *neighborList, double *fq, double Vout, int time); + double D3Q19_Flux_BC_z(int *neighborList, double *fq, double flux, + int time); + void D3Q7_Poisson_Potential_BC_z(int *neighborList, double *fq, double Vin, + int time); + void D3Q7_Poisson_Potential_BC_Z(int *neighborList, double *fq, double Vout, + int time); + void D3Q19_Poisson_Potential_BC_z(int *neighborList, double *fq, double Vin, + int time); + void D3Q19_Poisson_Potential_BC_Z(int *neighborList, double *fq, + double Vout, int time); void Poisson_D3Q7_BC_z(int *Map, double *Psi, double Vin); void Poisson_D3Q7_BC_Z(int *Map, double *Psi, double Vout); - void D3Q7_Ion_Concentration_BC_z(int *neighborList, double *fq, double Cin, int time); - void D3Q7_Ion_Concentration_BC_Z(int *neighborList, double *fq, double Cout, int time); - void D3Q7_Ion_Flux_Diff_BC_z(int *neighborList, double *fq, double Cin, double tau, double *VelocityZ, int time); - void D3Q7_Ion_Flux_Diff_BC_Z(int *neighborList, double *fq, double Cout, double tau, double *VelocityZ, int time); - void D3Q7_Ion_Flux_DiffAdvc_BC_z(int *neighborList, double *fq, double Cin, double tau, double *VelocityZ, int time); - void D3Q7_Ion_Flux_DiffAdvc_BC_Z(int *neighborList, double *fq, double Cout, double tau, double *VelocityZ, int time); - void D3Q7_Ion_Flux_DiffAdvcElec_BC_z(int *neighborList,double *fq,double Cin,double tau,double *VelocityZ,double *ElectricField_Z,double Di,double zi,double Vt, int time); - void D3Q7_Ion_Flux_DiffAdvcElec_BC_Z(int *neighborList,double *fq,double Cout,double tau,double *VelocityZ,double *ElectricField_Z,double Di,double zi,double Vt, int time); - void GreyscaleSC_BC_z(int *Map, double *DenA, double *DenB, double vA, double vB); - void GreyscaleSC_BC_Z(int *Map, double *DenA, double *DenB, double vA, double vB); - void GreyscaleSC_Pressure_BC_z(int *neighborList, double *fqA, double *fqB, double dinA, double dinB, int time); - void GreyscaleSC_Pressure_BC_Z(int *neighborList, double *fqA, double *fqB, double doutA, double doutB, int time); + void D3Q7_Ion_Concentration_BC_z(int *neighborList, double *fq, double Cin, + int time); + void D3Q7_Ion_Concentration_BC_Z(int *neighborList, double *fq, double Cout, + int time); + void D3Q7_Ion_Flux_Diff_BC_z(int *neighborList, double *fq, double Cin, + double tau, double *VelocityZ, int time); + void D3Q7_Ion_Flux_Diff_BC_Z(int *neighborList, double *fq, double Cout, + double tau, double *VelocityZ, int time); + void D3Q7_Ion_Flux_DiffAdvc_BC_z(int *neighborList, double *fq, double Cin, + double tau, double *VelocityZ, int time); + void D3Q7_Ion_Flux_DiffAdvc_BC_Z(int *neighborList, double *fq, double Cout, + double tau, double *VelocityZ, int time); + void D3Q7_Ion_Flux_DiffAdvcElec_BC_z(int *neighborList, double *fq, + double Cin, double tau, + double *VelocityZ, + double *ElectricField_Z, double Di, + double zi, double Vt, int time); + void D3Q7_Ion_Flux_DiffAdvcElec_BC_Z(int *neighborList, double *fq, + double Cout, double tau, + double *VelocityZ, + double *ElectricField_Z, double Di, + double zi, double Vt, int time); + void GreyscaleSC_BC_z(int *Map, double *DenA, double *DenB, double vA, + double vB); + void GreyscaleSC_BC_Z(int *Map, double *DenA, double *DenB, double vA, + double vB); + void GreyscaleSC_Pressure_BC_z(int *neighborList, double *fqA, double *fqB, + double dinA, double dinB, int time); + void GreyscaleSC_Pressure_BC_Z(int *neighborList, double *fqA, double *fqB, + double doutA, double doutB, int time); // Debugging and unit testing functions void PrintD3Q19(); private: - void D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, const int *list, int start, int count, int *d3q19_recvlist); + void D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, const int *list, int start, + int count, int *d3q19_recvlist); - bool Lock; // use Lock to make sure only one call at a time to protect data in transit - // only one set of Send requests can be active at any time (per instance) - int i,j,k,n; + bool + Lock; // use Lock to make sure only one call at a time to protect data in transit + // only one set of Send requests can be active at any time (per instance) + int i, j, k, n; - int sendtag,recvtag; - // Give the object it's own MPI communicator - RankInfoStruct rank_info; - MPI_Request req1[18],req2[18]; - //...................................................................................... - // MPI ranks for all 18 neighbors - //...................................................................................... - // These variables are all private to prevent external things from modifying them!! -//...................................................................................... - // Send buffers that reside on the compute device - int *dvcSendList_x, *dvcSendList_y, *dvcSendList_z, *dvcSendList_X, *dvcSendList_Y, *dvcSendList_Z; - int *dvcSendList_xy, *dvcSendList_yz, *dvcSendList_xz, *dvcSendList_Xy, *dvcSendList_Yz, *dvcSendList_xZ; - int *dvcSendList_xY, *dvcSendList_yZ, *dvcSendList_Xz, *dvcSendList_XY, *dvcSendList_YZ, *dvcSendList_XZ; - // Recieve buffers that reside on the compute device - int *dvcRecvList_x, *dvcRecvList_y, *dvcRecvList_z, *dvcRecvList_X, *dvcRecvList_Y, *dvcRecvList_Z; - int *dvcRecvList_xy, *dvcRecvList_yz, *dvcRecvList_xz, *dvcRecvList_Xy, *dvcRecvList_Yz, *dvcRecvList_xZ; - int *dvcRecvList_xY, *dvcRecvList_yZ, *dvcRecvList_Xz, *dvcRecvList_XY, *dvcRecvList_YZ, *dvcRecvList_XZ; - // Recieve buffers for the distributions - int *dvcRecvDist_x, *dvcRecvDist_y, *dvcRecvDist_z, *dvcRecvDist_X, *dvcRecvDist_Y, *dvcRecvDist_Z; - int *dvcRecvDist_xy, *dvcRecvDist_yz, *dvcRecvDist_xz, *dvcRecvDist_Xy, *dvcRecvDist_Yz, *dvcRecvDist_xZ; - int *dvcRecvDist_xY, *dvcRecvDist_yZ, *dvcRecvDist_Xz, *dvcRecvDist_XY, *dvcRecvDist_YZ, *dvcRecvDist_XZ; - //...................................................................................... + int sendtag, recvtag; + // Give the object it's own MPI communicator + RankInfoStruct rank_info; + MPI_Request req1[18], req2[18]; + //...................................................................................... + // MPI ranks for all 18 neighbors + //...................................................................................... + // These variables are all private to prevent external things from modifying them!! + //...................................................................................... + // Send buffers that reside on the compute device + int *dvcSendList_x, *dvcSendList_y, *dvcSendList_z, *dvcSendList_X, + *dvcSendList_Y, *dvcSendList_Z; + int *dvcSendList_xy, *dvcSendList_yz, *dvcSendList_xz, *dvcSendList_Xy, + *dvcSendList_Yz, *dvcSendList_xZ; + int *dvcSendList_xY, *dvcSendList_yZ, *dvcSendList_Xz, *dvcSendList_XY, + *dvcSendList_YZ, *dvcSendList_XZ; + // Recieve buffers that reside on the compute device + int *dvcRecvList_x, *dvcRecvList_y, *dvcRecvList_z, *dvcRecvList_X, + *dvcRecvList_Y, *dvcRecvList_Z; + int *dvcRecvList_xy, *dvcRecvList_yz, *dvcRecvList_xz, *dvcRecvList_Xy, + *dvcRecvList_Yz, *dvcRecvList_xZ; + int *dvcRecvList_xY, *dvcRecvList_yZ, *dvcRecvList_Xz, *dvcRecvList_XY, + *dvcRecvList_YZ, *dvcRecvList_XZ; + // Recieve buffers for the distributions + int *dvcRecvDist_x, *dvcRecvDist_y, *dvcRecvDist_z, *dvcRecvDist_X, + *dvcRecvDist_Y, *dvcRecvDist_Z; + int *dvcRecvDist_xy, *dvcRecvDist_yz, *dvcRecvDist_xz, *dvcRecvDist_Xy, + *dvcRecvDist_Yz, *dvcRecvDist_xZ; + int *dvcRecvDist_xY, *dvcRecvDist_yZ, *dvcRecvDist_Xz, *dvcRecvDist_XY, + *dvcRecvDist_YZ, *dvcRecvDist_XZ; + //...................................................................................... // MPI requests for persistent communications std::vector> req_D3Q19AA; std::vector> req_BiD3Q19AA; std::vector> req_TriD3Q19AA; - void start( std::vector>& requests ); - void wait( std::vector>& requests ); - //...................................................................................... - int *bb_dist; - int *bb_interactions; + void start(std::vector> &requests); + void wait(std::vector> &requests); + //...................................................................................... + int *bb_dist; + int *bb_interactions; int *fluid_boundary; double *lattice_weight; float *lattice_cx, *lattice_cy, *lattice_cz; - //...................................................................................... - + //...................................................................................... }; #endif diff --git a/common/SpherePack.cpp b/common/SpherePack.cpp index 53785fa9..e26a4bc9 100644 --- a/common/SpherePack.cpp +++ b/common/SpherePack.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include #include #include diff --git a/common/SpherePack.h b/common/SpherePack.h index 1ab7fbaa..2fbdfca3 100644 --- a/common/SpherePack.h +++ b/common/SpherePack.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef SpherePack_INC #define SpherePack_INC diff --git a/common/UnitTest.cpp b/common/UnitTest.cpp index 71e80464..9ce551e1 100644 --- a/common/UnitTest.cpp +++ b/common/UnitTest.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "common/UnitTest.h" #include "common/Utilities.h" #include diff --git a/common/UnitTest.h b/common/UnitTest.h index cb169cb2..693c9c72 100644 --- a/common/UnitTest.h +++ b/common/UnitTest.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef included_UnitTest #define included_UnitTest diff --git a/common/Units.cpp b/common/Units.cpp index d8df428c..13b2d1e3 100644 --- a/common/Units.cpp +++ b/common/Units.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "common/Units.h" #include "common/Utilities.h" diff --git a/common/Units.h b/common/Units.h index 56f587b2..cdbd25ca 100644 --- a/common/Units.h +++ b/common/Units.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef included_Units #define included_Units diff --git a/common/Utilities.cpp b/common/Utilities.cpp index 03ce113f..70ca19a4 100644 --- a/common/Utilities.cpp +++ b/common/Utilities.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include "common/Utilities.h" #include "StackTrace/StackTrace.h" #include "StackTrace/ErrorHandlers.h" diff --git a/common/Utilities.h b/common/Utilities.h index b8d1b760..e9567535 100644 --- a/common/Utilities.h +++ b/common/Utilities.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef included_Utilities #define included_Utilities diff --git a/common/Utilities.hpp b/common/Utilities.hpp index e84c62c8..587ee171 100644 --- a/common/Utilities.hpp +++ b/common/Utilities.hpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #ifndef included_Utilities_hpp #define included_Utilities_hpp @@ -47,7 +63,7 @@ template void quicksort(std::vector &x) { } else { k = (l + ir) / 2; // Choose median of left, center and right elements as partitioning - // element a. Also rearrange so that a(l) < a(l+1) < a(ir). + // element a. Also rearrange so that a(l) < a(l+1) < a(ir). tmp_a = arr[k]; arr[k] = arr[l + 1]; arr[l + 1] = tmp_a; @@ -140,7 +156,7 @@ void quicksort(std::vector &x, std::vector &y) { } else { k = (l + ir) / 2; // Choose median of left, center and right elements as partitioning - // element a. Also rearrange so that a(l) ? a(l+1) ? a(ir). + // element a. Also rearrange so that a(l) ? a(l+1) ? a(ir). tmp_a = arr[k]; arr[k] = arr[l + 1]; arr[l + 1] = tmp_a; diff --git a/common/UtilityMacros.h b/common/UtilityMacros.h index bde15f23..631b0836 100644 --- a/common/UtilityMacros.h +++ b/common/UtilityMacros.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ // This file contains useful macros including ERROR, WARNING, INSIST, ASSERT, etc. #ifndef included_UtilityMacros #define included_UtilityMacros diff --git a/common/WideHalo.cpp b/common/WideHalo.cpp index 0368740a..2acde32a 100644 --- a/common/WideHalo.cpp +++ b/common/WideHalo.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ /* This class implements support for halo widths larger than 1 */ diff --git a/common/WideHalo.h b/common/WideHalo.h index 28455f10..73d68f77 100644 --- a/common/WideHalo.h +++ b/common/WideHalo.h @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ /* This class implements support for halo widths larger than 1 */ diff --git a/cpu/BGK.cpp b/cpu/BGK.cpp index 5f50519a..90e6bf1a 100644 --- a/cpu/BGK.cpp +++ b/cpu/BGK.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz) { diff --git a/cpu/Color.cpp b/cpu/Color.cpp index c55ed5ff..526bcafa 100644 --- a/cpu/Color.cpp +++ b/cpu/Color.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include #define STOKES diff --git a/cpu/D3Q19.cpp b/cpu/D3Q19.cpp index 4483453e..2a85dd40 100644 --- a/cpu/D3Q19.cpp +++ b/cpu/D3Q19.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include extern "C" void ScaLBL_D3Q19_Pack(int q, int *list, int start, int count, @@ -32,7 +48,6 @@ extern "C" void ScaLBL_D3Q19_Unpack(int q, int *list, int start, int count, } } - extern "C" void ScaLBL_D3Q19_AA_Init(double *f_even, double *f_odd, int Np) { int n; for (n = 0; n < Np; n++) { @@ -1926,8 +1941,8 @@ extern "C" void ScaLBL_D3Q19_AAeven_Compact(double *dist, int Np) { } } -extern "C" void ScaLBL_D3Q19_AAodd_Compact(int *neighborList, - double *dist, int Np) { +extern "C" void ScaLBL_D3Q19_AAodd_Compact(int *neighborList, double *dist, + int Np) { int nread; for (int n = 0; n < Np; n++) { diff --git a/cpu/D3Q7.cpp b/cpu/D3Q7.cpp index e75abe32..2c7b909a 100644 --- a/cpu/D3Q7.cpp +++ b/cpu/D3Q7.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ // CPU Functions for D3Q7 Lattice Boltzmann Methods extern "C" void ScaLBL_Scalar_Pack(int *list, int count, double *sendbuf, diff --git a/cpu/D3Q7BC.cpp b/cpu/D3Q7BC.cpp index 3f30fde4..6d45bb01 100644 --- a/cpu/D3Q7BC.cpp +++ b/cpu/D3Q7BC.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ // CPU Functions for D3Q7 Lattice Boltzmann Methods // Boundary Conditions @@ -35,31 +51,39 @@ extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, } } -extern "C" void ScaLBL_Solid_DirichletAndNeumann_D3Q7(double *dist,double *BoundaryValue,int* BoundaryLabel,int *BounceBackDist_list,int *BounceBackSolid_list,int N){ +extern "C" void ScaLBL_Solid_DirichletAndNeumann_D3Q7( + double *dist, double *BoundaryValue, int *BoundaryLabel, + int *BounceBackDist_list, int *BounceBackSolid_list, int N) { int idx; - int iq,ib; - double value_b,value_b_label,value_q; - for (idx=0; idx. +*/ // Basic cuda functions callable from C/C++ code #include #include diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp index 3fe57030..824e3a5c 100644 --- a/cpu/FreeLee.cpp +++ b/cpu/FreeLee.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include #include diff --git a/cpu/Greyscale.cpp b/cpu/Greyscale.cpp index 779b1e45..f0d8edc5 100644 --- a/cpu/Greyscale.cpp +++ b/cpu/Greyscale.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include extern "C" void diff --git a/cpu/GreyscaleColor.cpp b/cpu/GreyscaleColor.cpp index 345c2433..05580e6a 100644 --- a/cpu/GreyscaleColor.cpp +++ b/cpu/GreyscaleColor.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleColor( diff --git a/cpu/Ion.cpp b/cpu/Ion.cpp index f1e8e689..935893c2 100644 --- a/cpu/Ion.cpp +++ b/cpu/Ion.cpp @@ -1,13 +1,31 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include #include /***** pH equilibrium ******/ extern "C" void ScaLBL_D3Q7_AAodd_pH_ionization(int *neighborList, double *dist, - double *Den, double *ElectricField, double *Velocity, - double Di, double Vt, - int pH_ion, int start, int finish, int Np) { + double *Den, + double *ElectricField, + double *Velocity, double Di, + double Vt, int pH_ion, + int start, int finish, int Np) { int n; - double Ex, Ey, Ez; //electrical field + double Ex, Ey, Ez; //electrical field double ux, uy, uz; double uEPx, uEPy, uEPz; //electrochemical induced velocity double Ca, Cb; @@ -16,8 +34,8 @@ extern "C" void ScaLBL_D3Q7_AAodd_pH_ionization(int *neighborList, double *dist, double f0, f1, f2, f3, f4, f5, f6; int nr1, nr2, nr3, nr4, nr5, nr6; double rhoe, tmp; - // double factor = Di / (Vt *Vt* ionizationEnergy); - + // double factor = Di / (Vt *Vt* ionizationEnergy); + for (n = start; n < finish; n++) { //Load data @@ -25,11 +43,11 @@ extern "C" void ScaLBL_D3Q7_AAodd_pH_ionization(int *neighborList, double *dist, Ex = ElectricField[n + 0 * Np]; Ey = ElectricField[n + 1 * Np]; Ez = ElectricField[n + 2 * Np]; - + ux = Velocity[n + 0 * Np]; uy = Velocity[n + 1 * Np]; uz = Velocity[n + 2 * Np]; - + uEPx = Di / Vt * Ex; uEPy = Di / Vt * Ey; uEPz = Di / Vt * Ez; @@ -47,15 +65,17 @@ extern "C" void ScaLBL_D3Q7_AAodd_pH_ionization(int *neighborList, double *dist, nr5 = neighborList[n + 4 * Np]; // q=6 nr6 = neighborList[n + 5 * Np]; - - A0 = dist[pH_ion*7*Np + n]; - A1 = dist[pH_ion*7*Np + nr1]; // reading the A1 data into register Aq - A2 = dist[pH_ion*7*Np + nr2]; // reading the A2 data into register Aq - A3 = dist[pH_ion*7*Np + nr3]; - A4 = dist[pH_ion*7*Np + nr4]; - A5 = dist[pH_ion*7*Np + nr5]; - A6 = dist[pH_ion*7*Np + nr6]; - + + A0 = dist[pH_ion * 7 * Np + n]; + A1 = + dist[pH_ion * 7 * Np + nr1]; // reading the A1 data into register Aq + A2 = + dist[pH_ion * 7 * Np + nr2]; // reading the A2 data into register Aq + A3 = dist[pH_ion * 7 * Np + nr3]; + A4 = dist[pH_ion * 7 * Np + nr4]; + A5 = dist[pH_ion * 7 * Np + nr5]; + A6 = dist[pH_ion * 7 * Np + nr6]; + /* B0 = dist[hydroxide*7*Np + n]; B1 = dist[hydroxide*7*Np + nr1]; // reading the B1 data into register Bq @@ -65,16 +85,16 @@ extern "C" void ScaLBL_D3Q7_AAodd_pH_ionization(int *neighborList, double *dist, B5 = dist[hydroxide*7*Np + nr5]; B6 = dist[hydroxide*7*Np + nr6]; */ - + // charge density rhoe = A0 + A1 + A2 + A3 + A4 + A5 + A6; //rhoe = Ca - Cb; // new equilibrium - tmp = sqrt(rhoe*rhoe + 4.04e-14); + tmp = sqrt(rhoe * rhoe + 4.04e-14); Ca = rhoe + tmp; Cb = Ca - rhoe; - - Den[pH_ion*Np + n] = Ca - Cb; + + Den[pH_ion * Np + n] = Ca - Cb; // proton production A1 = 0.125 * Ca * (1.0 + 4.0 * (ux + uEPx)); @@ -82,10 +102,10 @@ extern "C" void ScaLBL_D3Q7_AAodd_pH_ionization(int *neighborList, double *dist, A3 = 0.125 * Ca * (1.0 + 4.0 * (uy) + uEPy); A4 = 0.125 * Ca * (1.0 - 4.0 * (uy) + uEPy); A5 = 0.125 * Ca * (1.0 + 4.0 * (uz) + uEPz); - A6 = 0.125 * Ca * (1.0 - 4.0 * (uz) + uEPz); - - A0 = Ca - (A1+A2+A3+A4+A5+A6); - + A6 = 0.125 * Ca * (1.0 - 4.0 * (uz) + uEPz); + + A0 = Ca - (A1 + A2 + A3 + A4 + A5 + A6); + // hydroxide ions created by water ionization (no net charge increase) //Cb += (f1 + f2 + f3 + f4 + f5 + f6); // use relative mass of hydroxide + momentum conservation @@ -95,26 +115,26 @@ extern "C" void ScaLBL_D3Q7_AAodd_pH_ionization(int *neighborList, double *dist, B4 = 0.125 * Cb * (1.0 - 4.0 * (uy - uEPy)); B5 = 0.125 * Cb * (1.0 + 4.0 * (uz - uEPz)); B6 = 0.125 * Cb * (1.0 - 4.0 * (uz - uEPz)); - + B0 = Cb - (B1 + B2 + B3 + B4 + B5 + B6); - + B0 = Cb - (B1 + B2 + B3 + B4 + B5 + B6); - - f0 = A0 - B0; + + f0 = A0 - B0; f1 = A1 - B1; f2 = A2 - B2; f3 = A3 - B3; f4 = A4 - B4; f5 = A5 - B5; - f6 = A6 - B6; + f6 = A6 - B6; - dist[pH_ion*7*Np + n] = f0; - dist[pH_ion*7*Np + nr2] = f1; - dist[pH_ion*7*Np + nr1] = f2; - dist[pH_ion*7*Np + nr4] = f3; - dist[pH_ion*7*Np + nr3] = f4; - dist[pH_ion*7*Np + nr6] = f5; - dist[pH_ion*7*Np + nr5] = f6; + dist[pH_ion * 7 * Np + n] = f0; + dist[pH_ion * 7 * Np + nr2] = f1; + dist[pH_ion * 7 * Np + nr1] = f2; + dist[pH_ion * 7 * Np + nr4] = f3; + dist[pH_ion * 7 * Np + nr3] = f4; + dist[pH_ion * 7 * Np + nr6] = f5; + dist[pH_ion * 7 * Np + nr5] = f6; /* dist[pH_ion*7*Np + n] = f0; dist[pH_ion*7*Np + nr1] = f1; @@ -124,18 +144,15 @@ extern "C" void ScaLBL_D3Q7_AAodd_pH_ionization(int *neighborList, double *dist, dist[pH_ion*7*Np + nr5] = f5; dist[pH_ion*7*Np + nr6] = f6; */ - - } } -extern "C" void ScaLBL_D3Q7_AAeven_pH_ionization( double *dist, - double *Den, double *ElectricField, double * Velocity, - double Di, double Vt, - int pH_ion, int start, int finish, int Np) { - +extern "C" void ScaLBL_D3Q7_AAeven_pH_ionization( + double *dist, double *Den, double *ElectricField, double *Velocity, + double Di, double Vt, int pH_ion, int start, int finish, int Np) { + int n; - double Ex, Ey, Ez; //electrical field + double Ex, Ey, Ez; //electrical field double ux, uy, uz; double uEPx, uEPy, uEPz; //electrochemical induced velocity double Ca, Cb; @@ -143,42 +160,46 @@ extern "C" void ScaLBL_D3Q7_AAeven_pH_ionization( double *dist, double B0, B1, B2, B3, B4, B5, B6; double f0, f1, f2, f3, f4, f5, f6; double rhoe, tmp; - // double factor = Di / (Vt *Vt* ionizationEnergy); - + // double factor = Di / (Vt *Vt* ionizationEnergy); + for (n = start; n < finish; n++) { //Load data //Ci = Den[n]; Ex = ElectricField[n + 0 * Np]; Ey = ElectricField[n + 1 * Np]; Ez = ElectricField[n + 2 * Np]; - + ux = Velocity[n + 0 * Np]; uy = Velocity[n + 1 * Np]; uz = Velocity[n + 2 * Np]; - + uEPx = Di / Vt * Ex; uEPy = Di / Vt * Ey; uEPz = Di / Vt * Ez; - - A0 = dist[pH_ion*7*Np + n]; - A1 = dist[pH_ion*7*Np +2 * Np + n]; - A2 = dist[pH_ion*7*Np +1 * Np + n]; - A3 = dist[pH_ion*7*Np +4 * Np + n]; - A4 = dist[pH_ion*7*Np +3 * Np + n]; - A5 = dist[pH_ion*7*Np +6 * Np + n]; - A6 = dist[pH_ion*7*Np +5 * Np + n]; + + A0 = dist[pH_ion * 7 * Np + n]; + A1 = dist[pH_ion * 7 * Np + 2 * Np + n]; + A2 = dist[pH_ion * 7 * Np + 1 * Np + n]; + A3 = dist[pH_ion * 7 * Np + 4 * Np + n]; + A4 = dist[pH_ion * 7 * Np + 3 * Np + n]; + A5 = dist[pH_ion * 7 * Np + 6 * Np + n]; + A6 = dist[pH_ion * 7 * Np + 5 * Np + n]; // charge density rhoe = A0 + A1 + A2 + A3 + A4 + A5 + A6; //rhoe = Ca - Cb; // new equilibrium - tmp = sqrt(rhoe*rhoe + 4.04e-14); + tmp = sqrt(rhoe * rhoe + 4.04e-14); Ca = rhoe + tmp; Cb = Ca - rhoe; - if (Ca < 0.0) printf("Error in hydronium concentration, %f (charge density = %f) \n", Ca, rhoe); - if (Cb < 0.0) printf("Error in hydroxide concentration, %f \n", Cb); - - Den[pH_ion*Np + n] = Ca - Cb; + if (Ca < 0.0) + printf( + "Error in hydronium concentration, %f (charge density = %f) \n", + Ca, rhoe); + if (Cb < 0.0) + printf("Error in hydroxide concentration, %f \n", Cb); + + Den[pH_ion * Np + n] = Ca - Cb; // proton production A1 = 0.125 * Ca * (1.0 + 4.0 * (ux + uEPx)); @@ -186,10 +207,10 @@ extern "C" void ScaLBL_D3Q7_AAeven_pH_ionization( double *dist, A3 = 0.125 * Ca * (1.0 + 4.0 * (uy) + uEPy); A4 = 0.125 * Ca * (1.0 - 4.0 * (uy) + uEPy); A5 = 0.125 * Ca * (1.0 + 4.0 * (uz) + uEPz); - A6 = 0.125 * Ca * (1.0 - 4.0 * (uz) + uEPz); - - A0 = Ca - (A1+A2+A3+A4+A5+A6); - + A6 = 0.125 * Ca * (1.0 - 4.0 * (uz) + uEPz); + + A0 = Ca - (A1 + A2 + A3 + A4 + A5 + A6); + // hydroxide ions created by water ionization (no net charge increase) //Cb += (f1 + f2 + f3 + f4 + f5 + f6); // use relative mass of hydroxide + momentum conservation @@ -199,30 +220,30 @@ extern "C" void ScaLBL_D3Q7_AAeven_pH_ionization( double *dist, B4 = 0.125 * Cb * (1.0 - 4.0 * (uy - uEPy)); B5 = 0.125 * Cb * (1.0 + 4.0 * (uz - uEPz)); B6 = 0.125 * Cb * (1.0 - 4.0 * (uz - uEPz)); - + B0 = Cb - (B1 + B2 + B3 + B4 + B5 + B6); - - f0 = A0 - B0; + + f0 = A0 - B0; f1 = A1 - B1; f2 = A2 - B2; f3 = A3 - B3; f4 = A4 - B4; f5 = A5 - B5; - f6 = A6 - B6; - - if (Ez > 0.0 && n == start){ - printf("Ca = %.5g, Cb = %.5g \n", Ca, Cb); - printf(" charge density = %.5g \n", rhoe); - printf(" Ez = %.5g, A5 = %.5g, A6 = %.5g \n", Ez, f5, f6); + f6 = A6 - B6; + + if (Ez > 0.0 && n == start) { + printf("Ca = %.5g, Cb = %.5g \n", Ca, Cb); + printf(" charge density = %.5g \n", rhoe); + printf(" Ez = %.5g, A5 = %.5g, A6 = %.5g \n", Ez, f5, f6); } - dist[pH_ion*7*Np + n] = f0; - dist[pH_ion*7*Np +1 * Np + n] = f1; - dist[pH_ion*7*Np +2 * Np + n] = f2; - dist[pH_ion*7*Np +3 * Np + n] = f3; - dist[pH_ion*7*Np +4 * Np + n] = f4; - dist[pH_ion*7*Np +5 * Np + n] = f5; - dist[pH_ion*7*Np +6 * Np + n] = f6; + dist[pH_ion * 7 * Np + n] = f0; + dist[pH_ion * 7 * Np + 1 * Np + n] = f1; + dist[pH_ion * 7 * Np + 2 * Np + n] = f2; + dist[pH_ion * 7 * Np + 3 * Np + n] = f3; + dist[pH_ion * 7 * Np + 4 * Np + n] = f4; + dist[pH_ion * 7 * Np + 5 * Np + n] = f5; + dist[pH_ion * 7 * Np + 6 * Np + n] = f6; /* dist[pH_ion*7*Np +2 * Np + n] = f1; dist[pH_ion*7*Np +1 * Np + n] = f2; @@ -231,108 +252,115 @@ extern "C" void ScaLBL_D3Q7_AAeven_pH_ionization( double *dist, dist[pH_ion*7*Np +6 * Np + n] = f5; dist[pH_ion*7*Np +5 * Np + n] = f6; */ - } } /**** end of pH equlibrium model ********/ +extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef( + int *membrane, int *Map, double *Distance, double *Psi, double *coef, + double Threshold, double MassFractionIn, double MassFractionOut, + double ThresholdMassFractionIn, double ThresholdMassFractionOut, + int memLinks, int Nx, int Ny, int Nz, int Np) { -extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef, - double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut, - int memLinks, int Nx, int Ny, int Nz, int Np){ - - int link,iq,ip,nq,np,nqm,npm; - double aq, ap, membranePotential; - //double dq, dp, dist, orientation; - /* Interior Links */ - for (link=0; link Threshold){ - aq = ThresholdMassFractionIn; ap = ThresholdMassFractionOut; - } - - /* Save the mass transfer coefficients */ - //coef[2*link] = aq*orientation; coef[2*link+1] = ap*orientation; - coef[2*link] = aq; coef[2*link+1] = ap; - } + int link, iq, ip, nq, np, nqm, npm; + double aq, ap, membranePotential; + //double dq, dp, dist, orientation; + /* Interior Links */ + for (link = 0; link < memLinks; link++) { + + // inside //outside + aq = MassFractionIn; + ap = MassFractionOut; + iq = membrane[2 * link]; + ip = membrane[2 * link + 1]; + nq = iq % Np; + np = ip % Np; + nqm = Map[nq]; + npm = Map[np]; // strided layout + //dq = Distance[nqm]; dp = Distance[npm]; + /* orientation for link to distance gradient*/ + //orientation = 1.0/fabs(dq - dp); + + /* membrane potential for this link */ + membranePotential = Psi[nqm] - Psi[npm]; + if (membranePotential > Threshold) { + aq = ThresholdMassFractionIn; + ap = ThresholdMassFractionOut; + } + + /* Save the mass transfer coefficients */ + //coef[2*link] = aq*orientation; coef[2*link+1] = ap*orientation; + coef[2 * link] = aq; + coef[2 * link + 1] = ap; + } } extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo( - const int Cqx, const int Cqy, int const Cqz, - int *Map, double *Distance, double *Psi, double Threshold, - double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut, - int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count, - const int N, const int Nx, const int Ny, const int Nz) { - //.................................................................................... - // Unack distribution from the recv buffer - // Distribution q matche Cqx, Cqy, Cqz - // swap rule means that the distributions in recvbuf are OPPOSITE of q - // dist may be even or odd distributions stored by stream layout - //.................................................................................... - int n, idx, label, nqm, npm, i, j, k; - double distanceLocal;//, distanceNonlocal; - double psiLocal, psiNonlocal, membranePotential; - double ap,aq; // coefficient + const int Cqx, const int Cqy, int const Cqz, int *Map, double *Distance, + double *Psi, double Threshold, double MassFractionIn, + double MassFractionOut, double ThresholdMassFractionIn, + double ThresholdMassFractionOut, int *d3q7_recvlist, int *d3q7_linkList, + double *coef, int start, int nlinks, int count, const int N, const int Nx, + const int Ny, const int Nz) { + //.................................................................................... + // Unack distribution from the recv buffer + // Distribution q matche Cqx, Cqy, Cqz + // swap rule means that the distributions in recvbuf are OPPOSITE of q + // dist may be even or odd distributions stored by stream layout + //.................................................................................... + int n, idx, label, nqm, npm, i, j, k; + double distanceLocal; //, distanceNonlocal; + double psiLocal, psiNonlocal, membranePotential; + double ap, aq; // coefficient - - for (idx = 0; idx < count; idx++) { - n = d3q7_recvlist[idx]; - label = d3q7_linkList[idx]; - ap = 1.0; // regular streaming rule - aq = 1.0; - if (label > 0 && !(n < 0)){ - nqm = Map[n]; - distanceLocal = Distance[nqm]; - psiLocal = Psi[nqm]; + for (idx = 0; idx < count; idx++) { + n = d3q7_recvlist[idx]; + label = d3q7_linkList[idx]; + ap = 1.0; // regular streaming rule + aq = 1.0; + if (label > 0 && !(n < 0)) { + nqm = Map[n]; + distanceLocal = Distance[nqm]; + psiLocal = Psi[nqm]; - // Get the 3-D indices from the send process - k = nqm/(Nx*Ny); j = (nqm-Nx*Ny*k)/Nx; i = nqm-Nx*Ny*k-Nx*j; - // Streaming link the non-local distribution - i -= Cqx; j -= Cqy; k -= Cqz; - npm = k*Nx*Ny + j*Nx + i; - //distanceNonlocal = Distance[npm]; - psiNonlocal = Psi[npm]; + // Get the 3-D indices from the send process + k = nqm / (Nx * Ny); + j = (nqm - Nx * Ny * k) / Nx; + i = nqm - Nx * Ny * k - Nx * j; + // Streaming link the non-local distribution + i -= Cqx; + j -= Cqy; + k -= Cqz; + npm = k * Nx * Ny + j * Nx + i; + //distanceNonlocal = Distance[npm]; + psiNonlocal = Psi[npm]; - membranePotential = psiLocal - psiNonlocal; - aq = MassFractionIn; - ap = MassFractionOut; + membranePotential = psiLocal - psiNonlocal; + aq = MassFractionIn; + ap = MassFractionOut; - /* link is inside membrane */ - if (distanceLocal > 0.0){ - if (membranePotential < Threshold*(-1.0)){ - ap = MassFractionIn; - aq = MassFractionOut; - } - else { - ap = ThresholdMassFractionIn; - aq = ThresholdMassFractionOut; - } - } - else if (membranePotential > Threshold){ - aq = ThresholdMassFractionIn; - ap = ThresholdMassFractionOut; - } - } - coef[2*idx]=aq; - coef[2*idx+1]=ap; - } + /* link is inside membrane */ + if (distanceLocal > 0.0) { + if (membranePotential < Threshold * (-1.0)) { + ap = MassFractionIn; + aq = MassFractionOut; + } else { + ap = ThresholdMassFractionIn; + aq = ThresholdMassFractionOut; + } + } else if (membranePotential > Threshold) { + aq = ThresholdMassFractionIn; + ap = ThresholdMassFractionOut; + } + } + coef[2 * idx] = aq; + coef[2 * idx + 1] = ap; + } } - -extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q, - int *d3q7_recvlist, double *recvbuf, int count, - double *dist, int N, double *coef) { +extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q, int *d3q7_recvlist, + double *recvbuf, int count, + double *dist, int N, double *coef) { //.................................................................................... // Unack distribution from the recv buffer // Distribution q matche Cqx, Cqy, Cqz @@ -340,40 +368,50 @@ extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q, // dist may be even or odd distributions stored by stream layout //.................................................................................... int n, idx; - double fq,fp,fqq,ap,aq; // coefficient + double fq, fp, fqq, ap, aq; // coefficient /* First unpack the regular links */ for (idx = 0; idx < count; idx++) { - n = d3q7_recvlist[idx]; + n = d3q7_recvlist[idx]; // update link based on mass transfer coefficients - if (!(n < 0)){ - aq = coef[2*idx]; - ap = coef[2*idx+1]; - fq = dist[q * N + n]; - fp = recvbuf[idx]; - fqq = (1-aq)*fq+ap*fp; + if (!(n < 0)) { + aq = coef[2 * idx]; + ap = coef[2 * idx + 1]; + fq = dist[q * N + n]; + fp = recvbuf[idx]; + fqq = (1 - aq) * fq + ap * fp; dist[q * N + n] = fqq; } //printf(" LINK: site=%i, index=%i \n", n, idx); - } + } } -extern "C" void ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef, - double *dist, double *Den, int memLinks, int Np){ - - int link,iq,ip,nq,np; - double aq, ap, fq, fp, fqq, fpp, Cq, Cp; - for (link=0; link. +*/ extern "C" void INITIALIZE(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz) { int n, N; diff --git a/cpu/MembraneHelper.cpp b/cpu/MembraneHelper.cpp index 960e2f20..d56ea8d3 100644 --- a/cpu/MembraneHelper.cpp +++ b/cpu/MembraneHelper.cpp @@ -1,6 +1,23 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA -extern "C" void Membrane_D3Q19_Unpack(int q, int *list, int *links, int start, int linkCount, - double *recvbuf, double *dist, int N) { + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +extern "C" void Membrane_D3Q19_Unpack(int q, int *list, int *links, int start, + int linkCount, double *recvbuf, + double *dist, int N) { //.................................................................................... // Unack distribution from the recv buffer // Distribution q matche Cqx, Cqy, Cqz @@ -8,9 +25,9 @@ extern "C" void Membrane_D3Q19_Unpack(int q, int *list, int *links, int start, i // dist may be even or odd distributions stored by stream layout //.................................................................................... int n, idx, link; - for (link=0; link. +*/ /* Implement Mixed Gradient (Lee et al. JCP 2016)*/ extern "C" void ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, diff --git a/cpu/Poisson.cpp b/cpu/Poisson.cpp index 93571445..2222d1fc 100644 --- a/cpu/Poisson.cpp +++ b/cpu/Poisson.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include #include @@ -97,17 +113,18 @@ extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential( extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, - double tau, double epsilon_LB, bool UseSlippingVelBC, - int start, int finish, int Np) { + double tau, double epsilon_LB, + bool UseSlippingVelBC, int start, + int finish, int Np) { int n; - double psi; //electric potential - double Ex, Ey, Ez; //electric field - double rho_e, rho_p; //local charge density + double psi; //electric potential + double Ex, Ey, Ez; //electric field + double rho_e, rho_p; //local charge density double f0, f1, f2, f3, f4, f5, f6; int nr1, nr2, nr3, nr4, nr5, nr6; double rlx = 1.0 / tau; int idx; - + // Universal constant double kb = 1.38e-23; //Boltzmann constant;unit [J/K] double electron_charge = 1.6e-19; //electron charge;unit [C] @@ -118,13 +135,13 @@ extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, //Load data //When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral - //and thus the net space charge density is zero. - rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB; + //and thus the net space charge density is zero. + rho_e = (UseSlippingVelBC == 1) ? 0.0 : Den_charge[n] / epsilon_LB; idx = Map[n]; psi = Psi[idx]; - + /* Compute H30+ OH- charge density from Poisson Boltzmann statistics */ - rho_p = 1.04e-7 * (exp(psi*Vt) - exp((-1.0)*psi*Vt)); + rho_p = 1.04e-7 * (exp(psi * Vt) - exp((-1.0) * psi * Vt)); rho_e += rho_p; // q=0 @@ -188,16 +205,17 @@ extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, - double epsilon_LB, bool UseSlippingVelBC, - int start, int finish, int Np) { + double epsilon_LB, + bool UseSlippingVelBC, int start, + int finish, int Np) { int n; - double psi; //electric potential - double Ex, Ey, Ez; //electric field - double rho_e, rho_p; //local charge density + double psi; //electric potential + double Ex, Ey, Ez; //electric field + double rho_e, rho_p; //local charge density double f0, f1, f2, f3, f4, f5, f6; double rlx = 1.0 / tau; int idx; - + // Universal constant double kb = 1.38e-23; //Boltzmann constant;unit [J/K] double electron_charge = 1.6e-19; //electron charge;unit [C] @@ -208,13 +226,13 @@ extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, //Load data //When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral - //and thus the net space charge density is zero. - rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB; + //and thus the net space charge density is zero. + rho_e = (UseSlippingVelBC == 1) ? 0.0 : Den_charge[n] / epsilon_LB; idx = Map[n]; psi = Psi[idx]; - + /* Compute H30+ OH- charge density from Poisson Boltzmann statistics */ - rho_p = 1.04e-7 * (exp(psi*Vt) - exp((-1.0)*psi*Vt)); + rho_p = 1.04e-7 * (exp(psi * Vt) - exp((-1.0) * psi * Vt)); rho_e += rho_p; f0 = dist[n]; @@ -466,17 +484,19 @@ extern "C" void ScaLBL_D3Q7_PoissonResidualError( // } //} -extern "C" void ScaLBL_D3Q19_Poisson_getElectricField(double *dist, double *ElectricField, double tau, int Np){ - int n; - double f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15, - f16, f17, f18; - double Ex,Ey,Ez; - double rlx=1.0/tau; +extern "C" void ScaLBL_D3Q19_Poisson_getElectricField(double *dist, + double *ElectricField, + double tau, int Np) { + int n; + double f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15, + f16, f17, f18; + double Ex, Ey, Ez; + double rlx = 1.0 / tau; - for (n=0; n 1e-8) printf("Error in sum (psi = %0.5g, Vt =%0.5g): approx = %0.5g, true value = %0.5g \n", psit, Vt, expsum, truesum); - //if (fabs(expdiff - truediff) > 1e-8) printf("Error in diff: approx = %0.5g, true value = %0.5g \n", expdiff, truediff); - /* 1/ 5040 = 0.0001984126984126984 *(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt) */ /* 1/ 362880 = 2.755731922398589e-06 *(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt) */ /* 1/ 39916800 = 2.505210838544172e-08 *(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt) */ /* compute new psi */ - psi = 2.0*f0*(1.0 - rlx) + rlx*psit; //(1.0 / Wt)*(sum_q + 0.125*rho_i); - //error = 8.0*(sum_q - f0) + rho_i*factor; - error = Cp*factor*expdiff - 8.0*f0 + G; - Error[n] = error; - - - if (error > 1e-3){ - printf(" Newton's method error (site=%i) = %0.5g \n",n,F); - } - - + psi = 2.0 * f0 * (1.0 - rlx) + + rlx * psit; //(1.0 / Wt)*(sum_q + 0.125*rho_i); + idx = Map[n]; Psi[idx] = psi; - - // q = 0 - dist[n] = W0*psi;// - // q = 1 - dist[1 * Np + n] = W1*psi;//f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e; + // q = 0 + dist[n] = W0 * psi; //f0 * (1.0 - rlx) - (1.0-0.5*rlx)*W0*rho_e; - // q = 2 - dist[2 * Np + n] = W1*psi;//f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e; + // q = 1 + dist[nr2] = + W1 * + psi; //f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e; - // q = 3 - dist[3 * Np + n] = W1*psi;//f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e; + // q = 2 + dist[nr1] = + W1 * + psi; //f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e; - // q = 4 - dist[4 * Np + n] = W1*psi;//f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e; + // q = 3 + dist[nr4] = + W1 * + psi; //f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e; - // q = 5 - dist[5 * Np + n] = W1*psi;//f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e; + // q = 4 + dist[nr3] = + W1 * + psi; //f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e; - // q = 6 - dist[6 * Np + n] = W1*psi;//f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e; + // q = 5 + dist[nr6] = + W1 * + psi; //f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e; - dist[7 * Np + n] = W2*psi;//f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; - dist[8 * Np + n] = W2*psi;//f8* (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; - dist[9 * Np + n] = W2*psi;//f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; - dist[10 * Np + n] = W2*psi;//f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; - dist[11 * Np + n] = W2*psi;//f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; - dist[12 * Np + n] = W2*psi;//f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; - dist[13 * Np + n] = W2*psi;//f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; - dist[14 * Np + n] = W2*psi;//f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; - dist[15 * Np + n] = W2*psi;//f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; - dist[16 * Np + n] = W2*psi;//f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; - dist[17 * Np + n] = W2*psi;//f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; - dist[18 * Np + n] = W2*psi;//f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + // q = 6 + dist[nr5] = + W1 * + psi; //f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e; + //........................................................................ - //........................................................................ - } + // q = 7 + dist[nr8] = + W2 * + psi; //f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + + // q = 8 + dist[nr7] = + W2 * + psi; //f8 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + + // q = 9 + dist[nr10] = + W2 * + psi; //f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + + // q = 10 + dist[nr9] = + W2 * + psi; //f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + + // q = 11 + dist[nr12] = + W2 * + psi; //f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + + // q = 12 + dist[nr11] = + W2 * + psi; //f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + + // q = 13 + dist[nr14] = + W2 * + psi; //f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + + // q= 14 + dist[nr13] = + W2 * + psi; //f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + + // q = 15 + dist[nr16] = + W2 * + psi; //f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + + // q = 16 + dist[nr15] = + W2 * + psi; //f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + + // q = 17 + dist[nr18] = + W2 * + psi; //f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + + // q = 18 + dist[nr17] = + W2 * + psi; //f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + } } -extern "C" void ScaLBL_D3Q19_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np) { - //double W0 = 0.5; - double W1 = 1.0/24.0; - double W2 = 1.0/48.0; - int n;//nread, nr5; - - double psi = Vin; +extern "C" void ScaLBL_D3Q19_AAeven_Poisson_Grotthus( + int *Map, double *dist, double *Den_charge, double *Psi, + double *ElectricField, double *Error, double tau, double Vt, double Cp, + double epsilon_LB, bool UseSlippingVelBC, int start, int finish, int Np) { + int n; + double psi, psit; //electric potential + double Ex, Ey, Ez; //electric field + double rho_e, rho_i, rho_p; //local charge density + double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15, + f16, f17, f18; + double error, sum_q; + double rlx = 1.0 / tau; + int idx; + double W0 = 0.5; + double W1 = 1.0 / 24.0; + double W2 = 1.0 / 48.0; + + double F, G, Fprime; + double factor = 1.0 / epsilon_LB; + double inVt = 1.0 / Vt; + double expsum, expdiff, term, xv; + + /* exponential series coefficients */ + double a3 = 0.3333333333333333; + double a4 = 0.25; //0.08333333333333333; + double a5 = 0.2; // 0.01666666666666667; + double a6 = 0.1666666666666667; //0.002777777777777778; + double a7 = 0.1428571428571428; //0.0003968253968253968; + double a8 = 0.125; //4.96031746031746e-05; + double a9 = 0.1111111111111111; //5.511463844797179e-06; + double a10 = 0.1; //5.511463844797178e-07; + double a11 = 0.09090909090909091; //5.010421677088344e-08; + double a12 = 0.08333333333333333; //4.17535139757362e-09; + double a13 = 0.07692307692307693; + + for (n = start; n < finish; n++) { + + //Load data + //When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral + //and thus the net space charge density is zero. + rho_i = (UseSlippingVelBC == 1) ? 0.0 : Den_charge[n]; + + f0 = dist[n]; + f1 = dist[2 * Np + n]; + f2 = dist[1 * Np + n]; + f3 = dist[4 * Np + n]; + f4 = dist[3 * Np + n]; + f5 = dist[6 * Np + n]; + f6 = dist[5 * Np + n]; + + f7 = dist[8 * Np + n]; + f8 = dist[7 * Np + n]; + f9 = dist[10 * Np + n]; + f10 = dist[9 * Np + n]; + f11 = dist[12 * Np + n]; + f12 = dist[11 * Np + n]; + f13 = dist[14 * Np + n]; + f14 = dist[13 * Np + n]; + f15 = dist[16 * Np + n]; + f16 = dist[15 * Np + n]; + f17 = dist[18 * Np + n]; + f18 = dist[17 * Np + n]; + + /* Ex = (f1 - f2) * rlx * + 4.0; //NOTE the unit of electric field here is V/lu + Ey = (f3 - f4) * rlx * + 4.0; //factor 4.0 is D3Q7 lattice squared speed of sound + Ez = (f5 - f6) * rlx * 4.0; + */ + Ex = (f1 - f2 + 0.5 * (f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14)) * + 4.0; //NOTE the unit of electric field here is V/lu + Ey = (f3 - f4 + 0.5 * (f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18)) * + 4.0; + Ez = (f5 - f6 + 0.5 * (f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18)) * + 4.0; + ElectricField[n + 0 * Np] = Ex; + ElectricField[n + 1 * Np] = Ey; + ElectricField[n + 2 * Np] = Ez; + + sum_q = f1 + f2 + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + + f13 + f14 + f15 + f16 + f17 + f18; + G = 8.0 * sum_q + rho_i * factor; + + /* Use Poisson-Boltzmann for fast proton transport */ + psit = 4.0 * f0; + // rho_p = Cp * (exp(psi*inVt) - exp(-psi*inVt)); + // rho_e = rho_i + rho_p; + + /* use semi-implicit scheme */ + //Wt = W0 + Cp*inVt*factor*(1.0 + 0.16666666666666667*(psit*inVt)*(psit*inVt) + 0.00833333333333333*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)); + + for (int s = 0; s < 10; s++) { + /* approximate the exponential with Taylor series */ + expsum = 2.0; + xv = (psit * inVt); + expdiff = 2.0 * xv; + term = xv * xv; + expsum += term; + term *= a3 * xv; + expdiff += term; + term *= a4 * xv; + expsum += term; + term *= a5 * xv; + expdiff += term; + term *= a6 * xv; + expsum += term; + term *= a7 * xv; + expdiff += term; + term *= a8 * xv; + expsum += term; + term *= a9 * xv; + expdiff += term; + term *= a10 * xv; + expsum += term; + term *= a11 * xv; + expdiff += term; + term *= a12 * xv; + expsum += term; + term *= a13 * xv; + expdiff += term; + + /* Compare to analytical */ + double truesum = exp(xv) + exp(-1.0 * xv); + double truediff = exp(xv) - exp(-1.0 * xv); + + expdiff = truediff; + expsum = truesum; + + /* iteration */ + F = Cp * factor * expdiff - 8.0 * W0 * psit + G; + Fprime = Cp * factor * inVt * expsum - 8.0 * W0; + + psit -= (F / Fprime); + /* Newton iteration is successful if F=0 */ + } + + //if (fabs(expsum - truesum) > 1e-8) printf("Error in sum (psi = %0.5g, Vt =%0.5g): approx = %0.5g, true value = %0.5g \n", psit, Vt, expsum, truesum); + //if (fabs(expdiff - truediff) > 1e-8) printf("Error in diff: approx = %0.5g, true value = %0.5g \n", expdiff, truediff); + + /* 1/ 5040 = 0.0001984126984126984 *(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt) */ + /* 1/ 362880 = 2.755731922398589e-06 *(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt) */ + /* 1/ 39916800 = 2.505210838544172e-08 *(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt)*(psit*inVt) */ + + /* compute new psi */ + psi = 2.0 * f0 * (1.0 - rlx) + + rlx * psit; //(1.0 / Wt)*(sum_q + 0.125*rho_i); + //error = 8.0*(sum_q - f0) + rho_i*factor; + error = Cp * factor * expdiff - 8.0 * f0 + G; + Error[n] = error; + + if (error > 1e-3) { + printf(" Newton's method error (site=%i) = %0.5g \n", n, F); + } + + idx = Map[n]; + Psi[idx] = psi; + + // q = 0 + dist[n] = W0 * psi; // + + // q = 1 + dist[1 * Np + n] = + W1 * + psi; //f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e; + + // q = 2 + dist[2 * Np + n] = + W1 * + psi; //f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e; + + // q = 3 + dist[3 * Np + n] = + W1 * + psi; //f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e; + + // q = 4 + dist[4 * Np + n] = + W1 * + psi; //f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e; + + // q = 5 + dist[5 * Np + n] = + W1 * + psi; //f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e; + + // q = 6 + dist[6 * Np + n] = + W1 * + psi; //f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e; + + dist[7 * Np + n] = + W2 * + psi; //f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + dist[8 * Np + n] = + W2 * + psi; //f8* (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + dist[9 * Np + n] = + W2 * + psi; //f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + dist[10 * Np + n] = + W2 * + psi; //f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + dist[11 * Np + n] = + W2 * + psi; //f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + dist[12 * Np + n] = + W2 * + psi; //f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + dist[13 * Np + n] = + W2 * + psi; //f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + dist[14 * Np + n] = + W2 * + psi; //f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + dist[15 * Np + n] = + W2 * + psi; //f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + dist[16 * Np + n] = + W2 * + psi; //f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + dist[17 * Np + n] = + W2 * + psi; //f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + dist[18 * Np + n] = + W2 * + psi; //f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e; + + //........................................................................ + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_Poisson_Potential_BC_z(int *list, + double *dist, + double Vin, + int count, int Np) { + //double W0 = 0.5; + double W1 = 1.0 / 24.0; + double W2 = 1.0 / 48.0; + int n; //nread, nr5; + + double psi = Vin; for (int idx = 0; idx < count; idx++) { n = list[idx]; - - dist[6 * Np + n] = W1*psi; - dist[12 * Np + n] = W2*psi; - dist[13 * Np + n] = W2*psi; - dist[16 * Np + n] = W2*psi; - dist[17 * Np + n] = W2*psi; + + dist[6 * Np + n] = W1 * psi; + dist[12 * Np + n] = W2 * psi; + dist[13 * Np + n] = W2 * psi; + dist[16 * Np + n] = W2 * psi; + dist[17 * Np + n] = W2 * psi; } } extern "C" void ScaLBL_D3Q19_AAeven_Poisson_Potential_BC_Z(int *list, - double *dist, - double Vout, - int count, int Np) { - //double W0 = 0.5; - double W1 = 1.0/24.0; - double W2 = 1.0/48.0; - - double psi = Vout; + double *dist, + double Vout, + int count, int Np) { + //double W0 = 0.5; + double W1 = 1.0 / 24.0; + double W2 = 1.0 / 48.0; + + double psi = Vout; + + for (int idx = 0; idx < count; idx++) { - for (int idx = 0; idx < count; idx++) { - int n = list[idx]; - - dist[5 * Np + n] = W1*psi; - dist[11 * Np + n] = W2*psi; - dist[14 * Np + n] = W2*psi; - dist[15 * Np + n] = W2*psi; - dist[18 * Np + n] = W2*psi; - } + + dist[5 * Np + n] = W1 * psi; + dist[11 * Np + n] = W2 * psi; + dist[14 * Np + n] = W2 * psi; + dist[15 * Np + n] = W2 * psi; + dist[18 * Np + n] = W2 * psi; + } } extern "C" void ScaLBL_D3Q19_AAodd_Poisson_Potential_BC_z(int *d_neighborList, - int *list, - double *dist, - double Vin, int count, - int Np) { - double W1 = 1.0/24.0; - double W2 = 1.0/48.0; + int *list, + double *dist, + double Vin, int count, + int Np) { + double W1 = 1.0 / 24.0; + double W2 = 1.0 / 48.0; int nr5, nr11, nr14, nr15, nr18; double psi = Vin; - - for (int idx = 0; idx < count; idx++) { - int n = list[idx]; - nr5 = d_neighborList[n + 4 * Np]; - nr11 = d_neighborList[n + 10 * Np]; - nr14 = d_neighborList[n + 13 * Np]; - nr15 = d_neighborList[n + 14 * Np]; - nr18 = d_neighborList[n + 17 * Np]; - dist[nr5] = W1*psi; - dist[nr11] = W2*psi; - dist[nr14] = W2*psi; - dist[nr15] = W2*psi; - dist[nr18] = W2*psi; + for (int idx = 0; idx < count; idx++) { + int n = list[idx]; + nr5 = d_neighborList[n + 4 * Np]; + nr11 = d_neighborList[n + 10 * Np]; + nr14 = d_neighborList[n + 13 * Np]; + nr15 = d_neighborList[n + 14 * Np]; + nr18 = d_neighborList[n + 17 * Np]; + + dist[nr5] = W1 * psi; + dist[nr11] = W2 * psi; + dist[nr14] = W2 * psi; + dist[nr15] = W2 * psi; + dist[nr18] = W2 * psi; } } -extern "C" void ScaLBL_D3Q19_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np) { - - double W1 = 1.0/24.0; - double W2 = 1.0/48.0; +extern "C" void ScaLBL_D3Q19_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, + int *list, + double *dist, + double Vout, + int count, int Np) { + + double W1 = 1.0 / 24.0; + double W2 = 1.0 / 48.0; int nr6, nr12, nr13, nr16, nr17; double psi = Vout; - - for (int idx = 0; idx < count; idx++) { - int n = list[idx]; - nr6 = d_neighborList[n + 5 * Np]; - nr12 = d_neighborList[n + 11 * Np]; - nr13 = d_neighborList[n + 12 * Np]; - nr16 = d_neighborList[n + 15 * Np]; - nr17 = d_neighborList[n + 16 * Np]; - - dist[nr6] = W1*psi; - dist[nr12] = W2*psi; - dist[nr13] = W2*psi; - dist[nr16] = W2*psi; - dist[nr17] = W2*psi; - } + for (int idx = 0; idx < count; idx++) { + int n = list[idx]; + + nr6 = d_neighborList[n + 5 * Np]; + nr12 = d_neighborList[n + 11 * Np]; + nr13 = d_neighborList[n + 12 * Np]; + nr16 = d_neighborList[n + 15 * Np]; + nr17 = d_neighborList[n + 16 * Np]; + + dist[nr6] = W1 * psi; + dist[nr12] = W2 * psi; + dist[nr13] = W2 * psi; + dist[nr16] = W2 * psi; + dist[nr17] = W2 * psi; + } } extern "C" void ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi, - int start, int finish, int Np) { - int n; - int ijk; - double W0 = 0.5; - double W1 = 1.0/24.0; - double W2 = 1.0/48.0; - for (n = start; n < finish; n++) { - ijk = Map[n]; - dist[0 * Np + n] = W0 * Psi[ijk];//3333333333333333* Psi[ijk]; - dist[1 * Np + n] = W1 * Psi[ijk]; - dist[2 * Np + n] = W1 * Psi[ijk]; - dist[3 * Np + n] = W1 * Psi[ijk]; - dist[4 * Np + n] = W1 * Psi[ijk]; - dist[5 * Np + n] = W1 * Psi[ijk]; - dist[6 * Np + n] = W1 * Psi[ijk]; - dist[7 * Np + n] = W2* Psi[ijk]; - dist[8 * Np + n] = W2* Psi[ijk]; - dist[9 * Np + n] = W2* Psi[ijk]; - dist[10 * Np + n] = W2* Psi[ijk]; - dist[11 * Np + n] = W2* Psi[ijk]; - dist[12 * Np + n] = W2* Psi[ijk]; - dist[13 * Np + n] = W2* Psi[ijk]; - dist[14 * Np + n] = W2* Psi[ijk]; - dist[15 * Np + n] = W2* Psi[ijk]; - dist[16 * Np + n] = W2* Psi[ijk]; - dist[17 * Np + n] = W2* Psi[ijk]; - dist[18 * Np + n] = W2* Psi[ijk]; - } + int start, int finish, int Np) { + int n; + int ijk; + double W0 = 0.5; + double W1 = 1.0 / 24.0; + double W2 = 1.0 / 48.0; + for (n = start; n < finish; n++) { + ijk = Map[n]; + dist[0 * Np + n] = W0 * Psi[ijk]; //3333333333333333* Psi[ijk]; + dist[1 * Np + n] = W1 * Psi[ijk]; + dist[2 * Np + n] = W1 * Psi[ijk]; + dist[3 * Np + n] = W1 * Psi[ijk]; + dist[4 * Np + n] = W1 * Psi[ijk]; + dist[5 * Np + n] = W1 * Psi[ijk]; + dist[6 * Np + n] = W1 * Psi[ijk]; + dist[7 * Np + n] = W2 * Psi[ijk]; + dist[8 * Np + n] = W2 * Psi[ijk]; + dist[9 * Np + n] = W2 * Psi[ijk]; + dist[10 * Np + n] = W2 * Psi[ijk]; + dist[11 * Np + n] = W2 * Psi[ijk]; + dist[12 * Np + n] = W2 * Psi[ijk]; + dist[13 * Np + n] = W2 * Psi[ijk]; + dist[14 * Np + n] = W2 * Psi[ijk]; + dist[15 * Np + n] = W2 * Psi[ijk]; + dist[16 * Np + n] = W2 * Psi[ijk]; + dist[17 * Np + n] = W2 * Psi[ijk]; + dist[18 * Np + n] = W2 * Psi[ijk]; + } } diff --git a/cpu/Stokes.cpp b/cpu/Stokes.cpp index ce1ff7f8..ef9ec27e 100644 --- a/cpu/Stokes.cpp +++ b/cpu/Stokes.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT( @@ -38,12 +54,20 @@ extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT( Ey = ElectricField[n + 1 * Np]; Ez = ElectricField[n + 2 * Np]; //compute total body force, including input body force (Gx,Gy,Gz) - Fx = (UseSlippingVelBC==1) ? Gx : Gx + rhoE * Ex * (time_conv * time_conv) / (h * h * 1.0e-12) / - den_scale; //the extra factors at the end necessarily convert unit from phys to LB - Fy = (UseSlippingVelBC==1) ? Gy : Gy + rhoE * Ey * (time_conv * time_conv) / (h * h * 1.0e-12) / - den_scale; - Fz = (UseSlippingVelBC==1) ? Gz : Gz + rhoE * Ez * (time_conv * time_conv) / (h * h * 1.0e-12) / - den_scale; + Fx = + (UseSlippingVelBC == 1) + ? Gx + : Gx + + rhoE * Ex * (time_conv * time_conv) / (h * h * 1.0e-12) / + den_scale; //the extra factors at the end necessarily convert unit from phys to LB + Fy = (UseSlippingVelBC == 1) + ? Gy + : Gy + rhoE * Ey * (time_conv * time_conv) / + (h * h * 1.0e-12) / den_scale; + Fz = (UseSlippingVelBC == 1) + ? Gz + : Gz + rhoE * Ez * (time_conv * time_conv) / + (h * h * 1.0e-12) / den_scale; // q=0 fq = dist[n]; @@ -520,13 +544,21 @@ extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT( //Fz = Gz + rhoE * Ez * (time_conv * time_conv) / (h * h * 1.0e-12) / // den_scale; //When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral - //and body force induced by external efectric field is reduced to slipping velocity BC. - Fx = (UseSlippingVelBC==1) ? Gx : Gx + rhoE * Ex * (time_conv * time_conv) / (h * h * 1.0e-12) / - den_scale; //the extra factors at the end necessarily convert unit from phys to LB - Fy = (UseSlippingVelBC==1) ? Gy : Gy + rhoE * Ey * (time_conv * time_conv) / (h * h * 1.0e-12) / - den_scale; - Fz = (UseSlippingVelBC==1) ? Gz : Gz + rhoE * Ez * (time_conv * time_conv) / (h * h * 1.0e-12) / - den_scale; + //and body force induced by external efectric field is reduced to slipping velocity BC. + Fx = + (UseSlippingVelBC == 1) + ? Gx + : Gx + + rhoE * Ex * (time_conv * time_conv) / (h * h * 1.0e-12) / + den_scale; //the extra factors at the end necessarily convert unit from phys to LB + Fy = (UseSlippingVelBC == 1) + ? Gy + : Gy + rhoE * Ey * (time_conv * time_conv) / + (h * h * 1.0e-12) / den_scale; + Fz = (UseSlippingVelBC == 1) + ? Gz + : Gz + rhoE * Ez * (time_conv * time_conv) / + (h * h * 1.0e-12) / den_scale; // q=0 fq = dist[n]; diff --git a/cpu/dfh.cpp b/cpu/dfh.cpp index 9fef0075..6f14a2fe 100644 --- a/cpu/dfh.cpp +++ b/cpu/dfh.cpp @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include #include diff --git a/cpu/thermal.cpp b/cpu/thermal.cpp index 29e7d2e3..11820736 100644 --- a/cpu/thermal.cpp +++ b/cpu/thermal.cpp @@ -1,2 +1,18 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ // cpu implementation for thermal lattice boltzmann methods // copyright James McClure, 2014 diff --git a/cuda/BGK.cu b/cuda/BGK.cu index 07022ecb..55bc82d0 100644 --- a/cuda/BGK.cu +++ b/cuda/BGK.cu @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include #define NBLOCKS 1024 diff --git a/cuda/Color.cu b/cuda/Color.cu index aeeb3998..c25e0f08 100644 --- a/cuda/Color.cu +++ b/cuda/Color.cu @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include #include #include diff --git a/cuda/CudaExtras.cu b/cuda/CudaExtras.cu index 86ec713d..3351131c 100644 --- a/cuda/CudaExtras.cu +++ b/cuda/CudaExtras.cu @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ // Basic cuda functions callable from C/C++ code #include diff --git a/cuda/D3Q19.cu b/cuda/D3Q19.cu index afd870bf..f895aff5 100644 --- a/cuda/D3Q19.cu +++ b/cuda/D3Q19.cu @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include #include diff --git a/cuda/D3Q7.cu b/cuda/D3Q7.cu index 8a551f78..5e99b681 100644 --- a/cuda/D3Q7.cu +++ b/cuda/D3Q7.cu @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ // GPU Functions for D3Q7 Lattice Boltzmann Methods #include diff --git a/cuda/D3Q7BC.cu b/cuda/D3Q7BC.cu index d60b4bfb..776cfde3 100644 --- a/cuda/D3Q7BC.cu +++ b/cuda/D3Q7BC.cu @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include #include #include diff --git a/cuda/Extras.cu b/cuda/Extras.cu index 8aeedc87..a11a7d82 100644 --- a/cuda/Extras.cu +++ b/cuda/Extras.cu @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ // Basic cuda functions callable from C/C++ code #include #include diff --git a/cuda/FreeLee.cu b/cuda/FreeLee.cu index 57fe172f..f2938657 100644 --- a/cuda/FreeLee.cu +++ b/cuda/FreeLee.cu @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include #include #include diff --git a/cuda/Greyscale.cu b/cuda/Greyscale.cu index edf80a16..eecdd6bb 100644 --- a/cuda/Greyscale.cu +++ b/cuda/Greyscale.cu @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include #define NBLOCKS 1024 diff --git a/cuda/GreyscaleColor.cu b/cuda/GreyscaleColor.cu index d115acb7..ceaf7516 100644 --- a/cuda/GreyscaleColor.cu +++ b/cuda/GreyscaleColor.cu @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include #include diff --git a/cuda/Ion.cu b/cuda/Ion.cu index c22a1a2a..bc0eaf9b 100644 --- a/cuda/Ion.cu +++ b/cuda/Ion.cu @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include #include //#include diff --git a/cuda/MRT.cu b/cuda/MRT.cu index d2508cea..c726fbec 100644 --- a/cuda/MRT.cu +++ b/cuda/MRT.cu @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ //************************************************************************* // CUDA kernels for single-phase ScaLBL_D3Q19_MRT code // James McClure diff --git a/cuda/MixedGradient.cu b/cuda/MixedGradient.cu index 556e34ef..3ca87122 100644 --- a/cuda/MixedGradient.cu +++ b/cuda/MixedGradient.cu @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ /* Implement Mixed Gradient (Lee et al. JCP 2016)*/ #include #include diff --git a/cuda/Poisson.cu b/cuda/Poisson.cu index 00c9e3da..37f2fe39 100644 --- a/cuda/Poisson.cu +++ b/cuda/Poisson.cu @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include #include //#include diff --git a/cuda/Stokes.cu b/cuda/Stokes.cu index e6a2055a..8bff2862 100644 --- a/cuda/Stokes.cu +++ b/cuda/Stokes.cu @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include #include //#include diff --git a/cuda/dfh.cu b/cuda/dfh.cu index dc0ab0af..24eb824f 100644 --- a/cuda/dfh.cu +++ b/cuda/dfh.cu @@ -1,3 +1,19 @@ +/* + Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University + Copyright Equnior ASA + + This file is part of the Open Porous Media project (OPM). + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ #include #include #include diff --git a/cuda/exe/lb1_MRT_mpi.cpp b/cuda/exe/lb1_MRT_mpi.cpp index 7ef41e90..7a8dd8ee 100644 --- a/cuda/exe/lb1_MRT_mpi.cpp +++ b/cuda/exe/lb1_MRT_mpi.cpp @@ -16,1099 +16,1621 @@ using namespace std; //************************************************************************* //************************************************************************* //************************************************************************* -extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size); +extern "C" void dvc_AllocateDeviceMemory(void **address, size_t size); //************************************************************************* -extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size); +extern "C" void dvc_CopyToDevice(void *dest, void *source, size_t size); //************************************************************************* extern "C" void dvc_Barrier(); //************************************************************************* extern "C" void dvc_InitD3Q19(int nblocks, int nthreads, int S, char *ID, - double *f_even, double *f_odd, int Nx, int Ny, int Nz); + double *f_even, double *f_odd, int Nx, int Ny, + int Nz); //************************************************************************* -extern "C" void dvc_SwapD3Q19( int nblocks, int nthreads, int S, - char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz); +extern "C" void dvc_SwapD3Q19(int nblocks, int nthreads, int S, char *ID, + double *f_even, double *f_odd, int Nx, int Ny, + int Nz); //************************************************************************* -extern "C" void dvc_MRT( int nblocks, int nthreads, int S, - char *ID, double *f_even, double *f_odd, double rlxA, double rlxB, - double Fx, double Fy, double Fz, int Nx, int Ny, int Nz); +extern "C" void dvc_MRT(int nblocks, int nthreads, int S, char *ID, + double *f_even, double *f_odd, double rlxA, double rlxB, + double Fx, double Fy, double Fz, int Nx, int Ny, + int Nz); //************************************************************************* -extern "C" void dvc_PackDist(int grid, int threads, int q, int *SendList, int start, - int sendCount, double *sendbuf, double *Dist, int N); +extern "C" void dvc_PackDist(int grid, int threads, int q, int *SendList, + int start, int sendCount, double *sendbuf, + double *Dist, int N); //************************************************************************* -extern "C" void dvc_UnpackDist(int grid, int threads, int q, int Cqx, int Cqy, int Cqz, int *RecvList, int start, - int recvCount, double *recvbuf, double *Dist, int Nx, int Ny, int Nz); +extern "C" void dvc_UnpackDist(int grid, int threads, int q, int Cqx, int Cqy, + int Cqz, int *RecvList, int start, int recvCount, + double *recvbuf, double *Dist, int Nx, int Ny, + int Nz); //************************************************************************* -inline void PackID(int *list, int count, char *sendbuf, char *ID){ - // Fill in the phase ID values from neighboring processors - // This packs up the values that need to be sent from one processor to another - int idx,n; +inline void PackID(int *list, int count, char *sendbuf, char *ID) { + // Fill in the phase ID values from neighboring processors + // This packs up the values that need to be sent from one processor to another + int idx, n; - for (idx=0; idx> FILENAME; // name of the input file - input >> Nz; // number of nodes (x,y,z) - input >> nBlocks; - input >> nthreads; - input >> tau; // relaxation time - input >> Fx; // External force components (x,y,z) - input >> Fy; - input >> Fz; - input >> timestepMax; // max no. of timesteps - input >> interval; // error interval - input >> tol; // error tolerance + if (rank == 0) { + ifstream input("MRT.in"); + input >> FILENAME; // name of the input file + input >> Nz; // number of nodes (x,y,z) + input >> nBlocks; + input >> nthreads; + input >> tau; // relaxation time + input >> Fx; // External force components (x,y,z) + input >> Fy; + input >> Fz; + input >> timestepMax; // max no. of timesteps + input >> interval; // error interval + input >> tol; // error tolerance - ifstream domain("Domain.in"); - domain >> nprocx; - domain >> nprocy; - domain >> nprocz; - } + ifstream domain("Domain.in"); + domain >> nprocx; + domain >> nprocy; + domain >> nprocz; + } - // ************************************************************** - // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); - //................................................. - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nBlocks,1,MPI_INT,0,comm); - MPI_Bcast(&nthreads,1,MPI_INT,0,comm); - MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); - MPI_Bcast(×tepMax,1,MPI_INT,0,comm); - MPI_Bcast(&interval,1,MPI_INT,0,comm); - MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); + // ************************************************************** + // Broadcast simulation parameters from rank 0 to all other procs + MPI_Barrier(comm); + //................................................. + MPI_Bcast(&Nz, 1, MPI_INT, 0, comm); + MPI_Bcast(&nBlocks, 1, MPI_INT, 0, comm); + MPI_Bcast(&nthreads, 1, MPI_INT, 0, comm); + MPI_Bcast(&tau, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&Fx, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&Fy, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&Fz, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(×tepMax, 1, MPI_INT, 0, comm); + MPI_Bcast(&interval, 1, MPI_INT, 0, comm); + MPI_Bcast(&tol, 1, MPI_DOUBLE, 0, comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - //................................................. - MPI_Barrier(comm); - // ************************************************************** + MPI_Bcast(&nprocx, 1, MPI_INT, 0, comm); + MPI_Bcast(&nprocy, 1, MPI_INT, 0, comm); + MPI_Bcast(&nprocz, 1, MPI_INT, 0, comm); + //................................................. + MPI_Barrier(comm); + // ************************************************************** - double rlx_setA = 1.f/tau; - double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA); + double rlx_setA = 1.f / tau; + double rlx_setB = 8.f * (2.f - rlx_setA) / (8.f - rlx_setA); - if (nprocs != nprocx*nprocy*nprocz){ - printf("Fatal error in processor number! \n"); - printf("nprocx = %i \n",nprocx); - printf("nprocy = %i \n",nprocy); - printf("nprocz = %i \n",nprocz); - } + if (nprocs != nprocx * nprocy * nprocz) { + printf("Fatal error in processor number! \n"); + printf("nprocx = %i \n", nprocx); + printf("nprocy = %i \n", nprocy); + printf("nprocz = %i \n", nprocz); + } - if (rank==0){ - printf("tau = %f \n", tau); - printf("Set A = %f \n", rlx_setA); - printf("Set B = %f \n", rlx_setB); - printf("Force(x) = %f \n", Fx); - printf("Force(y) = %f \n", Fy); - printf("Force(z) = %f \n", Fz); - printf("Sub-domain size = %i x %i x %i\n",Nz,Nz,Nz); - } + if (rank == 0) { + printf("tau = %f \n", tau); + printf("Set A = %f \n", rlx_setA); + printf("Set B = %f \n", rlx_setB); + printf("Force(x) = %f \n", Fx); + printf("Force(y) = %f \n", Fy); + printf("Force(z) = %f \n", Fz); + printf("Sub-domain size = %i x %i x %i\n", Nz, Nz, Nz); + } - MPI_Barrier(comm); - kproc = rank/(nprocx*nprocy); - jproc = (rank-nprocx*nprocy*kproc)/nprocx; - iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; + MPI_Barrier(comm); + kproc = rank / (nprocx * nprocy); + jproc = (rank - nprocx * nprocy * kproc) / nprocx; + iproc = rank - nprocx * nprocy * kproc - nprocz * jproc; - //.......................................... - // set up the neighbor ranks - //.......................................... - i=iproc; j=jproc; k =kproc; - i+=1; - j+=0; - k+=0; - if (i<0) i+=nprocx; - if (j<0) j+=nprocy; - if (k<0) k+=nprocz; - if (!(i 0) sum++; - } - } - } - PM.close(); -// printf("File porosity = %f\n", double(sum)/N); - //........................................................................... - MPI_Barrier(comm); - if (rank == 0) cout << "Domain set." << endl; - //........................................................................... - // Write the communcation structure into a file for debugging - char LocalCommFile[40]; - sprintf(LocalCommFile,"%s%s","Comm.",LocalRankString); - FILE *CommFile; - CommFile = fopen(LocalCommFile,"w"); - fprintf(CommFile,"rank=%d, ",rank); - fprintf(CommFile,"i=%d,j=%d,k=%d :",iproc,jproc,kproc); - fprintf(CommFile,"x=%d, ",rank_x); - fprintf(CommFile,"X=%d, ",rank_X); - fprintf(CommFile,"y=%d, ",rank_y); - fprintf(CommFile,"Y=%d, ",rank_Y); - fprintf(CommFile,"z=%d, ",rank_z); - fprintf(CommFile,"Z=%d, ",rank_Z); - fprintf(CommFile,"xy=%d, ",rank_xy); - fprintf(CommFile,"XY=%d, ",rank_XY); - fprintf(CommFile,"xY=%d, ",rank_xY); - fprintf(CommFile,"Xy=%d, ",rank_Xy); - fprintf(CommFile,"xz=%d, ",rank_xz); - fprintf(CommFile,"XZ=%d, ",rank_XZ); - fprintf(CommFile,"xZ=%d, ",rank_xZ); - fprintf(CommFile,"Xz=%d, ",rank_Xz); - fprintf(CommFile,"yz=%d, ",rank_yz); - fprintf(CommFile,"YZ=%d, ",rank_YZ); - fprintf(CommFile,"yZ=%d, ",rank_yZ); - fprintf(CommFile,"Yz=%d, ",rank_Yz); - fprintf(CommFile,"\n"); - fclose(CommFile); - //........................................................................... + //....................................................................... + if (rank == 0) + printf("Read input media... \n"); + //....................................................................... + char LocalRankString[8]; + char LocalRankFilename[40]; + sprintf(LocalRankString, "%05d", rank); + sprintf(LocalRankFilename, "%s%s", "ID.", LocalRankString); + printf("Local File Name = %s \n", LocalRankFilename); + // .......... READ THE INPUT FILE ....................................... + char value; + char *id; + id = new char[N]; + int sum = 0; + // double porosity; + //....................................................................... + ifstream PM(LocalRankFilename, ios::binary); + for (k = 0; k < Nz; k++) { + for (j = 0; j < Ny; j++) { + for (i = 0; i < Nx; i++) { + n = k * Nx * Ny + j * Nx + i; + id[n] = 0; + } + } + } + for (k = 1; k < Nz - 1; k++) { + for (j = 1; j < Ny - 1; j++) { + for (i = 1; i < Nx - 1; i++) { + PM.read((char *)(&value), sizeof(value)); + n = k * Nx * Ny + j * Nx + i; + id[n] = value; + if (value > 0) + sum++; + } + } + } + PM.close(); + // printf("File porosity = %f\n", double(sum)/N); + //........................................................................... + MPI_Barrier(comm); + if (rank == 0) + cout << "Domain set." << endl; + //........................................................................... + // Write the communcation structure into a file for debugging + char LocalCommFile[40]; + sprintf(LocalCommFile, "%s%s", "Comm.", LocalRankString); + FILE *CommFile; + CommFile = fopen(LocalCommFile, "w"); + fprintf(CommFile, "rank=%d, ", rank); + fprintf(CommFile, "i=%d,j=%d,k=%d :", iproc, jproc, kproc); + fprintf(CommFile, "x=%d, ", rank_x); + fprintf(CommFile, "X=%d, ", rank_X); + fprintf(CommFile, "y=%d, ", rank_y); + fprintf(CommFile, "Y=%d, ", rank_Y); + fprintf(CommFile, "z=%d, ", rank_z); + fprintf(CommFile, "Z=%d, ", rank_Z); + fprintf(CommFile, "xy=%d, ", rank_xy); + fprintf(CommFile, "XY=%d, ", rank_XY); + fprintf(CommFile, "xY=%d, ", rank_xY); + fprintf(CommFile, "Xy=%d, ", rank_Xy); + fprintf(CommFile, "xz=%d, ", rank_xz); + fprintf(CommFile, "XZ=%d, ", rank_XZ); + fprintf(CommFile, "xZ=%d, ", rank_xZ); + fprintf(CommFile, "Xz=%d, ", rank_Xz); + fprintf(CommFile, "yz=%d, ", rank_yz); + fprintf(CommFile, "YZ=%d, ", rank_YZ); + fprintf(CommFile, "yZ=%d, ", rank_yZ); + fprintf(CommFile, "Yz=%d, ", rank_Yz); + fprintf(CommFile, "\n"); + fclose(CommFile); + //........................................................................... - // Set up MPI communication structures - if (rank==0) printf ("Setting up communication control structures \n"); - //...................................................................................... - // Get the actual D3Q19 communication counts (based on location of solid phase) - // Discrete velocity set symmetry implies the sendcount = recvcount - int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z; - int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ; - int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ; - sendCount_x = sendCount_y = sendCount_z = sendCount_X = sendCount_Y = sendCount_Z = 0; - sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = sendCount_xZ = 0; - sendCount_xY = sendCount_yZ = sendCount_Xz = sendCount_XY = sendCount_YZ = sendCount_XZ = 0; - //...................................................................................... - for (k=0; k> FILENAME; - // Line 2: domain size (Nx, Ny, Nz) - input >> Nz; // number of nodes (x,y,z) - input >> nBlocks; - input >> nthreads; - // Line 3: model parameters (tau, alpha, beta, das, dbs) - input >> tau; - input >> alpha; - input >> beta; - input >> das; - input >> dbs; - // Line 4: External force components (Fx,Fy, Fz) - input >> Fx; - input >> Fy; - input >> Fz; - // Line 5: Pressure Boundary conditions - input >> pBC; - input >> din; - input >> dout; - // Line 6: time-stepping criteria - input >> timestepMax; // max no. of timesteps - input >> interval; // error interval - input >> tol; // error tolerance - //............................................................. + if (rank == 0) { + //............................................................. + // READ SIMULATION PARMAETERS FROM INPUT FILE + //............................................................. + ifstream input("Color.in"); + // Line 1: Name of the phase indicator file (s=0,w=1,n=2) + input >> FILENAME; + // Line 2: domain size (Nx, Ny, Nz) + input >> Nz; // number of nodes (x,y,z) + input >> nBlocks; + input >> nthreads; + // Line 3: model parameters (tau, alpha, beta, das, dbs) + input >> tau; + input >> alpha; + input >> beta; + input >> das; + input >> dbs; + // Line 4: External force components (Fx,Fy, Fz) + input >> Fx; + input >> Fy; + input >> Fz; + // Line 5: Pressure Boundary conditions + input >> pBC; + input >> din; + input >> dout; + // Line 6: time-stepping criteria + input >> timestepMax; // max no. of timesteps + input >> interval; // error interval + input >> tol; // error tolerance + //............................................................. - ifstream domain("Domain.in"); - domain >> nprocx; - domain >> nprocy; - domain >> nprocz; - } + ifstream domain("Domain.in"); + domain >> nprocx; + domain >> nprocy; + domain >> nprocz; + } - double rlxA = 1.f/tau; - double rlxB = 8.f*(2.f-rlxA)/(8.f-rlxA); + double rlxA = 1.f / tau; + double rlxB = 8.f * (2.f - rlxA) / (8.f - rlxA); - if (nprocs != nprocx*nprocy*nprocz){ - printf("Fatal error in processor number! \n"); - printf("nprocx = %i \n",nprocx); - printf("nprocy = %i \n",nprocy); - printf("nprocz = %i \n",nprocz); - } + if (nprocs != nprocx * nprocy * nprocz) { + printf("Fatal error in processor number! \n"); + printf("nprocx = %i \n", nprocx); + printf("nprocy = %i \n", nprocy); + printf("nprocz = %i \n", nprocz); + } - if (rank==0){ - printf("********************************************************\n"); - printf("tau = %f \n", tau); - printf("alpha = %f \n", alpha); - printf("beta = %f \n", beta); - printf("das = %f \n", beta); - printf("dbs = %f \n", beta); - printf("Force(x) = %f \n", Fx); - printf("Force(y) = %f \n", Fy); - printf("Force(z) = %f \n", Fz); - printf("Sub-domain size = %i x %i x %i\n",Nz,Nz,Nz); - printf("Parallel domain size = %i x %i x %i\n",nprocx,nprocy,nprocz); - printf("********************************************************\n"); + if (rank == 0) { + printf("********************************************************\n"); + printf("tau = %f \n", tau); + printf("alpha = %f \n", alpha); + printf("beta = %f \n", beta); + printf("das = %f \n", beta); + printf("dbs = %f \n", beta); + printf("Force(x) = %f \n", Fx); + printf("Force(y) = %f \n", Fy); + printf("Force(z) = %f \n", Fz); + printf("Sub-domain size = %i x %i x %i\n", Nz, Nz, Nz); + printf("Parallel domain size = %i x %i x %i\n", nprocx, nprocy, nprocz); + printf("********************************************************\n"); + } - } + Nz += 2; + Nx = Ny = Nz; // Cubic domain - Nz += 2; - Nx = Ny = Nz; // Cubic domain + int N = Nx * Ny * Nz; + int dist_mem_size = N * sizeof(double); - int N = Nx*Ny*Nz; - int dist_mem_size = N*sizeof(double); + // unsigned int nBlocks = 32; + // int nthreads = 128; + int S = N / nthreads / nBlocks; -// unsigned int nBlocks = 32; -// int nthreads = 128; - int S = N/nthreads/nBlocks; + // unsigned int nBlocks = N/nthreads + (N%nthreads == 0?0:1); + // dim3 grid(nBlocks,1,1); -// unsigned int nBlocks = N/nthreads + (N%nthreads == 0?0:1); -// dim3 grid(nBlocks,1,1); + if (rank == 0) + printf("Number of blocks = %i \n", nBlocks); + if (rank == 0) + printf("Threads per block = %i \n", nthreads); + if (rank == 0) + printf("Sweeps per thread = %i \n", S); + if (rank == 0) + printf("Number of nodes per side = %i \n", Nx); + if (rank == 0) + printf("Total Number of nodes = %i \n", N); + if (rank == 0) + printf("********************************************************\n"); - if (rank==0) printf("Number of blocks = %i \n", nBlocks); - if (rank==0) printf("Threads per block = %i \n", nthreads); - if (rank==0) printf("Sweeps per thread = %i \n", S); - if (rank==0) printf("Number of nodes per side = %i \n", Nx); - if (rank==0) printf("Total Number of nodes = %i \n", N); - if (rank==0) printf("********************************************************\n"); + //....................................................................... + if (rank == 0) + printf("Read input media... \n"); + //....................................................................... + char LocalRankString[8]; + char LocalRankFilename[40]; + sprintf(LocalRankString, "%05d", rank); + sprintf(LocalRankFilename, "%s%s", "ID.", LocalRankString); + // printf("Local File Name = %s \n",LocalRankFilename); + // .......... READ THE INPUT FILE ....................................... + char value; + char *id; + id = new char[N]; + int sum = 0; + // double porosity; + //....................................................................... + ifstream PM(LocalRankFilename, ios::binary); + for (k = 0; k < Nz; k++) { + for (j = 0; j < Ny; j++) { + for (i = 0; i < Nx; i++) { + n = k * Nx * Ny + j * Nx + i; + id[n] = 0; + } + } + } + for (k = 1; k < Nz - 1; k++) { + for (j = 1; j < Ny - 1; j++) { + for (i = 1; i < Nx - 1; i++) { + PM.read((char *)(&value), sizeof(value)); + n = k * Nx * Ny + j * Nx + i; + id[n] = value; + if (value > 0) + sum++; + } + } + } + PM.close(); + // printf("File porosity = %f\n", double(sum)/N); - //....................................................................... - if (rank == 0) printf("Read input media... \n"); - //....................................................................... - char LocalRankString[8]; - char LocalRankFilename[40]; - sprintf(LocalRankString,"%05d",rank); - sprintf(LocalRankFilename,"%s%s","ID.",LocalRankString); -// printf("Local File Name = %s \n",LocalRankFilename); - // .......... READ THE INPUT FILE ....................................... - char value; - char *id; - id = new char[N]; - int sum = 0; -// double porosity; - //....................................................................... - ifstream PM(LocalRankFilename,ios::binary); - for (k=0;k 0) sum++; - } - } - } - PM.close(); -// printf("File porosity = %f\n", double(sum)/N); + //...........device phase ID................................................. + if (rank == 0) + printf("Copying phase ID to device \n"); + char *ID; + dvc_AllocateDeviceMemory((void **)&ID, N); // Allocate device memory + // Copy to the device + dvc_CopyToDevice(ID, id, N); + //........................................................................... - //...........device phase ID................................................. - if (rank==0) printf ("Copying phase ID to device \n"); - char *ID; - dvc_AllocateDeviceMemory((void **) &ID, N); // Allocate device memory - // Copy to the device - dvc_CopyToDevice(ID, id, N); - //........................................................................... + if (rank == 0) + printf("Allocating distributions \n"); + //......................device distributions................................. + double *f_even, *f_odd; + //........................................................................... + dvc_AllocateDeviceMemory((void **)&f_even, + 10 * dist_mem_size); // Allocate device memory + dvc_AllocateDeviceMemory((void **)&f_odd, + 9 * dist_mem_size); // Allocate device memory + //........................................................................... + //........................................................................... + // MAIN VARIABLES ALLOCATED HERE + //........................................................................... + double *Phi, *Den, *Copy; + double *ColorGrad, *Velocity; + //........................................................................... + dvc_AllocateDeviceMemory((void **)&Phi, dist_mem_size); + dvc_AllocateDeviceMemory((void **)&Den, 2 * dist_mem_size); + dvc_AllocateDeviceMemory((void **)&Copy, 2 * dist_mem_size); + dvc_AllocateDeviceMemory((void **)&Velocity, 3 * dist_mem_size); + dvc_AllocateDeviceMemory((void **)&ColorGrad, 3 * dist_mem_size); + //........................................................................... + if (rank == 0) + printf("Setting the distributions, size = : %i\n", N); + //........................................................................... + dvc_InitD3Q19(nBlocks, nthreads, S, ID, f_even, f_odd, Nx, Ny, Nz); + dvc_InitDenColor(nBlocks, nthreads, S, ID, Den, Phi, das, dbs, N); + //........................................................................... + dvc_ComputePhi(nBlocks, nthreads, S, ID, Phi, Copy, Den, N); + //........................................................................... - if (rank==0) printf ("Allocating distributions \n"); - //......................device distributions................................. - double *f_even,*f_odd; - //........................................................................... - dvc_AllocateDeviceMemory((void **) &f_even, 10*dist_mem_size); // Allocate device memory - dvc_AllocateDeviceMemory((void **) &f_odd, 9*dist_mem_size); // Allocate device memory - //........................................................................... - //........................................................................... - // MAIN VARIABLES ALLOCATED HERE - //........................................................................... - double *Phi,*Den,*Copy; - double *ColorGrad, *Velocity; - //........................................................................... - dvc_AllocateDeviceMemory((void **) &Phi, dist_mem_size); - dvc_AllocateDeviceMemory((void **) &Den, 2*dist_mem_size); - dvc_AllocateDeviceMemory((void **) &Copy, 2*dist_mem_size); - dvc_AllocateDeviceMemory((void **) &Velocity, 3*dist_mem_size); - dvc_AllocateDeviceMemory((void **) &ColorGrad, 3*dist_mem_size); - //........................................................................... - if (rank==0) printf("Setting the distributions, size = : %i\n", N); - //........................................................................... - dvc_InitD3Q19(nBlocks, nthreads, S, ID, f_even, f_odd, Nx, Ny, Nz); - dvc_InitDenColor(nBlocks, nthreads, S, ID, Den, Phi, das, dbs, N); - //........................................................................... - dvc_ComputePhi(nBlocks, nthreads, S,ID, Phi, Copy, Den, N); - //........................................................................... - - //........................................................................... - // Grids used to pack faces on the GPU for MPI - int faceGrid,edgeGrid,packThreads; - packThreads=512; - edgeGrid=1; - faceGrid=Nx*Ny/packThreads; + //........................................................................... + // Grids used to pack faces on the GPU for MPI + int faceGrid, edgeGrid, packThreads; + packThreads = 512; + edgeGrid = 1; + faceGrid = Nx * Ny / packThreads; + int timestep = 0; + if (rank == 0) + printf("********************************************************\n"); + if (rank == 0) + printf("No. of timesteps: %i \n", timestepMax); - int timestep = 0; - if (rank==0) printf("********************************************************\n"); - if (rank==0) printf("No. of timesteps: %i \n", timestepMax); + //.......create a stream for the LB calculation....... + // cudaStream_t stream; + // cudaStreamCreate(&stream); - //.......create a stream for the LB calculation....... -// cudaStream_t stream; -// cudaStreamCreate(&stream); + //.......create and start timer............ + double start, stop; + double walltime; + start = clock(); - //.......create and start timer............ - double start,stop; - double walltime; - start = clock(); + //************ MAIN ITERATION LOOP ***************************************/ + while (timestep < timestepMax) { + //************************************************************************* + // Compute the color gradient + //************************************************************************* + dvc_ComputeColorGradient(nBlocks, nthreads, S, ID, Phi, ColorGrad, Nx, + Ny, Nz); + //************************************************************************* - //************ MAIN ITERATION LOOP ***************************************/ - while (timestep < timestepMax){ + //************************************************************************* + // Perform collision step for the momentum transport + //************************************************************************* + dvc_ColorCollide(nBlocks, nthreads, S, ID, f_even, f_odd, ColorGrad, + Velocity, rlxA, rlxB, alpha, beta, Fx, Fy, Fz, Nx, Ny, + Nz, pBC); + //************************************************************************* - //************************************************************************* - // Compute the color gradient - //************************************************************************* - dvc_ComputeColorGradient(nBlocks, nthreads, S, - ID, Phi, ColorGrad, Nx, Ny, Nz); - //************************************************************************* + //************************************************************************* + // Carry out the density streaming step for mass transport + //************************************************************************* + dvc_DensityStreamD3Q7(nBlocks, nthreads, S, ID, Den, Copy, Phi, + ColorGrad, Velocity, beta, Nx, Ny, Nz, pBC); + //************************************************************************* - //************************************************************************* - // Perform collision step for the momentum transport - //************************************************************************* - dvc_ColorCollide(nBlocks, nthreads, S, - ID, f_even, f_odd, ColorGrad, Velocity, - rlxA, rlxB,alpha, beta, Fx, Fy, Fz, Nx, Ny, Nz, pBC); - //************************************************************************* + //************************************************************************* + // Swap the distributions for momentum transport + //************************************************************************* + dvc_SwapD3Q19(nBlocks, nthreads, S, ID, f_even, f_odd, Nx, Ny, Nz); + //************************************************************************* - //************************************************************************* - // Carry out the density streaming step for mass transport - //************************************************************************* - dvc_DensityStreamD3Q7(nBlocks, nthreads, S, - ID, Den, Copy, Phi, ColorGrad, Velocity,beta, Nx, Ny, Nz, pBC); - //************************************************************************* - - //************************************************************************* - // Swap the distributions for momentum transport - //************************************************************************* - dvc_SwapD3Q19(nBlocks, nthreads, S, ID, f_even, f_odd, Nx, Ny, Nz); - //************************************************************************* + //************************************************************************* + // Compute the phase indicator field and reset Copy, Den + //************************************************************************* + dvc_ComputePhi(nBlocks, nthreads, S, ID, Phi, Copy, Den, N); + //************************************************************************* - //************************************************************************* - // Compute the phase indicator field and reset Copy, Den - //************************************************************************* - dvc_ComputePhi(nBlocks, nthreads, S,ID, Phi, Copy, Den, N); - //************************************************************************* + // Iteration completed! + timestep++; - // Iteration completed! - timestep++; - - //................................................................... - } - //************************************************************************/ - dvc_Barrier(); - stop = clock(); + //................................................................... + } + //************************************************************************/ + dvc_Barrier(); + stop = clock(); -// cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl; - walltime = (stop - start)/CLOCKS_PER_SEC; -// cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl; - double MLUPS = double(Nx*Ny*Nz*timestep)/walltime/1000000; - if (rank==0) printf("********************************************************\n"); - if (rank==0) printf("CPU time = %f \n", walltime); - if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); - MLUPS *= nprocs; - if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); - if (rank==0) printf("********************************************************\n"); - - //************************************************************************/ - // Write out the phase indicator field - //************************************************************************/ - sprintf(LocalRankFilename,"%s%s","Phase.",LocalRankString); - // printf("Local File Name = %s \n",LocalRankFilename); - double *phiOut; - phiOut = new double[N]; - dvc_CopyToHost(phiOut,Phi,N*sizeof(double)); - - FILE *PHASE; - PHASE = fopen(LocalRankFilename,"wb"); - fwrite(phiOut,8,N,PHASE); - fclose(PHASE); - //************************************************************************/ + // cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl; + walltime = (stop - start) / CLOCKS_PER_SEC; + // cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl; + double MLUPS = double(Nx * Ny * Nz * timestep) / walltime / 1000000; + if (rank == 0) + printf("********************************************************\n"); + if (rank == 0) + printf("CPU time = %f \n", walltime); + if (rank == 0) + printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); + MLUPS *= nprocs; + if (rank == 0) + printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); + if (rank == 0) + printf("********************************************************\n"); + //************************************************************************/ + // Write out the phase indicator field + //************************************************************************/ + sprintf(LocalRankFilename, "%s%s", "Phase.", LocalRankString); + // printf("Local File Name = %s \n",LocalRankFilename); + double *phiOut; + phiOut = new double[N]; + dvc_CopyToHost(phiOut, Phi, N * sizeof(double)); + + FILE *PHASE; + PHASE = fopen(LocalRankFilename, "wb"); + fwrite(phiOut, 8, N, PHASE); + fclose(PHASE); + //************************************************************************/ } diff --git a/cuda/exe/lb2_Color_mpi.cpp b/cuda/exe/lb2_Color_mpi.cpp index fe11d32f..775c9ee0 100644 --- a/cuda/exe/lb2_Color_mpi.cpp +++ b/cuda/exe/lb2_Color_mpi.cpp @@ -9,62 +9,76 @@ using namespace std; //************************************************************************* // Functions defined in Color.cu //************************************************************************* -extern "C" void dvc_InitDenColor( int nblocks, int nthreads, int S, - char *ID, double *Den, double *Phi, double das, double dbs, int N); +extern "C" void dvc_InitDenColor(int nblocks, int nthreads, int S, char *ID, + double *Den, double *Phi, double das, + double dbs, int N); //************************************************************************* extern "C" void dvc_ComputeColorGradient(int nBlocks, int nthreads, int S, - char *ID, double *Phi, double *ColorGrad, int Nx, int Ny, int Nz); + char *ID, double *Phi, + double *ColorGrad, int Nx, int Ny, + int Nz); //************************************************************************* -extern "C" void dvc_ColorCollide(int nBlocks, int nthreads, int S, - char *ID, double *f_even, double *f_odd, double *ColorGrad, double *Velocity, - double rlxA, double rlxB,double alpha, double beta, double Fx, double Fy, double Fz, - int Nx, int Ny, int Nz, bool pBC); +extern "C" void dvc_ColorCollide(int nBlocks, int nthreads, int S, char *ID, + double *f_even, double *f_odd, + double *ColorGrad, double *Velocity, + double rlxA, double rlxB, double alpha, + double beta, double Fx, double Fy, double Fz, + int Nx, int Ny, int Nz, bool pBC); //************************************************************************* extern "C" void dvc_DensityStreamD3Q7(int nBlocks, int nthreads, int S, - char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity, - double beta, int Nx, int Ny, int Nz, bool pBC); + char *ID, double *Den, double *Copy, + double *Phi, double *ColorGrad, + double *Velocity, double beta, int Nx, + int Ny, int Nz, bool pBC); //************************************************************************* -extern "C" void dvc_ComputePhi(int nBlocks, int nthreads, int S, - char *ID, double *Phi, double *Copy, double *Den, int N); +extern "C" void dvc_ComputePhi(int nBlocks, int nthreads, int S, char *ID, + double *Phi, double *Copy, double *Den, int N); //************************************************************************* //************************************************************************* // Functions defined in D3Q19.cu //************************************************************************* -extern "C" void dvc_InitD3Q19(int nblocks, int nthreads, int S, char *ID, double *f_even, double *f_odd, int Nx, - int Ny, int Nz); +extern "C" void dvc_InitD3Q19(int nblocks, int nthreads, int S, char *ID, + double *f_even, double *f_odd, int Nx, int Ny, + int Nz); //************************************************************************* -extern "C" void dvc_SwapD3Q19(int nblocks, int nthreads, int S, - char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz); +extern "C" void dvc_SwapD3Q19(int nblocks, int nthreads, int S, char *ID, + double *f_even, double *f_odd, int Nx, int Ny, + int Nz); //************************************************************************* -extern "C" void dvc_PackDist(int grid, int threads, int q, int *SendList, int start, - int sendCount, double *sendbuf, double *Dist, int N); +extern "C" void dvc_PackDist(int grid, int threads, int q, int *SendList, + int start, int sendCount, double *sendbuf, + double *Dist, int N); //************************************************************************* -extern "C" void dvc_UnpackDist(int grid, int threads, int q, int Cqx, int Cqy, int Cqz, int *RecvList, int start, - int recvCount, double *recvbuf, double *Dist, int Nx, int Ny, int Nz); +extern "C" void dvc_UnpackDist(int grid, int threads, int q, int Cqx, int Cqy, + int Cqz, int *RecvList, int start, int recvCount, + double *recvbuf, double *Dist, int Nx, int Ny, + int Nz); //************************************************************************* //*************************************************************************************** // Functions defined in D3Q7.cu //*************************************************************************************** -extern "C" void dvc_PackDenD3Q7(int grid, int threads, int *list, int count, double *sendbuf, - int number, double *Data, int N); +extern "C" void dvc_PackDenD3Q7(int grid, int threads, int *list, int count, + double *sendbuf, int number, double *Data, + int N); //*************************************************************************************** -extern "C" void dvc_UnpackDenD3Q7(int grid, int threads, int *list, int count, double *recvbuf, - int number, double *Data, int N); +extern "C" void dvc_UnpackDenD3Q7(int grid, int threads, int *list, int count, + double *recvbuf, int number, double *Data, + int N); //*************************************************************************************** -extern "C" void dvc_PackValues(int grid, int threads, int *list, int count, double *sendbuf, - double *Data, int N); +extern "C" void dvc_PackValues(int grid, int threads, int *list, int count, + double *sendbuf, double *Data, int N); //*************************************************************************************** -extern "C" void dvc_UnpackValues(int grid, int threads, int *list, int count, double *recvbuf, - double *Data, int N); +extern "C" void dvc_UnpackValues(int grid, int threads, int *list, int count, + double *recvbuf, double *Data, int N); //*************************************************************************************** //************************************************************************* // Functions defined in CudaExtras.cu //************************************************************************* -extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size); +extern "C" void dvc_AllocateDeviceMemory(void **address, size_t size); //************************************************************************* -extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size); +extern "C" void dvc_CopyToDevice(void *dest, void *source, size_t size); //************************************************************************* -extern "C" void dvc_CopyToHost(void* dest, void* source, size_t size); +extern "C" void dvc_CopyToHost(void *dest, void *source, size_t size); //************************************************************************* extern "C" void dvc_Barrier(); //************************************************************************* @@ -73,451 +87,602 @@ extern "C" void dvc_Barrier(); // Implementation of Two-Phase Immiscible LBM using CUDA //************************************************************************* -inline void PackID(int *list, int count, char *sendbuf, char *ID){ - // Fill in the phase ID values from neighboring processors - // This packs up the values that need to be sent from one processor to another - int idx,n; +inline void PackID(int *list, int count, char *sendbuf, char *ID) { + // Fill in the phase ID values from neighboring processors + // This packs up the values that need to be sent from one processor to another + int idx, n; - for (idx=0; idx> FILENAME; - // Line 2: domain size (Nx, Ny, Nz) - input >> Nz; // number of nodes (x,y,z) - input >> nBlocks; - input >> nthreads; - // Line 3: model parameters (tau, alpha, beta, das, dbs) - input >> tau; - input >> alpha; - input >> beta; - input >> das; - input >> dbs; - // Line 4: External force components (Fx,Fy, Fz) - input >> Fx; - input >> Fy; - input >> Fz; - // Line 5: Pressure Boundary conditions - input >> pBC; - input >> din; - input >> dout; - // Line 6: time-stepping criteria - input >> timestepMax; // max no. of timesteps - input >> interval; // error interval - input >> tol; // error tolerance - //............................................................. + if (rank == 0) { + //............................................................. + // READ SIMULATION PARMAETERS FROM INPUT FILE + //............................................................. + ifstream input("Color.in"); + // Line 1: Name of the phase indicator file (s=0,w=1,n=2) + input >> FILENAME; + // Line 2: domain size (Nx, Ny, Nz) + input >> Nz; // number of nodes (x,y,z) + input >> nBlocks; + input >> nthreads; + // Line 3: model parameters (tau, alpha, beta, das, dbs) + input >> tau; + input >> alpha; + input >> beta; + input >> das; + input >> dbs; + // Line 4: External force components (Fx,Fy, Fz) + input >> Fx; + input >> Fy; + input >> Fz; + // Line 5: Pressure Boundary conditions + input >> pBC; + input >> din; + input >> dout; + // Line 6: time-stepping criteria + input >> timestepMax; // max no. of timesteps + input >> interval; // error interval + input >> tol; // error tolerance + //............................................................. - ifstream domain("Domain.in"); - domain >> nprocx; - domain >> nprocy; - domain >> nprocz; - } - // ************************************************************** - // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); - //................................................. - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nBlocks,1,MPI_INT,0,comm); - MPI_Bcast(&nthreads,1,MPI_INT,0,comm); - MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&das,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&dbs,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm); - MPI_Bcast(&din,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm); - MPI_Bcast(×tepMax,1,MPI_INT,0,comm); - MPI_Bcast(&interval,1,MPI_INT,0,comm); - MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); + ifstream domain("Domain.in"); + domain >> nprocx; + domain >> nprocy; + domain >> nprocz; + } + // ************************************************************** + // Broadcast simulation parameters from rank 0 to all other procs + MPI_Barrier(comm); + //................................................. + MPI_Bcast(&Nz, 1, MPI_INT, 0, comm); + MPI_Bcast(&nBlocks, 1, MPI_INT, 0, comm); + MPI_Bcast(&nthreads, 1, MPI_INT, 0, comm); + MPI_Bcast(&Fx, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&Fy, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&Fz, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&tau, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&alpha, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&beta, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&das, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&dbs, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&pBC, 1, MPI_LOGICAL, 0, comm); + MPI_Bcast(&din, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&dout, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(×tepMax, 1, MPI_INT, 0, comm); + MPI_Bcast(&interval, 1, MPI_INT, 0, comm); + MPI_Bcast(&tol, 1, MPI_DOUBLE, 0, comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - //................................................. - MPI_Barrier(comm); - // ************************************************************** - // ************************************************************** + MPI_Bcast(&nprocx, 1, MPI_INT, 0, comm); + MPI_Bcast(&nprocy, 1, MPI_INT, 0, comm); + MPI_Bcast(&nprocz, 1, MPI_INT, 0, comm); + //................................................. + MPI_Barrier(comm); + // ************************************************************** + // ************************************************************** - double rlxA = 1.f/tau; - double rlxB = 8.f*(2.f-rlxA)/(8.f-rlxA); + double rlxA = 1.f / tau; + double rlxB = 8.f * (2.f - rlxA) / (8.f - rlxA); - if (nprocs != nprocx*nprocy*nprocz){ - printf("Fatal error in processor number! \n"); - printf("nprocx = %i \n",nprocx); - printf("nprocy = %i \n",nprocy); - printf("nprocz = %i \n",nprocz); - } + if (nprocs != nprocx * nprocy * nprocz) { + printf("Fatal error in processor number! \n"); + printf("nprocx = %i \n", nprocx); + printf("nprocy = %i \n", nprocy); + printf("nprocz = %i \n", nprocz); + } - if (rank==0){ - printf("********************************************************\n"); - printf("tau = %f \n", tau); - printf("alpha = %f \n", alpha); - printf("beta = %f \n", beta); - printf("das = %f \n", beta); - printf("dbs = %f \n", beta); - printf("Force(x) = %f \n", Fx); - printf("Force(y) = %f \n", Fy); - printf("Force(z) = %f \n", Fz); - printf("Sub-domain size = %i x %i x %i\n",Nz,Nz,Nz); - printf("Parallel domain size = %i x %i x %i\n",nprocx,nprocy,nprocz); - printf("********************************************************\n"); + if (rank == 0) { + printf("********************************************************\n"); + printf("tau = %f \n", tau); + printf("alpha = %f \n", alpha); + printf("beta = %f \n", beta); + printf("das = %f \n", beta); + printf("dbs = %f \n", beta); + printf("Force(x) = %f \n", Fx); + printf("Force(y) = %f \n", Fy); + printf("Force(z) = %f \n", Fz); + printf("Sub-domain size = %i x %i x %i\n", Nz, Nz, Nz); + printf("Parallel domain size = %i x %i x %i\n", nprocx, nprocy, nprocz); + printf("********************************************************\n"); + } - } + MPI_Barrier(comm); + kproc = rank / (nprocx * nprocy); + jproc = (rank - nprocx * nprocy * kproc) / nprocx; + iproc = rank - nprocx * nprocy * kproc - nprocz * jproc; - MPI_Barrier(comm); - kproc = rank/(nprocx*nprocy); - jproc = (rank-nprocx*nprocy*kproc)/nprocx; - iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; + //.......................................... + // set up the neighbor ranks + //.......................................... + i = iproc; + j = jproc; + k = kproc; + i += 1; + j += 0; + k += 0; + if (i < 0) + i += nprocx; + if (j < 0) + j += nprocy; + if (k < 0) + k += nprocz; + if (!(i < nprocx)) + i -= nprocx; + if (!(j < nprocy)) + j -= nprocy; + if (!(k < nprocz)) + k -= nprocz; + rank_X = k * nprocx * nprocy + j * nprocx + i; + //.......................................... + i = iproc; + j = jproc; + k = kproc; + i -= 1; + j += 0; + k += 0; + if (i < 0) + i += nprocx; + if (j < 0) + j += nprocy; + if (k < 0) + k += nprocz; + if (!(i < nprocx)) + i -= nprocx; + if (!(j < nprocy)) + j -= nprocy; + if (!(k < nprocz)) + k -= nprocz; + rank_x = k * nprocx * nprocy + j * nprocx + i; + //.......................................... + i = iproc; + j = jproc; + k = kproc; + i += 0; + j += 1; + k += 0; + if (i < 0) + i += nprocx; + if (j < 0) + j += nprocy; + if (k < 0) + k += nprocz; + if (!(i < nprocx)) + i -= nprocx; + if (!(j < nprocy)) + j -= nprocy; + if (!(k < nprocz)) + k -= nprocz; + rank_Y = k * nprocx * nprocy + j * nprocx + i; + //.......................................... + i = iproc; + j = jproc; + k = kproc; + i += 0; + j -= 1; + k += 0; + if (i < 0) + i += nprocx; + if (j < 0) + j += nprocy; + if (k < 0) + k += nprocz; + if (!(i < nprocx)) + i -= nprocx; + if (!(j < nprocy)) + j -= nprocy; + if (!(k < nprocz)) + k -= nprocz; + rank_y = k * nprocx * nprocy + j * nprocx + i; + //.......................................... + i = iproc; + j = jproc; + k = kproc; + i += 0; + j += 0; + k += 1; + if (i < 0) + i += nprocx; + if (j < 0) + j += nprocy; + if (k < 0) + k += nprocz; + if (!(i < nprocx)) + i -= nprocx; + if (!(j < nprocy)) + j -= nprocy; + if (!(k < nprocz)) + k -= nprocz; + rank_Z = k * nprocx * nprocy + j * nprocx + i; + //.......................................... + i = iproc; + j = jproc; + k = kproc; + i += 0; + j += 0; + k -= 1; + if (i < 0) + i += nprocx; + if (j < 0) + j += nprocy; + if (k < 0) + k += nprocz; + if (!(i < nprocx)) + i -= nprocx; + if (!(j < nprocy)) + j -= nprocy; + if (!(k < nprocz)) + k -= nprocz; + rank_z = k * nprocx * nprocy + j * nprocx + i; + //.......................................... + i = iproc; + j = jproc; + k = kproc; + i += 1; + j += 1; + k += 0; + if (i < 0) + i += nprocx; + if (j < 0) + j += nprocy; + if (k < 0) + k += nprocz; + if (!(i < nprocx)) + i -= nprocx; + if (!(j < nprocy)) + j -= nprocy; + if (!(k < nprocz)) + k -= nprocz; + rank_XY = k * nprocx * nprocy + j * nprocx + i; + //.......................................... + i = iproc; + j = jproc; + k = kproc; + i -= 1; + j -= 1; + k += 0; + if (i < 0) + i += nprocx; + if (j < 0) + j += nprocy; + if (k < 0) + k += nprocz; + if (!(i < nprocx)) + i -= nprocx; + if (!(j < nprocy)) + j -= nprocy; + if (!(k < nprocz)) + k -= nprocz; + rank_xy = k * nprocx * nprocy + j * nprocx + i; + //.......................................... + i = iproc; + j = jproc; + k = kproc; + i += 1; + j -= 1; + k += 0; + if (i < 0) + i += nprocx; + if (j < 0) + j += nprocy; + if (k < 0) + k += nprocz; + if (!(i < nprocx)) + i -= nprocx; + if (!(j < nprocy)) + j -= nprocy; + if (!(k < nprocz)) + k -= nprocz; + rank_Xy = k * nprocx * nprocy + j * nprocx + i; + //.......................................... + i = iproc; + j = jproc; + k = kproc; + i -= 1; + j += 1; + k += 0; + if (i < 0) + i += nprocx; + if (j < 0) + j += nprocy; + if (k < 0) + k += nprocz; + if (!(i < nprocx)) + i -= nprocx; + if (!(j < nprocy)) + j -= nprocy; + if (!(k < nprocz)) + k -= nprocz; + rank_xY = k * nprocx * nprocy + j * nprocx + i; + //.......................................... + i = iproc; + j = jproc; + k = kproc; + i += 1; + j += 0; + k += 1; + if (i < 0) + i += nprocx; + if (j < 0) + j += nprocy; + if (k < 0) + k += nprocz; + if (!(i < nprocx)) + i -= nprocx; + if (!(j < nprocy)) + j -= nprocy; + if (!(k < nprocz)) + k -= nprocz; + rank_XZ = k * nprocx * nprocy + j * nprocx + i; + //.......................................... + i = iproc; + j = jproc; + k = kproc; + i -= 1; + j += 0; + k -= 1; + if (i < 0) + i += nprocx; + if (j < 0) + j += nprocy; + if (k < 0) + k += nprocz; + if (!(i < nprocx)) + i -= nprocx; + if (!(j < nprocy)) + j -= nprocy; + if (!(k < nprocz)) + k -= nprocz; + rank_xz = k * nprocx * nprocy + j * nprocx + i; + //.......................................... + i = iproc; + j = jproc; + k = kproc; + i -= 1; + j += 0; + k += 1; + if (i < 0) + i += nprocx; + if (j < 0) + j += nprocy; + if (k < 0) + k += nprocz; + if (!(i < nprocx)) + i -= nprocx; + if (!(j < nprocy)) + j -= nprocy; + if (!(k < nprocz)) + k -= nprocz; + rank_xZ = k * nprocx * nprocy + j * nprocx + i; + //.......................................... + i = iproc; + j = jproc; + k = kproc; + i += 1; + j += 0; + k -= 1; + if (i < 0) + i += nprocx; + if (j < 0) + j += nprocy; + if (k < 0) + k += nprocz; + if (!(i < nprocx)) + i -= nprocx; + if (!(j < nprocy)) + j -= nprocy; + if (!(k < nprocz)) + k -= nprocz; + rank_Xz = k * nprocx * nprocy + j * nprocx + i; + //.......................................... + i = iproc; + j = jproc; + k = kproc; + i += 0; + j += 1; + k += 1; + if (i < 0) + i += nprocx; + if (j < 0) + j += nprocy; + if (k < 0) + k += nprocz; + if (!(i < nprocx)) + i -= nprocx; + if (!(j < nprocy)) + j -= nprocy; + if (!(k < nprocz)) + k -= nprocz; + rank_YZ = k * nprocx * nprocy + j * nprocx + i; + //.......................................... + i = iproc; + j = jproc; + k = kproc; + i += 0; + j -= 1; + k -= 1; + if (i < 0) + i += nprocx; + if (j < 0) + j += nprocy; + if (k < 0) + k += nprocz; + if (!(i < nprocx)) + i -= nprocx; + if (!(j < nprocy)) + j -= nprocy; + if (!(k < nprocz)) + k -= nprocz; + rank_yz = k * nprocx * nprocy + j * nprocx + i; + //.......................................... + i = iproc; + j = jproc; + k = kproc; + i += 0; + j -= 1; + k += 1; + if (i < 0) + i += nprocx; + if (j < 0) + j += nprocy; + if (k < 0) + k += nprocz; + if (!(i < nprocx)) + i -= nprocx; + if (!(j < nprocy)) + j -= nprocy; + if (!(k < nprocz)) + k -= nprocz; + rank_yZ = k * nprocx * nprocy + j * nprocx + i; + //.......................................... + i = iproc; + j = jproc; + k = kproc; + i += 0; + j += 1; + k -= 1; + if (i < 0) + i += nprocx; + if (j < 0) + j += nprocy; + if (k < 0) + k += nprocz; + if (!(i < nprocx)) + i -= nprocx; + if (!(j < nprocy)) + j -= nprocy; + if (!(k < nprocz)) + k -= nprocz; + rank_Yz = k * nprocx * nprocy + j * nprocx + i; + //.......................................... - //.......................................... - // set up the neighbor ranks - //.......................................... - i=iproc; j=jproc; k =kproc; - i+=1; - j+=0; - k+=0; - if (i<0) i+=nprocx; - if (j<0) j+=nprocy; - if (k<0) k+=nprocz; - if (!(i 0) sum++; - } - } - } - PM.close(); -// printf("File porosity = %f\n", double(sum)/N); - //........................................................................... - MPI_Barrier(comm); - if (rank == 0) cout << "Domain set." << endl; - //........................................................................... - // Write the communcation structure into a file for debugging -/* char LocalCommFile[40]; + //....................................................................... + if (rank == 0) + printf("Read input media... \n"); + //....................................................................... + char LocalRankString[8]; + char LocalRankFilename[40]; + sprintf(LocalRankString, "%05d", rank); + sprintf(LocalRankFilename, "%s%s", "ID.", LocalRankString); + // printf("Local File Name = %s \n",LocalRankFilename); + // .......... READ THE INPUT FILE ....................................... + char value; + char *id; + id = new char[N]; + int sum = 0; + // double porosity; + //....................................................................... + ifstream PM(LocalRankFilename, ios::binary); + for (k = 0; k < Nz; k++) { + for (j = 0; j < Ny; j++) { + for (i = 0; i < Nx; i++) { + n = k * Nx * Ny + j * Nx + i; + id[n] = 0; + } + } + } + for (k = 1; k < Nz - 1; k++) { + for (j = 1; j < Ny - 1; j++) { + for (i = 1; i < Nx - 1; i++) { + PM.read((char *)(&value), sizeof(value)); + n = k * Nx * Ny + j * Nx + i; + id[n] = value; + if (value > 0) + sum++; + } + } + } + PM.close(); + // printf("File porosity = %f\n", double(sum)/N); + //........................................................................... + MPI_Barrier(comm); + if (rank == 0) + cout << "Domain set." << endl; + //........................................................................... + // Write the communcation structure into a file for debugging + /* char LocalCommFile[40]; sprintf(LocalCommFile,"%s%s","Comm.",LocalRankString); FILE *CommFile; CommFile = fopen(LocalCommFile,"w"); @@ -545,512 +710,816 @@ int main(int argc, char **argv) fclose(CommFile); */ //........................................................................... - // Set up MPI communication structures - if (rank==0) printf ("Setting up communication control structures \n"); - //...................................................................................... - // Get the actual D3Q19 communication counts (based on location of solid phase) - // Discrete velocity set symmetry implies the sendcount = recvcount - int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z; - int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ; - int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ; - sendCount_x = sendCount_y = sendCount_z = sendCount_X = sendCount_Y = sendCount_Z = 0; - sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = sendCount_xZ = 0; - sendCount_xY = sendCount_yZ = sendCount_Xz = sendCount_XY = sendCount_YZ = sendCount_XZ = 0; - //...................................................................................... - for (k=0; k> FILENAME; - // Line 2: domain size (Nx, Ny, Nz) - input >> Nz; // number of nodes (x,y,z) - input >> nBlocks; - input >> nthreads; - // Line 3: model parameters (tau, alpha, beta, das, dbs) - input >> tau; - input >> alpha; - input >> beta; - input >> das; - input >> dbs; - // Line 4: wetting phase saturation to initialize - input >> wp_saturation; - // Line 5: External force components (Fx,Fy, Fz) - input >> Fx; - input >> Fy; - input >> Fz; - // Line 6: Pressure Boundary conditions - input >> pBC; - input >> din; - input >> dout; - // Line 7: time-stepping criteria - input >> timestepMax; // max no. of timesteps - input >> interval; // error interval - input >> tol; // error tolerance - //............................................................. + if (rank == 0) { + //............................................................. + // READ SIMULATION PARMAETERS FROM INPUT FILE + //............................................................. + ifstream input("Color.in"); + // Line 1: Name of the phase indicator file (s=0,w=1,n=2) + input >> FILENAME; + // Line 2: domain size (Nx, Ny, Nz) + input >> Nz; // number of nodes (x,y,z) + input >> nBlocks; + input >> nthreads; + // Line 3: model parameters (tau, alpha, beta, das, dbs) + input >> tau; + input >> alpha; + input >> beta; + input >> das; + input >> dbs; + // Line 4: wetting phase saturation to initialize + input >> wp_saturation; + // Line 5: External force components (Fx,Fy, Fz) + input >> Fx; + input >> Fy; + input >> Fz; + // Line 6: Pressure Boundary conditions + input >> pBC; + input >> din; + input >> dout; + // Line 7: time-stepping criteria + input >> timestepMax; // max no. of timesteps + input >> interval; // error interval + input >> tol; // error tolerance + //............................................................. - //....................................................................... - // Reading the domain information file - //....................................................................... - ifstream domain("Domain.in"); - domain >> nprocx; - domain >> nprocy; - domain >> nprocz; - domain >> Nx; - domain >> Ny; - domain >> Nz; - domain >> nspheres; - domain >> Lx; - domain >> Ly; - domain >> Lz; - //....................................................................... - } - // ************************************************************** - // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); - //................................................. - MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&das,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&dbs,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm); - MPI_Bcast(&din,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm); - MPI_Bcast(×tepMax,1,MPI_INT,0,comm); - MPI_Bcast(&interval,1,MPI_INT,0,comm); - MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm); - // Computational domain - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nBlocks,1,MPI_INT,0,comm); - MPI_Bcast(&nthreads,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - MPI_Bcast(&nspheres,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); - //................................................. - MPI_Barrier(comm); - // ************************************************************** - // ************************************************************** - double Ps = -(das-dbs)/(das+dbs); - double rlxA = 1.f/tau; - double rlxB = 8.f*(2.f-rlxA)/(8.f-rlxA); + //....................................................................... + // Reading the domain information file + //....................................................................... + ifstream domain("Domain.in"); + domain >> nprocx; + domain >> nprocy; + domain >> nprocz; + domain >> Nx; + domain >> Ny; + domain >> Nz; + domain >> nspheres; + domain >> Lx; + domain >> Ly; + domain >> Lz; + //....................................................................... + } + // ************************************************************** + // Broadcast simulation parameters from rank 0 to all other procs + MPI_Barrier(comm); + //................................................. + MPI_Bcast(&tau, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&alpha, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&beta, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&das, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&dbs, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&pBC, 1, MPI_LOGICAL, 0, comm); + MPI_Bcast(&din, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&dout, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&Fx, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&Fy, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&Fz, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(×tepMax, 1, MPI_INT, 0, comm); + MPI_Bcast(&interval, 1, MPI_INT, 0, comm); + MPI_Bcast(&tol, 1, MPI_DOUBLE, 0, comm); + // Computational domain + MPI_Bcast(&Nz, 1, MPI_INT, 0, comm); + MPI_Bcast(&nBlocks, 1, MPI_INT, 0, comm); + MPI_Bcast(&nthreads, 1, MPI_INT, 0, comm); + MPI_Bcast(&nprocx, 1, MPI_INT, 0, comm); + MPI_Bcast(&nprocy, 1, MPI_INT, 0, comm); + MPI_Bcast(&nprocz, 1, MPI_INT, 0, comm); + MPI_Bcast(&nspheres, 1, MPI_INT, 0, comm); + MPI_Bcast(&Lx, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&Ly, 1, MPI_DOUBLE, 0, comm); + MPI_Bcast(&Lz, 1, MPI_DOUBLE, 0, comm); + //................................................. + MPI_Barrier(comm); + // ************************************************************** + // ************************************************************** + double Ps = -(das - dbs) / (das + dbs); + double rlxA = 1.f / tau; + double rlxB = 8.f * (2.f - rlxA) / (8.f - rlxA); - if (nprocs != nprocx*nprocy*nprocz){ - printf("Fatal error in processor number! \n"); - printf("nprocx = %i \n",nprocx); - printf("nprocy = %i \n",nprocy); - printf("nprocz = %i \n",nprocz); - } + if (nprocs != nprocx * nprocy * nprocz) { + printf("Fatal error in processor number! \n"); + printf("nprocx = %i \n", nprocx); + printf("nprocy = %i \n", nprocy); + printf("nprocz = %i \n", nprocz); + } - if (rank==0){ - printf("********************************************************\n"); - printf("tau = %f \n", tau); - printf("alpha = %f \n", alpha); - printf("beta = %f \n", beta); - printf("das = %f \n", das); - printf("dbs = %f \n", dbs); - printf("phi_s = %f \n", Ps); - printf("gamma_{wn} = %f \n", 6.01603*alpha); - printf("cos theta_c = %f \n", 1.05332*Ps); - printf("Force(x) = %f \n", Fx); - printf("Force(y) = %f \n", Fy); - printf("Force(z) = %f \n", Fz); - printf("Sub-domain size = %i x %i x %i\n",Nz,Nz,Nz); - printf("Parallel domain size = %i x %i x %i\n",nprocx,nprocy,nprocz); - printf("********************************************************\n"); - } + if (rank == 0) { + printf("********************************************************\n"); + printf("tau = %f \n", tau); + printf("alpha = %f \n", alpha); + printf("beta = %f \n", beta); + printf("das = %f \n", das); + printf("dbs = %f \n", dbs); + printf("phi_s = %f \n", Ps); + printf("gamma_{wn} = %f \n", 6.01603 * alpha); + printf("cos theta_c = %f \n", 1.05332 * Ps); + printf("Force(x) = %f \n", Fx); + printf("Force(y) = %f \n", Fy); + printf("Force(z) = %f \n", Fz); + printf("Sub-domain size = %i x %i x %i\n", Nz, Nz, Nz); + printf("Parallel domain size = %i x %i x %i\n", nprocx, nprocy, nprocz); + printf("********************************************************\n"); + } - MPI_Barrier(comm); - kproc = rank/(nprocx*nprocy); - jproc = (rank-nprocx*nprocy*kproc)/nprocx; - iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; + MPI_Barrier(comm); + kproc = rank / (nprocx * nprocy); + jproc = (rank - nprocx * nprocy * kproc) / nprocx; + iproc = rank - nprocx * nprocy * kproc - nprocz * jproc; - //.......................................... - // set up the neighbor ranks - //.......................................... - i=iproc; j=jproc; k =kproc; - i+=1; - j+=0; - k+=0; - if (i<0) i+=nprocx; - if (j<0) j+=nprocy; - if (k<0) k+=nprocz; - if (!(i 0.0){ - id[n] = 1; - sum++; - } - } - } - } - //...................................................................... - // Once phase ID has been generated, map solid to account for 'smeared' interface - //...................................................................... - for (i=0; i 0.0) { + id[n] = 1; + sum++; + } + } + } + } + //...................................................................... + // Once phase ID has been generated, map solid to account for 'smeared' interface + //...................................................................... + for (i = 0; i < N; i++) + SignDist.data[i] -= 0.5; // Solid appears half a pixel bigger + //...................................................................... + // Generate the residual NWP + GenerateResidual(id, Nx, Ny, Nz, wp_saturation); + //....................................................................... + sprintf(LocalRankString, "%05d", rank); + sprintf(LocalRankFilename, "%s%s", "ID.", LocalRankString); + WriteLocalSolidID(LocalRankFilename, id, N); + sprintf(LocalRankFilename, "%s%s", "SignDist.", LocalRankString); + WriteLocalSolidDistance(LocalRankFilename, SignDist.data, N); + //....................................................................... - // Set up MPI communication structures - if (rank==0) printf ("Setting up communication control structures \n"); - //...................................................................................... - // Get the actual D3Q19 communication counts (based on location of solid phase) - // Discrete velocity set symmetry implies the sendcount = recvcount - int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z; - int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ; - int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ; - sendCount_x = sendCount_y = sendCount_z = sendCount_X = sendCount_Y = sendCount_Z = 0; - sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = sendCount_xZ = 0; - sendCount_xY = sendCount_yZ = sendCount_Xz = sendCount_XY = sendCount_YZ = sendCount_XZ = 0; - //...................................................................................... - for (k=0; k fluid_isovalue) - int cube[8][3] = {{0,0,0},{1,0,0},{0,1,0},{1,1,0},{0,0,1},{1,0,1},{0,1,1},{1,1,1}}; // cube corners -// int count_in=0,count_out=0; -// int nodx,nody,nodz; - // initialize lists for vertices for surfaces, common line - DTMutableList nw_pts(20); - DTMutableList ns_pts(20); - DTMutableList ws_pts(20); - DTMutableList nws_pts(20); - // initialize triangle lists for surfaces - IntArray nw_tris(3,20); - IntArray ns_tris(3,20); - IntArray ws_tris(3,20); - // initialize list for line segments - IntArray nws_seg(2,20); - - DTMutableList tmp(20); - // IntArray store; - - int n_nw_pts=0,n_ns_pts=0,n_ws_pts=0,n_nws_pts=0, map=0; - int n_nw_tris=0, n_ns_tris=0, n_ws_tris=0, n_nws_seg=0; - - double s,s1,s2,s3; // Triangle sides (lengths) - Point A,B,C,P; -// double area; - - // Initialize arrays for local solid surface - DTMutableList local_sol_pts(20); - int n_local_sol_pts = 0; - IntArray local_sol_tris(3,18); - int n_local_sol_tris; - DoubleArray values(20); - DTMutableList local_nws_pts(20); - int n_local_nws_pts; - - int n_nw_tris_beg, n_ns_tris_beg, n_ws_tris_beg; - int c; - int newton_steps = 0; - //........................................................................... - int ncubes = (Nx-2)*(Ny-2)*(Nz-2); // Exclude the "upper" halo - IntArray cubeList(3,ncubes); - int nc=0; - //........................................................................... - // Set up the cube list (very regular in this case due to lack of blob-ID) - for (k=0; k fluid_isovalue) + int cube[8][3] = {{0, 0, 0}, {1, 0, 0}, {0, 1, 0}, {1, 1, 0}, {0, 0, 1}, + {1, 0, 1}, {0, 1, 1}, {1, 1, 1}}; // cube corners + // int count_in=0,count_out=0; + // int nodx,nody,nodz; + // initialize lists for vertices for surfaces, common line + DTMutableList nw_pts(20); + DTMutableList ns_pts(20); + DTMutableList ws_pts(20); + DTMutableList nws_pts(20); + // initialize triangle lists for surfaces + IntArray nw_tris(3, 20); + IntArray ns_tris(3, 20); + IntArray ws_tris(3, 20); + // initialize list for line segments + IntArray nws_seg(2, 20); + + DTMutableList tmp(20); + // IntArray store; + + int n_nw_pts = 0, n_ns_pts = 0, n_ws_pts = 0, n_nws_pts = 0, map = 0; + int n_nw_tris = 0, n_ns_tris = 0, n_ws_tris = 0, n_nws_seg = 0; + + double s, s1, s2, s3; // Triangle sides (lengths) + Point A, B, C, P; + // double area; + + // Initialize arrays for local solid surface + DTMutableList local_sol_pts(20); + int n_local_sol_pts = 0; + IntArray local_sol_tris(3, 18); + int n_local_sol_tris; + DoubleArray values(20); + DTMutableList local_nws_pts(20); + int n_local_nws_pts; + + int n_nw_tris_beg, n_ns_tris_beg, n_ws_tris_beg; + int c; + int newton_steps = 0; + //........................................................................... + int ncubes = (Nx - 2) * (Ny - 2) * (Nz - 2); // Exclude the "upper" halo + IntArray cubeList(3, ncubes); + int nc = 0; + //........................................................................... + // Set up the cube list (very regular in this case due to lack of blob-ID) + for (k = 0; k < Nz - 2; k++) { + for (j = 0; j < Ny - 2; j++) { + for (i = 0; i < Nx - 2; i++) { + cubeList(0, nc) = i; + cubeList(1, nc) = j; + cubeList(2, nc) = k; + nc++; + } + } + } + if (nc != ncubes) + fprintf("Basic cubeList error \n"); + + //........................................................................... + // Grids used to pack faces on the GPU for MPI + int faceGrid, edgeGrid, packThreads; + packThreads = 512; + edgeGrid = 1; + faceGrid = Nx * Ny / packThreads; + //........................................................................... + + //........................................................................... + // MAIN VARIABLES INITIALIZED HERE + //........................................................................... + //........................................................................... + if (rank == 0) + printf("Setting the distributions, size = : %i\n", N); + //........................................................................... + dvc_InitD3Q19(nBlocks, nthreads, S, ID, f_even, f_odd, Nx, Ny, Nz); + dvc_InitDenColor(nBlocks, nthreads, S, ID, Copy, Phi, das, dbs, Nx, Ny, Nz); + dvc_InitDenColor(nBlocks, nthreads, S, ID, Den, Phi, das, dbs, Nx, Ny, Nz); + // Pack the buffers (zeros out the halo region) + dvc_PackDenD3Q7(faceGrid, packThreads, dvcRecvList_x, recvCount_x, + recvbuf_x, 2, Den, N); + dvc_PackDenD3Q7(faceGrid, packThreads, dvcRecvList_y, recvCount_y, + recvbuf_y, 2, Den, N); + dvc_PackDenD3Q7(faceGrid, packThreads, dvcRecvList_z, recvCount_z, + recvbuf_z, 2, Den, N); + dvc_PackDenD3Q7(faceGrid, packThreads, dvcRecvList_X, recvCount_X, + recvbuf_X, 2, Den, N); + dvc_PackDenD3Q7(faceGrid, packThreads, dvcRecvList_Y, recvCount_Y, + recvbuf_Y, 2, Den, N); + dvc_PackDenD3Q7(faceGrid, packThreads, dvcRecvList_Z, recvCount_Z, + recvbuf_Z, 2, Den, N); + //................................................................................... + dvc_ComputePhi(nBlocks, nthreads, S, ID, Phi, Copy, Den, N); + //........................................................................... + dvc_PackValues(faceGrid, packThreads, dvcSendList_x, sendCount_x, sendbuf_x, + Phi, N); + dvc_PackValues(faceGrid, packThreads, dvcSendList_y, sendCount_y, sendbuf_y, + Phi, N); + dvc_PackValues(faceGrid, packThreads, dvcSendList_z, sendCount_z, sendbuf_z, + Phi, N); + dvc_PackValues(faceGrid, packThreads, dvcSendList_X, sendCount_X, sendbuf_X, + Phi, N); + dvc_PackValues(faceGrid, packThreads, dvcSendList_Y, sendCount_Y, sendbuf_Y, + Phi, N); + dvc_PackValues(faceGrid, packThreads, dvcSendList_Z, sendCount_Z, sendbuf_Z, + Phi, N); + dvc_PackValues(faceGrid, packThreads, dvcSendList_xy, sendCount_xy, + sendbuf_xy, Phi, N); + dvc_PackValues(faceGrid, packThreads, dvcSendList_xY, sendCount_xY, + sendbuf_xY, Phi, N); + dvc_PackValues(faceGrid, packThreads, dvcSendList_Xy, sendCount_Xy, + sendbuf_Xy, Phi, N); + dvc_PackValues(faceGrid, packThreads, dvcSendList_XY, sendCount_XY, + sendbuf_XY, Phi, N); + dvc_PackValues(faceGrid, packThreads, dvcSendList_xz, sendCount_xz, + sendbuf_xz, Phi, N); + dvc_PackValues(faceGrid, packThreads, dvcSendList_xZ, sendCount_xZ, + sendbuf_xZ, Phi, N); + dvc_PackValues(faceGrid, packThreads, dvcSendList_Xz, sendCount_Xz, + sendbuf_Xz, Phi, N); + dvc_PackValues(faceGrid, packThreads, dvcSendList_XZ, sendCount_XZ, + sendbuf_XZ, Phi, N); + dvc_PackValues(faceGrid, packThreads, dvcSendList_yz, sendCount_yz, + sendbuf_yz, Phi, N); + dvc_PackValues(faceGrid, packThreads, dvcSendList_yZ, sendCount_yZ, + sendbuf_yZ, Phi, N); + dvc_PackValues(faceGrid, packThreads, dvcSendList_Yz, sendCount_Yz, + sendbuf_Yz, Phi, N); + dvc_PackValues(faceGrid, packThreads, dvcSendList_YZ, sendCount_YZ, + sendbuf_YZ, Phi, N); + //................................................................................... + // Send / Recv all the phase indcator field values + //................................................................................... + MPI_Isend(sendbuf_x, sendCount_x, MPI_DOUBLE, rank_X, sendtag, comm, + &req1[0]); + MPI_Irecv(recvbuf_X, recvCount_X, MPI_DOUBLE, rank_x, recvtag, comm, + &req2[0]); + MPI_Isend(sendbuf_X, sendCount_X, MPI_DOUBLE, rank_x, sendtag, comm, + &req1[1]); + MPI_Irecv(recvbuf_x, recvCount_x, MPI_DOUBLE, rank_X, recvtag, comm, + &req2[1]); + MPI_Isend(sendbuf_y, sendCount_y, MPI_DOUBLE, rank_Y, sendtag, comm, + &req1[2]); + MPI_Irecv(recvbuf_Y, recvCount_Y, MPI_DOUBLE, rank_y, recvtag, comm, + &req2[2]); + MPI_Isend(sendbuf_Y, sendCount_Y, MPI_DOUBLE, rank_y, sendtag, comm, + &req1[3]); + MPI_Irecv(recvbuf_y, recvCount_y, MPI_DOUBLE, rank_Y, recvtag, comm, + &req2[3]); + MPI_Isend(sendbuf_z, sendCount_z, MPI_DOUBLE, rank_Z, sendtag, comm, + &req1[4]); + MPI_Irecv(recvbuf_Z, recvCount_Z, MPI_DOUBLE, rank_z, recvtag, comm, + &req2[4]); + MPI_Isend(sendbuf_Z, sendCount_Z, MPI_DOUBLE, rank_z, sendtag, comm, + &req1[5]); + MPI_Irecv(recvbuf_z, recvCount_z, MPI_DOUBLE, rank_Z, recvtag, comm, + &req2[5]); + MPI_Isend(sendbuf_xy, sendCount_xy, MPI_DOUBLE, rank_XY, sendtag, comm, + &req1[6]); + MPI_Irecv(recvbuf_XY, recvCount_XY, MPI_DOUBLE, rank_xy, recvtag, comm, + &req2[6]); + MPI_Isend(sendbuf_XY, sendCount_XY, MPI_DOUBLE, rank_xy, sendtag, comm, + &req1[7]); + MPI_Irecv(recvbuf_xy, recvCount_xy, MPI_DOUBLE, rank_XY, recvtag, comm, + &req2[7]); + MPI_Isend(sendbuf_Xy, sendCount_Xy, MPI_DOUBLE, rank_xY, sendtag, comm, + &req1[8]); + MPI_Irecv(recvbuf_xY, recvCount_xY, MPI_DOUBLE, rank_Xy, recvtag, comm, + &req2[8]); + MPI_Isend(sendbuf_xY, sendCount_xY, MPI_DOUBLE, rank_Xy, sendtag, comm, + &req1[9]); + MPI_Irecv(recvbuf_Xy, recvCount_Xy, MPI_DOUBLE, rank_xY, recvtag, comm, + &req2[9]); + MPI_Isend(sendbuf_xz, sendCount_xz, MPI_DOUBLE, rank_XZ, sendtag, comm, + &req1[10]); + MPI_Irecv(recvbuf_XZ, recvCount_XZ, MPI_DOUBLE, rank_xz, recvtag, comm, + &req2[10]); + MPI_Isend(sendbuf_XZ, sendCount_XZ, MPI_DOUBLE, rank_xz, sendtag, comm, + &req1[11]); + MPI_Irecv(recvbuf_xz, recvCount_xz, MPI_DOUBLE, rank_XZ, recvtag, comm, + &req2[11]); + MPI_Isend(sendbuf_Xz, sendCount_Xz, MPI_DOUBLE, rank_xZ, sendtag, comm, + &req1[12]); + MPI_Irecv(recvbuf_xZ, recvCount_xZ, MPI_DOUBLE, rank_Xz, recvtag, comm, + &req2[12]); + MPI_Isend(sendbuf_xZ, sendCount_xZ, MPI_DOUBLE, rank_Xz, sendtag, comm, + &req1[13]); + MPI_Irecv(recvbuf_Xz, recvCount_Xz, MPI_DOUBLE, rank_xZ, recvtag, comm, + &req2[13]); + MPI_Isend(sendbuf_yz, sendCount_yz, MPI_DOUBLE, rank_YZ, sendtag, comm, + &req1[14]); + MPI_Irecv(recvbuf_YZ, recvCount_YZ, MPI_DOUBLE, rank_yz, recvtag, comm, + &req2[14]); + MPI_Isend(sendbuf_YZ, sendCount_YZ, MPI_DOUBLE, rank_yz, sendtag, comm, + &req1[15]); + MPI_Irecv(recvbuf_yz, recvCount_yz, MPI_DOUBLE, rank_YZ, recvtag, comm, + &req2[15]); + MPI_Isend(sendbuf_Yz, sendCount_Yz, MPI_DOUBLE, rank_yZ, sendtag, comm, + &req1[16]); + MPI_Irecv(recvbuf_yZ, recvCount_yZ, MPI_DOUBLE, rank_Yz, recvtag, comm, + &req2[16]); + MPI_Isend(sendbuf_yZ, sendCount_yZ, MPI_DOUBLE, rank_Yz, sendtag, comm, + &req1[17]); + MPI_Irecv(recvbuf_Yz, recvCount_Yz, MPI_DOUBLE, rank_yZ, recvtag, comm, + &req2[17]); + //................................................................................... + //................................................................................... + // Wait for completion of Indicator Field communication + //................................................................................... + MPI_Waitall(18, req1, stat1); + MPI_Waitall(18, req2, stat2); + //................................................................................... + //................................................................................... + /* dvc_UnpackValues(faceGrid, packThreads, dvcSendList_x, sendCount_x,sendbuf_x, Phi, N); dvc_UnpackValues(faceGrid, packThreads, dvcSendList_y, sendCount_y,sendbuf_y, Phi, N); dvc_UnpackValues(faceGrid, packThreads, dvcSendList_z, sendCount_z,sendbuf_z, Phi, N); dvc_UnpackValues(faceGrid, packThreads, dvcSendList_X, sendCount_X,sendbuf_X, Phi, N); dvc_UnpackValues(faceGrid, packThreads, dvcSendList_Y, sendCount_Y,sendbuf_Y, Phi, N); dvc_UnpackValues(faceGrid, packThreads, dvcSendList_Z, sendCount_Z,sendbuf_Z, Phi, N); - */ - dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_x, recvCount_x,recvbuf_x, Phi, N); - dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_y, recvCount_y,recvbuf_y, Phi, N); - dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_z, recvCount_z,recvbuf_z, Phi, N); - dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_X, recvCount_X,recvbuf_X, Phi, N); - dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_Y, recvCount_Y,recvbuf_Y, Phi, N); - dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_Z, recvCount_Z,recvbuf_Z, Phi, N); - dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_xy, recvCount_xy,recvbuf_xy, Phi, N); - dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_xY, recvCount_xY,recvbuf_xY, Phi, N); - dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_Xy, recvCount_Xy,recvbuf_Xy, Phi, N); - dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_XY, recvCount_XY,recvbuf_XY, Phi, N); - dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_xz, recvCount_xz,recvbuf_xz, Phi, N); - dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_xZ, recvCount_xZ,recvbuf_xZ, Phi, N); - dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_Xz, recvCount_Xz,recvbuf_Xz, Phi, N); - dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_XZ, recvCount_XZ,recvbuf_XZ, Phi, N); - dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_yz, recvCount_yz,recvbuf_yz, Phi, N); - dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_yZ, recvCount_yZ,recvbuf_yZ, Phi, N); - dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_Yz, recvCount_Yz,recvbuf_Yz, Phi, N); - dvc_UnpackValues(faceGrid, packThreads,dvcRecvList_YZ, recvCount_YZ,recvbuf_YZ, Phi, N); - //................................................................................... + */ + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_x, recvCount_x, + recvbuf_x, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_y, recvCount_y, + recvbuf_y, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_z, recvCount_z, + recvbuf_z, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_X, recvCount_X, + recvbuf_X, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_Y, recvCount_Y, + recvbuf_Y, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_Z, recvCount_Z, + recvbuf_Z, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_xy, recvCount_xy, + recvbuf_xy, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_xY, recvCount_xY, + recvbuf_xY, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_Xy, recvCount_Xy, + recvbuf_Xy, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_XY, recvCount_XY, + recvbuf_XY, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_xz, recvCount_xz, + recvbuf_xz, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_xZ, recvCount_xZ, + recvbuf_xZ, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_Xz, recvCount_Xz, + recvbuf_Xz, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_XZ, recvCount_XZ, + recvbuf_XZ, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_yz, recvCount_yz, + recvbuf_yz, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_yZ, recvCount_yZ, + recvbuf_yZ, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_Yz, recvCount_Yz, + recvbuf_Yz, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_YZ, recvCount_YZ, + recvbuf_YZ, Phi, N); + //................................................................................... - int timestep = 0; - if (rank==0) printf("********************************************************\n"); - if (rank==0) printf("No. of timesteps: %i \n", timestepMax); + int timestep = 0; + if (rank == 0) + printf("********************************************************\n"); + if (rank == 0) + printf("No. of timesteps: %i \n", timestepMax); - //.......create a stream for the LB calculation....... -// cudaStream_t stream; -// cudaStreamCreate(&stream); + //.......create a stream for the LB calculation....... + // cudaStream_t stream; + // cudaStreamCreate(&stream); - //.......create and start timer............ - double starttime,stoptime,cputime; - MPI_Barrier(comm); - starttime = MPI_Wtime(); - //......................................... + //.......create and start timer............ + double starttime, stoptime, cputime; + MPI_Barrier(comm); + starttime = MPI_Wtime(); + //......................................... - sendtag = recvtag = 5; - if (rank==0) printf("-------------------------------------------------------------------\n"); - if (rank==0) printf("timestep dEs Vn Awn Ans Aws Lwns \n"); - if (rank==0) printf("-------------------------------------------------------------------\n"); + sendtag = recvtag = 5; + if (rank == 0) + printf("---------------------------------------------------------------" + "----\n"); + if (rank == 0) + printf("timestep dEs Vn Awn Ans Aws Lwns \n"); + if (rank == 0) + printf("---------------------------------------------------------------" + "----\n"); - //************ MAIN ITERATION LOOP ***************************************/ - while (timestep < timestepMax){ + //************ MAIN ITERATION LOOP ***************************************/ + while (timestep < timestepMax) { - //************************************************************************* - // Compute the color gradient - //************************************************************************* - dvc_ComputeColorGradient(nBlocks, nthreads, S, - ID, Phi, ColorGrad, Nx, Ny, Nz); - //************************************************************************* + //************************************************************************* + // Compute the color gradient + //************************************************************************* + dvc_ComputeColorGradient(nBlocks, nthreads, S, ID, Phi, ColorGrad, Nx, + Ny, Nz); + //************************************************************************* - //************************************************************************* - // Perform collision step for the momentum transport - //************************************************************************* - dvc_ColorCollide(nBlocks, nthreads, S, ID, f_even, f_odd, ColorGrad, Velocity, - rlxA, rlxB,alpha, beta, Fx, Fy, Fz, Nx, Ny, Nz, pBC); - //************************************************************************* + //************************************************************************* + // Perform collision step for the momentum transport + //************************************************************************* + dvc_ColorCollide(nBlocks, nthreads, S, ID, f_even, f_odd, ColorGrad, + Velocity, rlxA, rlxB, alpha, beta, Fx, Fy, Fz, Nx, Ny, + Nz, pBC); + //************************************************************************* - //................................................................................... - dvc_PackDist(faceGrid,packThreads,1,dvcSendList_x,0,sendCount_x,sendbuf_x,f_even,N); - dvc_PackDist(faceGrid,packThreads,4,dvcSendList_x,sendCount_x,sendCount_x,sendbuf_x,f_even,N); - dvc_PackDist(faceGrid,packThreads,5,dvcSendList_x,2*sendCount_x,sendCount_x,sendbuf_x,f_even,N); - dvc_PackDist(faceGrid,packThreads,6,dvcSendList_x,3*sendCount_x,sendCount_x,sendbuf_x,f_even,N); - dvc_PackDist(faceGrid,packThreads,7,dvcSendList_x,4*sendCount_x,sendCount_x,sendbuf_x,f_even,N); - //...Packing for X face(faceGrid,packThreads,1,7,9,11,13)................................ - dvc_PackDist(faceGrid,packThreads,0,dvcSendList_X,0,sendCount_X,sendbuf_X,f_odd,N); - dvc_PackDist(faceGrid,packThreads,3,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,f_odd,N); - dvc_PackDist(faceGrid,packThreads,4,dvcSendList_X,2*sendCount_X,sendCount_X,sendbuf_X,f_odd,N); - dvc_PackDist(faceGrid,packThreads,5,dvcSendList_X,3*sendCount_X,sendCount_X,sendbuf_X,f_odd,N); - dvc_PackDist(faceGrid,packThreads,6,dvcSendList_X,4*sendCount_X,sendCount_X,sendbuf_X,f_odd,N); - //...Packing for y face(faceGrid,packThreads,4,8,9,16,18)................................. - dvc_PackDist(faceGrid,packThreads,2,dvcSendList_y,0,sendCount_y,sendbuf_y,f_even,N); - dvc_PackDist(faceGrid,packThreads,4,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,f_even,N); - dvc_PackDist(faceGrid,packThreads,4,dvcSendList_y,2*sendCount_y,sendCount_y,sendbuf_y,f_odd,N); - dvc_PackDist(faceGrid,packThreads,8,dvcSendList_y,3*sendCount_y,sendCount_y,sendbuf_y,f_even,N); - dvc_PackDist(faceGrid,packThreads,9,dvcSendList_y,4*sendCount_y,sendCount_y,sendbuf_y,f_even,N); - //...Packing for Y face(faceGrid,packThreads,3,7,10,15,17)................................. - dvc_PackDist(faceGrid,packThreads,1,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,f_odd,N); - dvc_PackDist(faceGrid,packThreads,3,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,f_odd,N); - dvc_PackDist(faceGrid,packThreads,5,dvcSendList_Y,2*sendCount_Y,sendCount_Y,sendbuf_Y,f_even,N); - dvc_PackDist(faceGrid,packThreads,7,dvcSendList_Y,3*sendCount_Y,sendCount_Y,sendbuf_Y,f_odd,N); - dvc_PackDist(faceGrid,packThreads,8,dvcSendList_Y,4*sendCount_Y,sendCount_Y,sendbuf_Y,f_odd,N); - //...Packing for z face(faceGrid,packThreads,6,12,13,16,17)................................ - dvc_PackDist(faceGrid,packThreads,3,dvcSendList_z,0,sendCount_z,sendbuf_z,f_even,N); - dvc_PackDist(faceGrid,packThreads,6,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,f_even,N); - dvc_PackDist(faceGrid,packThreads,6,dvcSendList_z,2*sendCount_z,sendCount_z,sendbuf_z,f_odd,N); - dvc_PackDist(faceGrid,packThreads,8,dvcSendList_z,3*sendCount_z,sendCount_z,sendbuf_z,f_even,N); - dvc_PackDist(faceGrid,packThreads,8,dvcSendList_z,4*sendCount_z,sendCount_z,sendbuf_z,f_odd,N); - //...Packing for Z face(faceGrid,packThreads,5,11,14,15,18)................................ - dvc_PackDist(faceGrid,packThreads,2,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,f_odd,N); - dvc_PackDist(faceGrid,packThreads,5,dvcSendList_Z,sendCount_Z,sendCount_Z,sendbuf_Z,f_odd,N); - dvc_PackDist(faceGrid,packThreads,7,dvcSendList_Z,2*sendCount_Z,sendCount_Z,sendbuf_Z,f_even,N); - dvc_PackDist(faceGrid,packThreads,7,dvcSendList_Z,3*sendCount_Z,sendCount_Z,sendbuf_Z,f_odd,N); - dvc_PackDist(faceGrid,packThreads,9,dvcSendList_Z,4*sendCount_Z,sendCount_Z,sendbuf_Z,f_even,N); - //...Pack the xy edge (edgeGrid,packThreads,8)................................ - dvc_PackDist(edgeGrid,packThreads,4,dvcSendList_xy,0,sendCount_xy,sendbuf_xy,f_even,N); - //...Pack the Xy edge (edgeGrid,packThreads,9)................................ - dvc_PackDist(edgeGrid,packThreads,4,dvcSendList_Xy,0,sendCount_Xy,sendbuf_Xy,f_odd,N); - //...Pack the xY edge (edgeGrid,packThreads,10)................................ - dvc_PackDist(edgeGrid,packThreads,5,dvcSendList_xY,0,sendCount_xY,sendbuf_xY,f_even,N); - //...Pack the XY edge (edgeGrid,packThreads,7)................................ - dvc_PackDist(edgeGrid,packThreads,3,dvcSendList_XY,0,sendCount_XY,sendbuf_XY,f_odd,N); - //...Pack the xz edge (edgeGrid,packThreads,12)................................ - dvc_PackDist(edgeGrid,packThreads,6,dvcSendList_xz,0,sendCount_xz,sendbuf_xz,f_even,N); - //...Pack the xZ edge (edgeGrid,packThreads,14)................................ - dvc_PackDist(edgeGrid,packThreads,7,dvcSendList_xZ,0,sendCount_xZ,sendbuf_xZ,f_even,N); - //...Pack the Xz edge (edgeGrid,packThreads,13)................................ - dvc_PackDist(edgeGrid,packThreads,6,dvcSendList_Xz,0,sendCount_Xz,sendbuf_Xz,f_odd,N); - //...Pack the XZ edge (edgeGrid,packThreads,11)................................ - dvc_PackDist(edgeGrid,packThreads,5,dvcSendList_XZ,0,sendCount_XZ,sendbuf_XZ,f_odd,N); - //...Pack the xz edge (edgeGrid,packThreads,12)................................ - //...Pack the yz edge (edgeGrid,packThreads,16)................................ - dvc_PackDist(edgeGrid,packThreads,8,dvcSendList_yz,0,sendCount_yz,sendbuf_yz,f_even,N); - //...Pack the yZ edge (edgeGrid,packThreads,18)................................ - dvc_PackDist(edgeGrid,packThreads,9,dvcSendList_yZ,0,sendCount_yZ,sendbuf_yZ,f_even,N); - //...Pack the Yz edge (edgeGrid,packThreads,17)................................ - dvc_PackDist(edgeGrid,packThreads,8,dvcSendList_Yz,0,sendCount_Yz,sendbuf_Yz,f_odd,N); - //...Pack the YZ edge (edgeGrid,packThreads,15)................................ - dvc_PackDist(edgeGrid,packThreads,7,dvcSendList_YZ,0,sendCount_YZ,sendbuf_YZ,f_odd,N); - //................................................................................... + //................................................................................... + dvc_PackDist(faceGrid, packThreads, 1, dvcSendList_x, 0, sendCount_x, + sendbuf_x, f_even, N); + dvc_PackDist(faceGrid, packThreads, 4, dvcSendList_x, sendCount_x, + sendCount_x, sendbuf_x, f_even, N); + dvc_PackDist(faceGrid, packThreads, 5, dvcSendList_x, 2 * sendCount_x, + sendCount_x, sendbuf_x, f_even, N); + dvc_PackDist(faceGrid, packThreads, 6, dvcSendList_x, 3 * sendCount_x, + sendCount_x, sendbuf_x, f_even, N); + dvc_PackDist(faceGrid, packThreads, 7, dvcSendList_x, 4 * sendCount_x, + sendCount_x, sendbuf_x, f_even, N); + //...Packing for X face(faceGrid,packThreads,1,7,9,11,13)................................ + dvc_PackDist(faceGrid, packThreads, 0, dvcSendList_X, 0, sendCount_X, + sendbuf_X, f_odd, N); + dvc_PackDist(faceGrid, packThreads, 3, dvcSendList_X, sendCount_X, + sendCount_X, sendbuf_X, f_odd, N); + dvc_PackDist(faceGrid, packThreads, 4, dvcSendList_X, 2 * sendCount_X, + sendCount_X, sendbuf_X, f_odd, N); + dvc_PackDist(faceGrid, packThreads, 5, dvcSendList_X, 3 * sendCount_X, + sendCount_X, sendbuf_X, f_odd, N); + dvc_PackDist(faceGrid, packThreads, 6, dvcSendList_X, 4 * sendCount_X, + sendCount_X, sendbuf_X, f_odd, N); + //...Packing for y face(faceGrid,packThreads,4,8,9,16,18)................................. + dvc_PackDist(faceGrid, packThreads, 2, dvcSendList_y, 0, sendCount_y, + sendbuf_y, f_even, N); + dvc_PackDist(faceGrid, packThreads, 4, dvcSendList_y, sendCount_y, + sendCount_y, sendbuf_y, f_even, N); + dvc_PackDist(faceGrid, packThreads, 4, dvcSendList_y, 2 * sendCount_y, + sendCount_y, sendbuf_y, f_odd, N); + dvc_PackDist(faceGrid, packThreads, 8, dvcSendList_y, 3 * sendCount_y, + sendCount_y, sendbuf_y, f_even, N); + dvc_PackDist(faceGrid, packThreads, 9, dvcSendList_y, 4 * sendCount_y, + sendCount_y, sendbuf_y, f_even, N); + //...Packing for Y face(faceGrid,packThreads,3,7,10,15,17)................................. + dvc_PackDist(faceGrid, packThreads, 1, dvcSendList_Y, 0, sendCount_Y, + sendbuf_Y, f_odd, N); + dvc_PackDist(faceGrid, packThreads, 3, dvcSendList_Y, sendCount_Y, + sendCount_Y, sendbuf_Y, f_odd, N); + dvc_PackDist(faceGrid, packThreads, 5, dvcSendList_Y, 2 * sendCount_Y, + sendCount_Y, sendbuf_Y, f_even, N); + dvc_PackDist(faceGrid, packThreads, 7, dvcSendList_Y, 3 * sendCount_Y, + sendCount_Y, sendbuf_Y, f_odd, N); + dvc_PackDist(faceGrid, packThreads, 8, dvcSendList_Y, 4 * sendCount_Y, + sendCount_Y, sendbuf_Y, f_odd, N); + //...Packing for z face(faceGrid,packThreads,6,12,13,16,17)................................ + dvc_PackDist(faceGrid, packThreads, 3, dvcSendList_z, 0, sendCount_z, + sendbuf_z, f_even, N); + dvc_PackDist(faceGrid, packThreads, 6, dvcSendList_z, sendCount_z, + sendCount_z, sendbuf_z, f_even, N); + dvc_PackDist(faceGrid, packThreads, 6, dvcSendList_z, 2 * sendCount_z, + sendCount_z, sendbuf_z, f_odd, N); + dvc_PackDist(faceGrid, packThreads, 8, dvcSendList_z, 3 * sendCount_z, + sendCount_z, sendbuf_z, f_even, N); + dvc_PackDist(faceGrid, packThreads, 8, dvcSendList_z, 4 * sendCount_z, + sendCount_z, sendbuf_z, f_odd, N); + //...Packing for Z face(faceGrid,packThreads,5,11,14,15,18)................................ + dvc_PackDist(faceGrid, packThreads, 2, dvcSendList_Z, 0, sendCount_Z, + sendbuf_Z, f_odd, N); + dvc_PackDist(faceGrid, packThreads, 5, dvcSendList_Z, sendCount_Z, + sendCount_Z, sendbuf_Z, f_odd, N); + dvc_PackDist(faceGrid, packThreads, 7, dvcSendList_Z, 2 * sendCount_Z, + sendCount_Z, sendbuf_Z, f_even, N); + dvc_PackDist(faceGrid, packThreads, 7, dvcSendList_Z, 3 * sendCount_Z, + sendCount_Z, sendbuf_Z, f_odd, N); + dvc_PackDist(faceGrid, packThreads, 9, dvcSendList_Z, 4 * sendCount_Z, + sendCount_Z, sendbuf_Z, f_even, N); + //...Pack the xy edge (edgeGrid,packThreads,8)................................ + dvc_PackDist(edgeGrid, packThreads, 4, dvcSendList_xy, 0, sendCount_xy, + sendbuf_xy, f_even, N); + //...Pack the Xy edge (edgeGrid,packThreads,9)................................ + dvc_PackDist(edgeGrid, packThreads, 4, dvcSendList_Xy, 0, sendCount_Xy, + sendbuf_Xy, f_odd, N); + //...Pack the xY edge (edgeGrid,packThreads,10)................................ + dvc_PackDist(edgeGrid, packThreads, 5, dvcSendList_xY, 0, sendCount_xY, + sendbuf_xY, f_even, N); + //...Pack the XY edge (edgeGrid,packThreads,7)................................ + dvc_PackDist(edgeGrid, packThreads, 3, dvcSendList_XY, 0, sendCount_XY, + sendbuf_XY, f_odd, N); + //...Pack the xz edge (edgeGrid,packThreads,12)................................ + dvc_PackDist(edgeGrid, packThreads, 6, dvcSendList_xz, 0, sendCount_xz, + sendbuf_xz, f_even, N); + //...Pack the xZ edge (edgeGrid,packThreads,14)................................ + dvc_PackDist(edgeGrid, packThreads, 7, dvcSendList_xZ, 0, sendCount_xZ, + sendbuf_xZ, f_even, N); + //...Pack the Xz edge (edgeGrid,packThreads,13)................................ + dvc_PackDist(edgeGrid, packThreads, 6, dvcSendList_Xz, 0, sendCount_Xz, + sendbuf_Xz, f_odd, N); + //...Pack the XZ edge (edgeGrid,packThreads,11)................................ + dvc_PackDist(edgeGrid, packThreads, 5, dvcSendList_XZ, 0, sendCount_XZ, + sendbuf_XZ, f_odd, N); + //...Pack the xz edge (edgeGrid,packThreads,12)................................ + //...Pack the yz edge (edgeGrid,packThreads,16)................................ + dvc_PackDist(edgeGrid, packThreads, 8, dvcSendList_yz, 0, sendCount_yz, + sendbuf_yz, f_even, N); + //...Pack the yZ edge (edgeGrid,packThreads,18)................................ + dvc_PackDist(edgeGrid, packThreads, 9, dvcSendList_yZ, 0, sendCount_yZ, + sendbuf_yZ, f_even, N); + //...Pack the Yz edge (edgeGrid,packThreads,17)................................ + dvc_PackDist(edgeGrid, packThreads, 8, dvcSendList_Yz, 0, sendCount_Yz, + sendbuf_Yz, f_odd, N); + //...Pack the YZ edge (edgeGrid,packThreads,15)................................ + dvc_PackDist(edgeGrid, packThreads, 7, dvcSendList_YZ, 0, sendCount_YZ, + sendbuf_YZ, f_odd, N); + //................................................................................... - //................................................................................... - // Send all the distributions - MPI_Isend(sendbuf_x, 5*sendCount_x,MPI_DOUBLE,rank_X,sendtag,comm,&req1[0]); - MPI_Irecv(recvbuf_X, 5*recvCount_X,MPI_DOUBLE,rank_x,recvtag,comm,&req2[0]); - MPI_Isend(sendbuf_X, 5*sendCount_X,MPI_DOUBLE,rank_x,sendtag,comm,&req1[1]); - MPI_Irecv(recvbuf_x, 5*recvCount_x,MPI_DOUBLE,rank_X,recvtag,comm,&req2[1]); - MPI_Isend(sendbuf_y, 5*sendCount_y,MPI_DOUBLE,rank_Y,sendtag,comm,&req1[2]); - MPI_Irecv(recvbuf_Y, 5*recvCount_Y,MPI_DOUBLE,rank_y,recvtag,comm,&req2[2]); - MPI_Isend(sendbuf_Y, 5*sendCount_Y,MPI_DOUBLE,rank_y,sendtag,comm,&req1[3]); - MPI_Irecv(recvbuf_y, 5*recvCount_y,MPI_DOUBLE,rank_Y,recvtag,comm,&req2[3]); - MPI_Isend(sendbuf_z, 5*sendCount_z,MPI_DOUBLE,rank_Z,sendtag,comm,&req1[4]); - MPI_Irecv(recvbuf_Z, 5*recvCount_Z,MPI_DOUBLE,rank_z,recvtag,comm,&req2[4]); - MPI_Isend(sendbuf_Z, 5*sendCount_Z,MPI_DOUBLE,rank_z,sendtag,comm,&req1[5]); - MPI_Irecv(recvbuf_z, 5*recvCount_z,MPI_DOUBLE,rank_Z,recvtag,comm,&req2[5]); - MPI_Isend(sendbuf_xy, sendCount_xy,MPI_DOUBLE,rank_XY,sendtag,comm,&req1[6]); - MPI_Irecv(recvbuf_XY, recvCount_XY,MPI_DOUBLE,rank_xy,recvtag,comm,&req2[6]); - MPI_Isend(sendbuf_XY, sendCount_XY,MPI_DOUBLE,rank_xy,sendtag,comm,&req1[7]); - MPI_Irecv(recvbuf_xy, recvCount_xy,MPI_DOUBLE,rank_XY,recvtag,comm,&req2[7]); - MPI_Isend(sendbuf_Xy, sendCount_Xy,MPI_DOUBLE,rank_xY,sendtag,comm,&req1[8]); - MPI_Irecv(recvbuf_xY, recvCount_xY,MPI_DOUBLE,rank_Xy,recvtag,comm,&req2[8]); - MPI_Isend(sendbuf_xY, sendCount_xY,MPI_DOUBLE,rank_Xy,sendtag,comm,&req1[9]); - MPI_Irecv(recvbuf_Xy, recvCount_Xy,MPI_DOUBLE,rank_xY,recvtag,comm,&req2[9]); - MPI_Isend(sendbuf_xz, sendCount_xz,MPI_DOUBLE,rank_XZ,sendtag,comm,&req1[10]); - MPI_Irecv(recvbuf_XZ, recvCount_XZ,MPI_DOUBLE,rank_xz,recvtag,comm,&req2[10]); - MPI_Isend(sendbuf_XZ, sendCount_XZ,MPI_DOUBLE,rank_xz,sendtag,comm,&req1[11]); - MPI_Irecv(recvbuf_xz, recvCount_xz,MPI_DOUBLE,rank_XZ,recvtag,comm,&req2[11]); - MPI_Isend(sendbuf_Xz, sendCount_Xz,MPI_DOUBLE,rank_xZ,sendtag,comm,&req1[12]); - MPI_Irecv(recvbuf_xZ, recvCount_xZ,MPI_DOUBLE,rank_Xz,recvtag,comm,&req2[12]); - MPI_Isend(sendbuf_xZ, sendCount_xZ,MPI_DOUBLE,rank_Xz,sendtag,comm,&req1[13]); - MPI_Irecv(recvbuf_Xz, recvCount_Xz,MPI_DOUBLE,rank_xZ,recvtag,comm,&req2[13]); - MPI_Isend(sendbuf_yz, sendCount_yz,MPI_DOUBLE,rank_YZ,sendtag,comm,&req1[14]); - MPI_Irecv(recvbuf_YZ, recvCount_YZ,MPI_DOUBLE,rank_yz,recvtag,comm,&req2[14]); - MPI_Isend(sendbuf_YZ, sendCount_YZ,MPI_DOUBLE,rank_yz,sendtag,comm,&req1[15]); - MPI_Irecv(recvbuf_yz, recvCount_yz,MPI_DOUBLE,rank_YZ,recvtag,comm,&req2[15]); - MPI_Isend(sendbuf_Yz, sendCount_Yz,MPI_DOUBLE,rank_yZ,sendtag,comm,&req1[16]); - MPI_Irecv(recvbuf_yZ, recvCount_yZ,MPI_DOUBLE,rank_Yz,recvtag,comm,&req2[16]); - MPI_Isend(sendbuf_yZ, sendCount_yZ,MPI_DOUBLE,rank_Yz,sendtag,comm,&req1[17]); - MPI_Irecv(recvbuf_Yz, recvCount_Yz,MPI_DOUBLE,rank_yZ,recvtag,comm,&req2[17]); - //................................................................................... + //................................................................................... + // Send all the distributions + MPI_Isend(sendbuf_x, 5 * sendCount_x, MPI_DOUBLE, rank_X, sendtag, comm, + &req1[0]); + MPI_Irecv(recvbuf_X, 5 * recvCount_X, MPI_DOUBLE, rank_x, recvtag, comm, + &req2[0]); + MPI_Isend(sendbuf_X, 5 * sendCount_X, MPI_DOUBLE, rank_x, sendtag, comm, + &req1[1]); + MPI_Irecv(recvbuf_x, 5 * recvCount_x, MPI_DOUBLE, rank_X, recvtag, comm, + &req2[1]); + MPI_Isend(sendbuf_y, 5 * sendCount_y, MPI_DOUBLE, rank_Y, sendtag, comm, + &req1[2]); + MPI_Irecv(recvbuf_Y, 5 * recvCount_Y, MPI_DOUBLE, rank_y, recvtag, comm, + &req2[2]); + MPI_Isend(sendbuf_Y, 5 * sendCount_Y, MPI_DOUBLE, rank_y, sendtag, comm, + &req1[3]); + MPI_Irecv(recvbuf_y, 5 * recvCount_y, MPI_DOUBLE, rank_Y, recvtag, comm, + &req2[3]); + MPI_Isend(sendbuf_z, 5 * sendCount_z, MPI_DOUBLE, rank_Z, sendtag, comm, + &req1[4]); + MPI_Irecv(recvbuf_Z, 5 * recvCount_Z, MPI_DOUBLE, rank_z, recvtag, comm, + &req2[4]); + MPI_Isend(sendbuf_Z, 5 * sendCount_Z, MPI_DOUBLE, rank_z, sendtag, comm, + &req1[5]); + MPI_Irecv(recvbuf_z, 5 * recvCount_z, MPI_DOUBLE, rank_Z, recvtag, comm, + &req2[5]); + MPI_Isend(sendbuf_xy, sendCount_xy, MPI_DOUBLE, rank_XY, sendtag, comm, + &req1[6]); + MPI_Irecv(recvbuf_XY, recvCount_XY, MPI_DOUBLE, rank_xy, recvtag, comm, + &req2[6]); + MPI_Isend(sendbuf_XY, sendCount_XY, MPI_DOUBLE, rank_xy, sendtag, comm, + &req1[7]); + MPI_Irecv(recvbuf_xy, recvCount_xy, MPI_DOUBLE, rank_XY, recvtag, comm, + &req2[7]); + MPI_Isend(sendbuf_Xy, sendCount_Xy, MPI_DOUBLE, rank_xY, sendtag, comm, + &req1[8]); + MPI_Irecv(recvbuf_xY, recvCount_xY, MPI_DOUBLE, rank_Xy, recvtag, comm, + &req2[8]); + MPI_Isend(sendbuf_xY, sendCount_xY, MPI_DOUBLE, rank_Xy, sendtag, comm, + &req1[9]); + MPI_Irecv(recvbuf_Xy, recvCount_Xy, MPI_DOUBLE, rank_xY, recvtag, comm, + &req2[9]); + MPI_Isend(sendbuf_xz, sendCount_xz, MPI_DOUBLE, rank_XZ, sendtag, comm, + &req1[10]); + MPI_Irecv(recvbuf_XZ, recvCount_XZ, MPI_DOUBLE, rank_xz, recvtag, comm, + &req2[10]); + MPI_Isend(sendbuf_XZ, sendCount_XZ, MPI_DOUBLE, rank_xz, sendtag, comm, + &req1[11]); + MPI_Irecv(recvbuf_xz, recvCount_xz, MPI_DOUBLE, rank_XZ, recvtag, comm, + &req2[11]); + MPI_Isend(sendbuf_Xz, sendCount_Xz, MPI_DOUBLE, rank_xZ, sendtag, comm, + &req1[12]); + MPI_Irecv(recvbuf_xZ, recvCount_xZ, MPI_DOUBLE, rank_Xz, recvtag, comm, + &req2[12]); + MPI_Isend(sendbuf_xZ, sendCount_xZ, MPI_DOUBLE, rank_Xz, sendtag, comm, + &req1[13]); + MPI_Irecv(recvbuf_Xz, recvCount_Xz, MPI_DOUBLE, rank_xZ, recvtag, comm, + &req2[13]); + MPI_Isend(sendbuf_yz, sendCount_yz, MPI_DOUBLE, rank_YZ, sendtag, comm, + &req1[14]); + MPI_Irecv(recvbuf_YZ, recvCount_YZ, MPI_DOUBLE, rank_yz, recvtag, comm, + &req2[14]); + MPI_Isend(sendbuf_YZ, sendCount_YZ, MPI_DOUBLE, rank_yz, sendtag, comm, + &req1[15]); + MPI_Irecv(recvbuf_yz, recvCount_yz, MPI_DOUBLE, rank_YZ, recvtag, comm, + &req2[15]); + MPI_Isend(sendbuf_Yz, sendCount_Yz, MPI_DOUBLE, rank_yZ, sendtag, comm, + &req1[16]); + MPI_Irecv(recvbuf_yZ, recvCount_yZ, MPI_DOUBLE, rank_Yz, recvtag, comm, + &req2[16]); + MPI_Isend(sendbuf_yZ, sendCount_yZ, MPI_DOUBLE, rank_Yz, sendtag, comm, + &req1[17]); + MPI_Irecv(recvbuf_Yz, recvCount_Yz, MPI_DOUBLE, rank_yZ, recvtag, comm, + &req2[17]); + //................................................................................... - //************************************************************************* - // Carry out the density streaming step for mass transport - //************************************************************************* - dvc_DensityStreamD3Q7(nBlocks, nthreads, S, - ID, Den, Copy, Phi, ColorGrad, Velocity, beta, Nx, Ny, Nz, pBC); - //************************************************************************* + //************************************************************************* + // Carry out the density streaming step for mass transport + //************************************************************************* + dvc_DensityStreamD3Q7(nBlocks, nthreads, S, ID, Den, Copy, Phi, + ColorGrad, Velocity, beta, Nx, Ny, Nz, pBC); + //************************************************************************* - //************************************************************************* - // Swap the distributions for momentum transport - //************************************************************************* - dvc_SwapD3Q19(nBlocks, nthreads, S, ID, f_even, f_odd, Nx, Ny, Nz); - //************************************************************************* + //************************************************************************* + // Swap the distributions for momentum transport + //************************************************************************* + dvc_SwapD3Q19(nBlocks, nthreads, S, ID, f_even, f_odd, Nx, Ny, Nz); + //************************************************************************* - //................................................................................... - // Wait for completion of D3Q19 communication - MPI_Waitall(18,req1,stat1); - MPI_Waitall(18,req2,stat2); - //................................................................................... - // Unpack the distributions on the device - //................................................................................... - //...Map recieve list for the X face: q=2,8,10,12,13 ................................. - dvc_UnpackDist(faceGrid,packThreads,0,-1,0,0,dvcRecvList_X,0,recvCount_X,recvbuf_X,f_odd,Nx,Ny,Nz); - dvc_UnpackDist(faceGrid,packThreads,3,-1,-1,0,dvcRecvList_X,recvCount_X,recvCount_X,recvbuf_X,f_odd,Nx,Ny,Nz); - dvc_UnpackDist(faceGrid,packThreads,4,-1,1,0,dvcRecvList_X,2*recvCount_X,recvCount_X,recvbuf_X,f_odd,Nx,Ny,Nz); - dvc_UnpackDist(faceGrid,packThreads,5,-1,0,-1,dvcRecvList_X,3*recvCount_X,recvCount_X,recvbuf_X,f_odd,Nx,Ny,Nz); - dvc_UnpackDist(faceGrid,packThreads,6,-1,0,1,dvcRecvList_X,4*recvCount_X,recvCount_X,recvbuf_X,f_odd,Nx,Ny,Nz); - //................................................................................... - //...Map recieve list for the x face: q=1,7,9,11,13.................................. - dvc_UnpackDist(faceGrid,packThreads,1,1,0,0,dvcRecvList_x,0,recvCount_x,recvbuf_x,f_even,Nx,Ny,Nz); - dvc_UnpackDist(faceGrid,packThreads,4,1,1,0,dvcRecvList_x,recvCount_x,recvCount_x,recvbuf_x,f_even,Nx,Ny,Nz); - dvc_UnpackDist(faceGrid,packThreads,5,1,-1,0,dvcRecvList_x,2*recvCount_x,recvCount_x,recvbuf_x,f_even,Nx,Ny,Nz); - dvc_UnpackDist(faceGrid,packThreads,6,1,0,1,dvcRecvList_x,3*recvCount_x,recvCount_x,recvbuf_x,f_even,Nx,Ny,Nz); - dvc_UnpackDist(faceGrid,packThreads,7,1,0,-1,dvcRecvList_x,4*recvCount_x,recvCount_x,recvbuf_x,f_even,Nx,Ny,Nz); - //................................................................................... - //...Map recieve list for the y face: q=4,8,9,16,18 ................................... - dvc_UnpackDist(faceGrid,packThreads,1,0,-1,0,dvcRecvList_Y,0,recvCount_Y,recvbuf_Y,f_odd,Nx,Ny,Nz); - dvc_UnpackDist(faceGrid,packThreads,3,-1,-1,0,dvcRecvList_Y,recvCount_Y,recvCount_Y,recvbuf_Y,f_odd,Nx,Ny,Nz); - dvc_UnpackDist(faceGrid,packThreads,5,1,-1,0,dvcRecvList_Y,2*recvCount_Y,recvCount_Y,recvbuf_Y,f_even,Nx,Ny,Nz); - dvc_UnpackDist(faceGrid,packThreads,7,0,-1,-1,dvcRecvList_Y,3*recvCount_Y,recvCount_Y,recvbuf_Y,f_odd,Nx,Ny,Nz); - dvc_UnpackDist(faceGrid,packThreads,8,0,-1,1,dvcRecvList_Y,4*recvCount_Y,recvCount_Y,recvbuf_Y,f_odd,Nx,Ny,Nz); - //................................................................................... - //...Map recieve list for the Y face: q=3,7,10,15,17 .................................. - dvc_UnpackDist(faceGrid,packThreads,2,0,1,0,dvcRecvList_y,0,recvCount_y,recvbuf_y,f_even,Nx,Ny,Nz); - dvc_UnpackDist(faceGrid,packThreads,4,1,1,0,dvcRecvList_y,recvCount_y,recvCount_y,recvbuf_y,f_even,Nx,Ny,Nz); - dvc_UnpackDist(faceGrid,packThreads,4,-1,1,0,dvcRecvList_y,2*recvCount_y,recvCount_y,recvbuf_y,f_odd,Nx,Ny,Nz); - dvc_UnpackDist(faceGrid,packThreads,8,0,1,1,dvcRecvList_y,3*recvCount_y,recvCount_y,recvbuf_y,f_even,Nx,Ny,Nz); - dvc_UnpackDist(faceGrid,packThreads,9,0,1,-1,dvcRecvList_y,4*recvCount_y,recvCount_y,recvbuf_y,f_even,Nx,Ny,Nz); - //................................................................................... - //...Map recieve list for the z face<< CPU - //........................................................................... - dvc_Barrier(); - dvc_CopyToHost(Phase.data,Phi,N*sizeof(double)); - MPI_Barrier(comm); - //........................................................................... - // Compute areas using porous medium marching cubes algorithm - // McClure, Adalsteinsson, et al. (2007) - //........................................................................... - awn = aws = ans = lwns = 0.0; - nwp_volume = 0.0; - As = 0.0; +*/ + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_x, recvCount_x, + recvbuf_x, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_y, recvCount_y, + recvbuf_y, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_z, recvCount_z, + recvbuf_z, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_X, recvCount_X, + recvbuf_X, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_Y, recvCount_Y, + recvbuf_Y, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_Z, recvCount_Z, + recvbuf_Z, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_xy, recvCount_xy, + recvbuf_xy, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_xY, recvCount_xY, + recvbuf_xY, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_Xy, recvCount_Xy, + recvbuf_Xy, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_XY, recvCount_XY, + recvbuf_XY, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_xz, recvCount_xz, + recvbuf_xz, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_xZ, recvCount_xZ, + recvbuf_xZ, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_Xz, recvCount_Xz, + recvbuf_Xz, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_XZ, recvCount_XZ, + recvbuf_XZ, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_yz, recvCount_yz, + recvbuf_yz, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_yZ, recvCount_yZ, + recvbuf_yZ, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_Yz, recvCount_Yz, + recvbuf_Yz, Phi, N); + dvc_UnpackValues(faceGrid, packThreads, dvcRecvList_YZ, recvCount_YZ, + recvbuf_YZ, Phi, N); + //................................................................................... + MPI_Barrier(comm); + // Iteration completed! + timestep++; + //................................................................... - for (c=0;c 0 - && SignDist(i+cube[p][0],j+cube[p][1],k+cube[p][2]) > 0 ){ - nwp_volume += 0.125; - } - } - - // Run PMMC - n_local_sol_tris = 0; - n_local_sol_pts = 0; - n_local_nws_pts = 0; - - n_nw_pts=0,n_ns_pts=0,n_ws_pts=0,n_nws_pts=0, map=0; - n_nw_tris=0, n_ns_tris=0, n_ws_tris=0, n_nws_seg=0; - - n_nw_tris_beg = 0;// n_nw_tris; - n_ns_tris_beg = 0;//n_ns_tris; - n_ws_tris_beg = 0;//n_ws_tris; - - // if there is a solid phase interface in the grid cell - if (Interface(SignDist,solid_isovalue,i,j,k) == 1){ - - ///////////////////////////////////////// - /// CONSTRUCT THE LOCAL SOLID SURFACE /// - ///////////////////////////////////////// - - // find the local solid surface - SOL_SURF(SignDist,0.0,Phase,fluid_isovalue,i,j,k, Nx,Ny,Nz,local_sol_pts,n_local_sol_pts, - local_sol_tris,n_local_sol_tris,values); - - ///////////////////////////////////////// - //////// TRIM THE SOLID SURFACE ///////// - ///////////////////////////////////////// -/* TRIM(local_sol_pts, n_local_sol_pts, fluid_isovalue,local_sol_tris, n_local_sol_tris, + if (timestep % 1000 == 0) { + //........................................................................... + // Copy the phase from the GPU -> CPU + //........................................................................... + dvc_Barrier(); + dvc_CopyToHost(Phase.data, Phi, N * sizeof(double)); + MPI_Barrier(comm); + //........................................................................... + // Compute areas using porous medium marching cubes algorithm + // McClure, Adalsteinsson, et al. (2007) + //........................................................................... + awn = aws = ans = lwns = 0.0; + nwp_volume = 0.0; + As = 0.0; + + for (c = 0; c < ncubes; c++) { + // Get cube from the list + i = cubeList(0, c); + j = cubeList(1, c); + k = cubeList(2, c); + + for (p = 0; p < 8; p++) { + if (Phase(i + cube[p][0], j + cube[p][1], k + cube[p][2]) > + 0 && + SignDist(i + cube[p][0], j + cube[p][1], + k + cube[p][2]) > 0) { + nwp_volume += 0.125; + } + } + + // Run PMMC + n_local_sol_tris = 0; + n_local_sol_pts = 0; + n_local_nws_pts = 0; + + n_nw_pts = 0, n_ns_pts = 0, n_ws_pts = 0, n_nws_pts = 0, + map = 0; + n_nw_tris = 0, n_ns_tris = 0, n_ws_tris = 0, n_nws_seg = 0; + + n_nw_tris_beg = 0; // n_nw_tris; + n_ns_tris_beg = 0; //n_ns_tris; + n_ws_tris_beg = 0; //n_ws_tris; + + // if there is a solid phase interface in the grid cell + if (Interface(SignDist, solid_isovalue, i, j, k) == 1) { + + ///////////////////////////////////////// + /// CONSTRUCT THE LOCAL SOLID SURFACE /// + ///////////////////////////////////////// + + // find the local solid surface + SOL_SURF(SignDist, 0.0, Phase, fluid_isovalue, i, j, k, Nx, + Ny, Nz, local_sol_pts, n_local_sol_pts, + local_sol_tris, n_local_sol_tris, values); + + ///////////////////////////////////////// + //////// TRIM THE SOLID SURFACE ///////// + ///////////////////////////////////////// + /* TRIM(local_sol_pts, n_local_sol_pts, fluid_isovalue,local_sol_tris, n_local_sol_tris, ns_pts, n_ns_pts, ns_tris, n_ns_tris, ws_pts, n_ws_pts, ws_tris, n_ws_tris, values, local_nws_pts, n_local_nws_pts, Phase, SignDist, i, j, k, newton_steps); -*/ - TRIM(local_sol_pts, n_local_sol_pts, fluid_isovalue,local_sol_tris, n_local_sol_tris, - ns_pts, n_ns_pts, ns_tris, n_ns_tris, ws_pts, n_ws_pts, - ws_tris, n_ws_tris, values, local_nws_pts, n_local_nws_pts); - - ///////////////////////////////////////// - //////// WRITE COMMON LINE POINTS /////// - //////// TO MAIN ARRAYS /////// - ///////////////////////////////////////// - map = n_nws_pts; - for (p=0; p < n_local_nws_pts; p++){ - nws_pts(n_nws_pts++) = local_nws_pts(p); - } - for (q=0; q < n_local_nws_pts-1; q++){ - nws_seg(0,n_nws_seg) = map+q; - nws_seg(1,n_nws_seg) = map+q+1; - n_nws_seg++; - } - - ///////////////////////////////////////// - ////// CONSTRUCT THE nw SURFACE ///////// - ///////////////////////////////////////// - if ( n_local_nws_pts > 0){ - EDGE(Phase, fluid_isovalue, SignDist, i,j,k, Nx, Ny, Nz, nw_pts, n_nw_pts, nw_tris, n_nw_tris, - local_nws_pts, n_local_nws_pts); - } - else { - MC(Phase, fluid_isovalue, SignDist, i,j,k, nw_pts, n_nw_pts, nw_tris, n_nw_tris); - } - } - - ///////////////////////////////////////// - ////// CONSTRUCT THE nw SURFACE ///////// - ///////////////////////////////////////// - - else if (Fluid_Interface(Phase,SignDist,fluid_isovalue,i,j,k) == 1){ - MC(Phase, fluid_isovalue, SignDist, i,j,k, nw_pts, n_nw_pts, nw_tris, n_nw_tris); - } - //******END OF BLOB PMMC********************************************* +*/ + TRIM(local_sol_pts, n_local_sol_pts, fluid_isovalue, + local_sol_tris, n_local_sol_tris, ns_pts, n_ns_pts, + ns_tris, n_ns_tris, ws_pts, n_ws_pts, ws_tris, + n_ws_tris, values, local_nws_pts, n_local_nws_pts); - //******************************************************************* - // Compute the Interfacial Areas, Common Line length for blob p - // nw surface - double temp; - for (r=0;r 0.0) awn += sqrt(temp); - - } - for (r=0;r 0.0) ans += sqrt(temp); - } - for (r=0;r 0.0) aws += sqrt(temp); - } - for (r=0;r 0.0) As += sqrt(temp); - } - for (p=0; p < n_local_nws_pts-1; p++){ - // Extract the line segment - A = local_nws_pts(p); - B = local_nws_pts(p+1); - // Compute the length of the segment - s = sqrt((A.x-B.x)*(A.x-B.x)+(A.y-B.y)*(A.y-B.y)+(A.z-B.z)*(A.z-B.z)); - // Add the length to the common line - lwns += s; - } - //******************************************************************* - // Reset the triangle counts to zero - n_nw_pts=0,n_ns_pts=0,n_ws_pts=0,n_nws_pts=0, map=0; - n_nw_tris=0, n_ns_tris=0, n_ws_tris=0, n_nws_seg=0; - - n_nw_tris_beg = 0;// n_nw_tris; - // n_ns_tris_beg = 0;//n_ns_tris; - // n_ws_tris_beg = 0;//n_ws_tris; - // n_nws_seg_beg = n_nws_seg; - //******************************************************************* - } - //........................................................................... - MPI_Barrier(comm); - MPI_Allreduce(&nwp_volume,&nwp_volume_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&awn,&awn_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&ans,&ans_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&aws,&aws_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&lwns,&lwns_global,1,MPI_DOUBLE,MPI_SUM,comm); - MPI_Allreduce(&As,&As_global,1,MPI_DOUBLE,MPI_SUM,comm); + ///////////////////////////////////////// + //////// WRITE COMMON LINE POINTS /////// + //////// TO MAIN ARRAYS /////// + ///////////////////////////////////////// + map = n_nws_pts; + for (p = 0; p < n_local_nws_pts; p++) { + nws_pts(n_nws_pts++) = local_nws_pts(p); + } + for (q = 0; q < n_local_nws_pts - 1; q++) { + nws_seg(0, n_nws_seg) = map + q; + nws_seg(1, n_nws_seg) = map + q + 1; + n_nws_seg++; + } - MPI_Barrier(comm); - //......................................................................... - // Compute the change in the total surface energy based on the defined interval - // See McClure, Prins and Miller (2013) - //......................................................................... - dAwn += awn_global; - dAns += ans_global; - dEs = 6.01603*alpha*(dAwn + 1.05332*Ps*dAns); - dAwn = -awn_global; // Get ready for the next analysis interval - dAns = -ans_global; - //......................................................................... - if (rank==0){ -/* printf("-------------------------------- \n"); + ///////////////////////////////////////// + ////// CONSTRUCT THE nw SURFACE ///////// + ///////////////////////////////////////// + if (n_local_nws_pts > 0) { + EDGE(Phase, fluid_isovalue, SignDist, i, j, k, Nx, Ny, + Nz, nw_pts, n_nw_pts, nw_tris, n_nw_tris, + local_nws_pts, n_local_nws_pts); + } else { + MC(Phase, fluid_isovalue, SignDist, i, j, k, nw_pts, + n_nw_pts, nw_tris, n_nw_tris); + } + } + + ///////////////////////////////////////// + ////// CONSTRUCT THE nw SURFACE ///////// + ///////////////////////////////////////// + + else if (Fluid_Interface(Phase, SignDist, fluid_isovalue, i, j, + k) == 1) { + MC(Phase, fluid_isovalue, SignDist, i, j, k, nw_pts, + n_nw_pts, nw_tris, n_nw_tris); + } + //******END OF BLOB PMMC********************************************* + + //******************************************************************* + // Compute the Interfacial Areas, Common Line length for blob p + // nw surface + double temp; + for (r = 0; r < n_nw_tris; r++) { + A = nw_pts(nw_tris(0, r)); + B = nw_pts(nw_tris(1, r)); + C = nw_pts(nw_tris(2, r)); + // Compute length of sides (assume dx=dy=dz) + s1 = sqrt((A.x - B.x) * (A.x - B.x) + + (A.y - B.y) * (A.y - B.y) + + (A.z - B.z) * (A.z - B.z)); + s2 = sqrt((A.x - C.x) * (A.x - C.x) + + (A.y - C.y) * (A.y - C.y) + + (A.z - C.z) * (A.z - C.z)); + s3 = sqrt((B.x - C.x) * (B.x - C.x) + + (B.y - C.y) * (B.y - C.y) + + (B.z - C.z) * (B.z - C.z)); + s = 0.5 * (s1 + s2 + s3); + temp = s * (s - s1) * (s - s2) * (s - s3); + if (temp > 0.0) + awn += sqrt(temp); + } + for (r = 0; r < n_ns_tris; r++) { + A = ns_pts(ns_tris(0, r)); + B = ns_pts(ns_tris(1, r)); + C = ns_pts(ns_tris(2, r)); + // Compute length of sides (assume dx=dy=dz) + s1 = sqrt((A.x - B.x) * (A.x - B.x) + + (A.y - B.y) * (A.y - B.y) + + (A.z - B.z) * (A.z - B.z)); + s2 = sqrt((A.x - C.x) * (A.x - C.x) + + (A.y - C.y) * (A.y - C.y) + + (A.z - C.z) * (A.z - C.z)); + s3 = sqrt((B.x - C.x) * (B.x - C.x) + + (B.y - C.y) * (B.y - C.y) + + (B.z - C.z) * (B.z - C.z)); + s = 0.5 * (s1 + s2 + s3); + //ans=ans+sqrt(s*(s-s1)*(s-s2)*(s-s3)); + temp = s * (s - s1) * (s - s2) * (s - s3); + if (temp > 0.0) + ans += sqrt(temp); + } + for (r = 0; r < n_ws_tris; r++) { + A = ws_pts(ws_tris(0, r)); + B = ws_pts(ws_tris(1, r)); + C = ws_pts(ws_tris(2, r)); + // Compute length of sides (assume dx=dy=dz) + s1 = sqrt((A.x - B.x) * (A.x - B.x) + + (A.y - B.y) * (A.y - B.y) + + (A.z - B.z) * (A.z - B.z)); + s2 = sqrt((A.x - C.x) * (A.x - C.x) + + (A.y - C.y) * (A.y - C.y) + + (A.z - C.z) * (A.z - C.z)); + s3 = sqrt((B.x - C.x) * (B.x - C.x) + + (B.y - C.y) * (B.y - C.y) + + (B.z - C.z) * (B.z - C.z)); + s = 0.5 * (s1 + s2 + s3); + //aws=aws+sqrt(s*(s-s1)*(s-s2)*(s-s3)); + temp = s * (s - s1) * (s - s2) * (s - s3); + if (temp > 0.0) + aws += sqrt(temp); + } + for (r = 0; r < n_local_sol_tris; r++) { + A = local_sol_pts(local_sol_tris(0, r)); + B = local_sol_pts(local_sol_tris(1, r)); + C = local_sol_pts(local_sol_tris(2, r)); + // Compute length of sides (assume dx=dy=dz) + s1 = sqrt((A.x - B.x) * (A.x - B.x) + + (A.y - B.y) * (A.y - B.y) + + (A.z - B.z) * (A.z - B.z)); + s2 = sqrt((A.x - C.x) * (A.x - C.x) + + (A.y - C.y) * (A.y - C.y) + + (A.z - C.z) * (A.z - C.z)); + s3 = sqrt((B.x - C.x) * (B.x - C.x) + + (B.y - C.y) * (B.y - C.y) + + (B.z - C.z) * (B.z - C.z)); + s = 0.5 * (s1 + s2 + s3); + //aws=aws+sqrt(s*(s-s1)*(s-s2)*(s-s3)); + temp = s * (s - s1) * (s - s2) * (s - s3); + if (temp > 0.0) + As += sqrt(temp); + } + for (p = 0; p < n_local_nws_pts - 1; p++) { + // Extract the line segment + A = local_nws_pts(p); + B = local_nws_pts(p + 1); + // Compute the length of the segment + s = sqrt((A.x - B.x) * (A.x - B.x) + + (A.y - B.y) * (A.y - B.y) + + (A.z - B.z) * (A.z - B.z)); + // Add the length to the common line + lwns += s; + } + //******************************************************************* + // Reset the triangle counts to zero + n_nw_pts = 0, n_ns_pts = 0, n_ws_pts = 0, n_nws_pts = 0, + map = 0; + n_nw_tris = 0, n_ns_tris = 0, n_ws_tris = 0, n_nws_seg = 0; + + n_nw_tris_beg = 0; // n_nw_tris; + // n_ns_tris_beg = 0;//n_ns_tris; + // n_ws_tris_beg = 0;//n_ws_tris; + // n_nws_seg_beg = n_nws_seg; + //******************************************************************* + } + //........................................................................... + MPI_Barrier(comm); + MPI_Allreduce(&nwp_volume, &nwp_volume_global, 1, MPI_DOUBLE, + MPI_SUM, comm); + MPI_Allreduce(&awn, &awn_global, 1, MPI_DOUBLE, MPI_SUM, comm); + MPI_Allreduce(&ans, &ans_global, 1, MPI_DOUBLE, MPI_SUM, comm); + MPI_Allreduce(&aws, &aws_global, 1, MPI_DOUBLE, MPI_SUM, comm); + MPI_Allreduce(&lwns, &lwns_global, 1, MPI_DOUBLE, MPI_SUM, comm); + MPI_Allreduce(&As, &As_global, 1, MPI_DOUBLE, MPI_SUM, comm); + + MPI_Barrier(comm); + //......................................................................... + // Compute the change in the total surface energy based on the defined interval + // See McClure, Prins and Miller (2013) + //......................................................................... + dAwn += awn_global; + dAns += ans_global; + dEs = 6.01603 * alpha * (dAwn + 1.05332 * Ps * dAns); + dAwn = -awn_global; // Get ready for the next analysis interval + dAns = -ans_global; + //......................................................................... + if (rank == 0) { + /* printf("-------------------------------- \n"); printf("Timestep = %i \n", timestep); printf("NWP volume = %f \n", nwp_volume_global); printf("Area wn = %f \n", awn_global); @@ -1964,42 +2786,49 @@ int main(int argc, char **argv) printf("Change in surface energy = %f \n", dEs); printf("-------------------------------- \n"); */ - printf("%i %f %f %f %f %f %f %f \n",timestep,dEs,nwp_volume_global, - awn_global,ans_global,aws_global, As_global, lwns_global); + printf("%i %f %f %f %f %f %f %f \n", timestep, dEs, + nwp_volume_global, awn_global, ans_global, aws_global, + As_global, lwns_global); + } + } + } + //************************************************************************/ + dvc_Barrier(); + MPI_Barrier(comm); + stoptime = MPI_Wtime(); + if (rank == 0) + printf("---------------------------------------------------------------" + "----\n"); + // cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl; + cputime = stoptime - starttime; + // cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl; + double MLUPS = double(Nx * Ny * Nz * timestep) / cputime / 1000000; + if (rank == 0) + printf("********************************************************\n"); + if (rank == 0) + printf("CPU time = %f \n", cputime); + if (rank == 0) + printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); + MLUPS *= nprocs; + if (rank == 0) + printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); + if (rank == 0) + printf("********************************************************\n"); - } - } - } - //************************************************************************/ - dvc_Barrier(); - MPI_Barrier(comm); - stoptime = MPI_Wtime(); - if (rank==0) printf("-------------------------------------------------------------------\n"); -// cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl; - cputime = stoptime - starttime; -// cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl; - double MLUPS = double(Nx*Ny*Nz*timestep)/cputime/1000000; - if (rank==0) printf("********************************************************\n"); - if (rank==0) printf("CPU time = %f \n", cputime); - if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); - MLUPS *= nprocs; - if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); - if (rank==0) printf("********************************************************\n"); - - //************************************************************************/ - // Write out the phase indicator field - //************************************************************************/ - sprintf(LocalRankFilename,"%s%s","Phase.",LocalRankString); - // printf("Local File Name = %s \n",LocalRankFilename); - dvc_CopyToHost(HostPhi,Phi,N*sizeof(double)); - dvc_CopyToHost(Phase.data,Phi,N*sizeof(double)); - - FILE *PHASE; - PHASE = fopen(LocalRankFilename,"wb"); - fwrite(Phase.data,8,N,PHASE); - fclose(PHASE); + //************************************************************************/ + // Write out the phase indicator field + //************************************************************************/ + sprintf(LocalRankFilename, "%s%s", "Phase.", LocalRankString); + // printf("Local File Name = %s \n",LocalRankFilename); + dvc_CopyToHost(HostPhi, Phi, N * sizeof(double)); + dvc_CopyToHost(Phase.data, Phi, N * sizeof(double)); -/* double *DensityValues; + FILE *PHASE; + PHASE = fopen(LocalRankFilename, "wb"); + fwrite(Phase.data, 8, N, PHASE); + fclose(PHASE); + + /* double *DensityValues; DensityValues = new double [2*N]; dvc_CopyToHost(DensityValues,Copy,2*N*sizeof(double)); FILE *PHASE; @@ -2008,8 +2837,8 @@ int main(int argc, char **argv) fclose(PHASE); */ //************************************************************************/ - // **************************************************** - MPI_Barrier(comm); - MPI_Finalize(); - // **************************************************** + // **************************************************** + MPI_Barrier(comm); + MPI_Finalize(); + // **************************************************** } diff --git a/sample_scripts/configure_arden b/sample_scripts/configure_arden index cdf524cf..92985e12 100755 --- a/sample_scripts/configure_arden +++ b/sample_scripts/configure_arden @@ -13,9 +13,10 @@ cmake -D CMAKE_C_COMPILER:PATH=/opt/arden/openmpi/3.1.2/bin/mpicc \ -D CUDA_HOST_COMPILER="/usr/bin/gcc" \ -D USE_HDF5=1 \ -D HDF5_DIRECTORY="/opt/arden/hdf5/1.8.12" \ + -D HDF5_LIB="/opt/arden/hdf5/1.8.12/lib/libhdf5.a"\ -D USE_DOXYGEN=true \ + -D USE_SILO=false \ -D USE_CUDA=0 \ -D USE_TIMER=0 \ ~/Programs/LBPM -# -D HDF5_LIB="/opt/arden/hdf5/1.8.12/lib/libhdf5.a"\