From 2b9d776113ba6a5ec85955f9e837b70efc32e948 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 4 Jan 2021 20:13:48 -0500 Subject: [PATCH 01/21] save the work; to be compiled, tested and validated; add sine and cosine voltage input for Poisson solver --- models/MultiPhysController.cpp | 12 +- models/MultiPhysController.h | 2 + models/PoissonSolver.cpp | 196 +++++++++++++++--- models/PoissonSolver.h | 13 +- tests/TestPoissonSolver.cpp | 30 ++- ...m_electrokinetic_SingleFluid_simulator.cpp | 6 +- 6 files changed, 223 insertions(+), 36 deletions(-) diff --git a/models/MultiPhysController.cpp b/models/MultiPhysController.cpp index fcfb5403..9b361ad8 100644 --- a/models/MultiPhysController.cpp +++ b/models/MultiPhysController.cpp @@ -2,7 +2,7 @@ ScaLBL_Multiphys_Controller::ScaLBL_Multiphys_Controller(int RANK, int NP, MPI_Comm COMM): rank(RANK),nprocs(NP),Restart(0),timestepMax(0),num_iter_Stokes(0),num_iter_Ion(0), -analysis_interval(0),visualization_interval(0),tolerance(0),comm(COMM) +analysis_interval(0),visualization_interval(0),tolerance(0),time_conv_max(0),comm(COMM) { } @@ -25,6 +25,7 @@ void ScaLBL_Multiphys_Controller::ReadParams(string filename){ analysis_interval = 500; visualization_interval = 10000; tolerance = 1.0e-6; + time_conv_max = 0.0; // load input parameters if (study_db->keyExists( "timestepMax" )){ @@ -135,3 +136,12 @@ vector ScaLBL_Multiphys_Controller::getIonNumIter_PNP_coupling(double Stoke } return num_iter_ion; } + +void ScaLBL_Multiphys_Controller::getTimeConvMax_PNP_coupling(double StokesTimeConv,const vector &IonTimeConv){ + //Return maximum of the time converting factor from Stokes and ion solvers + vector TimeConv; + + TimeConv.assign(IonTimeConv.begin(),IonTimeConv.end()); + TimeConv.insert(TimeConv.begin(),StokesTimeConv); + time_conv_max = *max_element(TimeConv.begin(),TimeConv.end()); +} diff --git a/models/MultiPhysController.h b/models/MultiPhysController.h index f217248f..988f0225 100644 --- a/models/MultiPhysController.h +++ b/models/MultiPhysController.h @@ -27,6 +27,7 @@ public: int getStokesNumIter_PNP_coupling(double StokesTimeConv,const vector &IonTimeConv); vector getIonNumIter_PNP_coupling(double StokesTimeConv,const vector &IonTimeConv); //void getIonNumIter_PNP_coupling(double StokesTimeConv,vector &IonTimeConv,vector &IonTimeMax); + void getTimeConvMax_PNP_coupling(double StokesTimeConv,const vector &IonTimeConv); bool Restart; int timestepMax; @@ -35,6 +36,7 @@ public: int analysis_interval; int visualization_interval; double tolerance; + double time_conv_max; //double SchmidtNum;//Schmidt number = kinematic_viscosity/mass_diffusivity int rank,nprocs; diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index b0dde2c7..96d737bb 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -8,8 +8,11 @@ ScaLBL_Poisson::ScaLBL_Poisson(int RANK, int NP, MPI_Comm COMM): rank(RANK), nprocs(NP),timestep(0),timestepMax(0),tau(0),k2_inv(0),tolerance(0),h(0), epsilon0(0),epsilon0_LB(0),epsilonR(0),epsilon_LB(0),Vin(0),Vout(0),Nx(0),Ny(0),Nz(0),N(0),Np(0),analysis_interval(0), -chargeDen_dummy(0),WriteLog(0), -nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),BoundaryConditionSolid(0),Lx(0),Ly(0),Lz(0),comm(COMM) +chargeDen_dummy(0),WriteLog(0),nprocx(0),nprocy(0),nprocz(0), +BoundaryConditionInlet(0),BoundaryConditionOutlet(0),BoundaryConditionSolid(0),Lx(0),Ly(0),Lz(0), +Vin0(0),freqIn(0),t0_In(0),Vin_Type(0),Vout0(0),freqOut(0),t0_Out(0),Vout_Type(0), +TestPeriodic(0),TestPeriodicTime(0),TestPeriodicTimeConv(0),TestPeriodicSaveInterval(0), +comm(COMM) { } @@ -33,10 +36,12 @@ void ScaLBL_Poisson::ReadParams(string filename){ epsilonR = 78.4;//default dielectric constant of water epsilon_LB = epsilon0_LB*epsilonR;//electric permittivity analysis_interval = 1000; - Vin = 1.0; //Boundary-z (inlet) electric potential - Vout = 1.0; //Boundary-Z (outlet) electric potential chargeDen_dummy = 1.0e-3;//For debugging;unit=[C/m^3] WriteLog = false; + TestPeriodic = false; + TestPeriodicTime = 1.0;//unit: [sec] + TestPeriodicTimeConv = 0.01; //unit [sec/lt] + TestPeriodicSaveInterval = 0.1; //unit [sec] // LB-Poisson Model parameters if (electric_db->keyExists( "timestepMax" )){ @@ -57,6 +62,18 @@ void ScaLBL_Poisson::ReadParams(string filename){ if (electric_db->keyExists( "WriteLog" )){ WriteLog = electric_db->getScalar( "WriteLog" ); } + if (electric_db->keyExists( "TestPeriodic" )){ + TestPeriodic = electric_db->getScalar( "TestPeriodic" ); + } + if (electric_db->keyExists( "TestPeriodicTime" )){ + TestPeriodicTime = electric_db->getScalar( "TestPeriodicTime" ); + } + if (electric_db->keyExists( "TestPeriodicTimeConv" )){ + TestPeriodicTimeConv = electric_db->getScalar( "TestPeriodicTimeConv" ); + } + if (electric_db->keyExists( "TestPeriodicSaveInterval" )){ + TestPeriodicSaveInterval = electric_db->getScalar( "TestPeriodicSaveInterval" ); + } // Read solid boundary condition specific to Poisson equation BoundaryConditionSolid = 1; @@ -65,10 +82,15 @@ void ScaLBL_Poisson::ReadParams(string filename){ } // Read boundary condition for electric potential // BC = 0: normal periodic BC - // BC = 1: fixed inlet and outlet potential - BoundaryCondition = 0; - if (electric_db->keyExists( "BC" )){ - BoundaryCondition = electric_db->getScalar( "BC" ); + // BC = 1: fixed electric potential + // BC = 2: sine/cosine periodic electric potential (need extra input parameters) + BoundaryConditionInlet = 0; + BoundaryConditionOutlet = 0; + if (electric_db->keyExists( "BC_Inlet" )){ + BoundaryConditionInlet = electric_db->getScalar( "BC_Inlet" ); + } + if (electric_db->keyExists( "BC_Outlet" )){ + BoundaryConditionOutlet = electric_db->getScalar( "BC_Outlet" ); } // Read domain parameters @@ -342,15 +364,91 @@ void ScaLBL_Poisson::Create(){ void ScaLBL_Poisson::Potential_Init(double *psi_init){ - if (BoundaryCondition==1){ - if (electric_db->keyExists( "Vin" )){ - Vin = electric_db->getScalar( "Vin" ); - } - if (electric_db->keyExists( "Vout" )){ - Vout = electric_db->getScalar( "Vout" ); - } + //set up default boundary input parameters + Vin0 = Vout0 = 1.0; //unit: [V] + freqIn = freqOut = 50.0; //unit: [Hz] + t0_In = t0_Out = 0.0; //unit: [sec] + Vin_Type = Vout_Type = 1; //1->sin; 2->cos + Vin = 1.0; //Boundary-z (inlet) electric potential + Vout = 1.0; //Boundary-Z (outlet) electric potential + + if (BoundaryConditionInlet>0){ + switch (BoundaryConditionInlet){ + case 1: + if (electric_db->keyExists( "Vin" )){ + Vin = electric_db->getScalar( "Vin" ); + } + if (rank==0) printf("LB-Poisson Solver: inlet boundary; fixed electric potential Vin = %.3g \n",Vin); + break; + case 2: + if (electric_db->keyExists( "Vin0" )){//voltage amplitude; unit: Volt + Vin0 = electric_db->getScalar( "Vin0" ); + } + if (electric_db->keyExists( "freqIn" )){//unit: Hz + freqIn = electric_db->getScalar( "freqIn" ); + } + if (electric_db->keyExists( "t0_In" )){//timestep shift, unit: lt + t0_In = electric_db->getScalar( "t0_In" ); + } + if (electric_db->keyExists( "Vin_Type" )){ + //type=1 -> sine + //tyep=2 -> cosine + Vin_Type = electric_db->getScalar( "Vin_Type" ); + if (Vin_Type>2 || Vin_Type<=0) ERROR("Error: user-input Vin_Type is currently not supported! \n"); + } + if (rank==0){ + if (Vin_Type==1){ + printf("LB-Poisson Solver: inlet boundary; periodic electric potential Vin = %.3g*Sin[2*pi*%.3g*(t+%.3g)] \n",Vin,freqIn,t0_In); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vin,freqIn,t0_In); + } + else if (Vin_Type==2){ + printf("LB-Poisson Solver: inlet boundary; periodic electric potential Vin = %.3g*Cos[2*pi*%.3g*(t+%.3g)] \n",Vin,freqIn,t0_In); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vin,freqIn,t0_In); + } + } + break; + } + } + if (BoundaryConditionOutlet>0){ + switch (BoundaryConditionOutlet){ + case 1: + if (electric_db->keyExists( "Vout" )){ + Vout = electric_db->getScalar( "Vout" ); + } + if (rank==0) printf("LB-Poisson Solver: outlet boundary; fixed electric potential Vin = %.3g \n",Vout); + break; + case 2: + if (electric_db->keyExists( "Vout0" )){//voltage amplitude; unit: Volt + Vout0 = electric_db->getScalar( "Vout0" ); + } + if (electric_db->keyExists( "freqOut" )){//unit: Hz + freqOut = electric_db->getScalar( "freqOut" ); + } + if (electric_db->keyExists( "t0_Out" )){//timestep shift, unit: lt + t0_Out = electric_db->getScalar( "t0_Out" ); + } + if (electric_db->keyExists( "Vout_Type" )){ + //type=1 -> sine + //tyep=2 -> cosine + Vout_Type = electric_db->getScalar( "Vout_Type" ); + if (Vout_Type>2 || Vin_Type<=0) ERROR("Error: user-input Vout_Type is currently not supported! \n"); + } + if (rank==0){ + if (Vout_Type==1){ + printf("LB-Poisson Solver: outlet boundary; periodic electric potential Vout = %.3g*Sin[2*pi*%.3g*(t+%.3g)] \n",Vout,freqOut,t0_Out); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vout,freqOut,t0_Out); + } + else if (Vout_Type==2){ + printf("LB-Poisson Solver: outlet boundary; periodic electric potential Vout = %.3g*Cos[2*pi*%.3g*(t+%.3g)] \n",Vout,freqOut,t0_Out); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vout,freqOut,t0_Out); + } + } + break; + } } //By default only periodic BC is applied and Vin=Vout=1.0, i.e. there is no potential gradient along Z-axis + if (BoundaryConditionInlet==2) Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,t0_In,Vin_Type,0); + if (BoundaryConditionOutlet==2) Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,t0_Out,Vout_Type,0); double slope = (Vout-Vin)/(Nz-2); double psi_linearized; for (int k=0;kSendD3Q7AA(fq, 0); //READ FROM NORMAL ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); // Set boundary conditions - if (BoundaryCondition == 1){ - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + if (BoundaryConditionInlet > 0){ + switch (BoundaryConditionInlet){ + case 1: + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); + break; + case 2: + Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,t0_In,Vin_Type,timestep_from_Study); + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); + break; + } + } + if (BoundaryConditionOutlet > 0){ + switch (BoundaryConditionOutlet){ + case 1: + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + break; + case 2: + Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,t0_Out,Vout_Type,timestep_from_Study); + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + break; + } } //-------------------------// ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); } -void ScaLBL_Poisson::SolveElectricPotentialAAeven(){ +void ScaLBL_Poisson::SolveElectricPotentialAAeven(int timestep_from_Study){ ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FORM NORMAL ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); // Set boundary conditions - if (BoundaryCondition == 1){ - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + if (BoundaryConditionInlet > 0){ + switch (BoundaryConditionInlet){ + case 1: + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); + break; + case 2: + Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,t0_In,Vin_Type,timestep_from_Study); + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); + break; + } + } + if (BoundaryConditionOutlet > 0){ + switch (BoundaryConditionOutlet){ + case 1: + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + break; + case 2: + Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,t0_Out,Vout_Type,timestep_from_Study); + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + break; + } } //-------------------------// ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); diff --git a/models/PoissonSolver.h b/models/PoissonSolver.h index 74abd775..ebcac179 100644 --- a/models/PoissonSolver.h +++ b/models/PoissonSolver.h @@ -9,6 +9,7 @@ #include #include #include +#include #include "common/ScaLBL.h" #include "common/Communication.h" @@ -16,6 +17,7 @@ #include "analysis/Minkowski.h" #include "ProfilerApp.h" +#define _USE_MATH_DEFINES #ifndef ScaLBL_POISSON_INC #define ScaLBL_POISSON_INC @@ -41,7 +43,8 @@ public: //bool Restart,pBC; int timestep,timestepMax; int analysis_interval; - int BoundaryCondition; + int BoundaryConditionInlet; + int BoundaryConditionOutlet; int BoundaryConditionSolid; double tau; double tolerance; @@ -50,11 +53,18 @@ public: double Vin, Vout; double chargeDen_dummy;//for debugging bool WriteLog; + double Vin0,freqIn,t0_In,Vin_Type; + double Vout0,freqOut,t0_Out,Vout_Type; + bool TestPeriodic; + double TestPeriodicTime;//unit: [sec] + double TestPeriodicTimeConv; //unit [sec/lt] + double TestPeriodicSaveInterval; //unit [sec] int Nx,Ny,Nz,N,Np; int rank,nprocx,nprocy,nprocz,nprocs; double Lx,Ly,Lz; double h;//image resolution + double time_conv;//phys to LB time converting factor; unit=[sec/lt] std::shared_ptr Dm; // this domain is for analysis std::shared_ptr Mask; // this domain is for lbm @@ -97,6 +107,7 @@ private: void SolvePoissonAAodd(double *ChargeDensity); void SolvePoissonAAeven(double *ChargeDensity); void getConvergenceLog(int timestep,double error); + double getBoundaryVoltagefromPeriodicBC(double V0,double freq,double t0,int V_type,int time_step); }; #endif diff --git a/tests/TestPoissonSolver.cpp b/tests/TestPoissonSolver.cpp index 32353f65..5683ace1 100644 --- a/tests/TestPoissonSolver.cpp +++ b/tests/TestPoissonSolver.cpp @@ -53,14 +53,36 @@ int main(int argc, char **argv) PoissonSolver.SetDomain(); PoissonSolver.ReadInput(); PoissonSolver.Create(); - PoissonSolver.Initialize(); + if (PoissonSolver.TestPeriodic==true){ + PoissonSolver.Initialize(PoissonSolver.TestPeriodicTimeConv); + } + else { + PoissonSolver.Initialize(0); + } //Initialize dummy charge density for test PoissonSolver.DummyChargeDensity(); - PoissonSolver.Run(PoissonSolver.ChargeDensityDummy); - PoissonSolver.getElectricPotential_debug(1); - PoissonSolver.getElectricField_debug(1); + if (PoissonSolver.TestPeriodic==true){ + if (rank==0) printf("Testing periodic voltage input is enabled. Total test time is %.3g[s], saving data every %.3g[s]; + user-specified time resolution is %.3g[s/lt]\n", + PoissonSolver.TestPeriodicTime,PoissonSolver.TestPeriodicSaveInterval,PoissonSolver.TestPeriodicTimeConv); + int timestep = 0; + while (timestep<(PoissonSolver.TestPeriodicTime/PoissonSolver.TestPeriodicTimeConv)){ + timestep++; + PoissonSolver.Run(PoissonSolver.ChargeDensityDummy,timestep); + if (timestep%(PoissonSolver.TestPeriodicSaveInterval/PoissonSolver.TestPeriodicTimeConv)==0){ + if (rank==0) printf(" Time = %.3g[s]; saving electric potential and field\n",timestep*PoissonSolver.TestPeriodicTimeConv); + PoissonSolver.getElectricPotential_debug(timestep*PoissonSolver.TestPeriodicTimeConv); + PoissonSolver.getElectricField_debug(timestep*PoissonSolver.TestPeriodicTimeConv); + } + } + } + else { + PoissonSolver.Run(PoissonSolver.ChargeDensityDummy,1); + PoissonSolver.getElectricPotential_debug(1); + PoissonSolver.getElectricField_debug(1); + } if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n"); if (rank==0) printf("*************************************************************\n"); diff --git a/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp b/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp index 2b3726a4..93493331 100644 --- a/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp +++ b/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp @@ -80,20 +80,22 @@ int main(int argc, char **argv) IonModel.timestepMax = Study.getIonNumIter_PNP_coupling(StokesModel.time_conv,IonModel.time_conv); IonModel.Initialize(); + // Get maximal time converting factor based on Sotkes and Ion solvers + Study.getTimeConvMax_PNP_coupling(StokesModel.time_conv,IonModel.time_conv); // Initialize LB-Poisson model PoissonSolver.ReadParams(filename); PoissonSolver.SetDomain(); PoissonSolver.ReadInput(); PoissonSolver.Create(); - PoissonSolver.Initialize(); + PoissonSolver.Initialize(Study.time_conv_max); int timestep=0; while (timestep < Study.timestepMax){ timestep++; - PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental + PoissonSolver.Run(IonModel.ChargeDensity,timestep);//solve Poisson equtaion to get steady-state electrical potental StokesModel.Run_Lite(IonModel.ChargeDensity, PoissonSolver.ElectricField);// Solve the N-S equations to get velocity IonModel.Run(StokesModel.Velocity,PoissonSolver.ElectricField); //solve for ion transport and electric potential From 432fab95b3b00c9a922d9b7a6e73746bf341603e Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Wed, 6 Jan 2021 01:03:18 -0500 Subject: [PATCH 02/21] test done;add sine and cosine voltage input for Poisson solver --- models/PoissonSolver.cpp | 35 ++++++++++++++++++++++------------- models/PoissonSolver.h | 8 ++++---- tests/TestNernstPlanck.cpp | 4 ++-- tests/TestPNP_Stokes.cpp | 4 ++-- tests/TestPoissonSolver.cpp | 13 +++++++------ 5 files changed, 37 insertions(+), 27 deletions(-) diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index 96d737bb..1af8ad65 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -139,8 +139,17 @@ void ScaLBL_Poisson::SetDomain(){ for (int i=0; iid[i] = 1; // initialize this way //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object MPI_Barrier(comm); - Dm->BoundaryCondition = BoundaryCondition; - Mask->BoundaryCondition = BoundaryCondition; + if (BoundaryConditionInlet==0 && BoundaryConditionOutlet==0){ + Dm->BoundaryCondition = 0; + Mask->BoundaryCondition = 0; + } + else if (BoundaryConditionInlet>0 && BoundaryConditionOutlet>0){ + Dm->BoundaryCondition = 1; + Mask->BoundaryCondition = 1; + } + else {//i.e. non-periodic and periodic BCs are mixed + ERROR("Error: check the type of inlet and outlet boundary condition! Mixed periodic and non-periodic BCs are found!\n"); + } Dm->CommInit(); MPI_Barrier(comm); @@ -378,7 +387,7 @@ void ScaLBL_Poisson::Potential_Init(double *psi_init){ if (electric_db->keyExists( "Vin" )){ Vin = electric_db->getScalar( "Vin" ); } - if (rank==0) printf("LB-Poisson Solver: inlet boundary; fixed electric potential Vin = %.3g \n",Vin); + if (rank==0) printf("LB-Poisson Solver: inlet boundary; fixed electric potential Vin = %.3g [V]\n",Vin); break; case 2: if (electric_db->keyExists( "Vin0" )){//voltage amplitude; unit: Volt @@ -398,12 +407,12 @@ void ScaLBL_Poisson::Potential_Init(double *psi_init){ } if (rank==0){ if (Vin_Type==1){ - printf("LB-Poisson Solver: inlet boundary; periodic electric potential Vin = %.3g*Sin[2*pi*%.3g*(t+%.3g)] \n",Vin,freqIn,t0_In); - printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vin,freqIn,t0_In); + printf("LB-Poisson Solver: inlet boundary; periodic electric potential Vin = %.3g*Sin[2*pi*%.3g*(t+%.3g)] [V]\n",Vin0,freqIn,t0_In); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vin0,freqIn,t0_In); } else if (Vin_Type==2){ - printf("LB-Poisson Solver: inlet boundary; periodic electric potential Vin = %.3g*Cos[2*pi*%.3g*(t+%.3g)] \n",Vin,freqIn,t0_In); - printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vin,freqIn,t0_In); + printf("LB-Poisson Solver: inlet boundary; periodic electric potential Vin = %.3g*Cos[2*pi*%.3g*(t+%.3g)] [V] \n",Vin0,freqIn,t0_In); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vin0,freqIn,t0_In); } } break; @@ -415,7 +424,7 @@ void ScaLBL_Poisson::Potential_Init(double *psi_init){ if (electric_db->keyExists( "Vout" )){ Vout = electric_db->getScalar( "Vout" ); } - if (rank==0) printf("LB-Poisson Solver: outlet boundary; fixed electric potential Vin = %.3g \n",Vout); + if (rank==0) printf("LB-Poisson Solver: outlet boundary; fixed electric potential Vout = %.3g [V] \n",Vout); break; case 2: if (electric_db->keyExists( "Vout0" )){//voltage amplitude; unit: Volt @@ -435,12 +444,12 @@ void ScaLBL_Poisson::Potential_Init(double *psi_init){ } if (rank==0){ if (Vout_Type==1){ - printf("LB-Poisson Solver: outlet boundary; periodic electric potential Vout = %.3g*Sin[2*pi*%.3g*(t+%.3g)] \n",Vout,freqOut,t0_Out); - printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vout,freqOut,t0_Out); + printf("LB-Poisson Solver: outlet boundary; periodic electric potential Vout = %.3g*Sin[2*pi*%.3g*(t+%.3g)] [V]\n",Vout0,freqOut,t0_Out); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vout0,freqOut,t0_Out); } else if (Vout_Type==2){ - printf("LB-Poisson Solver: outlet boundary; periodic electric potential Vout = %.3g*Cos[2*pi*%.3g*(t+%.3g)] \n",Vout,freqOut,t0_Out); - printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vout,freqOut,t0_Out); + printf("LB-Poisson Solver: outlet boundary; periodic electric potential Vout = %.3g*Cos[2*pi*%.3g*(t+%.3g)] [V]\n",Vout0,freqOut,t0_Out); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vout0,freqOut,t0_Out); } } break; @@ -489,7 +498,7 @@ void ScaLBL_Poisson::Initialize(double time_conv_from_Study){ double *psi_host; psi_host = new double [Nx*Ny*Nz]; time_conv = time_conv_from_Study; - AssignSolidBoundary(psi_host,time_conv);//step1 + AssignSolidBoundary(psi_host);//step1 Potential_Init(psi_host);//step2 ScaLBL_CopyToDevice(Psi, psi_host, Nx*Ny*Nz*sizeof(double)); ScaLBL_DeviceBarrier(); diff --git a/models/PoissonSolver.h b/models/PoissonSolver.h index ebcac179..f2d18327 100644 --- a/models/PoissonSolver.h +++ b/models/PoissonSolver.h @@ -32,8 +32,8 @@ public: void SetDomain(); void ReadInput(); void Create(); - void Initialize(); - void Run(double *ChargeDensity); + void Initialize(double time_conv_from_Study); + void Run(double *ChargeDensity,int timestep_from_Study); void getElectricPotential(DoubleArray &ReturnValues); void getElectricPotential_debug(int timestep); void getElectricField(DoubleArray &Values_x, DoubleArray &Values_y, DoubleArray &Values_z); @@ -101,8 +101,8 @@ private: void AssignSolidBoundary(double *poisson_solid); void Potential_Init(double *psi_init); void ElectricField_LB_to_Phys(DoubleArray &Efield_reg); - void SolveElectricPotentialAAodd(); - void SolveElectricPotentialAAeven(); + void SolveElectricPotentialAAodd(int timestep_from_Study); + void SolveElectricPotentialAAeven(int timestep_from_Study); //void SolveElectricField(); void SolvePoissonAAodd(double *ChargeDensity); void SolvePoissonAAeven(double *ChargeDensity); diff --git a/tests/TestNernstPlanck.cpp b/tests/TestNernstPlanck.cpp index def67d5b..ecb3a6d0 100644 --- a/tests/TestNernstPlanck.cpp +++ b/tests/TestNernstPlanck.cpp @@ -69,7 +69,7 @@ int main(int argc, char **argv) PoissonSolver.SetDomain(); PoissonSolver.ReadInput(); PoissonSolver.Create(); - PoissonSolver.Initialize(); + PoissonSolver.Initialize(0); int timestep=0; double error = 1.0; @@ -77,7 +77,7 @@ int main(int argc, char **argv) while (timestep < Study.timestepMax && error > Study.tolerance){ timestep++; - PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental + PoissonSolver.Run(IonModel.ChargeDensity,0);//solve Poisson equtaion to get steady-state electrical potental IonModel.Run(IonModel.FluidVelocityDummy,PoissonSolver.ElectricField); //solve for ion transport and electric potential timestep++;//AA operations diff --git a/tests/TestPNP_Stokes.cpp b/tests/TestPNP_Stokes.cpp index bf05f73c..16abcee0 100644 --- a/tests/TestPNP_Stokes.cpp +++ b/tests/TestPNP_Stokes.cpp @@ -82,7 +82,7 @@ int main(int argc, char **argv) PoissonSolver.SetDomain(); PoissonSolver.ReadInput(); PoissonSolver.Create(); - PoissonSolver.Initialize(); + PoissonSolver.Initialize(0); int timestep=0; @@ -94,7 +94,7 @@ int main(int argc, char **argv) while (timestep < Study.timestepMax && error > Study.tolerance){ timestep++; - PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental + PoissonSolver.Run(IonModel.ChargeDensity,0);//solve Poisson equtaion to get steady-state electrical potental StokesModel.Run_Lite(IonModel.ChargeDensity, PoissonSolver.ElectricField);// Solve the N-S equations to get velocity IonModel.Run(StokesModel.Velocity,PoissonSolver.ElectricField); //solve for ion transport and electric potential diff --git a/tests/TestPoissonSolver.cpp b/tests/TestPoissonSolver.cpp index 5683ace1..c81e503e 100644 --- a/tests/TestPoissonSolver.cpp +++ b/tests/TestPoissonSolver.cpp @@ -64,17 +64,18 @@ int main(int argc, char **argv) PoissonSolver.DummyChargeDensity(); if (PoissonSolver.TestPeriodic==true){ - if (rank==0) printf("Testing periodic voltage input is enabled. Total test time is %.3g[s], saving data every %.3g[s]; - user-specified time resolution is %.3g[s/lt]\n", + if (rank==0) printf("Testing periodic voltage input is enabled. Total test time is %.3g[s], saving data every %.3g[s]; user-specified time resolution is %.3g[s/lt]\n", PoissonSolver.TestPeriodicTime,PoissonSolver.TestPeriodicSaveInterval,PoissonSolver.TestPeriodicTimeConv); int timestep = 0; - while (timestep<(PoissonSolver.TestPeriodicTime/PoissonSolver.TestPeriodicTimeConv)){ + int timeMax = int(PoissonSolver.TestPeriodicTime/PoissonSolver.TestPeriodicTimeConv); + int timeSave = int(PoissonSolver.TestPeriodicSaveInterval/PoissonSolver.TestPeriodicTimeConv); + while (timestep Date: Mon, 18 Jan 2021 21:30:27 -0500 Subject: [PATCH 03/21] save the work --- models/FreeLeeModel.cpp | 312 +++++++++++++++++++++++++++++++++++----- 1 file changed, 276 insertions(+), 36 deletions(-) diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index 547885b8..755347f3 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -32,8 +32,8 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){ tauA = tauB = 1.0; rhoA = rhoB = 1.0; Fx = Fy = Fz = 0.0; - gamma=1e-3; - W=5; + gamma=1e-3;//surface tension + W=5.0;//interfacial thickness Restart=false; din=dout=1.0; flux=0.0; @@ -220,7 +220,7 @@ void ScaLBL_FreeLeeModel::Create(){ //........................................................................... ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize); ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Np); - ScaLBL_AllocateDeviceMemory((void **) &fq, 19*dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &gqbar, 19*dist_mem_size); ScaLBL_AllocateDeviceMemory((void **) &hq, 7*dist_mem_size); ScaLBL_AllocateDeviceMemory((void **) &mu_phi, dist_mem_size); ScaLBL_AllocateDeviceMemory((void **) &Den, dist_mem_size); @@ -239,10 +239,11 @@ void ScaLBL_FreeLeeModel::Create(){ for (int i=1; iMap(i,j,k); } } } + //TODO The following check needs update! // check that TmpMap is valid for (int idx=0; idxLastExterior(); idx++){ auto n = TmpMap[idx]; @@ -264,21 +265,255 @@ void ScaLBL_FreeLeeModel::Create(){ // copy the neighbor list ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); - // initialize phi based on PhaseLabel (include solid component labels) } -/******************************************************** - * AssignComponentLabels * - ********************************************************/ +void ScaLBL_FreeLeeModel::AssignComponentLabels() +{ + double *phase; + phase = new double[Nh]; + + size_t NLABELS=0; + signed char VALUE=0; + double AFFINITY=0.f; + + auto LabelList = greyscaleColor_db->getVector( "ComponentLabels" ); + auto AffinityList = greyscaleColor_db->getVector( "ComponentAffinity" ); + + NLABELS=LabelList.size(); + if (NLABELS != AffinityList.size()){ + ERROR("Error: ComponentLabels and ComponentAffinity must be the same length! \n"); + } + + double label_count[NLABELS]; + double label_count_global[NLABELS]; + + // Assign the labels + for (size_t idx=0; idxid[n] = 0; // set mask to zero since this is an immobile component + } + } + // fluid labels are reserved + if (VALUE == 1) AFFINITY=1.0; + else if (VALUE == 2) AFFINITY=-1.0; + phase[n] = AFFINITY; + } + } + } + + // Set Dm to match Mask + for (int i=0; iid[i] = Mask->id[i]; + + for (size_t idx=0; idxComm, label_count[idx]); + + if (rank==0){ + printf("Number of component labels: %lu \n",NLABELS); + for (unsigned int idx=0; idxMPI_COMM_SCALBL); + delete [] phase; +} + +void ScaLBL_FreeLeeModel::AssignChemPotential_ColorGrad() +{ + double *SolidPotential_host = new double [Nx*Ny*Nz]; + double *GreySolidGrad_host = new double [3*Np]; + + size_t NLABELS=0; + signed char VALUE=0; + double AFFINITY=0.f; + + auto LabelList = greyscaleColor_db->getVector( "GreySolidLabels" ); + auto AffinityList = greyscaleColor_db->getVector( "GreySolidAffinity" ); + + NLABELS=LabelList.size(); + if (NLABELS != AffinityList.size()){ + ERROR("Error: GreySolidLabels and GreySolidAffinity must be the same length! \n"); + } + + for (int k=0;kid[n] = 0; // set mask to zero since this is an immobile component + } + } + SolidPotential_host[n] = AFFINITY; + } + } + } + + // Calculate grey-solid color-gradient + double *Dst; + Dst = new double [3*3*3]; + for (int kk=0; kk<3; kk++){ + for (int jj=0; jj<3; jj++){ + for (int ii=0; ii<3; ii++){ + int index = kk*9+jj*3+ii; + Dst[index] = sqrt(double(ii-1)*double(ii-1) + double(jj-1)*double(jj-1)+ double(kk-1)*double(kk-1)); + } + } + } + double w_face = 1.f; + double w_edge = 0.5; + double w_corner = 0.f; + //local + Dst[13] = 0.f; + //faces + Dst[4] = w_face; + Dst[10] = w_face; + Dst[12] = w_face; + Dst[14] = w_face; + Dst[16] = w_face; + Dst[22] = w_face; + // corners + Dst[0] = w_corner; + Dst[2] = w_corner; + Dst[6] = w_corner; + Dst[8] = w_corner; + Dst[18] = w_corner; + Dst[20] = w_corner; + Dst[24] = w_corner; + Dst[26] = w_corner; + // edges + Dst[1] = w_edge; + Dst[3] = w_edge; + Dst[5] = w_edge; + Dst[7] = w_edge; + Dst[9] = w_edge; + Dst[11] = w_edge; + Dst[15] = w_edge; + Dst[17] = w_edge; + Dst[19] = w_edge; + Dst[21] = w_edge; + Dst[23] = w_edge; + Dst[25] = w_edge; + + for (int k=1; kSDs(i,j,k)<2.0){ + GreySolidGrad_host[idx+0*Np] = phi_x; + GreySolidGrad_host[idx+1*Np] = phi_y; + GreySolidGrad_host[idx+2*Np] = phi_z; + } + else{ + GreySolidGrad_host[idx+0*Np] = 0.0; + GreySolidGrad_host[idx+1*Np] = 0.0; + GreySolidGrad_host[idx+2*Np] = 0.0; + } + } + } + } + } + + + if (rank==0){ + printf("Number of Grey-solid labels: %lu \n",NLABELS); + for (unsigned int idx=0; idxLastExterior(), Np); + ScaLBL_FreeLeeModel_PhaseField_Init(dvcMap, Phi, Den, hq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + if (Restart == true){ + //TODO need to revise this function if (rank==0){ printf("Reading restart file! \n"); } @@ -292,7 +527,7 @@ void ScaLBL_FreeLeeModel::Initialize(){ cDen = new double[2*Np]; cDist = new double[19*Np]; ScaLBL_CopyToHost(TmpMap, dvcMap, Np*sizeof(int)); - ScaLBL_CopyToHost(cPhi, Phi, N*sizeof(double)); + //ScaLBL_CopyToHost(cPhi, Phi, N*sizeof(double)); ifstream File(LocalRestartFile,ios::binary); int idx; @@ -336,11 +571,11 @@ void ScaLBL_FreeLeeModel::Initialize(){ ScaLBL_DeviceBarrier(); MPI_Barrier(comm); - } - if (rank==0) printf ("Initializing phase field \n"); - //ScaLBL_PhaseField_Init(dvcMap, Phi, Den, hq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); - //ScaLBL_PhaseField_Init(dvcMap, Phi, Den, hq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + if (rank==0) printf ("Initializing phase and density fields on device from Restart\n"); + ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + } // establish reservoirs for external bC if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4 ){ @@ -382,27 +617,30 @@ void ScaLBL_FreeLeeModel::Run(){ PROFILE_START("Update"); // *************ODD TIMESTEP************* timestep++; - /* // Compute the Phase indicator field + //------------------------------------------------------------------------------------------------------------------- + // Compute the Phase indicator field // Read for hq, Bq happens in this routine (requires communication) - ScaLBL_Comm->BiSendD3Q7AA(hq,Bq); //READ FROM NORMAL - ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm->BiRecvD3Q7AA(hq,Bq); //WRITE INTO OPPOSITE + //ScaLBL_Comm->SendD3Q7AA(hq); //READ FROM NORMAL + ScaLBL_Comm->SendD3Q7AA(hq); //READ FROM NORMAL + ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q7AA(hq); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); - ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); // Perform the collision operation - ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL + ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FROM NORMAL if (BoundaryCondition > 0 && BoundaryCondition < 5){ + //TODO to be revised ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); } // Halo exchange for phase field - ScaLBL_Comm_Regular->SendHalo(Phi); + ScaLBL_Comm_WideHalo->Send(Phi); - ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm_Regular->RecvHalo(Phi); - ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + ScaLBL_Comm_WideHalo->Recv(Phi); + ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); // Set BCs if (BoundaryCondition == 3){ @@ -417,7 +655,7 @@ void ScaLBL_FreeLeeModel::Run(){ ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); } - ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_DeviceBarrier(); MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); @@ -425,24 +663,24 @@ void ScaLBL_FreeLeeModel::Run(){ // *************EVEN TIMESTEP************* timestep++; // Compute the Phase indicator field - ScaLBL_Comm->BiSendD3Q7AA(hq,Bq); //READ FROM NORMAL - ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm->BiRecvD3Q7AA(hq,Bq); //WRITE INTO OPPOSITE + ScaLBL_Comm->SendD3Q7AA(hq); //READ FROM NORMAL + ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q7AA(hq); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); - ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); // Perform the collision operation - ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL + ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FORM NORMAL // Halo exchange for phase field if (BoundaryCondition > 0 && BoundaryCondition < 5){ ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); } - ScaLBL_Comm_Regular->SendHalo(Phi); + ScaLBL_Comm_WideHalo->Send(Phi); ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm_Regular->RecvHalo(Phi); - ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + ScaLBL_Comm_WideHalo->Recv(Phi); + ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); // Set boundary conditions if (BoundaryCondition == 3){ @@ -459,7 +697,9 @@ void ScaLBL_FreeLeeModel::Run(){ } ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - */ + + + //---------------------------------------------------------------------------------------------- ScaLBL_DeviceBarrier(); MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); //************************************************************************ From 4085deb5e35e354ad756fb7b63dacac1ac91b0ec Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 18 Jan 2021 23:37:08 -0500 Subject: [PATCH 04/21] save the work --- models/FreeLeeModel.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index e9f1f5b5..4bdff0d0 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -243,20 +243,19 @@ void ScaLBL_FreeLeeModel::Create(){ } } } - //TODO The following check needs update! // check that TmpMap is valid for (int idx=0; idxLastExterior(); idx++){ auto n = TmpMap[idx]; - if (n > Nx*Ny*Nz){ + if (n > Nxh*Nyh*Nzh){ printf("Bad value! idx=%i \n", n); - TmpMap[idx] = Nx*Ny*Nz-1; + TmpMap[idx] = Nxh*Nyh*Nzh-1; } } for (int idx=ScaLBL_Comm->FirstInterior(); idxLastInterior(); idx++){ auto n = TmpMap[idx]; - if ( n > Nx*Ny*Nz ){ + if ( n > Nxh*Nyh*Nzh ){ printf("Bad value! idx=%i \n",n); - TmpMap[idx] = Nx*Ny*Nz-1; + TmpMap[idx] = Nxh*Nyh*Nzh-1; } } ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np); From e22de8ae7e8f6bb393df15e2482078b5be48b42b Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Sun, 31 Jan 2021 19:06:07 -0500 Subject: [PATCH 05/21] save the work;to be built and tested --- cpu/FreeLee.cpp | 2046 +++++++++++++++++++++++++-------------- models/FreeLeeModel.cpp | 237 ++--- models/FreeLeeModel.h | 3 +- 3 files changed, 1408 insertions(+), 878 deletions(-) diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp index 35cbd5fd..40a99dd3 100644 --- a/cpu/FreeLee.cpp +++ b/cpu/FreeLee.cpp @@ -2,6 +2,1311 @@ #define STOKES +extern "C" void ScaLBL_D3Q19_FreeLeeModel_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np) +{ + int n; + double p = 1.0;//NOTE: take initial pressure p=1.0 + for (n=0; n 1.f) phi = 1.0; + if (phi < -1.f) phi = -1.0; + Den[idx] = rhoA + 0.5*(1.0-phi)*(rhoB-rhoA); + + //compute unit normal of color gradient + nx = ColorGrad[idx+0*Np]; + ny = ColorGrad[idx+1*Np]; + nz = ColorGrad[idx+2*Np]; + cg_mag = sqrt(nx*nx+ny*ny+nz*nz); + double ColorMag_temp = cg_mag; + if (cg_mag==0.0) ColorMag_temp=1.0; + nx = nx/ColorMag_temp; + ny = ny/ColorMag_temp; + nz = nz/ColorMag_temp; + + theta = M*cs2_inv*(1-4.0*phi*phi)/W; + + hq[0*Np+idx]=0.3333333333333333*(phi); + hq[1*Np+idx]=0.1111111111111111*(phi+theta*nx); + hq[2*Np+idx]=0.1111111111111111*(phi-theta*nx); + hq[3*Np+idx]=0.1111111111111111*(phi+theta*ny); + hq[4*Np+idx]=0.1111111111111111*(phi-theta*ny); + hq[5*Np+idx]=0.1111111111111111*(phi+theta*nz); + hq[6*Np+idx]=0.1111111111111111*(phi-theta*nz); + + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, + double rhoA, double rhoB, int start, int finish, int Np){ + + int idx,n,nread; + double fq,phi; + + for (int n=start; n 10Np => odd part of dist) + m1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + m2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + m3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + m4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + m5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + m6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + m7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + m8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + m9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + m10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + m11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + m12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + m13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + m14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + m15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + m16 = dist[nr16]; + + // q=17 + nr17 = neighborList[n+16*Np]; + m17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + m18 = dist[nr18]; + + //compute fluid velocity + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(chem*nx+Fx)); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(chem*ny+Fy)); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(chem*nz+Fz)); + //compute pressure + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17) + +0.5*(rhoA-rhoB)/2.0/3.0*(ux*nx+uy*ny+uz*nz); + + //compute equilibrium distributions + feq0 = 0.3333333333333333*p - 0.25*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) - + 0.16666666666666666*rho0*(ux*ux + uy*uy + uz*uz) - 0.5*(-(nx*ux) - ny*uy - nz*uz)* + (-0.08333333333333333*(rhoA - rhoB)*(ux*ux + uy*uy + uz*uz) + chem*(0.3333333333333333 - 0.5*(ux*ux + uy*uy + uz*uz))); + feq1 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-ux*ux + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) - + 0.125*(Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*uz)) - 0.0625*(nx - nx*ux - ny*uy - nz*uz)* + (2*chem*ux*ux - 0.3333333333333333*((-rhoA + rhoB)*ux*ux + 2*chem*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*uz))); + feq2 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-ux*ux + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) - + 0.125*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*uz)) - 0.0625*(nx + nx*ux + ny*uy + nz*uz)* + (-2.*chem*ux*ux + 0.1111111111111111*(-4.*chem + rhoB*(-2.*ux - 1.*ux*ux - 1.*uy*uy - 1.*uz*uz) + + rhoA*(2.*ux + ux*ux + uy*uy + uz*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*ux*ux + + chem*(4.*ux + 2.*ux*ux + 2.*uy*uy + 2.*uz*uz))); + feq3 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uy*uy + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.125*(Fx*ux + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.0625*(ny - nx*ux - ny*uy - nz*uz)* + (2*chem*uy*uy - 0.3333333333333333*((-rhoA + rhoB)*uy*uy + 2*chem*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*uz))); + feq4 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uy*uy + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.125*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.0625*(ny + nx*ux + ny*uy + nz*uz)* + (-2.*chem*uy*uy + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 2.*uy - 1.*uy*uy - 1.*uz*uz) + + rhoA*(ux*ux + 2.*uy + uy*uy + uz*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*uy*uy + + chem*(2.*ux*ux + 4.*uy + 2.*uy*uy + 2.*uz*uz))); + feq5 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uz*uz + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.125*(Fx*ux + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2. + uz)*uz)) - 0.0625*(nx*ux + ny*uy + nz*(-1. + uz))* + (-2.*chem*uz*uz + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 1.*uy*uy + (2. - 1.*uz)*uz) + + rhoA*(ux*ux + uy*uy + (-2. + uz)*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*uz*uz + + chem*(2.*ux*ux + 2.*uy*uy + uz*(-4. + 2.*uz)))); + feq6 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uz*uz + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) - + 0.125*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2. + uz))) - 0.0625*(nz + nx*ux + ny*uy + nz*uz)* + (-2.*chem*uz*uz + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 1.*uy*uy + (-2. - 1.*uz)*uz) + + rhoA*(ux*ux + uy*uy + uz*(2. + uz))) + 0.3333333333333333*((-1.*rhoA + rhoB)*uz*uz + + chem*(2.*ux*ux + 2.*uy*uy + uz*(4. + 2.*uz)))); + feq7 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uy)*(ux + uy) + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx*(-1. + ux) + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(-2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.03125*(nx + ny - nx*ux - ny*uy - nz*uz)* + (2*chem*(ux + uy)*(ux + uy) + 0.3333333333333333*((rhoA - rhoB)*(ux + uy)*(ux + uy) - 2*chem*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz))); + feq8 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uy)*(ux + uy) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.03125*(-(nx*(1 + ux)) - ny*(1 + uy) - nz*uz)* + (2*chem*(ux + uy)*(ux + uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux + uy)*(ux + uy)) + + 2*chem*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz))); + feq9 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uy)*(ux - uy) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fy + Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(-2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.03125*(nx - nx*ux - ny*(1 + uy) - nz*uz)* + (2*chem*(ux - uy)*(ux - uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz))); + feq10 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uy)*(ux - uy) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx*(1 + ux) + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.03125*(ny - nx*(1 + ux) - ny*uy - nz*uz)* + (2*chem*(ux - uy)*(ux - uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz))); + feq11 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uz)*(ux + uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*(-1. + ux) + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)) - 0.03125*(nx + nz - nx*ux - ny*uy - nz*uz)* + (2*chem*(ux + uz)*(ux + uz) + 0.3333333333333333*((rhoA - rhoB)*(ux + uz)*(ux + uz) - 2*chem*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz))); + feq12 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) - + 0.0625*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*(2. + uz))) - 0.03125*(-(nx*(1 + ux)) - ny*uy - nz*(1 + uz))* + (2*chem*(ux + uz)*(ux + uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux + uz)*(ux + uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*(2 + uz)))); + feq13 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uz)*(ux - uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) - + 0.0625*(Fz + Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*(2. + uz))) - 0.03125*(nx - nx*ux - ny*uy - nz*(1 + uz))* + (2*chem*(ux - uz)*(ux - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*(2 + uz)))); + feq14 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uz)*(ux - uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*(1 + ux) + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)) - 0.03125*(nz - nx*(1 + ux) - ny*uy - nz*uz)* + (2*chem*(ux - uz)*(ux - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz))); + feq15 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*ux + Fy*(-1. + uy) + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uy*uy - 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + (-2. + uz)*uz)) - 0.03125*(ny + nz - nx*ux - ny*uy - nz*uz)* + (2*chem*(uy + uz)*(uy + uz) + 0.3333333333333333*((rhoA - rhoB)*(uy + uz)*(uy + uz) - 2*chem*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz))); + feq16 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) - + 0.0625*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy - 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*(2. + uz))) - 0.03125*(-(nx*ux) - ny*(1 + uy) - nz*(1 + uz))* + (2*chem*(uy + uz)*(uy + uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy + uz)*(uy + uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*(2 + uz)))); + feq17 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) - + 0.0625*(Fz + Fx*ux + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*(2. + uz))) - 0.03125*(ny - nx*ux - ny*uy - nz*(1 + uz))* + (2*chem*(uy - uz)*(uy - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*(2 + uz)))); + feq18 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*ux + Fy*(1 + uy) + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uy*uy + 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + (-2. + uz)*uz)) - 0.03125*(nz - nx*ux - ny*(1 + uy) - nz*uz)* + (2*chem*(uy - uz)*(uy - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz))); + + //------------------------------------------------- BCK collison ------------------------------------------------------------// + // q=0 + dist[n] = m0 - (m0-feq0)/tau + 0.25*(-2*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) + + (mgx*ux + mgy*uy + mgz*uz)*(2*chem*(ux*ux + uy*uy + uz*uz) + 0.3333333333333333* + (-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + uz*uz)))); + + // q = 1 + dist[nr2] = m1 - (m1-feq1)/tau + 0.125*(2*(Fx*(-1 + ux) + Fy*uy + Fz*uz)*(0.2222222222222222 + ux*ux - + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) + (mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*ux*ux + 0.3333333333333333*((-rhoA + rhoB)*ux*ux + 2*chem*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*uz)))); + + // q=2 + dist[nr1] = m2 - (m2-feq2)/tau + 0.125*(-2*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) + + (mgx + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*ux*ux + 0.3333333333333333*((-rhoA + rhoB)*ux*ux + + 2*chem*(2*ux + ux*ux + uy*uy + uz*uz)) + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*uz)))); + + // q = 3 + dist[nr4] = m3 - (m3-feq3)/tau + 0.125*(2*(Fx*ux + Fy*(-1 + uy) + Fz*uz)*(0.2222222222222222 + uy*uy - 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*ux + mgy*(-1 + uy) + mgz*uz)*(-2*chem*uy*uy + 0.3333333333333333*((-rhoA + rhoB)*uy*uy + + 2*chem*(ux*ux - 2*uy + uy*uy + uz*uz)) + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 4 + dist[nr3] = m4 - (m4-feq4)/tau + 0.125*(-2*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uy*uy + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgy + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*uy*uy + 0.3333333333333333*((-rhoA + rhoB)*uy*uy + + 2*chem*(ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 5 + dist[nr6] = m5 - (m5-feq5)/tau + 0.125*(2*(Fx*ux + Fy*uy + Fz*(-1 + uz))*(0.2222222222222222 + uz*uz - + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) + (mgx*ux + mgy*uy + mgz*(-1 + uz))* + (-2*chem*uz*uz + 0.3333333333333333*((-rhoA + rhoB)*uz*uz + 2*chem*(ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 6 + dist[nr5] = m6 - (m6-feq6)/tau + 0.125*(-2*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uz*uz + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) + + (mgz + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*uz*uz + 0.3333333333333333*((-rhoA + rhoB)*uz*uz + + 2*chem*(ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + uz*(2 + uz))))); + + // q = 7 + dist[nr8] = m7 - (m7-feq7)/tau + 0.0625*(2*(Fx*(-1 + ux) + Fy*(-1 + uy) + Fz*uz)*(0.2222222222222222 + (ux + uy)*(ux + uy) - + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + (mgx*(-1 + ux) + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*(ux + uy)*(ux + uy) + 0.3333333333333333*(-((rhoA - rhoB)*(ux + uy)*(ux + uy)) + + 2*chem*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 8 + dist[nr7] = m8 - (m8-feq8)/tau + 0.0625*(2*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)*(0.2222222222222222 + (ux + uy)*(ux + uy) - + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + (mgx + mgy + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*(ux + uy)*(ux + uy) + 0.3333333333333333*(-((rhoA - rhoB)*(ux + uy)*(ux + uy)) + + 2*chem*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 9 + dist[nr10] = m9 - (m9-feq9)/tau + 0.0625*(2*(Fy + Fx*(-1 + ux) + Fy*uy + Fz*uz)*(0.2222222222222222 + (ux - uy)*(ux - uy) - + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + (mgy + mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*(ux - uy)*(ux - uy) + 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 10 + dist[nr9] = m10 - (m10-feq10)/tau + 0.0625*(2*(Fx*(1 + ux) + Fy*(-1 + uy) + Fz*uz)*(0.2222222222222222 + (ux - uy)*(ux - uy) - + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + (mgx*(1 + ux) + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*(ux - uy)*(ux - uy) + 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 11 + dist[nr12] = m11 - (m11-feq11)/tau + 0.0625*(2*(Fx*(-1 + ux) + Fy*uy + Fz*(-1 + uz))*(0.2222222222222222 + (ux + uz)*(ux + uz) - + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + (mgx*(-1 + ux) + mgy*uy + mgz*(-1 + uz))* + (-2*chem*(ux + uz)*(ux + uz) + 0.3333333333333333*(-((rhoA - rhoB)*(ux + uz)*(ux + uz)) + + 2*chem*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 12 + dist[nr11] = m12 - (m12-feq12)/tau + 0.0625*(2*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)*(0.2222222222222222 + (ux + uz)*(ux + uz) - + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + (mgx + mgz + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*(ux + uz)*(ux + uz) + 0.3333333333333333*(-((rhoA - rhoB)*(ux + uz)*(ux + uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*(2 + uz))))); + + // q = 13 + dist[nr14] = m13 - (m13-feq13)/tau + 0.0625*(2*(Fz + Fx*(-1 + ux) + Fy*uy + Fz*uz)*(0.2222222222222222 + (ux - uz)*(ux - uz) - + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + (mgz + mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*(ux - uz)*(ux - uz) + 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))))); + + // q= 14 + dist[nr13] = m14 - (m14-feq14)/tau + 0.0625*(2*(Fx*(1 + ux) + Fy*uy + Fz*(-1 + uz))*(0.2222222222222222 + (ux - uz)*(ux - uz) - + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + (mgx*(1 + ux) + mgy*uy + mgz*(-1 + uz))* + (-2*chem*(ux - uz)*(ux - uz) + 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 15 + dist[nr16] = m15 - (m15-feq15)/tau + 0.0625*(2*(Fx*ux + Fy*(-1 + uy) + Fz*(-1 + uz))*(0.2222222222222222 + (uy + uz)(uy + uz) - + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + (mgx*ux + mgy*(-1 + uy) + mgz*(-1 + uz))* + (-2*chem*(uy + uz)*(uy + uz) + 0.3333333333333333*(-((rhoA - rhoB)*(uy + uz)*(uy + uz)) + + 2*chem*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)))); + + // q = 16 + dist[nr15] = m16 - (m16-feq16)/tau + 0.0625*(2*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)*(0.2222222222222222 + (uy + uz)*(uy + uz) - + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + (mgy + mgz + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*(uy + uz)*(uy + uz) + 0.3333333333333333*(-((rhoA - rhoB)*(uy + uz)*(uy + uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))))); + + // q = 17 + dist[nr18] = m17 - (m17-feq17)/tau + 0.0625*(2*(Fz + Fx*ux + Fy*(-1 + uy) + Fz*uz)*(0.2222222222222222 + (uy - uz)*(uy - uz) - + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + (mgz + mgx*ux + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*(uy - uz)*(uy - uz) + 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))))); + + // q = 18 + dist[nr17] = m18 - (m18-feq18)/tau + 0.0625*(2*(Fx*ux + Fy*(1 + uy) + Fz*(-1 + uz))*(0.2222222222222222 + (uy - uz)*(uy - uz) - + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + (mgx*ux + mgy*(1 + uy) + mgz*(-1 + uz))* + (-2*chem*(uy - uz)*(uy - uz) + 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)))); + //----------------------------------------------------------------------------------------------------------------------------------------// + + + // ----------------------------- compute phase field evolution ---------------------------------------- + //Normalize the Color Gradient + C = sqrt(nx*nx+ny*ny+nz*nz); + double ColorMag = C; + if (C==0.0) ColorMag=1.0; + nx = nx/ColorMag; + ny = ny/ColorMag; + nz = nz/ColorMag; + //compute surface tension-related parameter + theta = M*4.5*(1-4.0*phi*phi)/W; + + //load distributions of phase field + //q=0 + h0 = hq[n]; + //q=1 + h1 = hq[nr1]; + + //q=2 + h2 = hq[nr2]; + + //q=3 + h3 = hq[nr3]; + + //q=4 + h4 = hq[nr4]; + + //q=5 + h5 = hq[nr5]; + + //q=6 + h6 = hq[nr6]; + + //-------------------------------- BGK collison for phase field ---------------------------------// + // q = 0 + hq[n] = h0 - (h0 - 0.3333333333333333*phi)/tauM; + + // q = 1 + hq[nr2] = h1 - (h1 - phi*(0.1111111111111111 + 0.5*ux) - (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; + + // q = 2 + hq[nr1] = h2 - (h2 - phi*(0.1111111111111111 - 0.5*ux) + (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; + + // q = 3 + hq[nr4] = h3 - (h3 - phi*(0.1111111111111111 + 0.5*uy) - (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; + + // q = 4 + hq[nr3] = h4 - (h4 - phi*(0.1111111111111111 - 0.5*uy) + (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; + + // q = 5 + hq[nr6] = h5 - (h5 - phi*(0.1111111111111111 + 0.5*uz) - (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; + + // q = 6 + hq[nr5] = h6 - (h6 - phi*(0.1111111111111111 - 0.5*uz) + (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; + //........................................................................ + + //Update velocity on device + Velocity[0*Np+n] = ux; + Velocity[1*Np+n] = uy; + Velocity[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = p; + //Update chemical potential on device + mu_phi[n] = chem; + //Update color gradient on device + ColorGrad[0*Np+n] = nx; + ColorGrad[1*Np+n] = ny; + ColorGrad[2*Np+n] = nz; + + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, + double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, + int strideY, int strideZ, int start, int finish, int Np){ + + int n,nn,nn2x,ijk; + //int nr1,nr2,nr3,nr4,nr5,nr6,nr7,nr8,nr9,nr10,nr11,nr12,nr13,nr14,nr15,nr16,nr17,nr18; + double ux,uy,uz;//fluid velocity + double p;//pressure + double chem;//chemical potential + double phi; //phase field + double rho0;//fluid density + // distribution functions + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m0,m3,m5,m7; + double mm1,mm2,mm4,mm6,mm8,mm9,mm10,mm11,mm12,mm13,mm14,mm15,mm16,mm17,mm18; + double mm3,mm5,mm7; + double feq0,feq1,feq2,feq3,feq4,feq5,feq6,feq7,feq8,feq9,feq10,feq11,feq12,feq13,feq14,feq15,feq16,feq17,feq18; + double nx,ny,nz;//normal color gradient + double mgx,mgy,mgz;//mixed gradient reaching secondary neighbor + + //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double h0,h1,h2,h3,h4,h5,h6;//distributions for LB phase field + double tau;//position dependent LB relaxation time for fluid + double C,theta; + double M = 2.0/9.0*(tauM-0.5);//diffusivity (or mobility) for the phase field D3Q7 + + for (int n=start; n even part of dist) - //fq = dist[nread]; // reading the f2 data into register fq - nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) - fq = dist[nr2]; // reading the f2 data into register fq - rho += fq; - m1 -= 11.0*(fq); - m2 -= 4.0*(fq); - jx -= fq; - m4 += 4.0*(fq); - m9 += 2.0*(fq); - m10 -= 4.0*(fq); - - // q=3 - //nread = neighborList[n+2*Np]; // neighbor 4 - //fq = dist[nread]; - nr3 = neighborList[n+2*Np]; // neighbor 4 - fq = dist[nr3]; - rho += fq; - m1 -= 11.0*fq; - m2 -= 4.0*fq; - jy = fq; - m6 = -4.0*fq; - m9 -= fq; - m10 += 2.0*fq; - m11 = fq; - m12 = -2.0*fq; - - // q = 4 - //nread = neighborList[n+3*Np]; // neighbor 3 - //fq = dist[nread]; - nr4 = neighborList[n+3*Np]; // neighbor 3 - fq = dist[nr4]; - rho+= fq; - m1 -= 11.0*fq; - m2 -= 4.0*fq; - jy -= fq; - m6 += 4.0*fq; - m9 -= fq; - m10 += 2.0*fq; - m11 += fq; - m12 -= 2.0*fq; - - // q=5 - //nread = neighborList[n+4*Np]; - //fq = dist[nread]; - nr5 = neighborList[n+4*Np]; - fq = dist[nr5]; - rho += fq; - m1 -= 11.0*fq; - m2 -= 4.0*fq; - jz = fq; - m8 = -4.0*fq; - m9 -= fq; - m10 += 2.0*fq; - m11 -= fq; - m12 += 2.0*fq; - - - // q = 6 - //nread = neighborList[n+5*Np]; - //fq = dist[nread]; - nr6 = neighborList[n+5*Np]; - fq = dist[nr6]; - rho+= fq; - m1 -= 11.0*fq; - m2 -= 4.0*fq; - jz -= fq; - m8 += 4.0*fq; - m9 -= fq; - m10 += 2.0*fq; - m11 -= fq; - m12 += 2.0*fq; - - // q=7 - //nread = neighborList[n+6*Np]; - //fq = dist[nread]; - nr7 = neighborList[n+6*Np]; - fq = dist[nr7]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx += fq; - m4 += fq; - jy += fq; - m6 += fq; - m9 += fq; - m10 += fq; - m11 += fq; - m12 += fq; - m13 = fq; - m16 = fq; - m17 = -fq; - - // q = 8 - //nread = neighborList[n+7*Np]; - //fq = dist[nread]; - nr8 = neighborList[n+7*Np]; - fq = dist[nr8]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx -= fq; - m4 -= fq; - jy -= fq; - m6 -= fq; - m9 += fq; - m10 += fq; - m11 += fq; - m12 += fq; - m13 += fq; - m16 -= fq; - m17 += fq; - - // q=9 - //nread = neighborList[n+8*Np]; - //fq = dist[nread]; - nr9 = neighborList[n+8*Np]; - fq = dist[nr9]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx += fq; - m4 += fq; - jy -= fq; - m6 -= fq; - m9 += fq; - m10 += fq; - m11 += fq; - m12 += fq; - m13 -= fq; - m16 += fq; - m17 += fq; - - // q = 10 - //nread = neighborList[n+9*Np]; - //fq = dist[nread]; - nr10 = neighborList[n+9*Np]; - fq = dist[nr10]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx -= fq; - m4 -= fq; - jy += fq; - m6 += fq; - m9 += fq; - m10 += fq; - m11 += fq; - m12 += fq; - m13 -= fq; - m16 -= fq; - m17 -= fq; - - // q=11 - //nread = neighborList[n+10*Np]; - //fq = dist[nread]; - nr11 = neighborList[n+10*Np]; - fq = dist[nr11]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx += fq; - m4 += fq; - jz += fq; - m8 += fq; - m9 += fq; - m10 += fq; - m11 -= fq; - m12 -= fq; - m15 = fq; - m16 -= fq; - m18 = fq; - - // q=12 - //nread = neighborList[n+11*Np]; - //fq = dist[nread]; - nr12 = neighborList[n+11*Np]; - fq = dist[nr12]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx -= fq; - m4 -= fq; - jz -= fq; - m8 -= fq; - m9 += fq; - m10 += fq; - m11 -= fq; - m12 -= fq; - m15 += fq; - m16 += fq; - m18 -= fq; - - // q=13 - //nread = neighborList[n+12*Np]; - //fq = dist[nread]; - nr13 = neighborList[n+12*Np]; - fq = dist[nr13]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx += fq; - m4 += fq; - jz -= fq; - m8 -= fq; - m9 += fq; - m10 += fq; - m11 -= fq; - m12 -= fq; - m15 -= fq; - m16 -= fq; - m18 -= fq; - - // q=14 - //nread = neighborList[n+13*Np]; - //fq = dist[nread]; - nr14 = neighborList[n+13*Np]; - fq = dist[nr14]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx -= fq; - m4 -= fq; - jz += fq; - m8 += fq; - m9 += fq; - m10 += fq; - m11 -= fq; - m12 -= fq; - m15 -= fq; - m16 += fq; - m18 += fq; - - // q=15 - nread = neighborList[n+14*Np]; - fq = dist[nread]; - //fq = dist[17*Np+n]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jy += fq; - m6 += fq; - jz += fq; - m8 += fq; - m9 -= 2.0*fq; - m10 -= 2.0*fq; - m14 = fq; - m17 += fq; - m18 -= fq; - - // q=16 - nread = neighborList[n+15*Np]; - fq = dist[nread]; - //fq = dist[8*Np+n]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jy -= fq; - m6 -= fq; - jz -= fq; - m8 -= fq; - m9 -= 2.0*fq; - m10 -= 2.0*fq; - m14 += fq; - m17 -= fq; - m18 += fq; - - // q=17 - //fq = dist[18*Np+n]; - nread = neighborList[n+16*Np]; - fq = dist[nread]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jy += fq; - m6 += fq; - jz -= fq; - m8 -= fq; - m9 -= 2.0*fq; - m10 -= 2.0*fq; - m14 -= fq; - m17 += fq; - m18 += fq; - - // q=18 - nread = neighborList[n+17*Np]; - fq = dist[nread]; - //fq = dist[9*Np+n]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jy -= fq; - m6 -= fq; - jz += fq; - m8 += fq; - m9 -= 2.0*fq; - m10 -= 2.0*fq; - m14 -= fq; - m17 -= fq; - m18 -= fq; - - //........................................................................ - //..............carry out relaxation process.............................. - //..........Toelke, Fruediger et. al. 2006................................ - if (C == 0.0) nx = ny = nz = 0.0; - m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) -19*alpha*C - m1); - m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0)- m2); - m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4); - m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6); - m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8); - m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); - m10 = m10 + rlx_setA*( - m10); - m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); - m12 = m12 + rlx_setA*( - m12); - m13 = m13 + rlx_setA*( (jx*jy/rho0) + 0.5*alpha*C*nx*ny - m13); - m14 = m14 + rlx_setA*( (jy*jz/rho0) + 0.5*alpha*C*ny*nz - m14); - m15 = m15 + rlx_setA*( (jx*jz/rho0) + 0.5*alpha*C*nx*nz - m15); - m16 = m16 + rlx_setB*( - m16); - m17 = m17 + rlx_setB*( - m17); - m18 = m18 + rlx_setB*( - m18); - //.................inverse transformation...................................................... - - // q=0 - fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; - dist[n] = fq; - - // q = 1 - fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx; - //nread = neighborList[n+Np]; - dist[nr2] = fq; - - // q=2 - fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx; - //nread = neighborList[n]; - dist[nr1] = fq; - - // q = 3 - fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy; - //nread = neighborList[n+3*Np]; - dist[nr4] = fq; - - // q = 4 - fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy; - //nread = neighborList[n+2*Np]; - dist[nr3] = fq; - - // q = 5 - fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz; - //nread = neighborList[n+5*Np]; - dist[nr6] = fq; - - // q = 6 - fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz; - //nread = neighborList[n+4*Np]; - dist[nr5] = fq; - - // q = 7 - fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ - mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy); - //nread = neighborList[n+7*Np]; - dist[nr8] = fq; - - // q = 8 - fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 - +mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy); - //nread = neighborList[n+6*Np]; - dist[nr7] = fq; - - // q = 9 - fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ - mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy); - //nread = neighborList[n+9*Np]; - dist[nr10] = fq; - - // q = 10 - fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ - mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy); - //nread = neighborList[n+8*Np]; - dist[nr9] = fq; - - // q = 11 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) - +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 - -mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz); - //nread = neighborList[n+11*Np]; - dist[nr12] = fq; - - // q = 12 - fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ - mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz); - //nread = neighborList[n+10*Np]; - dist[nr11]= fq; - - // q = 13 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) - +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 - -mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz); - //nread = neighborList[n+13*Np]; - dist[nr14] = fq; - - // q= 14 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) - +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 - -mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz); - //nread = neighborList[n+12*Np]; - dist[nr13] = fq; - - - // q = 15 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) - -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz); - nread = neighborList[n+15*Np]; - dist[nread] = fq; - - // q = 16 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) - -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz); - nread = neighborList[n+14*Np]; - dist[nread] = fq; - - - // q = 17 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) - -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz); - nread = neighborList[n+17*Np]; - dist[nread] = fq; - - // q = 18 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) - -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz); - nread = neighborList[n+16*Np]; - dist[nread] = fq; - - // write the velocity - ux = jx / rho0; - uy = jy / rho0; - uz = jz / rho0; - Vel[n] = ux; - Vel[Np+n] = uy; - Vel[2*Np+n] = uz; - - // Instantiate mass transport distributions - // Stationary value - distribution 0 - nAB = 1.0/(nA+nB); - Aq[n] = 0.3333333333333333*nA; - Bq[n] = 0.3333333333333333*nB; - - //............................................... - // q = 0,2,4 - // Cq = {1,0,0}, {0,1,0}, {0,0,1} - delta = beta*nA*nB*nAB*0.1111111111111111*nx; - if (!(nA*nB*nAB>0)) delta=0; - a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; - b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; - a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; - b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; - - // q = 1 - //nread = neighborList[n+Np]; - Aq[nr2] = a1; - Bq[nr2] = b1; - // q=2 - //nread = neighborList[n]; - Aq[nr1] = a2; - Bq[nr1] = b2; - - //............................................... - // Cq = {0,1,0} - delta = beta*nA*nB*nAB*0.1111111111111111*ny; - if (!(nA*nB*nAB>0)) delta=0; - a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; - b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; - a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; - b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; - - // q = 3 - //nread = neighborList[n+3*Np]; - Aq[nr4] = a1; - Bq[nr4] = b1; - // q = 4 - //nread = neighborList[n+2*Np]; - Aq[nr3] = a2; - Bq[nr3] = b2; - - //............................................... - // q = 4 - // Cq = {0,0,1} - delta = beta*nA*nB*nAB*0.1111111111111111*nz; - if (!(nA*nB*nAB>0)) delta=0; - a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; - b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; - a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; - b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; - - // q = 5 - //nread = neighborList[n+5*Np]; - Aq[nr6] = a1; - Bq[nr6] = b1; - // q = 6 - //nread = neighborList[n+4*Np]; - Aq[nr5] = a2; - Bq[nr5] = b2; - //............................................... - } -} - -extern "C" void ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double *Aq, double *Bq, - double *Den, double *Phi, int start, int finish, int Np){ - - int idx,n,nread; - double fq,nA,nB; - - for (int n=start; n ScaLBL_FreeLeeModel::ScaLBL_FreeLeeModel(int RANK, int NP, const Utilities::MPI& COMM): -rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),W(0),gamma(0), +rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),tauM(0),rhoA(0),rhoB(0),W(0),gamma(0),kappa(0),beta(0), Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) { @@ -30,10 +30,13 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){ // set defaults timestepMax = 100000; tauA = tauB = 1.0; + tauM = 1.0;//relaxation time for phase field rhoA = rhoB = 1.0; Fx = Fy = Fz = 0.0; gamma=1e-3;//surface tension W=5.0;//interfacial thickness + beta = 12.0*gamma/W; + kappa = 3.0*gamma*W/2.0;//beta and kappa are related to surface tension \gamma Restart=false; din=dout=1.0; flux=0.0; @@ -81,6 +84,9 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){ inletB=0.f; outletA=0.f; outletB=1.f; + //update secondary parameters + beta = 12.0*gamma/W; + kappa = 3.0*gamma*W/2.0;//beta and kappa are related to surface tension \gamma //if (BoundaryCondition==4) flux *= rhoA; // mass flux must adjust for density (see formulation for details) BoundaryCondition = 0; @@ -258,15 +264,16 @@ void ScaLBL_FreeLeeModel::Create(){ TmpMap[idx] = Nxh*Nyh*Nzh-1; } } + // copy the device map ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np); - ScaLBL_DeviceBarrier(); - delete [] TmpMap; - // copy the neighbor list ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); + comm.barrier(); + delete [] TmpMap; + delete [] neighborList; } -void ScaLBL_FreeLeeModel::AssignComponentLabels() +void ScaLBL_FreeLeeModel::AssignComponentLabels_ChemPotential_ColorGrad() { double *phase; phase = new double[Nh]; @@ -288,12 +295,26 @@ void ScaLBL_FreeLeeModel::AssignComponentLabels() // Assign the labels for (size_t idx=0; idxid[n] + int x=i-1; + int y=j-1; + int z=k-1; + if (x<0) x=0; + if (y<0) y=0; + if (z<0) z=0; + if (x>=Nx) x=Nx-1; + if (y>=Ny) y=Ny-1; + if (z>=Nz) z=Nz-1; + int n = z*Nx*Ny+y*Nx+x; VALUE=id[n]; + // Assign the affinity from the paired list for (unsigned int idx=0; idx < NLABELS; idx++){ //printf("idx=%i, value=%i, %i, \n",idx, VALUE,LabelList[idx]); @@ -307,7 +328,7 @@ void ScaLBL_FreeLeeModel::AssignComponentLabels() // fluid labels are reserved if (VALUE == 1) AFFINITY=1.0; else if (VALUE == 2) AFFINITY=-1.0; - phase[n] = AFFINITY; + phase[nh] = AFFINITY; } } } @@ -329,56 +350,10 @@ void ScaLBL_FreeLeeModel::AssignComponentLabels() } //compute color gradient and laplacian of phase field + double *ColorGrad_host, mu_phi_host; + ColorGrad_host = new double[3*Np]; + mu_phi_host = new double[Np]; - - - - - //copy all data to device - ScaLBL_CopyToDevice(Phi, phase, N*sizeof(double)); - ScaLBL_DeviceBarrier(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); - delete [] phase; -} - -void ScaLBL_FreeLeeModel::AssignChemPotential_ColorGrad() -{ - double *SolidPotential_host = new double [Nx*Ny*Nz]; - double *GreySolidGrad_host = new double [3*Np]; - - size_t NLABELS=0; - signed char VALUE=0; - double AFFINITY=0.f; - - auto LabelList = greyscaleColor_db->getVector( "GreySolidLabels" ); - auto AffinityList = greyscaleColor_db->getVector( "GreySolidAffinity" ); - - NLABELS=LabelList.size(); - if (NLABELS != AffinityList.size()){ - ERROR("Error: GreySolidLabels and GreySolidAffinity must be the same length! \n"); - } - - for (int k=0;kid[n] = 0; // set mask to zero since this is an immobile component - } - } - SolidPotential_host[n] = AFFINITY; - } - } - } - - // Calculate grey-solid color-gradient double *Dst; Dst = new double [3*3*3]; for (int kk=0; kk<3; kk++){ @@ -389,8 +364,8 @@ void ScaLBL_FreeLeeModel::AssignChemPotential_ColorGrad() } } } - double w_face = 1.f; - double w_edge = 0.5; + double w_face = 1.0/18.0; + double w_edge = 1.0/36.0; double w_corner = 0.f; //local Dst[13] = 0.f; @@ -424,14 +399,21 @@ void ScaLBL_FreeLeeModel::AssignChemPotential_ColorGrad() Dst[23] = w_edge; Dst[25] = w_edge; - for (int k=1; kSDs(i,j,k)<2.0){ - GreySolidGrad_host[idx+0*Np] = phi_x; - GreySolidGrad_host[idx+1*Np] = phi_y; - GreySolidGrad_host[idx+2*Np] = phi_z; - } - else{ - GreySolidGrad_host[idx+0*Np] = 0.0; - GreySolidGrad_host[idx+1*Np] = 0.0; - GreySolidGrad_host[idx+2*Np] = 0.0; - } + //store color gradient + ColorGrad_host[idx+0*Np] = cs2_inv*phi_x; + ColorGrad_host[idx+1*Np] = cs2_inv*phi_y; + ColorGrad_host[idx+2*Np] = cs2_inv*phi_z; + //compute chemical potential + phi_Lap = 2.0*cs2_inv*phi_Lap; + mu_phi_host[idx] = 4.0*beta*phase[nh]*(phase[nh]+1.0)*(phase[nh]-1.0) - kappa*phi_Lap; } } } } - - if (rank==0){ - printf("Number of Grey-solid labels: %lu \n",NLABELS); - for (unsigned int idx=0; idxLastExterior(), Np); - ScaLBL_FreeLeeModel_PhaseField_Init(dvcMap, Phi, Den, hq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_FreeLeeModel_PhaseField_Init(dvcMap, Phi, Den, hq, ColorGrad, rhoA, rhoB, tauM, W, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_FreeLeeModel_PhaseField_Init(dvcMap, Phi, Den, hq, ColorGrad, rhoA, rhoB, tauM, W, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); if (Restart == true){ //TODO need to revise this function @@ -576,6 +547,7 @@ void ScaLBL_FreeLeeModel::Initialize(){ } // establish reservoirs for external bC + // TODO to be revised if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4 ){ if (Dm->kproc()==0){ ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0); @@ -617,13 +589,12 @@ void ScaLBL_FreeLeeModel::Run(){ timestep++; //------------------------------------------------------------------------------------------------------------------- // Compute the Phase indicator field - // Read for hq, Bq happens in this routine (requires communication) - //ScaLBL_Comm->SendD3Q7AA(hq); //READ FROM NORMAL - ScaLBL_Comm->SendD3Q7AA(hq); //READ FROM NORMAL - ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm->RecvD3Q7AA(hq); //WRITE INTO OPPOSITE + // Read for hq happens in this routine (requires communication) + ScaLBL_Comm->SendD3Q7AA(hq,0); //READ FROM NORMAL + ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(NeighborList, dvcMap, hq, Den, Phi, rhoA, rhoB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q7AA(hq,0); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); - ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(NeighborList, dvcMap, hq, Den, Phi, rhoA, rhoB, 0, ScaLBL_Comm->LastExterior(), Np); // Perform the collision operation ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FROM NORMAL @@ -635,8 +606,8 @@ void ScaLBL_FreeLeeModel::Run(){ // Halo exchange for phase field ScaLBL_Comm_WideHalo->Send(Phi); - ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, - alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, rhoA, rhoB, tauA, tauB, tauM, + kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm_WideHalo->Recv(Phi); ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); @@ -653,19 +624,19 @@ void ScaLBL_FreeLeeModel::Run(){ ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); } - ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, - alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, rhoA, rhoB, tauA, tauB, tauM, + kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_DeviceBarrier(); MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); // *************EVEN TIMESTEP************* timestep++; // Compute the Phase indicator field - ScaLBL_Comm->SendD3Q7AA(hq); //READ FROM NORMAL - ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm->RecvD3Q7AA(hq); //WRITE INTO OPPOSITE + ScaLBL_Comm->SendD3Q7AA(hq,0); //READ FROM NORMAL + ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(dvcMap, hq, Den, Phi, rhoA, rhoB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q7AA(hq,0); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); - ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(dvcMap, hq, Den, Phi, rhoA, rhoB, 0, ScaLBL_Comm->LastExterior(), Np); // Perform the collision operation ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FORM NORMAL @@ -675,8 +646,8 @@ void ScaLBL_FreeLeeModel::Run(){ ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); } ScaLBL_Comm_WideHalo->Send(Phi); - ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, - alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, rhoA, rhoB, tauA, tauB, tauM, + kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm_WideHalo->Recv(Phi); ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); @@ -693,8 +664,8 @@ void ScaLBL_FreeLeeModel::Run(){ ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); } - ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, - alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, rhoA, rhoB, tauA, tauB, tauM, + kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_Comm->Barrier(); //************************************************************************ PROFILE_STOP("Update"); @@ -722,30 +693,24 @@ void ScaLBL_FreeLeeModel::Run(){ void ScaLBL_FreeLeeModel::WriteDebug(){ // Copy back final phase indicator field and convert to regular layout - DoubleArray PhaseField(Nx,Ny,Nz); + DoubleArray PhaseData(Nxh,Nyh,Nzh); //ScaLBL_Comm->RegularLayout(Map,Phi,PhaseField); - ScaLBL_CopyToHost(PhaseField.data(), Phi, sizeof(double)*N); + ScaLBL_CopyToHost(PhaseData.data(), Phi, sizeof(double)*Nh); FILE *OUTFILE; sprintf(LocalRankFilename,"Phase.%05i.raw",rank); OUTFILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,OUTFILE); + fwrite(PhaseData.data(),8,Nh,OUTFILE); fclose(OUTFILE); - ScaLBL_Comm->RegularLayout(Map,&Den[0],PhaseField); + DoubleArray PhaseField(Nx,Ny,Nz); + ScaLBL_Comm->RegularLayout(Map,Den,PhaseField); FILE *AFILE; - sprintf(LocalRankFilename,"A.%05i.raw",rank); + sprintf(LocalRankFilename,"Density.%05i.raw",rank); AFILE = fopen(LocalRankFilename,"wb"); fwrite(PhaseField.data(),8,N,AFILE); fclose(AFILE); - ScaLBL_Comm->RegularLayout(Map,&Den[Np],PhaseField); - FILE *BFILE; - sprintf(LocalRankFilename,"B.%05i.raw",rank); - BFILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,BFILE); - fclose(BFILE); - ScaLBL_Comm->RegularLayout(Map,Pressure,PhaseField); FILE *PFILE; sprintf(LocalRankFilename,"Pressure.%05i.raw",rank); diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h index 5aa2d30a..75d2b413 100644 --- a/models/FreeLeeModel.h +++ b/models/FreeLeeModel.h @@ -35,7 +35,8 @@ public: int timestep,timestepMax; int BoundaryCondition; double tauA,tauB,rhoA,rhoB; - double W,gamma; + double tauM;//relaxation time for phase field (or mass) + double W,gamma,kappa,beta; double Fx,Fy,Fz,flux; double din,dout,inletA,inletB,outletA,outletB; From 3a6edc365d029194eda8187e37475aecdab92eb2 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Sun, 31 Jan 2021 21:26:03 -0500 Subject: [PATCH 06/21] build pass; ongoing model validation --- common/ScaLBL.h | 22 + cpu/FreeLee.cpp | 2100 +----------------------------- models/FreeLeeModel.cpp | 64 +- models/FreeLeeModel.h | 3 +- tests/CMakeLists.txt | 1 + tests/lbpm_freelee_simulator.cpp | 81 ++ 6 files changed, 154 insertions(+), 2117 deletions(-) create mode 100644 tests/lbpm_freelee_simulator.cpp diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 0fe2ad0c..f0c34ea9 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -178,6 +178,28 @@ extern "C" void ScaLBL_D3Q7_AAeven_DFH(double *Aq, double *Bq, double *Den, doub extern "C" void ScaLBL_D3Q19_Gradient_DFH(int *NeighborList, double *Phi, double *ColorGrad, int start, int finish, int Np); +// FREE ENERGY LEE MODEL + +extern "C" void ScaLBL_D3Q19_FreeLeeModel_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np); + +extern "C" void ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, double *Den, double *hq, double *ColorGrad, + double rhonA, double rhoB, double tauM, double W, int start, int finish, int Np); + +extern "C" void ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, + double rhoA, double rhoB, int start, int finish, int Np); + +extern "C" void ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double *hq, double *Den, double *Phi, + double rhoA, double rhoB, int start, int finish, int Np); + +extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, + int strideY, int strideZ, int start, int finish, int Np); + +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, + int strideY, int strideZ, int start, int finish, int Np); + + // BOUNDARY CONDITION ROUTINES extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *neighborList, int *list, double *dist, double din, int count, int Np); diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp index 40a99dd3..f28af185 100644 --- a/cpu/FreeLee.cpp +++ b/cpu/FreeLee.cpp @@ -6,6 +6,9 @@ extern "C" void ScaLBL_D3Q19_FreeLeeModel_Init(double *gqbar, double *mu_phi, do { int n; double p = 1.0;//NOTE: take initial pressure p=1.0 + double chem; + double cg_x,cg_y,cg_z; + for (n=0; n 0){ - - // Retrieve the color gradient - nx = ColorGrad[n]; - ny = ColorGrad[N+n]; - nz = ColorGrad[2*N+n]; - //...........Normalize the Color Gradient................................. - C = sqrt(nx*nx+ny*ny+nz*nz); - if (C==0.0) C=1.0; - nx = nx/C; - ny = ny/C; - nz = nz/C; - //......No color gradient at z-boundary if pressure BC are set............. - // if (pBC && k==0) nx = ny = nz = 0.f; - // if (pBC && k==Nz-1) nx = ny = nz = 0.f; - //........................................................................ - // READ THE DISTRIBUTIONS - // (read from opposite array due to previous swap operation) - //........................................................................ - f2 = distodd[n]; - f4 = distodd[N+n]; - f6 = distodd[2*N+n]; - f8 = distodd[3*N+n]; - f10 = distodd[4*N+n]; - f12 = distodd[5*N+n]; - f14 = distodd[6*N+n]; - f16 = distodd[7*N+n]; - f18 = distodd[8*N+n]; - //........................................................................ - f0 = disteven[n]; - f1 = disteven[N+n]; - f3 = disteven[2*N+n]; - f5 = disteven[3*N+n]; - f7 = disteven[4*N+n]; - f9 = disteven[5*N+n]; - f11 = disteven[6*N+n]; - f13 = disteven[7*N+n]; - f15 = disteven[8*N+n]; - f17 = disteven[9*N+n]; - //........................................................................ - // PERFORM RELAXATION PROCESS - //........................................................................ - //....................compute the moments............................................... - rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; - m1 = -30*f0-11*(f2+f1+f4+f3+f6+f5)+8*(f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18 +f17); - m2 = 12*f0-4*(f2+f1 +f4+f3+f6 +f5)+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; - jx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14; - m4 = 4*(-f1+f2)+f7-f8+f9-f10+f11-f12+f13-f14; - jy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18; - m6 = -4*(f3-f4)+f7-f8-f9+f10+f15-f16+f17-f18; - jz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18; - m8 = -4*(f5-f6)+f11-f12-f13+f14+f15-f16-f17+f18; - m9 = 2*(f1+f2)-f3-f4-f5-f6+f7+f8+f9+f10+f11+f12+f13+f14-2*(f15+f16+f17+f18); - m10 = -4*(f1+f2)+2*(f4+f3+f6+f5)+f8+f7+f10+f9+f12+f11+f14+f13-2*(f16+f15+f18+f17); - m11 = f4+f3-f6-f5+f8+f7+f10+f9-f12-f11-f14-f13; - m12 = -2*(f4+f3-f6-f5)+f8+f7+f10+f9-f12-f11-f14-f13; - m13 = f8+f7-f10-f9; - m14 = f16+f15-f18-f17; - m15 = f12+f11-f14-f13; - m16 = f7-f8+f9-f10-f11+f12-f13+f14; - m17 = -f7+f8+f9-f10+f15-f16+f17-f18; - m18 = f11-f12-f13+f14-f15+f16+f17-f18; - //..........Toelke, Fruediger et. al. 2006............... - if (C == 0.0) nx = ny = nz = 1.0; -#ifdef STOKES - m1 = m1 + rlx_setA*(- 11*rho -alpha*C - m1); - m2 = m2 + rlx_setA*(3*rho - m2); - m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4); - m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6); - m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8); - m9 = m9 + rlx_setA*( 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); - m10 = m10 + rlx_setA*( - m10); - m11 = m11 + rlx_setA*( 0.5*alpha*C*(ny*ny-nz*nz)- m11); - m12 = m12 + rlx_setA*( - m12); - m13 = m13 + rlx_setA*( 0.5*alpha*C*nx*ny - m13); - m14 = m14 + rlx_setA*( 0.5*alpha*C*ny*nz - m14); - m15 = m15 + rlx_setA*( 0.5*alpha*C*nx*nz - m15); - m16 = m16 + rlx_setB*( - m16); - m17 = m17 + rlx_setB*( - m17); - m18 = m18 + rlx_setB*( - m18); -#else - m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) -alpha*C - m1); - m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho)- m2); - m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4); - m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6); - m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8); - m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); - m10 = m10 + rlx_setA*( - m10); - m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); - m12 = m12 + rlx_setA*( - m12); - m13 = m13 + rlx_setA*( (jx*jy/rho) + 0.5*alpha*C*nx*ny - m13); - m14 = m14 + rlx_setA*( (jy*jz/rho) + 0.5*alpha*C*ny*nz - m14); - m15 = m15 + rlx_setA*( (jx*jz/rho) + 0.5*alpha*C*nx*nz - m15); - m16 = m16 + rlx_setB*( - m16); - m17 = m17 + rlx_setB*( - m17); - m18 = m18 + rlx_setB*( - m18); -#endif - //.................inverse transformation...................................................... - f0 = 0.05263157894736842*rho-0.012531328320802*m1+0.04761904761904762*m2; - f1 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 - +0.1*(jx-m4)+0.0555555555555555555555555*(m9-m10); - f2 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 - +0.1*(m4-jx)+0.0555555555555555555555555*(m9-m10); - f3 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 - +0.1*(jy-m6)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12); - f4 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 - +0.1*(m6-jy)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12); - f5 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 - +0.1*(jz-m8)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11); - f6 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 - +0.1*(m8-jz)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11); - f7 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx+jy)+0.025*(m4+m6) - +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 - +0.04166666666666666*m12+0.25*m13+0.125*(m16-m17); - f8 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2-0.1*(jx+jy)-0.025*(m4+m6) - +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 - +0.04166666666666666*m12+0.25*m13+0.125*(m17-m16); - f9 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx-jy)+0.025*(m4-m6) - +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 - +0.04166666666666666*m12-0.25*m13+0.125*(m16+m17); - f10 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jy-jx)+0.025*(m6-m4) - +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 - +0.04166666666666666*m12-0.25*m13-0.125*(m16+m17); - f11 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2+0.1*(jx+jz)+0.025*(m4+m8) - +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 - -0.04166666666666666*m12+0.25*m15+0.125*(m18-m16); - f12 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2-0.1*(jx+jz)-0.025*(m4+m8) - +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 - -0.04166666666666666*m12+0.25*m15+0.125*(m16-m18); - f13 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2+0.1*(jx-jz)+0.025*(m4-m8) - +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 - -0.04166666666666666*m12-0.25*m15-0.125*(m16+m18); - f14 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2+0.1*(jz-jx)+0.025*(m8-m4) - +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 - -0.04166666666666666*m12-0.25*m15+0.125*(m16+m18); - f15 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2+0.1*(jy+jz)+0.025*(m6+m8) - -0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m17-m18); - f16 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2-0.1*(jy+jz)-0.025*(m6+m8) - -0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m18-m17); - f17 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2+0.1*(jy-jz)+0.025*(m6-m8) - -0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14+0.125*(m17+m18); - f18 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2+0.1*(jz-jy)+0.025*(m8-m6) - -0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14-0.125*(m17+m18); - //....................................................................................................... - // incorporate external force - f1 += 0.16666666*Fx; - f2 -= 0.16666666*Fx; - f3 += 0.16666666*Fy; - f4 -= 0.16666666*Fy; - f5 += 0.16666666*Fz; - f6 -= 0.16666666*Fz; - f7 += 0.08333333333*(Fx+Fy); - f8 -= 0.08333333333*(Fx+Fy); - f9 += 0.08333333333*(Fx-Fy); - f10 -= 0.08333333333*(Fx-Fy); - f11 += 0.08333333333*(Fx+Fz); - f12 -= 0.08333333333*(Fx+Fz); - f13 += 0.08333333333*(Fx-Fz); - f14 -= 0.08333333333*(Fx-Fz); - f15 += 0.08333333333*(Fy+Fz); - f16 -= 0.08333333333*(Fy+Fz); - f17 += 0.08333333333*(Fy-Fz); - f18 -= 0.08333333333*(Fy-Fz); - //*********** WRITE UPDATED VALUES TO MEMORY ****************** - // Write the updated distributions - //....EVEN..................................... - disteven[n] = f0; - disteven[N+n] = f2; - disteven[2*N+n] = f4; - disteven[3*N+n] = f6; - disteven[4*N+n] = f8; - disteven[5*N+n] = f10; - disteven[6*N+n] = f12; - disteven[7*N+n] = f14; - disteven[8*N+n] = f16; - disteven[9*N+n] = f18; - //....ODD...................................... - distodd[n] = f1; - distodd[N+n] = f3; - distodd[2*N+n] = f5; - distodd[3*N+n] = f7; - distodd[4*N+n] = f9; - distodd[5*N+n] = f11; - distodd[6*N+n] = f13; - distodd[7*N+n] = f15; - distodd[8*N+n] = f17; - - //...Store the Velocity.......................... - Velocity[n] = jx; - Velocity[N+n] = jy; - Velocity[2*N+n] = jz; - /* Velocity[3*n] = jx; - Velocity[3*n+1] = jy; - Velocity[3*n+2] = jz; - */ //...Store the Color Gradient.................... - // ColorGrad[3*n] = nx*C; - // ColorGrad[3*n+1] = ny*C; - // ColorGrad[3*n+2] = nz*C; - //............................................... - //*************************************************************** - } // check if n is in the solid - } // loop over n -} - -extern "C" void ScaLBL_D3Q19_ColorCollide( char *ID, double *disteven, double *distodd, double *phi, double *ColorGrad, - double *Velocity, int Nx, int Ny, int Nz, double rlx_setA, double rlx_setB, - double alpha, double beta, double Fx, double Fy, double Fz) -{ - - int i,j,k,n,nn,N; - // distributions - double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; - double f10,f11,f12,f13,f14,f15,f16,f17,f18; - - // non-conserved moments - double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; - // additional variables needed for computations - double rho,jx,jy,jz,C,nx,ny,nz; - - N = Nx*Ny*Nz; - char id; - - for (n=0; n 0){ - - //.......Back out the 3-D indices for node n.............. - k = n/(Nx*Ny); - j = (n-Nx*Ny*k)/Nx; - i = n-Nx*Ny*k-Nx*j; - //........................................................................ - //........Get 1-D index for this thread.................... - // n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x; - //........................................................................ - // COMPUTE THE COLOR GRADIENT - //........................................................................ - //.................Read Phase Indicator Values............................ - //........................................................................ - nn = n-1; // neighbor index (get convention) - if (i-1<0) nn += Nx; // periodic BC along the x-boundary - f1 = phi[nn]; // get neighbor for phi - 1 - //........................................................................ - nn = n+1; // neighbor index (get convention) - if (!(i+10)) delta=0; - a1 = na*(0.1111111111111111*(1+4.5*ux))+delta; - b1 = nb*(0.1111111111111111*(1+4.5*ux))-delta; - a2 = na*(0.1111111111111111*(1-4.5*ux))-delta; - b2 = nb*(0.1111111111111111*(1-4.5*ux))+delta; - - A_odd[n] = a1; - A_even[N+n] = a2; - B_odd[n] = b1; - B_even[N+n] = b2; - //............................................... - // q = 2 - // Cq = {0,1,0} - delta = beta*na*nb*nab*0.1111111111111111*ny; - if (!(na*nb*nab>0)) delta=0; - a1 = na*(0.1111111111111111*(1+4.5*uy))+delta; - b1 = nb*(0.1111111111111111*(1+4.5*uy))-delta; - a2 = na*(0.1111111111111111*(1-4.5*uy))-delta; - b2 = nb*(0.1111111111111111*(1-4.5*uy))+delta; - - A_odd[N+n] = a1; - A_even[2*N+n] = a2; - B_odd[N+n] = b1; - B_even[2*N+n] = b2; - //............................................... - // q = 4 - // Cq = {0,0,1} - delta = beta*na*nb*nab*0.1111111111111111*nz; - if (!(na*nb*nab>0)) delta=0; - a1 = na*(0.1111111111111111*(1+4.5*uz))+delta; - b1 = nb*(0.1111111111111111*(1+4.5*uz))-delta; - a2 = na*(0.1111111111111111*(1-4.5*uz))-delta; - b2 = nb*(0.1111111111111111*(1-4.5*uz))+delta; - - A_odd[2*N+n] = a1; - A_even[3*N+n] = a2; - B_odd[2*N+n] = b1; - B_even[3*N+n] = b2; - //............................................... - - /* // Construction and streaming for the components - for (idx=0; idx<3; idx++){ - //............................................... - // Distribution index - q = 2*idx; - // Associated discrete velocity - Cqx = D3Q7[idx][0]; - Cqy = D3Q7[idx][1]; - Cqz = D3Q7[idx][2]; - // Generate the Equilibrium Distribution - a1 = na*feq[q]; - b1 = nb*feq[q]; - a2 = na*feq[q+1]; - b2 = nb*feq[q+1]; - // Recolor the distributions - if (C > 0.0){ - sp = nx*double(Cqx)+ny*double(Cqy)+nz*double(Cqz); - //if (idx > 2) sp = 0.7071067811865475*sp; - //delta = sp*min( min(a1,a2), min(b1,b2) ); - delta = na*nb/(na+nb)*0.1111111111111111*sp; - //if (a1>0 && b1>0){ - a1 += beta*delta; - a2 -= beta*delta; - b1 -= beta*delta; - b2 += beta*delta; - } - // Save the re-colored distributions - A_odd[N*idx+n] = a1; - A_even[N*(idx+1)+n] = a2; - B_odd[N*idx+n] = b1; - B_even[N*(idx+1)+n] = b2; - //............................................... - } - */ - } - } -} - -//************************************************************************* -extern "C" void DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity, - double beta, int Nx, int Ny, int Nz, bool pBC, int S) -{ - char id; - - int idx; - int in,jn,kn,n,nn,N; - int q,Cqx,Cqy,Cqz; - // int sendLoc; - - double na,nb; // density values - double ux,uy,uz; // flow velocity - double nx,ny,nz,C; // color gradient components - double a1,a2,b1,b2; - double sp,delta; - double feq[6]; // equilibrium distributions - // Set of Discrete velocities for the D3Q19 Model - int D3Q7[3][3]={{1,0,0},{0,1,0},{0,0,1}}; - N = Nx*Ny*Nz; - - for (n=0; n 0 && na+nb > 0.0){ - //.......Back out the 3-D indices for node n.............. - int k = n/(Nx*Ny); - int j = (n-Nx*Ny*k)/Nx; - int i = n-Nx*Ny*k-Nx*j; - //.....Load the Color gradient......... - nx = ColorGrad[n]; - ny = ColorGrad[N+n]; - nz = ColorGrad[2*N+n]; - C = sqrt(nx*nx+ny*ny+nz*nz); - nx = nx/C; - ny = ny/C; - nz = nz/C; - //....Load the flow velocity........... - ux = Velocity[n]; - uy = Velocity[N+n]; - uz = Velocity[2*N+n]; - //....Instantiate the density distributions - // Generate Equilibrium Distributions and stream - // Stationary value - distribution 0 - // Den[2*n] += 0.3333333333333333*na; - // Den[2*n+1] += 0.3333333333333333*nb; - Den[2*n] += 0.3333333333333333*na; - Den[2*n+1] += 0.3333333333333333*nb; - // Non-Stationary equilibrium distributions - feq[0] = 0.1111111111111111*(1+3*ux); - feq[1] = 0.1111111111111111*(1-3*ux); - feq[2] = 0.1111111111111111*(1+3*uy); - feq[3] = 0.1111111111111111*(1-3*uy); - feq[4] = 0.1111111111111111*(1+3*uz); - feq[5] = 0.1111111111111111*(1-3*uz); - // Construction and streaming for the components - for (idx=0; idx<3; idx++){ - // Distribution index - q = 2*idx; - // Associated discrete velocity - Cqx = D3Q7[idx][0]; - Cqy = D3Q7[idx][1]; - Cqz = D3Q7[idx][2]; - // Generate the Equilibrium Distribution - a1 = na*feq[q]; - b1 = nb*feq[q]; - a2 = na*feq[q+1]; - b2 = nb*feq[q+1]; - // Recolor the distributions - if (C > 0.0){ - sp = nx*double(Cqx)+ny*double(Cqy)+nz*double(Cqz); - //if (idx > 2) sp = 0.7071067811865475*sp; - //delta = sp*min( min(a1,a2), min(b1,b2) ); - delta = na*nb/(na+nb)*0.1111111111111111*sp; - //if (a1>0 && b1>0){ - a1 += beta*delta; - a2 -= beta*delta; - b1 -= beta*delta; - b2 += beta*delta; - } - - // .......Get the neighbor node.............. - //nn = n + Stride[idx]; - in = i+Cqx; - jn = j+Cqy; - kn = k+Cqz; - - // Adjust for periodic BC, if necessary - // if (in<0) in+= Nx; - // if (jn<0) jn+= Ny; - // if (kn<0) kn+= Nz; - // if (!(in 0 ){ - // Get the density value (Streaming already performed) - Na = Den[n]; - Nb = Den[N+n]; - Phi[n] = (Na-Nb)/(Na+Nb); - } - } - //................................................................... -} - -extern "C" void ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice){ - int n; - for (n=Slice*Nx*Ny; n<(Slice+1)*Nx*Ny; n++){ - Phi[n] = value; - } -} - - -//extern "C" void ScaLBL_D3Q19_AAeven_Color(double *dist, double *Aq, double *Bq, double *Den, double *Velocity, -// double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, -// double Fx, double Fy, double Fz, int start, int finish, int Np){ -extern "C" void ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi, - double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, - double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ - - int ijk,nn,n; - double fq; - // conserved momemnts - double rho,jx,jy,jz; - // non-conserved moments - double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; - double m3,m5,m7; - double nA,nB; // number density - double a1,b1,a2,b2,nAB,delta; - double C,nx,ny,nz; //color gradient magnitude and direction - double ux,uy,uz; - double phi,tau,rho0,rlx_setA,rlx_setB; - - const double mrt_V1=0.05263157894736842; - const double mrt_V2=0.012531328320802; - const double mrt_V3=0.04761904761904762; - const double mrt_V4=0.004594820384294068; - const double mrt_V5=0.01587301587301587; - const double mrt_V6=0.0555555555555555555555555; - const double mrt_V7=0.02777777777777778; - const double mrt_V8=0.08333333333333333; - const double mrt_V9=0.003341687552213868; - const double mrt_V10=0.003968253968253968; - const double mrt_V11=0.01388888888888889; - const double mrt_V12=0.04166666666666666; - - - for (int n=start; n0)) delta=0; - a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; - b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; - a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; - b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; - - Aq[1*Np+n] = a1; - Bq[1*Np+n] = b1; - Aq[2*Np+n] = a2; - Bq[2*Np+n] = b2; - - //............................................... - // q = 2 - // Cq = {0,1,0} - delta = beta*nA*nB*nAB*0.1111111111111111*ny; - if (!(nA*nB*nAB>0)) delta=0; - a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; - b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; - a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; - b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; - - Aq[3*Np+n] = a1; - Bq[3*Np+n] = b1; - Aq[4*Np+n] = a2; - Bq[4*Np+n] = b2; - //............................................... - // q = 4 - // Cq = {0,0,1} - delta = beta*nA*nB*nAB*0.1111111111111111*nz; - if (!(nA*nB*nAB>0)) delta=0; - a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; - b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; - a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; - b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; - - Aq[5*Np+n] = a1; - Bq[5*Np+n] = b1; - Aq[6*Np+n] = a2; - Bq[6*Np+n] = b2; - //............................................... - - } - -} - -//extern "C" void ScaLBL_D3Q19_AAodd_Color(int *neighborList, double *dist, double *Aq, double *Bq, double *Den, double *Velocity, -// double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, -// double Fx, double Fy, double Fz, int start, int finish, int Np){ - - -extern "C" void ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq, double *Den, double *Phi, - int start, int finish, int Np){ - int idx,n,nread; - double fq,nA,nB; - for (int n=start; n 1.f){ - nA = 1.0; nB = 0.f; - } - else if (phi < -1.f){ - nB = 1.0; nA = 0.f; - } - else{ - nA=0.5*(phi+1.f); - nB=0.5*(1.f-phi); - } - Den[idx] = nA; - Den[Np+idx] = nB; - - Aq[idx]=0.3333333333333333*nA; - Aq[Np+idx]=0.1111111111111111*nA; - Aq[2*Np+idx]=0.1111111111111111*nA; - Aq[3*Np+idx]=0.1111111111111111*nA; - Aq[4*Np+idx]=0.1111111111111111*nA; - Aq[5*Np+idx]=0.1111111111111111*nA; - Aq[6*Np+idx]=0.1111111111111111*nA; - - Bq[idx]=0.3333333333333333*nB; - Bq[Np+idx]=0.1111111111111111*nB; - Bq[2*Np+idx]=0.1111111111111111*nB; - Bq[3*Np+idx]=0.1111111111111111*nB; - Bq[4*Np+idx]=0.1111111111111111*nB; - Bq[5*Np+idx]=0.1111111111111111*nB; - Bq[6*Np+idx]=0.1111111111111111*nB; - } -} - -extern "C" void ScaLBL_CopySlice_z(double *Phi, int Nx, int Ny, int Nz, int Source, int Dest){ - int n; double value; - for (n=0; nkeyExists( "tauB" )){ tauB = freelee_db->getScalar( "tauB" ); } + if (freelee_db->keyExists( "tauM" )){ + tauM = freelee_db->getScalar( "tauM" ); + } if (freelee_db->keyExists( "rhoA" )){ rhoA = freelee_db->getScalar( "rhoA" ); } @@ -282,8 +285,8 @@ void ScaLBL_FreeLeeModel::AssignComponentLabels_ChemPotential_ColorGrad() signed char VALUE=0; double AFFINITY=0.f; - auto LabelList = greyscaleColor_db->getVector( "ComponentLabels" ); - auto AffinityList = greyscaleColor_db->getVector( "ComponentAffinity" ); + auto LabelList = freelee_db->getVector( "ComponentLabels" ); + auto AffinityList = freelee_db->getVector( "ComponentAffinity" ); NLABELS=LabelList.size(); if (NLABELS != AffinityList.size()){ @@ -337,7 +340,7 @@ void ScaLBL_FreeLeeModel::AssignComponentLabels_ChemPotential_ColorGrad() for (int i=0; iid[i] = Mask->id[i]; for (size_t idx=0; idxComm, label_count[idx]); + label_count_global[idx] = Dm->Comm.sumReduce(label_count[idx]); if (rank==0){ printf("Number of component labels: %lu \n",NLABELS); @@ -350,7 +353,7 @@ void ScaLBL_FreeLeeModel::AssignComponentLabels_ChemPotential_ColorGrad() } //compute color gradient and laplacian of phase field - double *ColorGrad_host, mu_phi_host; + double *ColorGrad_host, *mu_phi_host; ColorGrad_host = new double[3*Np]; mu_phi_host = new double[Np]; @@ -461,6 +464,7 @@ void ScaLBL_FreeLeeModel::AssignComponentLabels_ChemPotential_ColorGrad() ScaLBL_CopyToDevice(Phi, phase, Nh*sizeof(double)); ScaLBL_CopyToDevice(ColorGrad, ColorGrad_host, 3*Np*sizeof(double)); ScaLBL_CopyToDevice(mu_phi, mu_phi_host, Np*sizeof(double)); + ScaLBL_Comm->Barrier(); comm.barrier(); delete [] phase; delete [] ColorGrad_host; @@ -536,14 +540,15 @@ void ScaLBL_FreeLeeModel::Initialize(){ // Copy the restart data to the GPU ScaLBL_CopyToDevice(Den,cDen,2*Np*sizeof(double)); - ScaLBL_CopyToDevice(fq,cDist,19*Np*sizeof(double)); + ScaLBL_CopyToDevice(gqbar,cDist,19*Np*sizeof(double)); ScaLBL_CopyToDevice(Phi,cPhi,N*sizeof(double)); ScaLBL_Comm->Barrier(); comm.barrier(); if (rank==0) printf ("Initializing phase and density fields on device from Restart\n"); - ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + //TODO the following function is to be updated. + //ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, 0, ScaLBL_Comm->LastExterior(), Np); + //ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); } // establish reservoirs for external bC @@ -575,7 +580,7 @@ void ScaLBL_FreeLeeModel::Run(){ //.......create and start timer............ double starttime,stoptime,cputime; - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); comm.barrier(); starttime = MPI_Wtime(); //......................................... @@ -593,7 +598,7 @@ void ScaLBL_FreeLeeModel::Run(){ ScaLBL_Comm->SendD3Q7AA(hq,0); //READ FROM NORMAL ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(NeighborList, dvcMap, hq, Den, Phi, rhoA, rhoB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(hq,0); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(NeighborList, dvcMap, hq, Den, Phi, rhoA, rhoB, 0, ScaLBL_Comm->LastExterior(), Np); // Perform the collision operation @@ -606,28 +611,27 @@ void ScaLBL_FreeLeeModel::Run(){ // Halo exchange for phase field ScaLBL_Comm_WideHalo->Send(Phi); - ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, rhoA, rhoB, tauA, tauB, tauM, + ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm_WideHalo->Recv(Phi); ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); // Set BCs if (BoundaryCondition == 3){ - ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); - ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); } if (BoundaryCondition == 4){ - din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); - ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); } else if (BoundaryCondition == 5){ - ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); - ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar); } - ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, rhoA, rhoB, tauA, tauB, tauM, + ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + ScaLBL_Comm->Barrier(); // *************EVEN TIMESTEP************* timestep++; @@ -635,7 +639,7 @@ void ScaLBL_FreeLeeModel::Run(){ ScaLBL_Comm->SendD3Q7AA(hq,0); //READ FROM NORMAL ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(dvcMap, hq, Den, Phi, rhoA, rhoB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(hq,0); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(dvcMap, hq, Den, Phi, rhoA, rhoB, 0, ScaLBL_Comm->LastExterior(), Np); // Perform the collision operation @@ -646,25 +650,25 @@ void ScaLBL_FreeLeeModel::Run(){ ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); } ScaLBL_Comm_WideHalo->Send(Phi); - ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, rhoA, rhoB, tauA, tauB, tauM, + ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm_WideHalo->Recv(Phi); ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); // Set boundary conditions if (BoundaryCondition == 3){ - ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); - ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); } else if (BoundaryCondition == 4){ - din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); - ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); } else if (BoundaryCondition == 5){ - ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); - ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar); } - ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, rhoA, rhoB, tauA, tauB, tauM, + ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_Comm->Barrier(); //************************************************************************ diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h index 75d2b413..1b78792a 100644 --- a/models/FreeLeeModel.h +++ b/models/FreeLeeModel.h @@ -62,7 +62,7 @@ public: signed char *id; int *NeighborList; int *dvcMap; - double *fq, *hq; + double *gqbar, *hq; double *mu_phi, *Den, *Phi; double *ColorGrad; double *Velocity; @@ -82,6 +82,7 @@ private: //int rank,nprocs; void LoadParams(std::shared_ptr db0); + void AssignComponentLabels_ChemPotential_ColorGrad(); }; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 0b634f06..63086219 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,6 +6,7 @@ ADD_LBPM_EXECUTABLE( lbpm_permeability_simulator ) ADD_LBPM_EXECUTABLE( lbpm_greyscale_simulator ) ADD_LBPM_EXECUTABLE( lbpm_greyscaleColor_simulator ) ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_SingleFluid_simulator ) +ADD_LBPM_EXECUTABLE( lbpm_freelee_simulator ) #ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator ) #ADD_LBPM_EXECUTABLE( lbpm_color_macro_simulator ) ADD_LBPM_EXECUTABLE( lbpm_dfh_simulator ) diff --git a/tests/lbpm_freelee_simulator.cpp b/tests/lbpm_freelee_simulator.cpp new file mode 100644 index 00000000..61de8c28 --- /dev/null +++ b/tests/lbpm_freelee_simulator.cpp @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "models/FreeLeeModel.h" +#include "common/Utilities.h" + +//#define WRE_SURFACES + +/* + * Simulator for two-phase flow in porous media + * James E. McClure 2013-2014 + */ + + +//************************************************************************* +// Implementation of Two-Phase Immiscible LBM using CUDA +//************************************************************************* + +int main(int argc, char **argv) +{ + + // Initialize MPI + Utilities::startup( argc, argv ); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); + + // Load the input database + auto db = std::make_shared( argv[1] ); + + // Initialize MPI and error handlers + auto multiple = db->getWithDefault( "MPI_THREAD_MULTIPLE", true ); + //Utilities::startup( argc, argv, multiple ); + //Utilities::MPI::changeProfileLevel( 1 ); + + { // Limit scope so variables that contain communicators will free before MPI_Finialize + + if (rank == 0){ + printf("********************************************************\n"); + printf("Running Free Energy Lee LBM \n"); + printf("********************************************************\n"); + } + // Initialize compute device + int device=ScaLBL_SetDevice(rank); + NULL_USE( device ); + ScaLBL_DeviceBarrier(); + comm.barrier(); + + PROFILE_ENABLE(1); + //PROFILE_ENABLE_TRACE(); + //PROFILE_ENABLE_MEMORY(); + PROFILE_SYNCHRONIZE(); + PROFILE_START("Main"); + Utilities::setErrorHandlers(); + + auto filename = argv[1]; + ScaLBL_FreeLeeModel LeeModel(rank,nprocs,comm); + LeeModel.ReadParams(filename); + LeeModel.SetDomain(); + LeeModel.ReadInput(); + LeeModel.Create(); // creating the model will create data structure to match the pore structure and allocate variables + LeeModel.Initialize(); // initializing the model will set initial conditions for variables + LeeModel.Run(); + LeeModel.WriteDebug(); + + PROFILE_STOP("Main"); + auto file = db->getWithDefault( "TimerFile", "lbpm_freelee_simulator" ); + auto level = db->getWithDefault( "TimerLevel", 1 ); + PROFILE_SAVE(file,level); + // **************************************************** + + + } // Limit scope so variables that contain communicators will free before MPI_Finialize + + Utilities::shutdown(); +} From ac06cd342888dcbb0e0bc5e2899b3c2d6f8f8dcb Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 1 Feb 2021 00:14:24 -0500 Subject: [PATCH 07/21] build pass; continue model debugging --- common/WideHalo.cpp | 4 ++-- common/WideHalo.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/common/WideHalo.cpp b/common/WideHalo.cpp index b56e8b96..a39ab317 100644 --- a/common/WideHalo.cpp +++ b/common/WideHalo.cpp @@ -67,7 +67,7 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr Date: Mon, 1 Feb 2021 00:39:21 -0500 Subject: [PATCH 08/21] code clean up --- tests/lbpm_freelee_simulator.cpp | 113 ++++++++++++++----------------- 1 file changed, 51 insertions(+), 62 deletions(-) diff --git a/tests/lbpm_freelee_simulator.cpp b/tests/lbpm_freelee_simulator.cpp index 61de8c28..3e9c372a 100644 --- a/tests/lbpm_freelee_simulator.cpp +++ b/tests/lbpm_freelee_simulator.cpp @@ -1,81 +1,70 @@ +#include +#include +#include +#include #include #include #include -#include -#include -#include -#include -#include "models/FreeLeeModel.h" #include "common/Utilities.h" +#include "models/FreeLeeModel.h" -//#define WRE_SURFACES +//******************************************************************* +// Implementation of Free-Energy Two-Phase LBM (Lee model) +//******************************************************************* -/* - * Simulator for two-phase flow in porous media - * James E. McClure 2013-2014 - */ - - -//************************************************************************* -// Implementation of Two-Phase Immiscible LBM using CUDA -//************************************************************************* - -int main(int argc, char **argv) +int main( int argc, char **argv ) { - // Initialize MPI - Utilities::startup( argc, argv ); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + // Initialize + Utilities::startup( argc, argv ); + + // Load the input database + auto db = std::make_shared( argv[1] ); - // Load the input database - auto db = std::make_shared( argv[1] ); + { // Limit scope so variables that contain communicators will free before MPI_Finialize - // Initialize MPI and error handlers - auto multiple = db->getWithDefault( "MPI_THREAD_MULTIPLE", true ); - //Utilities::startup( argc, argv, multiple ); - //Utilities::MPI::changeProfileLevel( 1 ); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); - { // Limit scope so variables that contain communicators will free before MPI_Finialize + if (rank == 0){ + printf("********************************************************\n"); + printf("Running Free Energy Lee LBM \n"); + printf("********************************************************\n"); + } + // Initialize compute device + int device=ScaLBL_SetDevice(rank); + NULL_USE( device ); + ScaLBL_DeviceBarrier(); + comm.barrier(); - if (rank == 0){ - printf("********************************************************\n"); - printf("Running Free Energy Lee LBM \n"); - printf("********************************************************\n"); - } - // Initialize compute device - int device=ScaLBL_SetDevice(rank); - NULL_USE( device ); - ScaLBL_DeviceBarrier(); - comm.barrier(); + PROFILE_ENABLE(1); + //PROFILE_ENABLE_TRACE(); + //PROFILE_ENABLE_MEMORY(); + PROFILE_SYNCHRONIZE(); + PROFILE_START("Main"); + Utilities::setErrorHandlers(); - PROFILE_ENABLE(1); - //PROFILE_ENABLE_TRACE(); - //PROFILE_ENABLE_MEMORY(); - PROFILE_SYNCHRONIZE(); - PROFILE_START("Main"); - Utilities::setErrorHandlers(); + auto filename = argv[1]; + ScaLBL_FreeLeeModel LeeModel( rank,nprocs,comm ); + LeeModel.ReadParams( filename ); + LeeModel.SetDomain(); + LeeModel.ReadInput(); + LeeModel.Create(); + LeeModel.Initialize(); + LeeModel.Run(); + LeeModel.WriteDebug(); - auto filename = argv[1]; - ScaLBL_FreeLeeModel LeeModel(rank,nprocs,comm); - LeeModel.ReadParams(filename); - LeeModel.SetDomain(); - LeeModel.ReadInput(); - LeeModel.Create(); // creating the model will create data structure to match the pore structure and allocate variables - LeeModel.Initialize(); // initializing the model will set initial conditions for variables - LeeModel.Run(); - LeeModel.WriteDebug(); - - PROFILE_STOP("Main"); - auto file = db->getWithDefault( "TimerFile", "lbpm_freelee_simulator" ); - auto level = db->getWithDefault( "TimerLevel", 1 ); - PROFILE_SAVE(file,level); - // **************************************************** + PROFILE_STOP("Main"); + auto file = db->getWithDefault( "TimerFile", "lbpm_freelee_simulator" ); + auto level = db->getWithDefault( "TimerLevel", 1 ); + PROFILE_SAVE( file,level ); + // **************************************************** - } // Limit scope so variables that contain communicators will free before MPI_Finialize + } // Limit scope so variables that contain communicators will free before MPI_Finialize - Utilities::shutdown(); + Utilities::shutdown(); + return 0; } From 1f08c9a0b6c8d16d138444139e2fdcf48065b7f3 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Sun, 7 Feb 2021 20:08:38 -0500 Subject: [PATCH 09/21] save the work; add debugging output --- models/FreeLeeModel.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index 7d530406..5a048d38 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -466,6 +466,16 @@ void ScaLBL_FreeLeeModel::AssignComponentLabels_ChemPotential_ColorGrad() ScaLBL_CopyToDevice(mu_phi, mu_phi_host, Np*sizeof(double)); ScaLBL_Comm->Barrier(); comm.barrier(); + + //debug + //save the phase field and check it + //FILE *OUTFILE; + //sprintf(LocalRankFilename,"Phase_Init.%05i.raw",rank); + //OUTFILE = fopen(LocalRankFilename,"wb"); + //fwrite(phase,8,Nh,OUTFILE); + //fclose(OUTFILE); + + delete [] phase; delete [] ColorGrad_host; delete [] mu_phi_host; From 9ddf949a9e1373bf636e353686467346f43f03c8 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Sun, 7 Feb 2021 23:37:26 -0500 Subject: [PATCH 10/21] save the work; to be compiled and tested --- common/ScaLBL.h | 10 +- cpu/FreeLee.cpp | 941 +++++++++++++++++++++++++++++++++------- models/FreeLeeModel.cpp | 269 +++++++++++- models/FreeLeeModel.h | 12 +- 4 files changed, 1052 insertions(+), 180 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 73b89f1d..42c51525 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -180,7 +180,9 @@ extern "C" void ScaLBL_D3Q19_Gradient_DFH(int *NeighborList, double *Phi, double // FREE ENERGY LEE MODEL -extern "C" void ScaLBL_D3Q19_FreeLeeModel_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np); +extern "C" void ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np); + +extern "C" void ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(double *gqbar, double Fx, double Fy, double Fz, int Np); extern "C" void ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, double *Den, double *hq, double *ColorGrad, double rhonA, double rhoB, double tauM, double W, int start, int finish, int Np); @@ -199,6 +201,12 @@ extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborList, double *dist, double *Vel, double *Pressure, + double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np); + +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure, + double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np); + // BOUNDARY CONDITION ROUTINES diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp index f28af185..bd6e744a 100644 --- a/cpu/FreeLee.cpp +++ b/cpu/FreeLee.cpp @@ -2,7 +2,7 @@ #define STOKES -extern "C" void ScaLBL_D3Q19_FreeLeeModel_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np) +extern "C" void ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np) { int n; double p = 1.0;//NOTE: take initial pressure p=1.0 @@ -40,6 +40,38 @@ extern "C" void ScaLBL_D3Q19_FreeLeeModel_Init(double *gqbar, double *mu_phi, do } } +extern "C" void ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(double *gqbar, double Fx, double Fy, double Fz, int Np) +{ + int n; + double p = 1.0;//NOTE: take initial pressure p=1.0 + + for (n=0; n 10Np => odd part of dist) + m1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + m2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + m3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + m4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + m5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + m6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + m7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + m8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + m9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + m10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + m11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + m12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + m13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + m14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + m15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + m16 = dist[nr16]; + + // q=17 + nr17 = neighborList[n+16*Np]; + m17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + m18 = dist[nr18]; + + //compute fluid velocity + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(Fx)); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(Fy)); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(Fz)); + //compute pressure + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17); + + //------------------------------------------------- BCK collison ------------------------------------------------------------// + // q=0 + dist[n] = m0 + 0.5*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) - + (m0 - 0.3333333333333333*p + 0.25*(Fx*ux + Fy*uy + Fz*uz)* + (-0.6666666666666666 + ux*ux + uy*uy + uz*uz) + 0.16666666666666666*rho0*(ux*ux + uy*uy + uz*uz))/ + tau; + + // q = 1 + dist[nr2] = m1 + 0.25*(Fx*(-1 + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) - + (m1 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(ux*ux) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.125*(Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*uz)))/tau; + + // q=2 + dist[nr1] = m2 + 0.25*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) - + (m2 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(ux*ux) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) + + 0.125*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(ux*ux) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*uz)))/tau; + + // q = 3 + dist[nr4] = m3 + 0.25*(Fx*ux + Fy*(-1 + uy) + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) - + (m3 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uy*uy) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.125*(Fx*ux + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 4 + dist[nr3] = m4 + 0.25*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) - + (m4 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uy*uy) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.125*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(uy*uy) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 5 + dist[nr6] = m5 + 0.25*(Fx*ux + Fy*uy + Fz*(-1 + uz))*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) - + (m5 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.125*(Fx*ux + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 6 + dist[nr5] = m6 + 0.25*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) - + (m6 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) + + 0.125*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q = 7 + dist[nr8] = m7 - 0.125*(Fx*(-1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (0.2222222222222222 + (ux + uy)*(ux + uy) - 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz))\ + - (m7 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uy)*(ux + uy)) + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx*(-1. + ux) + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(-2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 8 + dist[nr7] = m8 + 0.125*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uy)*(ux + uy) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz))\ + - (m8 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uy)*(ux + uy)) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 9 + dist[nr10] = m9 + 0.125*(Fy + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + - (m9 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uy)*(ux - uy)) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fy + Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(-2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 10 + dist[nr9] = m10 + 0.125*(Fx*(1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz))\ + - (m10 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uy)*(ux - uy)) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx*(1 + ux) + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 11 + dist[nr12] = m11 - 0.125*(Fx*(-1 + ux) + Fy*uy + Fz*(-1 + uz))* + (0.2222222222222222 + (ux + uz)*(ux + uz) - 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz))\ + - (m11 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uz)*(ux + uz)) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*(-1. + ux) + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 12 + dist[nr11] = m12 + 0.125*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz)))\ + - (m12 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uz)*(ux + uz)) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.0625*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q = 13 + dist[nr14] = m13 + 0.125*(Fz + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz)))\ + - (m13 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uz)*(ux - uz)) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.0625*(Fz + Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q= 14 + dist[nr13] = m14 + 0.125*(Fx*(1 + ux) + Fy*uy + Fz*(-1 + uz))* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz))\ + - (m14 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uz)*(ux - uz)) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*(1 + ux) + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 15 + dist[nr16] = m15 - 0.125*(Fx*ux + Fy*(-1 + uy) + Fz*(-1 + uz))* + (0.2222222222222222 + (uy + uz)*(uy + uz) - 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz))\ + - (m15 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy + uz)*(uy + uz)) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*ux + Fy*(-1. + uy) + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uy*uy) - 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 16 + dist[nr15] = m16 + 0.125*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz)))\ + - (m16 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy + uz)*(uy + uz)) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + 0.0625*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) - 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*(2. + uz))))/tau; + + // q = 17 + dist[nr18] = m17 + 0.125*(Fz + Fx*ux + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz)))\ + - (m17 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy - uz)*(uy - uz)) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + 0.0625*(Fz + Fx*ux + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) + 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*(2. + uz))))/tau; + + // q = 18 + dist[nr17] = m18 + 0.125*(Fx*ux + Fy*(1 + uy) + Fz*(-1 + uz))* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz))\ + - (m18 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy - uz)*(uy - uz)) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*ux + Fy*(1 + uy) + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uy*uy) + 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + (-2. + uz)*uz)))/tau; + //----------------------------------------------------------------------------------------------------------------------------------------// + + + //Update velocity on device + Vel[0*Np+n] = ux; + Vel[1*Np+n] = uy; + Vel[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = p; + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure, + double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){ + + int n; + double ux,uy,uz;//fluid velocity + double p;//pressure + // distribution functions + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m0,m3,m5,m7; + + for (int n=start; n(new ScaLBL_Communicator(Mask)); - ScaLBL_Comm_Regular = std::shared_ptr(new ScaLBL_Communicator(Mask)); + //ScaLBL_Comm_Regular = std::shared_ptr(new ScaLBL_Communicator(Mask)); ScaLBL_Comm_WideHalo = std::shared_ptr(new ScaLBLWideHalo_Communicator(Mask,2)); // create the layout for the LBM @@ -276,6 +276,51 @@ void ScaLBL_FreeLeeModel::Create(){ delete [] neighborList; } +void ScaLBL_FreeLeeModel::Create_SingleFluid(){ + /* + * This function creates the variables needed to run single-fluid Lee model + */ + //......................................................... + // Initialize communication structures in averaging domain + for (int i=0; iid[i] = Mask->id[i]; + Mask->CommInit(); + Np=Mask->PoreCount(); + //........................................................................... + if (rank==0) printf ("Create ScaLBL_Communicator \n"); + // Create a communicator for the device (will use optimized layout) + // ScaLBL_Communicator ScaLBL_Comm(Mask); // original + ScaLBL_Comm = std::shared_ptr(new ScaLBL_Communicator(Mask)); + + // create the layout for the LBM + int Npad=(Np/16 + 2)*16; + if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N); + Map.resize(Nx,Ny,Nz); Map.fill(-2); + auto neighborList= new int[18*Npad]; + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np,1); + comm.barrier(); + + //........................................................................... + // MAIN VARIABLES ALLOCATED HERE + //........................................................................... + // LBM variables + if (rank==0) printf ("Allocating distributions \n"); + //......................device distributions................................. + dist_mem_size = Np*sizeof(double); + neighborSize=18*(Np*sizeof(int)); + //........................................................................... + ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize); + ScaLBL_AllocateDeviceMemory((void **) &gqbar, 19*dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &Pressure, sizeof(double)*Np); + ScaLBL_AllocateDeviceMemory((void **) &Velocity, 3*sizeof(double)*Np); + //........................................................................... + // Update GPU data structures + if (rank==0) printf ("Setting up device map and neighbor list \n"); + // copy the neighbor list + ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); + comm.barrier(); + delete [] neighborList; +} + void ScaLBL_FreeLeeModel::AssignComponentLabels_ChemPotential_ColorGrad() { double *phase; @@ -482,15 +527,15 @@ void ScaLBL_FreeLeeModel::AssignComponentLabels_ChemPotential_ColorGrad() delete [] Dst; } -void ScaLBL_FreeLeeModel::Initialize(){ +void ScaLBL_FreeLeeModel::Initialize_TwoFluid(){ /* - * This function initializes model + * This function initializes two-fluid Lee model */ if (rank==0) printf ("Initializing phase field, chemical potential and color gradient\n"); AssignComponentLabels_ChemPotential_ColorGrad();//initialize phase field Phi if (rank==0) printf ("Initializing distributions for momentum transport\n"); - ScaLBL_D3Q19_FreeLeeModel_Init(gqbar, mu_phi, ColorGrad, Fx, Fy, Fz, Np); + ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(gqbar, mu_phi, ColorGrad, Fx, Fy, Fz, Np); if (rank==0) printf ("Initializing density field and distributions for phase-field transport\n"); ScaLBL_FreeLeeModel_PhaseField_Init(dvcMap, Phi, Den, hq, ColorGrad, rhoA, rhoB, tauM, W, 0, ScaLBL_Comm->LastExterior(), Np); @@ -578,7 +623,84 @@ void ScaLBL_FreeLeeModel::Initialize(){ //ScaLBL_CopyToHost(Averages->Phi.data(),Phi,N*sizeof(double)); } -void ScaLBL_FreeLeeModel::Run(){ +void ScaLBL_FreeLeeModel::Initialize_SingleFluid(){ + /* + * This function initializes single-fluid Lee model + */ + if (rank==0) printf ("Initializing distributions for momentum transport\n"); + ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(gqbar, Fx, Fy, Fz, Np); + + if (Restart == true){ + //TODO need to revise this function + //remove the phase-related part + + + +// if (rank==0){ +// printf("Reading restart file! \n"); +// } +// +// // Read in the restart file to CPU buffers +// int *TmpMap; +// TmpMap = new int[Np]; +// +// double *cPhi, *cDist, *cDen; +// cPhi = new double[N]; +// cDen = new double[2*Np]; +// cDist = new double[19*Np]; +// ScaLBL_CopyToHost(TmpMap, dvcMap, Np*sizeof(int)); +// //ScaLBL_CopyToHost(cPhi, Phi, N*sizeof(double)); +// +// ifstream File(LocalRestartFile,ios::binary); +// int idx; +// double value,va,vb; +// for (int n=0; nLastExterior(); n++){ +// va = cDen[n]; +// vb = cDen[Np + n]; +// value = (va-vb)/(va+vb); +// idx = TmpMap[n]; +// if (!(idx < 0) && idxFirstInterior(); nLastInterior(); n++){ +// va = cDen[n]; +// vb = cDen[Np + n]; +// value = (va-vb)/(va+vb); +// idx = TmpMap[n]; +// if (!(idx < 0) && idxBarrier(); +// comm.barrier(); +// +// if (rank==0) printf ("Initializing phase and density fields on device from Restart\n"); +// //TODO the following function is to be updated. +// //ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, 0, ScaLBL_Comm->LastExterior(), Np); +// //ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + } +} + +void ScaLBL_FreeLeeModel::Run_TwoFluid(){ int nprocs=nprocx*nprocy*nprocz; const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); @@ -704,8 +826,105 @@ void ScaLBL_FreeLeeModel::Run(){ // ************************************************************************ } +void ScaLBL_FreeLeeModel::Run_SingleFluid(){ + int nprocs=nprocx*nprocy*nprocz; + const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); + + if (rank==0){ + printf("********************************************************\n"); + printf("No. of timesteps: %i \n", timestepMax); + fflush(stdout); + } -void ScaLBL_FreeLeeModel::WriteDebug(){ + //.......create and start timer............ + double starttime,stoptime,cputime; + ScaLBL_Comm->Barrier(); + comm.barrier(); + starttime = MPI_Wtime(); + //......................................... + + //************ MAIN ITERATION LOOP ***************************************/ + PROFILE_START("Loop"); + while (timestep < timestepMax ) { + //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } + PROFILE_START("Update"); + // *************ODD TIMESTEP************* + timestep++; + //------------------------------------------------------------------------------------------------------------------- + // Perform the collision operation + ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FROM NORMAL + ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(NeighborList, gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz, + ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE + ScaLBL_Comm->Barrier(); + // Set boundary conditions + // TODO to be revised! + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); + } + if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar); + } + ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(NeighborList, gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz, + 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_Comm->Barrier(); + + // *************EVEN TIMESTEP************* + timestep++; + //------------------------------------------------------------------------------------------------------------------- + // Perform the collision operation + ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FORM NORMAL + ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz, + ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE + ScaLBL_Comm->Barrier(); + // Set boundary conditions + // TODO to be revised! + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); + } + else if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar); + } + ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz, + 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_Comm->Barrier(); + //************************************************************************ + PROFILE_STOP("Update"); + } + PROFILE_STOP("Loop"); + PROFILE_SAVE("lbpm_color_simulator",1); + //************************************************************************ + stoptime = MPI_Wtime(); + if (rank==0) printf("-------------------------------------------------------------------\n"); + // Compute the walltime per timestep + cputime = (stoptime - starttime)/timestep; + // Performance obtained from each node + double MLUPS = double(Np)/cputime/1000000; + + if (rank==0) printf("********************************************************\n"); + if (rank==0) printf("CPU time = %f \n", cputime); + if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); + MLUPS *= nprocs; + if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); + if (rank==0) printf("********************************************************\n"); + + // ************************************************************************ +} + +void ScaLBL_FreeLeeModel::WriteDebug_TwoFluid(){ // Copy back final phase indicator field and convert to regular layout DoubleArray PhaseData(Nxh,Nyh,Nzh); //ScaLBL_Comm->RegularLayout(Map,Phi,PhaseField); @@ -775,3 +994,37 @@ void ScaLBL_FreeLeeModel::WriteDebug(){ fclose(CGZ_FILE); */ } + +void ScaLBL_FreeLeeModel::WriteDebug_SingleFluid(){ + + DoubleArray PhaseData(Nxh,Nyh,Nzh); + + // Copy back final phase indicator field and convert to regular layout + ScaLBL_Comm->RegularLayout(Map,Pressure,PhaseField); + FILE *PFILE; + sprintf(LocalRankFilename,"Pressure.%05i.raw",rank); + PFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,PFILE); + fclose(PFILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField); + FILE *VELX_FILE; + sprintf(LocalRankFilename,"Velocity_X.%05i.raw",rank); + VELX_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELX_FILE); + fclose(VELX_FILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],PhaseField); + FILE *VELY_FILE; + sprintf(LocalRankFilename,"Velocity_Y.%05i.raw",rank); + VELY_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELY_FILE); + fclose(VELY_FILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],PhaseField); + FILE *VELZ_FILE; + sprintf(LocalRankFilename,"Velocity_Z.%05i.raw",rank); + VELZ_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELZ_FILE); + fclose(VELZ_FILE); +} diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h index 1b78792a..5a7bf248 100644 --- a/models/FreeLeeModel.h +++ b/models/FreeLeeModel.h @@ -26,10 +26,14 @@ public: void ReadParams(std::shared_ptr db0); void SetDomain(); void ReadInput(); - void Create(); - void Initialize(); - void Run(); - void WriteDebug(); + void Create_TwoFluid(); + void Initialize_TwoFluid(); + void Run_TwoFluid(); + void WriteDebug_TwoFluid(); + void Create_SingleFluid(); + void Initialize_SingleFluid(); + void Run_SingleFluid(); + void WriteDebug_SingleFluid(); bool Restart,pBC; int timestep,timestepMax; From 98491ccd80cc9f8343d4080d353ea9ee09bd3a1f Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Sun, 7 Feb 2021 23:50:17 -0500 Subject: [PATCH 11/21] save the work2; to be complied and tested --- models/FreeLeeModel.cpp | 9 +++ models/FreeLeeModel.h | 1 + tests/CMakeLists.txt | 1 + .../lbpm_freelee_SingleFluidBGK_simulator.cpp | 70 +++++++++++++++++++ tests/lbpm_freelee_simulator.cpp | 8 +-- 5 files changed, 85 insertions(+), 4 deletions(-) create mode 100644 tests/lbpm_freelee_SingleFluidBGK_simulator.cpp diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index aca5d8d8..120d3ced 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -12,6 +12,7 @@ color lattice boltzmann model ScaLBL_FreeLeeModel::ScaLBL_FreeLeeModel(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),tauM(0),rhoA(0),rhoB(0),W(0),gamma(0),kappa(0),beta(0), Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0), +tau(0),rho0(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) { @@ -32,6 +33,8 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){ tauA = tauB = 1.0; tauM = 1.0;//relaxation time for phase field rhoA = rhoB = 1.0; + tau = 1.0;//only for single-fluid Lee model + rho0 = 1.0;//only for single-fluid Lee model Fx = Fy = Fz = 0.0; gamma=1e-3;//surface tension W=5.0;//interfacial thickness @@ -45,6 +48,9 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){ if (freelee_db->keyExists( "timestepMax" )){ timestepMax = freelee_db->getScalar( "timestepMax" ); } + if (freelee_db->keyExists( "tau" )){//only for single-fluid Lee model + tau = freelee_db->getScalar( "tau" ); + } if (freelee_db->keyExists( "tauA" )){ tauA = freelee_db->getScalar( "tauA" ); } @@ -54,6 +60,9 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){ if (freelee_db->keyExists( "tauM" )){ tauM = freelee_db->getScalar( "tauM" ); } + if (freelee_db->keyExists( "rho0" )){ + rho0 = freelee_db->getScalar( "rho0" ); + } if (freelee_db->keyExists( "rhoA" )){ rhoA = freelee_db->getScalar( "rhoA" ); } diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h index 5a7bf248..1e372f50 100644 --- a/models/FreeLeeModel.h +++ b/models/FreeLeeModel.h @@ -39,6 +39,7 @@ public: int timestep,timestepMax; int BoundaryCondition; double tauA,tauB,rhoA,rhoB; + double tau, rho0;//only for single-fluid Lee model double tauM;//relaxation time for phase field (or mass) double W,gamma,kappa,beta; double Fx,Fy,Fz,flux; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 63086219..8df4e6bd 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,6 +7,7 @@ ADD_LBPM_EXECUTABLE( lbpm_greyscale_simulator ) ADD_LBPM_EXECUTABLE( lbpm_greyscaleColor_simulator ) ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_SingleFluid_simulator ) ADD_LBPM_EXECUTABLE( lbpm_freelee_simulator ) +ADD_LBPM_EXECUTABLE( lbpm_freelee_SingleFluidBGK_simulator ) #ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator ) #ADD_LBPM_EXECUTABLE( lbpm_color_macro_simulator ) ADD_LBPM_EXECUTABLE( lbpm_dfh_simulator ) diff --git a/tests/lbpm_freelee_SingleFluidBGK_simulator.cpp b/tests/lbpm_freelee_SingleFluidBGK_simulator.cpp new file mode 100644 index 00000000..dd3be8d9 --- /dev/null +++ b/tests/lbpm_freelee_SingleFluidBGK_simulator.cpp @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "common/Utilities.h" +#include "models/FreeLeeModel.h" + +//******************************************************************* +// Implementation of Free-Energy Two-Phase LBM (Lee model) +//******************************************************************* + +int main( int argc, char **argv ) +{ + + // Initialize + Utilities::startup( argc, argv ); + + // Load the input database + auto db = std::make_shared( argv[1] ); + + { // Limit scope so variables that contain communicators will free before MPI_Finialize + + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); + + if (rank == 0){ + printf("********************************************************\n"); + printf("Running Single-Fluid Solver based on Lee LBM \n"); + printf("********************************************************\n"); + } + // Initialize compute device + int device=ScaLBL_SetDevice(rank); + NULL_USE( device ); + ScaLBL_DeviceBarrier(); + comm.barrier(); + + PROFILE_ENABLE(1); + //PROFILE_ENABLE_TRACE(); + //PROFILE_ENABLE_MEMORY(); + PROFILE_SYNCHRONIZE(); + PROFILE_START("Main"); + Utilities::setErrorHandlers(); + + auto filename = argv[1]; + ScaLBL_FreeLeeModel LeeModel( rank,nprocs,comm ); + LeeModel.ReadParams( filename ); + LeeModel.SetDomain(); + LeeModel.ReadInput(); + LeeModel.Create_SingleFluid()(); + LeeModel.Initialize_SingleFluid()(); + LeeModel.Run_SingleFluid()(); + LeeModel.WriteDebug_SingleFluid()(); + + PROFILE_STOP("Main"); + auto file = db->getWithDefault( "TimerFile", "lbpm_freelee_SingleFluidBGK_simulator" ); + auto level = db->getWithDefault( "TimerLevel", 1 ); + PROFILE_SAVE( file,level ); + // **************************************************** + + + } // Limit scope so variables that contain communicators will free before MPI_Finialize + + Utilities::shutdown(); + return 0; +} diff --git a/tests/lbpm_freelee_simulator.cpp b/tests/lbpm_freelee_simulator.cpp index 3e9c372a..3663c4e9 100644 --- a/tests/lbpm_freelee_simulator.cpp +++ b/tests/lbpm_freelee_simulator.cpp @@ -51,10 +51,10 @@ int main( int argc, char **argv ) LeeModel.ReadParams( filename ); LeeModel.SetDomain(); LeeModel.ReadInput(); - LeeModel.Create(); - LeeModel.Initialize(); - LeeModel.Run(); - LeeModel.WriteDebug(); + LeeModel.Create_TwoFluid(); + LeeModel.Initialize_TwoFluid(); + LeeModel.Run_TwoFluid(); + LeeModel.WriteDebug_TwoFluid(); PROFILE_STOP("Main"); auto file = db->getWithDefault( "TimerFile", "lbpm_freelee_simulator" ); From e34170d2325bfee43dc9682d67677898d067c7f8 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 8 Feb 2021 00:00:26 -0500 Subject: [PATCH 12/21] built passed --- cpu/FreeLee.cpp | 9 ++++----- models/FreeLeeModel.cpp | 2 +- tests/lbpm_freelee_SingleFluidBGK_simulator.cpp | 8 ++++---- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp index bd6e744a..266b3a84 100644 --- a/cpu/FreeLee.cpp +++ b/cpu/FreeLee.cpp @@ -1738,12 +1738,11 @@ extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, d m18 = dist[17*Np+n]; //compute fluid velocity - ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(chem*nx+Fx)); - uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(chem*ny+Fy)); - uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(chem*nz+Fz)); + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(Fx)); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(Fy)); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(Fz)); //compute pressure - p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17) - +0.5*(rhoA-rhoB)/2.0/3.0*(ux*nx+uy*ny+uz*nz); + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17); //------------------------------------------------- BCK collison ------------------------------------------------------------// // q=0 diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index 120d3ced..b0ee372d 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -1006,7 +1006,7 @@ void ScaLBL_FreeLeeModel::WriteDebug_TwoFluid(){ void ScaLBL_FreeLeeModel::WriteDebug_SingleFluid(){ - DoubleArray PhaseData(Nxh,Nyh,Nzh); + DoubleArray PhaseField(Nx,Ny,Nz); // Copy back final phase indicator field and convert to regular layout ScaLBL_Comm->RegularLayout(Map,Pressure,PhaseField); diff --git a/tests/lbpm_freelee_SingleFluidBGK_simulator.cpp b/tests/lbpm_freelee_SingleFluidBGK_simulator.cpp index dd3be8d9..19d99b9c 100644 --- a/tests/lbpm_freelee_SingleFluidBGK_simulator.cpp +++ b/tests/lbpm_freelee_SingleFluidBGK_simulator.cpp @@ -51,10 +51,10 @@ int main( int argc, char **argv ) LeeModel.ReadParams( filename ); LeeModel.SetDomain(); LeeModel.ReadInput(); - LeeModel.Create_SingleFluid()(); - LeeModel.Initialize_SingleFluid()(); - LeeModel.Run_SingleFluid()(); - LeeModel.WriteDebug_SingleFluid()(); + LeeModel.Create_SingleFluid(); + LeeModel.Initialize_SingleFluid(); + LeeModel.Run_SingleFluid(); + LeeModel.WriteDebug_SingleFluid(); PROFILE_STOP("Main"); auto file = db->getWithDefault( "TimerFile", "lbpm_freelee_SingleFluidBGK_simulator" ); From d8f5b21436433e9fff89250d9b78268a7d60f21c Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 8 Feb 2021 20:31:45 -0500 Subject: [PATCH 13/21] add a correcting factor cs2 into velocity equation --- cpu/FreeLee.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp index 266b3a84..32a7b568 100644 --- a/cpu/FreeLee.cpp +++ b/cpu/FreeLee.cpp @@ -15,7 +15,7 @@ extern "C" void ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *m cg_y = ColorGrad[1*Np+n]; cg_z = ColorGrad[2*Np+n]; - gqbar[0*Np+n] = 0.3333333333333333; + gqbar[0*Np+n] = 0.3333333333333333*p; gqbar[1*Np+n] = 0.055555555555555555*(p - 0.5*(chem*cg_x+Fx)); //double(100*n)+1.f; gqbar[2*Np+n] = 0.055555555555555555*(p - 0.5*(-chem*cg_x-Fx)); //double(100*n)+2.f; gqbar[3*Np+n] = 0.055555555555555555*(p - 0.5*(chem*cg_y+Fy)); //double(100*n)+3.f; @@ -47,7 +47,7 @@ extern "C" void ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(double *gqbar, double for (n=0; n Date: Tue, 9 Feb 2021 15:25:20 -0500 Subject: [PATCH 14/21] fix wide halo bug in list memory --- common/WideHalo.cpp | 3 ++- common/WideHalo.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/common/WideHalo.cpp b/common/WideHalo.cpp index a39ab317..0c8f1781 100644 --- a/common/WideHalo.cpp +++ b/common/WideHalo.cpp @@ -124,7 +124,7 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr Date: Thu, 11 Feb 2021 14:31:29 -0500 Subject: [PATCH 15/21] added FlowAdapter class --- models/ColorModel.cpp | 45 +++++++++++++++++++++++++++++++++++++++++++ models/ColorModel.h | 14 ++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 3b8edd6c..1769324e 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -9,6 +9,7 @@ color lattice boltzmann model #include #include + ScaLBL_ColorModel::ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK), nprocs(NP), Restart(0), timestep(0), timestepMax(0), tauA(0), tauB(0), rhoA(0), rhoB(0), alpha(0), beta(0), @@ -1600,3 +1601,47 @@ void ScaLBL_ColorModel::WriteDebug(){ fclose(CGZ_FILE); */ } + +FlowAdaptor::FlowAdaptor(ScaLBL_ColorModel &M){ + Nx = M.Dm->Nx; + Ny = M.Dm->Ny; + Nz = M.Dm->Nz; + timestep=-1; + timestep_previous=-1; + + phi.resize(Nx,Ny,Nz); phi.fill(0); // phase indicator field + phi_t.resize(Nx,Ny,Nz); phi_t.fill(0); // time derivative for the phase indicator field +} + +FlowAdaptor::~FlowAdaptor(){ + +} + +double FlowAdaptor::MoveInterface(ScaLBL_ColorModel &M){ + + double INTERFACE_CUTOFF = M.color_db->getWithDefault( "move_interface_cutoff", 0.975 ); + double MOVE_INTERFACE_FACTOR = M.color_db->getWithDefault( "move_interface_factor", 10.0 ); + + ScaLBL_CopyToHost( phi.data(), M.Phi, Nx*Ny*Nz* sizeof( double ) ); + /* compute the local derivative of phase indicator field */ + double beta = M.beta; + double factor = 0.5/beta; + for (int n=0; nPhi(n); + double dist1 = factor*log((1.0+value1)/(1.0-value1)); + double value2 = phi(n); + double dist2 = factor*log((1.0+value2)/(1.0-value2)); + phi_t(n) = value2; + if (value1 < INTERFACE_CUTOFF && value1 > -1*INTERFACE_CUTOFF && value2 < INTERFACE_CUTOFF && value2 > -1*INTERFACE_CUTOFF ){ + /* time derivative of distance */ + double dxdt = 0.125*(dist2-dist1); + /* extrapolate to move the distance further */ + double dist3 = dist2 + MOVE_INTERFACE_FACTOR*dxdt; + /* compute the new phase interface */ + phi_t(n) = (2.f*(exp(-2.f*beta*(dist3)))/(1.f+exp(-2.f*beta*(dist3))) - 1.f); + } + } + ScaLBL_CopyToDevice( M.Phi, phi_t.data(), Nx*Ny*Nz* sizeof( double ) ); +} + diff --git a/models/ColorModel.h b/models/ColorModel.h index f5667765..b2a9c1d1 100644 --- a/models/ColorModel.h +++ b/models/ColorModel.h @@ -30,6 +30,7 @@ public: void Initialize(); void Run(); void WriteDebug(); + void getPhaseField(DoubleArray &f); bool Restart,pBC; bool REVERSE_FLOW_DIRECTION; @@ -86,3 +87,16 @@ private: double MorphOpenConnected(double target_volume_change); }; +class FlowAdaptor{ +public: + FlowAdaptor(ScaLBL_ColorModel &M); + ~FlowAdaptor(); + double MoveInterface(ScaLBL_ColorModel &M); + DoubleArray phi; + DoubleArray phi_t; +private: + int Nx, Ny, Nz; + int timestep; + int timestep_previous; +}; + From da55748d30912f03b929bb5575b57117d219e219 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 12 Feb 2021 09:10:54 -0500 Subject: [PATCH 16/21] update to flow adapter --- models/ColorModel.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 1769324e..a9886337 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -1238,6 +1238,7 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){ } return(volume_change); } + double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){ srand(time(NULL)); double mass_loss =0.f; @@ -1626,6 +1627,8 @@ double FlowAdaptor::MoveInterface(ScaLBL_ColorModel &M){ /* compute the local derivative of phase indicator field */ double beta = M.beta; double factor = 0.5/beta; + double total_interface_displacement = 0.0; + double total_interface_sites = 0.0; for (int n=0; nPhi(n); @@ -1640,8 +1643,27 @@ double FlowAdaptor::MoveInterface(ScaLBL_ColorModel &M){ double dist3 = dist2 + MOVE_INTERFACE_FACTOR*dxdt; /* compute the new phase interface */ phi_t(n) = (2.f*(exp(-2.f*beta*(dist3)))/(1.f+exp(-2.f*beta*(dist3))) - 1.f); + total_interface_displacement += fabs(MOVE_INTERFACE_FACTOR*dxdt); + total_interface_sites += 1.0; } } ScaLBL_CopyToDevice( M.Phi, phi_t.data(), Nx*Ny*Nz* sizeof( double ) ); + + +/* ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4){ + if (Dm->kproc()==0){ + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0); + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,1); + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,2); + } + if (Dm->kproc() == nprocz-1){ + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-1); + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-2); + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-3); + } + } + */ } From 81d726030b35cff1b726f20a166ffff45c7979f2 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 12 Feb 2021 09:36:50 -0500 Subject: [PATCH 17/21] add freelee gpu skeleton --- cuda/FreeLee.cu | 2013 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2013 insertions(+) create mode 100644 cuda/FreeLee.cu diff --git a/cuda/FreeLee.cu b/cuda/FreeLee.cu new file mode 100644 index 00000000..bc641ed9 --- /dev/null +++ b/cuda/FreeLee.cu @@ -0,0 +1,2013 @@ +#include + +#define STOKES + +__global__ void dvc_ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np) +{ + int n; + double p = 1.0;//NOTE: take initial pressure p=1.0 + double chem; + double cg_x,cg_y,cg_z; + + //for (n=0; n 1.f) phi = 1.0; + if (phi < -1.f) phi = -1.0; + Den[idx] = rhoA + 0.5*(1.0-phi)*(rhoB-rhoA); + + //compute unit normal of color gradient + nx = ColorGrad[idx+0*Np]; + ny = ColorGrad[idx+1*Np]; + nz = ColorGrad[idx+2*Np]; + cg_mag = sqrt(nx*nx+ny*ny+nz*nz); + double ColorMag_temp = cg_mag; + if (cg_mag==0.0) ColorMag_temp=1.0; + nx = nx/ColorMag_temp; + ny = ny/ColorMag_temp; + nz = nz/ColorMag_temp; + + theta = M*cs2_inv*(1-4.0*phi*phi)/W; + + hq[0*Np+idx]=0.3333333333333333*(phi); + hq[1*Np+idx]=0.1111111111111111*(phi+theta*nx); + hq[2*Np+idx]=0.1111111111111111*(phi-theta*nx); + hq[3*Np+idx]=0.1111111111111111*(phi+theta*ny); + hq[4*Np+idx]=0.1111111111111111*(phi-theta*ny); + hq[5*Np+idx]=0.1111111111111111*(phi+theta*nz); + hq[6*Np+idx]=0.1111111111111111*(phi-theta*nz); + + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, + double rhoA, double rhoB, int start, int finish, int Np){ + + int idx,n,nread; + double fq,phi; + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + m1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + m2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + m3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + m4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + m5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + m6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + m7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + m8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + m9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + m10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + m11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + m12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + m13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + m14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + m15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + m16 = dist[nr16]; + + // q=17 + nr17 = neighborList[n+16*Np]; + m17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + m18 = dist[nr18]; + + //compute fluid velocity + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(chem*nx+Fx)/3.0); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(chem*ny+Fy)/3.0); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(chem*nz+Fz)/3.0); + //compute pressure + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17) + +0.5*(rhoA-rhoB)/2.0/3.0*(ux*nx+uy*ny+uz*nz); + + //compute equilibrium distributions + feq0 = 0.3333333333333333*p - 0.25*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) - + 0.16666666666666666*rho0*(ux*ux + uy*uy + uz*uz) - 0.5*(-(nx*ux) - ny*uy - nz*uz)* + (-0.08333333333333333*(rhoA - rhoB)*(ux*ux + uy*uy + uz*uz) + chem*(0.3333333333333333 - 0.5*(ux*ux + uy*uy + uz*uz))); + feq1 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-ux*ux + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) - + 0.125*(Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*uz)) - 0.0625*(nx - nx*ux - ny*uy - nz*uz)* + (2*chem*ux*ux - 0.3333333333333333*((-rhoA + rhoB)*ux*ux + 2*chem*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*uz))); + feq2 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-ux*ux + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) - + 0.125*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*uz)) - 0.0625*(nx + nx*ux + ny*uy + nz*uz)* + (-2.*chem*ux*ux + 0.1111111111111111*(-4.*chem + rhoB*(-2.*ux - 1.*ux*ux - 1.*uy*uy - 1.*uz*uz) + + rhoA*(2.*ux + ux*ux + uy*uy + uz*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*ux*ux + + chem*(4.*ux + 2.*ux*ux + 2.*uy*uy + 2.*uz*uz))); + feq3 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uy*uy + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.125*(Fx*ux + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.0625*(ny - nx*ux - ny*uy - nz*uz)* + (2*chem*uy*uy - 0.3333333333333333*((-rhoA + rhoB)*uy*uy + 2*chem*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*uz))); + feq4 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uy*uy + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.125*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.0625*(ny + nx*ux + ny*uy + nz*uz)* + (-2.*chem*uy*uy + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 2.*uy - 1.*uy*uy - 1.*uz*uz) + + rhoA*(ux*ux + 2.*uy + uy*uy + uz*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*uy*uy + + chem*(2.*ux*ux + 4.*uy + 2.*uy*uy + 2.*uz*uz))); + feq5 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uz*uz + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.125*(Fx*ux + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2. + uz)*uz)) - 0.0625*(nx*ux + ny*uy + nz*(-1. + uz))* + (-2.*chem*uz*uz + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 1.*uy*uy + (2. - 1.*uz)*uz) + + rhoA*(ux*ux + uy*uy + (-2. + uz)*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*uz*uz + + chem*(2.*ux*ux + 2.*uy*uy + uz*(-4. + 2.*uz)))); + feq6 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uz*uz + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) - + 0.125*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2. + uz))) - 0.0625*(nz + nx*ux + ny*uy + nz*uz)* + (-2.*chem*uz*uz + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 1.*uy*uy + (-2. - 1.*uz)*uz) + + rhoA*(ux*ux + uy*uy + uz*(2. + uz))) + 0.3333333333333333*((-1.*rhoA + rhoB)*uz*uz + + chem*(2.*ux*ux + 2.*uy*uy + uz*(4. + 2.*uz)))); + feq7 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uy)*(ux + uy) + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx*(-1. + ux) + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(-2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.03125*(nx + ny - nx*ux - ny*uy - nz*uz)* + (2*chem*(ux + uy)*(ux + uy) + 0.3333333333333333*((rhoA - rhoB)*(ux + uy)*(ux + uy) - 2*chem*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz))); + feq8 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uy)*(ux + uy) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.03125*(-(nx*(1 + ux)) - ny*(1 + uy) - nz*uz)* + (2*chem*(ux + uy)*(ux + uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux + uy)*(ux + uy)) + + 2*chem*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz))); + feq9 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uy)*(ux - uy) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fy + Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(-2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.03125*(nx - nx*ux - ny*(1 + uy) - nz*uz)* + (2*chem*(ux - uy)*(ux - uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz))); + feq10 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uy)*(ux - uy) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx*(1 + ux) + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.03125*(ny - nx*(1 + ux) - ny*uy - nz*uz)* + (2*chem*(ux - uy)*(ux - uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz))); + feq11 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uz)*(ux + uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*(-1. + ux) + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)) - 0.03125*(nx + nz - nx*ux - ny*uy - nz*uz)* + (2*chem*(ux + uz)*(ux + uz) + 0.3333333333333333*((rhoA - rhoB)*(ux + uz)*(ux + uz) - 2*chem*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz))); + feq12 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) - + 0.0625*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*(2. + uz))) - 0.03125*(-(nx*(1 + ux)) - ny*uy - nz*(1 + uz))* + (2*chem*(ux + uz)*(ux + uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux + uz)*(ux + uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*(2 + uz)))); + feq13 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uz)*(ux - uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) - + 0.0625*(Fz + Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*(2. + uz))) - 0.03125*(nx - nx*ux - ny*uy - nz*(1 + uz))* + (2*chem*(ux - uz)*(ux - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*(2 + uz)))); + feq14 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uz)*(ux - uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*(1 + ux) + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)) - 0.03125*(nz - nx*(1 + ux) - ny*uy - nz*uz)* + (2*chem*(ux - uz)*(ux - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz))); + feq15 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*ux + Fy*(-1. + uy) + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uy*uy - 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + (-2. + uz)*uz)) - 0.03125*(ny + nz - nx*ux - ny*uy - nz*uz)* + (2*chem*(uy + uz)*(uy + uz) + 0.3333333333333333*((rhoA - rhoB)*(uy + uz)*(uy + uz) - 2*chem*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz))); + feq16 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) - + 0.0625*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy - 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*(2. + uz))) - 0.03125*(-(nx*ux) - ny*(1 + uy) - nz*(1 + uz))* + (2*chem*(uy + uz)*(uy + uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy + uz)*(uy + uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*(2 + uz)))); + feq17 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) - + 0.0625*(Fz + Fx*ux + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*(2. + uz))) - 0.03125*(ny - nx*ux - ny*uy - nz*(1 + uz))* + (2*chem*(uy - uz)*(uy - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*(2 + uz)))); + feq18 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*ux + Fy*(1 + uy) + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uy*uy + 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + (-2. + uz)*uz)) - 0.03125*(nz - nx*ux - ny*(1 + uy) - nz*uz)* + (2*chem*(uy - uz)*(uy - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz))); + + //------------------------------------------------- BCK collison ------------------------------------------------------------// + // q=0 + dist[n] = m0 - (m0-feq0)/tau + 0.25*(2*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) + + (mgx*ux + mgy*uy + mgz*uz)*(2*chem*(ux*ux + uy*uy + uz*uz) + + 0.3333333333333333*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + uz*uz)))); + + // q = 1 + dist[nr2] = m1 - (m1-feq1)/tau + 0.125*(2*(Fx*(-1 + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) + + (mgx*(-1 + ux) + mgy*uy + mgz*uz)*(-2*chem*(ux*ux) + + 0.3333333333333333*((-rhoA + rhoB)*(ux*ux) + 2*chem*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*uz)))); + + // q=2 + dist[nr1] = m2 - (m2-feq2)/tau + 0.125*(2*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) + + (mgx + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(ux*ux) + + 0.3333333333333333*((-rhoA + rhoB)*(ux*ux) + 2*chem*(2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*uz)))); + + // q = 3 + dist[nr4] = m3 - (m3-feq3)/tau + 0.125*(2*(Fx*ux + Fy*(-1 + uy) + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*ux + mgy*(-1 + uy) + mgz*uz)*(-2*chem*(uy*uy) + + 0.3333333333333333*((-rhoA + rhoB)*(uy*uy) + 2*chem*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 4 + dist[nr3] = m4 - (m4-feq4)/tau + 0.125*(2*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgy + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(uy*uy) + + 0.3333333333333333*((-rhoA + rhoB)*(uy*uy) + 2*chem*(ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 5 + dist[nr6] = m5 - (m5-feq5)/tau + 0.125*(2*(Fx*ux + Fy*uy + Fz*(-1 + uz))*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*uy + mgz*(-1 + uz))*(-2*chem*(uz*uz) + + 0.3333333333333333*((-rhoA + rhoB)*(uz*uz) + 2*chem*(ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 6 + dist[nr5] = m6 - (m6-feq6)/tau + 0.125*(2*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) + + (mgz + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(uz*uz) + + 0.3333333333333333*((-rhoA + rhoB)*(uz*uz) + 2*chem*(ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + uz*(2 + uz))))); + + // q = 7 + dist[nr8] = m7 - (m7-feq7)/tau + 0.0625*(-2*(Fx*(-1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (0.2222222222222222 + (ux + uy)*(ux + uy) - + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*(-1 + ux) + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((ux + uy)*(ux + uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uy)*(ux + uy))) + 2*chem*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 8 + dist[nr7] = m8 - (m8-feq8)/tau + 0.0625*(2*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uy)*(ux + uy) + + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgx + mgy + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((ux + uy)*(ux + uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uy)*(ux + uy))) + 2*chem*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 9 + dist[nr10] = m9 - (m9-feq9)/tau + 0.0625*(2*(Fy + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgy + mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*((ux - uy)*(ux - uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uy)*(ux - uy))) + 2*chem*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 10 + dist[nr9] = m10 - (m10-feq10)/tau + 0.0625*(2*(Fx*(1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*(1 + ux) + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((ux - uy)*(ux - uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uy)*(ux - uy))) + 2*chem*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 11 + dist[nr12] = m11 - (m11-feq11)/tau + 0.0625*(-2*(Fx*(-1 + ux) + Fy*uy + Fz*(-1 + uz))* + (0.2222222222222222 + (ux + uz)*(ux + uz) - + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*(-1 + ux) + mgy*uy + mgz*(-1 + uz))* + (-2*chem*((ux + uz)*(ux + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uz)*(ux + uz))) + 2*chem*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 12 + dist[nr11] = m12 - (m12-feq12)/tau + 0.0625*(2*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + (mgx + mgz + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((ux + uz)*(ux + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uz)*(ux + uz))) + 2*chem*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*(2 + uz))))); + + // q = 13 + dist[nr14] = m13 - (m13-feq13)/tau + 0.0625*(2*(Fz + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + (mgz + mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*((ux - uz)*(ux - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uz)*(ux - uz))) + 2*chem*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))))); + + // q= 14 + dist[nr13] = m14 - (m14-feq14)/tau + 0.0625*(2*(Fx*(1 + ux) + Fy*uy + Fz*(-1 + uz))* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*(1 + ux) + mgy*uy + mgz*(-1 + uz))* + (-2*chem*((ux - uz)*(ux - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uz)*(ux - uz))) + 2*chem*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 15 + dist[nr16] = m15 - (m15-feq15)/tau + 0.0625*(-2*(Fx*ux + Fy*(-1 + uy) + Fz*(-1 + uz))* + (0.2222222222222222 + (uy + uz)*(uy + uz) - 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*(-1 + uy) + mgz*(-1 + uz))* + (-2*chem*((uy + uz)*(uy + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy + uz)*(uy + uz))) + 2*chem*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)))); + + // q = 16 + dist[nr15] = m16 - (m16-feq16)/tau + 0.0625*(2*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + (mgy + mgz + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((uy + uz)*(uy + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy + uz)*(uy + uz))) + 2*chem*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))))); + + // q = 17 + dist[nr18] = m17 - (m17-feq17)/tau + 0.0625*(2*(Fz + Fx*ux + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + (mgz + mgx*ux + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((uy - uz)*(uy - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy - uz)*(uy - uz))) + 2*chem*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))))); + + // q = 18 + dist[nr17] = m18 - (m18-feq18)/tau + 0.0625*(2*(Fx*ux + Fy*(1 + uy) + Fz*(-1 + uz))* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*(1 + uy) + mgz*(-1 + uz))* + (-2*chem*((uy - uz)*(uy - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy - uz)*(uy - uz))) + 2*chem*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)))); + //----------------------------------------------------------------------------------------------------------------------------------------// + + + // ----------------------------- compute phase field evolution ---------------------------------------- + //Normalize the Color Gradient + C = sqrt(nx*nx+ny*ny+nz*nz); + double ColorMag = C; + if (C==0.0) ColorMag=1.0; + nx = nx/ColorMag; + ny = ny/ColorMag; + nz = nz/ColorMag; + //compute surface tension-related parameter + theta = M*4.5*(1-4.0*phi*phi)/W; + + //load distributions of phase field + //q=0 + h0 = hq[n]; + //q=1 + h1 = hq[nr1]; + + //q=2 + h2 = hq[nr2]; + + //q=3 + h3 = hq[nr3]; + + //q=4 + h4 = hq[nr4]; + + //q=5 + h5 = hq[nr5]; + + //q=6 + h6 = hq[nr6]; + + //-------------------------------- BGK collison for phase field ---------------------------------// + // q = 0 + hq[n] = h0 - (h0 - 0.3333333333333333*phi)/tauM; + + // q = 1 + hq[nr2] = h1 - (h1 - phi*(0.1111111111111111 + 0.5*ux) - (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; + + // q = 2 + hq[nr1] = h2 - (h2 - phi*(0.1111111111111111 - 0.5*ux) + (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; + + // q = 3 + hq[nr4] = h3 - (h3 - phi*(0.1111111111111111 + 0.5*uy) - (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; + + // q = 4 + hq[nr3] = h4 - (h4 - phi*(0.1111111111111111 - 0.5*uy) + (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; + + // q = 5 + hq[nr6] = h5 - (h5 - phi*(0.1111111111111111 + 0.5*uz) - (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; + + // q = 6 + hq[nr5] = h6 - (h6 - phi*(0.1111111111111111 - 0.5*uz) + (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; + //........................................................................ + + //Update velocity on device + Vel[0*Np+n] = ux; + Vel[1*Np+n] = uy; + Vel[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = p; + //Update chemical potential on device + mu_phi[n] = chem; + //Update color gradient on device + ColorGrad[0*Np+n] = nx; + ColorGrad[1*Np+n] = ny; + ColorGrad[2*Np+n] = nz; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, + int strideY, int strideZ, int start, int finish, int Np){ + + int n,nn,nn2x,ijk; + //int nr1,nr2,nr3,nr4,nr5,nr6,nr7,nr8,nr9,nr10,nr11,nr12,nr13,nr14,nr15,nr16,nr17,nr18; + double ux,uy,uz;//fluid velocity + double p;//pressure + double chem;//chemical potential + double phi; //phase field + double rho0;//fluid density + // distribution functions + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m0,m3,m5,m7; + double mm1,mm2,mm4,mm6,mm8,mm9,mm10,mm11,mm12,mm13,mm14,mm15,mm16,mm17,mm18; + double mm3,mm5,mm7; + double feq0,feq1,feq2,feq3,feq4,feq5,feq6,feq7,feq8,feq9,feq10,feq11,feq12,feq13,feq14,feq15,feq16,feq17,feq18; + double nx,ny,nz;//normal color gradient + double mgx,mgy,mgz;//mixed gradient reaching secondary neighbor + + //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double h0,h1,h2,h3,h4,h5,h6;//distributions for LB phase field + double tau;//position dependent LB relaxation time for fluid + double C,theta; + double M = 2.0/9.0*(tauM-0.5);//diffusivity (or mobility) for the phase field D3Q7 + + // for (int n=start; n 10Np => odd part of dist) + m1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + m2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + m3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + m4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + m5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + m6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + m7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + m8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + m9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + m10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + m11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + m12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + m13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + m14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + m15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + m16 = dist[nr16]; + + // q=17 + nr17 = neighborList[n+16*Np]; + m17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + m18 = dist[nr18]; + + //compute fluid velocity + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(Fx)/3.0); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(Fy)/3.0); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(Fz)/3.0); + //compute pressure + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17); + + //------------------------------------------------- BCK collison ------------------------------------------------------------// + // q=0 + dist[n] = m0 + 0.5*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) - + (m0 - 0.3333333333333333*p + 0.25*(Fx*ux + Fy*uy + Fz*uz)* + (-0.6666666666666666 + ux*ux + uy*uy + uz*uz) + 0.16666666666666666*rho0*(ux*ux + uy*uy + uz*uz))/ + tau; + + // q = 1 + dist[nr2] = m1 + 0.25*(Fx*(-1 + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) - + (m1 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(ux*ux) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.125*(Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*uz)))/tau; + + // q=2 + dist[nr1] = m2 + 0.25*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) - + (m2 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(ux*ux) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) + + 0.125*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(ux*ux) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*uz)))/tau; + + // q = 3 + dist[nr4] = m3 + 0.25*(Fx*ux + Fy*(-1 + uy) + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) - + (m3 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uy*uy) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.125*(Fx*ux + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 4 + dist[nr3] = m4 + 0.25*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) - + (m4 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uy*uy) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.125*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(uy*uy) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 5 + dist[nr6] = m5 + 0.25*(Fx*ux + Fy*uy + Fz*(-1 + uz))*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) - + (m5 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.125*(Fx*ux + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 6 + dist[nr5] = m6 + 0.25*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) - + (m6 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) + + 0.125*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q = 7 + dist[nr8] = m7 - 0.125*(Fx*(-1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (0.2222222222222222 + (ux + uy)*(ux + uy) - 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz))\ + - (m7 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uy)*(ux + uy)) + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx*(-1. + ux) + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(-2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 8 + dist[nr7] = m8 + 0.125*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uy)*(ux + uy) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz))\ + - (m8 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uy)*(ux + uy)) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 9 + dist[nr10] = m9 + 0.125*(Fy + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + - (m9 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uy)*(ux - uy)) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fy + Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(-2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 10 + dist[nr9] = m10 + 0.125*(Fx*(1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz))\ + - (m10 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uy)*(ux - uy)) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx*(1 + ux) + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 11 + dist[nr12] = m11 - 0.125*(Fx*(-1 + ux) + Fy*uy + Fz*(-1 + uz))* + (0.2222222222222222 + (ux + uz)*(ux + uz) - 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz))\ + - (m11 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uz)*(ux + uz)) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*(-1. + ux) + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 12 + dist[nr11] = m12 + 0.125*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz)))\ + - (m12 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uz)*(ux + uz)) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.0625*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q = 13 + dist[nr14] = m13 + 0.125*(Fz + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz)))\ + - (m13 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uz)*(ux - uz)) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.0625*(Fz + Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q= 14 + dist[nr13] = m14 + 0.125*(Fx*(1 + ux) + Fy*uy + Fz*(-1 + uz))* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz))\ + - (m14 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uz)*(ux - uz)) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*(1 + ux) + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 15 + dist[nr16] = m15 - 0.125*(Fx*ux + Fy*(-1 + uy) + Fz*(-1 + uz))* + (0.2222222222222222 + (uy + uz)*(uy + uz) - 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz))\ + - (m15 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy + uz)*(uy + uz)) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*ux + Fy*(-1. + uy) + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uy*uy) - 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 16 + dist[nr15] = m16 + 0.125*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz)))\ + - (m16 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy + uz)*(uy + uz)) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + 0.0625*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) - 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*(2. + uz))))/tau; + + // q = 17 + dist[nr18] = m17 + 0.125*(Fz + Fx*ux + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz)))\ + - (m17 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy - uz)*(uy - uz)) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + 0.0625*(Fz + Fx*ux + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) + 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*(2. + uz))))/tau; + + // q = 18 + dist[nr17] = m18 + 0.125*(Fx*ux + Fy*(1 + uy) + Fz*(-1 + uz))* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz))\ + - (m18 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy - uz)*(uy - uz)) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*ux + Fy*(1 + uy) + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uy*uy) + 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + (-2. + uz)*uz)))/tau; + //----------------------------------------------------------------------------------------------------------------------------------------// + + + //Update velocity on device + Vel[0*Np+n] = ux; + Vel[1*Np+n] = uy; + Vel[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = p; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure, + double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){ + + int n; + double ux,uy,uz;//fluid velocity + double p;//pressure + // distribution functions + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m0,m3,m5,m7; + + // for (int n=start; n Date: Fri, 12 Feb 2021 10:05:00 -0500 Subject: [PATCH 18/21] skeleton freelee build for gpu --- cuda/FreeLee.cu | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cuda/FreeLee.cu b/cuda/FreeLee.cu index bc641ed9..e37a92a3 100644 --- a/cuda/FreeLee.cu +++ b/cuda/FreeLee.cu @@ -2,6 +2,9 @@ #define STOKES +#define NBLOCKS 1024 +#define NTHREADS 256 + __global__ void dvc_ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np) { int n; From e04abb922470f396244ac7a814daaf01e9ac11ab Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 12 Feb 2021 10:23:19 -0500 Subject: [PATCH 19/21] hip versions for new physics --- cuda/BGK.cu | 414 +++--- hip/D3Q7BC.cu | 536 ++++++++ hip/FreeLee.cu | 2017 +++++++++++++++++++++++++++ hip/Greyscale.cu | 2745 +++++++++++++++++++++++++++++++++++++ hip/GreyscaleColor.cu | 3038 +++++++++++++++++++++++++++++++++++++++++ hip/Ion.cu | 392 ++++++ hip/MixedGradient.cu | 78 ++ hip/Poisson.cu | 330 +++++ hip/Stokes.cu | 996 ++++++++++++++ 9 files changed, 10339 insertions(+), 207 deletions(-) create mode 100644 hip/D3Q7BC.cu create mode 100644 hip/FreeLee.cu create mode 100644 hip/Greyscale.cu create mode 100644 hip/GreyscaleColor.cu create mode 100644 hip/Ion.cu create mode 100644 hip/MixedGradient.cu create mode 100644 hip/Poisson.cu create mode 100644 hip/Stokes.cu diff --git a/cuda/BGK.cu b/cuda/BGK.cu index b1da88bb..d9206a4f 100644 --- a/cuda/BGK.cu +++ b/cuda/BGK.cu @@ -12,111 +12,111 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int S = Np/NBLOCKS/NTHREADS + 1; for (int s=0; s 10Np => odd part of dist) - f1 = dist[nr1]; // reading the f1 data into register fq + if ( n 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq - nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) - f2 = dist[nr2]; // reading the f2 data into register fq + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq - // q=3 - nr3 = neighborList[n+2*Np]; // neighbor 4 - f3 = dist[nr3]; + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; - // q = 4 - nr4 = neighborList[n+3*Np]; // neighbor 3 - f4 = dist[nr4]; + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; - // q=5 - nr5 = neighborList[n+4*Np]; - f5 = dist[nr5]; + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; - // q = 6 - nr6 = neighborList[n+5*Np]; - f6 = dist[nr6]; - - // q=7 - nr7 = neighborList[n+6*Np]; - f7 = dist[nr7]; + // q = 6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; - // q = 8 - nr8 = neighborList[n+7*Np]; - f8 = dist[nr8]; + // q=7 + nr7 = neighborList[n+6*Np]; + f7 = dist[nr7]; - // q=9 - nr9 = neighborList[n+8*Np]; - f9 = dist[nr9]; + // q = 8 + nr8 = neighborList[n+7*Np]; + f8 = dist[nr8]; - // q = 10 - nr10 = neighborList[n+9*Np]; - f10 = dist[nr10]; + // q=9 + nr9 = neighborList[n+8*Np]; + f9 = dist[nr9]; - // q=11 - nr11 = neighborList[n+10*Np]; - f11 = dist[nr11]; + // q = 10 + nr10 = neighborList[n+9*Np]; + f10 = dist[nr10]; - // q=12 - nr12 = neighborList[n+11*Np]; - f12 = dist[nr12]; + // q=11 + nr11 = neighborList[n+10*Np]; + f11 = dist[nr11]; - // q=13 - nr13 = neighborList[n+12*Np]; - f13 = dist[nr13]; + // q=12 + nr12 = neighborList[n+11*Np]; + f12 = dist[nr12]; - // q=14 - nr14 = neighborList[n+13*Np]; - f14 = dist[nr14]; + // q=13 + nr13 = neighborList[n+12*Np]; + f13 = dist[nr13]; - // q=15 - nr15 = neighborList[n+14*Np]; - f15 = dist[nr15]; + // q=14 + nr14 = neighborList[n+13*Np]; + f14 = dist[nr14]; - // q=16 - nr16 = neighborList[n+15*Np]; - f16 = dist[nr16]; + // q=15 + nr15 = neighborList[n+14*Np]; + f15 = dist[nr15]; - // q=17 - //fq = dist[18*Np+n]; - nr17 = neighborList[n+16*Np]; - f17 = dist[nr17]; + // q=16 + nr16 = neighborList[n+15*Np]; + f16 = dist[nr16]; - // q=18 - nr18 = neighborList[n+17*Np]; - f18 = dist[nr18]; + // q=17 + //fq = dist[18*Np+n]; + nr17 = neighborList[n+16*Np]; + f17 = dist[nr17]; - rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; - ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14; - uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18; - uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18; - uu = 1.5*(ux*ux+uy*uy+uz*uz); + // q=18 + nr18 = neighborList[n+17*Np]; + f18 = dist[nr18]; - // q=0 - dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu); + rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; + ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14; + uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18; + uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18; + uu = 1.5*(ux*ux+uy*uy+uz*uz); - // q = 1 - dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx; + // q=0 + dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu); - // q=2 - dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx; + // q = 1 + dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx; - // q = 3 - dist[nr4] = f3*(1.0-rlx) + - rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy; + // q=2 + dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx; - // q = 4 - dist[nr3] = f4*(1.0-rlx) + - rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy; + // q = 3 + dist[nr4] = f3*(1.0-rlx) + + rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy; - // q = 5 - dist[nr6] = f5*(1.0-rlx) + - rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz; + // q = 4 + dist[nr3] = f4*(1.0-rlx) + + rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy; - // q = 6 - dist[nr5] = f6*(1.0-rlx) + - rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz; + // q = 5 + dist[nr6] = f5*(1.0-rlx) + + rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz; - // q = 7 - dist[nr8] = f7*(1.0-rlx) + - rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy); + // q = 6 + dist[nr5] = f6*(1.0-rlx) + + rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz; - // q = 8 - dist[nr7] = f8*(1.0-rlx) + - rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy); + // q = 7 + dist[nr8] = f7*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy); - // q = 9 - dist[nr10] = f9*(1.0-rlx) + - rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy); + // q = 8 + dist[nr7] = f8*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy); - // q = 10 - dist[nr9] = f10*(1.0-rlx) + - rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy); + // q = 9 + dist[nr10] = f9*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy); - // q = 11 - dist[nr12] = f11*(1.0-rlx) + - rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz); + // q = 10 + dist[nr9] = f10*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy); - // q = 12 - dist[nr11] = f12*(1.0-rlx) + - rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz); + // q = 11 + dist[nr12] = f11*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz); - // q = 13 - dist[nr14] = f13*(1.0-rlx) + - rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz); + // q = 12 + dist[nr11] = f12*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz); - // q= 14 - dist[nr13] = f14*(1.0-rlx) + - rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz); + // q = 13 + dist[nr14] = f13*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz); - // q = 15 - dist[nr16] = f15*(1.0-rlx) + - rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz); + // q= 14 + dist[nr13] = f14*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz); - // q = 16 - dist[nr15] = f16*(1.0-rlx) + - rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz); + // q = 15 + dist[nr16] = f15*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz); - // q = 17 - dist[nr18] = f17*(1.0-rlx) + - rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz); + // q = 16 + dist[nr15] = f16*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz); - // q = 18 - dist[nr17] = f18*(1.0-rlx) + - rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz); + // q = 17 + dist[nr18] = f17*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz); + + // q = 18 + dist[nr17] = f18*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz); } } } extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ - - dvc_ScaLBL_D3Q19_AAeven_BGK<<>>(dist,start,finish,Np,rlx,Fx,Fy,Fz); - cudaError_t err = cudaGetLastError(); + dvc_ScaLBL_D3Q19_AAeven_BGK<<>>(dist,start,finish,Np,rlx,Fx,Fy,Fz); + + cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q19_AAeven_BGK: %s \n",cudaGetErrorString(err)); } } extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ - dvc_ScaLBL_D3Q19_AAodd_BGK<<>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz); + dvc_ScaLBL_D3Q19_AAodd_BGK<<>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz); - cudaError_t err = cudaGetLastError(); + cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q19_AAeven_BGK: %s \n",cudaGetErrorString(err)); } diff --git a/hip/D3Q7BC.cu b/hip/D3Q7BC.cu new file mode 100644 index 00000000..9413a68a --- /dev/null +++ b/hip/D3Q7BC.cu @@ -0,0 +1,536 @@ +#include +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 560 +#define NTHREADS 128 + +__global__ void dvc_ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count) +{ + + int idx; + int iq,ib; + double value_b,value_q; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + iq = BounceBackDist_list[idx]; + ib = BounceBackSolid_list[idx]; + value_b = BoundaryValue[ib];//get boundary value from a solid site + value_q = dist[iq]; + dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice + } +} + +__global__ void dvc_ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count) +{ + + int idx; + int iq,ib; + double value_b,value_q; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + iq = BounceBackDist_list[idx]; + ib = BounceBackSolid_list[idx]; + value_b = BoundaryValue[ib];//get boundary value from a solid site + value_q = dist[iq]; + dist[iq] = value_q + value_b; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np) +{ + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f6 = dist[5*Np+n]; + //................................................... + f5 = Vin - (f0+f1+f2+f3+f4+f6); + dist[6*Np+n] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np) +{ + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f5 = dist[6*Np+n]; + //................................................... + f6 = Vout - (f0+f1+f2+f3+f4+f5); + dist[5*Np+n] = f6; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np) +{ + int idx, n; + int nread,nr5; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + nread = d_neighborList[n+5*Np]; + f6 = dist[nread]; + + // Unknown distributions + nr5 = d_neighborList[n+4*Np]; + f5 = Vin - (f0+f1+f2+f3+f4+f6); + dist[nr5] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np) +{ + int idx, n; + int nread,nr6; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+4*Np]; + f5 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + // unknown distributions + nr6 = d_neighborList[n+5*Np]; + f6 = Vout - (f0+f1+f2+f3+f4+f5); + dist[nr6] = f6; + } +} + +__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count) +{ + int idx,n,nm; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + nm = Map[n]; + Psi[nm] = Vin; + } +} + + +__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count) +{ + int idx,n,nm; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + nm = Map[n]; + Psi[nm] = Vout; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np) +{ + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f6 = dist[5*Np+n]; + //................................................... + f5 = Cin - (f0+f1+f2+f3+f4+f6); + dist[6*Np+n] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np) +{ + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f5 = dist[6*Np+n]; + //................................................... + f6 = Cout - (f0+f1+f2+f3+f4+f5); + dist[5*Np+n] = f6; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np) +{ + int idx, n; + int nread,nr5; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + nread = d_neighborList[n+5*Np]; + f6 = dist[nread]; + + // Unknown distributions + nr5 = d_neighborList[n+4*Np]; + f5 = Cin - (f0+f1+f2+f3+f4+f6); + dist[nr5] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np) +{ + int idx, n; + int nread,nr6; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+4*Np]; + f5 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + // unknown distributions + nr6 = d_neighborList[n+5*Np]; + f6 = Cout - (f0+f1+f2+f3+f4+f5); + dist[nr6] = f6; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np) +{ + //NOTE: FluxIn is the inward flux + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + double fsum_partial; + double uz; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f6 = dist[5*Np+n]; + fsum_partial = f0+f1+f2+f3+f4+f6; + uz = VelocityZ[n]; + //................................................... + f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau); + dist[6*Np+n] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np) +{ + //NOTE: FluxIn is the inward flux + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + double fsum_partial; + double uz; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f5 = dist[6*Np+n]; + fsum_partial = f0+f1+f2+f3+f4+f5; + uz = VelocityZ[n]; + //................................................... + f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau); + dist[5*Np+n] = f6; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np) +{ + //NOTE: FluxIn is the inward flux + int idx, n; + int nread,nr5; + double f0,f1,f2,f3,f4,f5,f6; + double fsum_partial; + double uz; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + nread = d_neighborList[n+5*Np]; + f6 = dist[nread]; + + fsum_partial = f0+f1+f2+f3+f4+f6; + uz = VelocityZ[n]; + //................................................... + f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau); + + // Unknown distributions + nr5 = d_neighborList[n+4*Np]; + dist[nr5] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np) +{ + //NOTE: FluxIn is the inward flux + int idx, n; + int nread,nr6; + double f0,f1,f2,f3,f4,f5,f6; + double fsum_partial; + double uz; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+4*Np]; + f5 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + fsum_partial = f0+f1+f2+f3+f4+f5; + uz = VelocityZ[n]; + //................................................... + f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau); + + // unknown distributions + nr6 = d_neighborList[n+5*Np]; + dist[nr6] = f6; + } +} +//************************************************************************* + +extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){ + int GRID = count / 512 + 1; + dvc_ScaLBL_Solid_Dirichlet_D3Q7<<>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_Solid_Dirichlet_D3Q7 (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){ + int GRID = count / 512 + 1; + dvc_ScaLBL_Solid_Neumann_D3Q7<<>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_Solid_Neumann_D3Q7 (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z<<>>(list, dist, Vin, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z<<>>(list, dist, Vout, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z<<>>(d_neighborList, list, dist, Vin, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z<<>>(d_neighborList, list, dist, Vout, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count){ + int GRID = count / 512 + 1; + dvc_ScaLBL_Poisson_D3Q7_BC_z<<>>(list, Map, Psi, Vin, count); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_Poisson_D3Q7_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count){ + int GRID = count / 512 + 1; + dvc_ScaLBL_Poisson_D3Q7_BC_Z<<>>(list, Map, Psi, Vout, count); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_Poisson_D3Q7_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z<<>>(list, dist, Cin, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z<<>>(list, dist, Cout, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z<<>>(d_neighborList, list, dist, Cin, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z<<>>(d_neighborList, list, dist, Cout, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z<<>>(list, dist, FluxIn, tau, VelocityZ, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z<<>>(list, dist, FluxIn, tau, VelocityZ, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z<<>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z<<>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} diff --git a/hip/FreeLee.cu b/hip/FreeLee.cu new file mode 100644 index 00000000..558bd2f1 --- /dev/null +++ b/hip/FreeLee.cu @@ -0,0 +1,2017 @@ +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 1024 +#define NTHREADS 256 + +#define STOKES + +__global__ void dvc_ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np) +{ + int n; + double p = 1.0;//NOTE: take initial pressure p=1.0 + double chem; + double cg_x,cg_y,cg_z; + + //for (n=0; n 1.f) phi = 1.0; + if (phi < -1.f) phi = -1.0; + Den[idx] = rhoA + 0.5*(1.0-phi)*(rhoB-rhoA); + + //compute unit normal of color gradient + nx = ColorGrad[idx+0*Np]; + ny = ColorGrad[idx+1*Np]; + nz = ColorGrad[idx+2*Np]; + cg_mag = sqrt(nx*nx+ny*ny+nz*nz); + double ColorMag_temp = cg_mag; + if (cg_mag==0.0) ColorMag_temp=1.0; + nx = nx/ColorMag_temp; + ny = ny/ColorMag_temp; + nz = nz/ColorMag_temp; + + theta = M*cs2_inv*(1-4.0*phi*phi)/W; + + hq[0*Np+idx]=0.3333333333333333*(phi); + hq[1*Np+idx]=0.1111111111111111*(phi+theta*nx); + hq[2*Np+idx]=0.1111111111111111*(phi-theta*nx); + hq[3*Np+idx]=0.1111111111111111*(phi+theta*ny); + hq[4*Np+idx]=0.1111111111111111*(phi-theta*ny); + hq[5*Np+idx]=0.1111111111111111*(phi+theta*nz); + hq[6*Np+idx]=0.1111111111111111*(phi-theta*nz); + + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, + double rhoA, double rhoB, int start, int finish, int Np){ + + int idx,n,nread; + double fq,phi; + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + m1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + m2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + m3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + m4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + m5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + m6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + m7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + m8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + m9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + m10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + m11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + m12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + m13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + m14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + m15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + m16 = dist[nr16]; + + // q=17 + nr17 = neighborList[n+16*Np]; + m17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + m18 = dist[nr18]; + + //compute fluid velocity + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(chem*nx+Fx)/3.0); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(chem*ny+Fy)/3.0); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(chem*nz+Fz)/3.0); + //compute pressure + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17) + +0.5*(rhoA-rhoB)/2.0/3.0*(ux*nx+uy*ny+uz*nz); + + //compute equilibrium distributions + feq0 = 0.3333333333333333*p - 0.25*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) - + 0.16666666666666666*rho0*(ux*ux + uy*uy + uz*uz) - 0.5*(-(nx*ux) - ny*uy - nz*uz)* + (-0.08333333333333333*(rhoA - rhoB)*(ux*ux + uy*uy + uz*uz) + chem*(0.3333333333333333 - 0.5*(ux*ux + uy*uy + uz*uz))); + feq1 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-ux*ux + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) - + 0.125*(Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*uz)) - 0.0625*(nx - nx*ux - ny*uy - nz*uz)* + (2*chem*ux*ux - 0.3333333333333333*((-rhoA + rhoB)*ux*ux + 2*chem*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*uz))); + feq2 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-ux*ux + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) - + 0.125*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*uz)) - 0.0625*(nx + nx*ux + ny*uy + nz*uz)* + (-2.*chem*ux*ux + 0.1111111111111111*(-4.*chem + rhoB*(-2.*ux - 1.*ux*ux - 1.*uy*uy - 1.*uz*uz) + + rhoA*(2.*ux + ux*ux + uy*uy + uz*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*ux*ux + + chem*(4.*ux + 2.*ux*ux + 2.*uy*uy + 2.*uz*uz))); + feq3 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uy*uy + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.125*(Fx*ux + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.0625*(ny - nx*ux - ny*uy - nz*uz)* + (2*chem*uy*uy - 0.3333333333333333*((-rhoA + rhoB)*uy*uy + 2*chem*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*uz))); + feq4 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uy*uy + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.125*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.0625*(ny + nx*ux + ny*uy + nz*uz)* + (-2.*chem*uy*uy + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 2.*uy - 1.*uy*uy - 1.*uz*uz) + + rhoA*(ux*ux + 2.*uy + uy*uy + uz*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*uy*uy + + chem*(2.*ux*ux + 4.*uy + 2.*uy*uy + 2.*uz*uz))); + feq5 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uz*uz + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.125*(Fx*ux + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2. + uz)*uz)) - 0.0625*(nx*ux + ny*uy + nz*(-1. + uz))* + (-2.*chem*uz*uz + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 1.*uy*uy + (2. - 1.*uz)*uz) + + rhoA*(ux*ux + uy*uy + (-2. + uz)*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*uz*uz + + chem*(2.*ux*ux + 2.*uy*uy + uz*(-4. + 2.*uz)))); + feq6 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uz*uz + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) - + 0.125*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2. + uz))) - 0.0625*(nz + nx*ux + ny*uy + nz*uz)* + (-2.*chem*uz*uz + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 1.*uy*uy + (-2. - 1.*uz)*uz) + + rhoA*(ux*ux + uy*uy + uz*(2. + uz))) + 0.3333333333333333*((-1.*rhoA + rhoB)*uz*uz + + chem*(2.*ux*ux + 2.*uy*uy + uz*(4. + 2.*uz)))); + feq7 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uy)*(ux + uy) + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx*(-1. + ux) + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(-2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.03125*(nx + ny - nx*ux - ny*uy - nz*uz)* + (2*chem*(ux + uy)*(ux + uy) + 0.3333333333333333*((rhoA - rhoB)*(ux + uy)*(ux + uy) - 2*chem*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz))); + feq8 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uy)*(ux + uy) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.03125*(-(nx*(1 + ux)) - ny*(1 + uy) - nz*uz)* + (2*chem*(ux + uy)*(ux + uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux + uy)*(ux + uy)) + + 2*chem*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz))); + feq9 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uy)*(ux - uy) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fy + Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(-2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.03125*(nx - nx*ux - ny*(1 + uy) - nz*uz)* + (2*chem*(ux - uy)*(ux - uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz))); + feq10 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uy)*(ux - uy) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx*(1 + ux) + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.03125*(ny - nx*(1 + ux) - ny*uy - nz*uz)* + (2*chem*(ux - uy)*(ux - uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz))); + feq11 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uz)*(ux + uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*(-1. + ux) + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)) - 0.03125*(nx + nz - nx*ux - ny*uy - nz*uz)* + (2*chem*(ux + uz)*(ux + uz) + 0.3333333333333333*((rhoA - rhoB)*(ux + uz)*(ux + uz) - 2*chem*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz))); + feq12 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) - + 0.0625*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*(2. + uz))) - 0.03125*(-(nx*(1 + ux)) - ny*uy - nz*(1 + uz))* + (2*chem*(ux + uz)*(ux + uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux + uz)*(ux + uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*(2 + uz)))); + feq13 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uz)*(ux - uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) - + 0.0625*(Fz + Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*(2. + uz))) - 0.03125*(nx - nx*ux - ny*uy - nz*(1 + uz))* + (2*chem*(ux - uz)*(ux - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*(2 + uz)))); + feq14 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uz)*(ux - uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*(1 + ux) + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)) - 0.03125*(nz - nx*(1 + ux) - ny*uy - nz*uz)* + (2*chem*(ux - uz)*(ux - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz))); + feq15 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*ux + Fy*(-1. + uy) + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uy*uy - 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + (-2. + uz)*uz)) - 0.03125*(ny + nz - nx*ux - ny*uy - nz*uz)* + (2*chem*(uy + uz)*(uy + uz) + 0.3333333333333333*((rhoA - rhoB)*(uy + uz)*(uy + uz) - 2*chem*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz))); + feq16 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) - + 0.0625*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy - 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*(2. + uz))) - 0.03125*(-(nx*ux) - ny*(1 + uy) - nz*(1 + uz))* + (2*chem*(uy + uz)*(uy + uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy + uz)*(uy + uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*(2 + uz)))); + feq17 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) - + 0.0625*(Fz + Fx*ux + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*(2. + uz))) - 0.03125*(ny - nx*ux - ny*uy - nz*(1 + uz))* + (2*chem*(uy - uz)*(uy - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*(2 + uz)))); + feq18 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*ux + Fy*(1 + uy) + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uy*uy + 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + (-2. + uz)*uz)) - 0.03125*(nz - nx*ux - ny*(1 + uy) - nz*uz)* + (2*chem*(uy - uz)*(uy - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz))); + + //------------------------------------------------- BCK collison ------------------------------------------------------------// + // q=0 + dist[n] = m0 - (m0-feq0)/tau + 0.25*(2*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) + + (mgx*ux + mgy*uy + mgz*uz)*(2*chem*(ux*ux + uy*uy + uz*uz) + + 0.3333333333333333*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + uz*uz)))); + + // q = 1 + dist[nr2] = m1 - (m1-feq1)/tau + 0.125*(2*(Fx*(-1 + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) + + (mgx*(-1 + ux) + mgy*uy + mgz*uz)*(-2*chem*(ux*ux) + + 0.3333333333333333*((-rhoA + rhoB)*(ux*ux) + 2*chem*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*uz)))); + + // q=2 + dist[nr1] = m2 - (m2-feq2)/tau + 0.125*(2*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) + + (mgx + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(ux*ux) + + 0.3333333333333333*((-rhoA + rhoB)*(ux*ux) + 2*chem*(2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*uz)))); + + // q = 3 + dist[nr4] = m3 - (m3-feq3)/tau + 0.125*(2*(Fx*ux + Fy*(-1 + uy) + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*ux + mgy*(-1 + uy) + mgz*uz)*(-2*chem*(uy*uy) + + 0.3333333333333333*((-rhoA + rhoB)*(uy*uy) + 2*chem*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 4 + dist[nr3] = m4 - (m4-feq4)/tau + 0.125*(2*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgy + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(uy*uy) + + 0.3333333333333333*((-rhoA + rhoB)*(uy*uy) + 2*chem*(ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 5 + dist[nr6] = m5 - (m5-feq5)/tau + 0.125*(2*(Fx*ux + Fy*uy + Fz*(-1 + uz))*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*uy + mgz*(-1 + uz))*(-2*chem*(uz*uz) + + 0.3333333333333333*((-rhoA + rhoB)*(uz*uz) + 2*chem*(ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 6 + dist[nr5] = m6 - (m6-feq6)/tau + 0.125*(2*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) + + (mgz + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(uz*uz) + + 0.3333333333333333*((-rhoA + rhoB)*(uz*uz) + 2*chem*(ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + uz*(2 + uz))))); + + // q = 7 + dist[nr8] = m7 - (m7-feq7)/tau + 0.0625*(-2*(Fx*(-1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (0.2222222222222222 + (ux + uy)*(ux + uy) - + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*(-1 + ux) + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((ux + uy)*(ux + uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uy)*(ux + uy))) + 2*chem*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 8 + dist[nr7] = m8 - (m8-feq8)/tau + 0.0625*(2*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uy)*(ux + uy) + + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgx + mgy + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((ux + uy)*(ux + uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uy)*(ux + uy))) + 2*chem*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 9 + dist[nr10] = m9 - (m9-feq9)/tau + 0.0625*(2*(Fy + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgy + mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*((ux - uy)*(ux - uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uy)*(ux - uy))) + 2*chem*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 10 + dist[nr9] = m10 - (m10-feq10)/tau + 0.0625*(2*(Fx*(1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*(1 + ux) + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((ux - uy)*(ux - uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uy)*(ux - uy))) + 2*chem*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 11 + dist[nr12] = m11 - (m11-feq11)/tau + 0.0625*(-2*(Fx*(-1 + ux) + Fy*uy + Fz*(-1 + uz))* + (0.2222222222222222 + (ux + uz)*(ux + uz) - + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*(-1 + ux) + mgy*uy + mgz*(-1 + uz))* + (-2*chem*((ux + uz)*(ux + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uz)*(ux + uz))) + 2*chem*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 12 + dist[nr11] = m12 - (m12-feq12)/tau + 0.0625*(2*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + (mgx + mgz + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((ux + uz)*(ux + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uz)*(ux + uz))) + 2*chem*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*(2 + uz))))); + + // q = 13 + dist[nr14] = m13 - (m13-feq13)/tau + 0.0625*(2*(Fz + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + (mgz + mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*((ux - uz)*(ux - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uz)*(ux - uz))) + 2*chem*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))))); + + // q= 14 + dist[nr13] = m14 - (m14-feq14)/tau + 0.0625*(2*(Fx*(1 + ux) + Fy*uy + Fz*(-1 + uz))* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*(1 + ux) + mgy*uy + mgz*(-1 + uz))* + (-2*chem*((ux - uz)*(ux - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uz)*(ux - uz))) + 2*chem*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 15 + dist[nr16] = m15 - (m15-feq15)/tau + 0.0625*(-2*(Fx*ux + Fy*(-1 + uy) + Fz*(-1 + uz))* + (0.2222222222222222 + (uy + uz)*(uy + uz) - 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*(-1 + uy) + mgz*(-1 + uz))* + (-2*chem*((uy + uz)*(uy + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy + uz)*(uy + uz))) + 2*chem*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)))); + + // q = 16 + dist[nr15] = m16 - (m16-feq16)/tau + 0.0625*(2*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + (mgy + mgz + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((uy + uz)*(uy + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy + uz)*(uy + uz))) + 2*chem*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))))); + + // q = 17 + dist[nr18] = m17 - (m17-feq17)/tau + 0.0625*(2*(Fz + Fx*ux + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + (mgz + mgx*ux + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((uy - uz)*(uy - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy - uz)*(uy - uz))) + 2*chem*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))))); + + // q = 18 + dist[nr17] = m18 - (m18-feq18)/tau + 0.0625*(2*(Fx*ux + Fy*(1 + uy) + Fz*(-1 + uz))* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*(1 + uy) + mgz*(-1 + uz))* + (-2*chem*((uy - uz)*(uy - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy - uz)*(uy - uz))) + 2*chem*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)))); + //----------------------------------------------------------------------------------------------------------------------------------------// + + + // ----------------------------- compute phase field evolution ---------------------------------------- + //Normalize the Color Gradient + C = sqrt(nx*nx+ny*ny+nz*nz); + double ColorMag = C; + if (C==0.0) ColorMag=1.0; + nx = nx/ColorMag; + ny = ny/ColorMag; + nz = nz/ColorMag; + //compute surface tension-related parameter + theta = M*4.5*(1-4.0*phi*phi)/W; + + //load distributions of phase field + //q=0 + h0 = hq[n]; + //q=1 + h1 = hq[nr1]; + + //q=2 + h2 = hq[nr2]; + + //q=3 + h3 = hq[nr3]; + + //q=4 + h4 = hq[nr4]; + + //q=5 + h5 = hq[nr5]; + + //q=6 + h6 = hq[nr6]; + + //-------------------------------- BGK collison for phase field ---------------------------------// + // q = 0 + hq[n] = h0 - (h0 - 0.3333333333333333*phi)/tauM; + + // q = 1 + hq[nr2] = h1 - (h1 - phi*(0.1111111111111111 + 0.5*ux) - (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; + + // q = 2 + hq[nr1] = h2 - (h2 - phi*(0.1111111111111111 - 0.5*ux) + (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; + + // q = 3 + hq[nr4] = h3 - (h3 - phi*(0.1111111111111111 + 0.5*uy) - (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; + + // q = 4 + hq[nr3] = h4 - (h4 - phi*(0.1111111111111111 - 0.5*uy) + (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; + + // q = 5 + hq[nr6] = h5 - (h5 - phi*(0.1111111111111111 + 0.5*uz) - (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; + + // q = 6 + hq[nr5] = h6 - (h6 - phi*(0.1111111111111111 - 0.5*uz) + (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; + //........................................................................ + + //Update velocity on device + Vel[0*Np+n] = ux; + Vel[1*Np+n] = uy; + Vel[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = p; + //Update chemical potential on device + mu_phi[n] = chem; + //Update color gradient on device + ColorGrad[0*Np+n] = nx; + ColorGrad[1*Np+n] = ny; + ColorGrad[2*Np+n] = nz; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, + int strideY, int strideZ, int start, int finish, int Np){ + + int n,nn,nn2x,ijk; + //int nr1,nr2,nr3,nr4,nr5,nr6,nr7,nr8,nr9,nr10,nr11,nr12,nr13,nr14,nr15,nr16,nr17,nr18; + double ux,uy,uz;//fluid velocity + double p;//pressure + double chem;//chemical potential + double phi; //phase field + double rho0;//fluid density + // distribution functions + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m0,m3,m5,m7; + double mm1,mm2,mm4,mm6,mm8,mm9,mm10,mm11,mm12,mm13,mm14,mm15,mm16,mm17,mm18; + double mm3,mm5,mm7; + double feq0,feq1,feq2,feq3,feq4,feq5,feq6,feq7,feq8,feq9,feq10,feq11,feq12,feq13,feq14,feq15,feq16,feq17,feq18; + double nx,ny,nz;//normal color gradient + double mgx,mgy,mgz;//mixed gradient reaching secondary neighbor + + //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double h0,h1,h2,h3,h4,h5,h6;//distributions for LB phase field + double tau;//position dependent LB relaxation time for fluid + double C,theta; + double M = 2.0/9.0*(tauM-0.5);//diffusivity (or mobility) for the phase field D3Q7 + + // for (int n=start; n 10Np => odd part of dist) + m1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + m2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + m3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + m4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + m5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + m6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + m7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + m8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + m9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + m10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + m11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + m12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + m13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + m14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + m15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + m16 = dist[nr16]; + + // q=17 + nr17 = neighborList[n+16*Np]; + m17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + m18 = dist[nr18]; + + //compute fluid velocity + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(Fx)/3.0); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(Fy)/3.0); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(Fz)/3.0); + //compute pressure + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17); + + //------------------------------------------------- BCK collison ------------------------------------------------------------// + // q=0 + dist[n] = m0 + 0.5*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) - + (m0 - 0.3333333333333333*p + 0.25*(Fx*ux + Fy*uy + Fz*uz)* + (-0.6666666666666666 + ux*ux + uy*uy + uz*uz) + 0.16666666666666666*rho0*(ux*ux + uy*uy + uz*uz))/ + tau; + + // q = 1 + dist[nr2] = m1 + 0.25*(Fx*(-1 + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) - + (m1 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(ux*ux) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.125*(Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*uz)))/tau; + + // q=2 + dist[nr1] = m2 + 0.25*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) - + (m2 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(ux*ux) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) + + 0.125*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(ux*ux) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*uz)))/tau; + + // q = 3 + dist[nr4] = m3 + 0.25*(Fx*ux + Fy*(-1 + uy) + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) - + (m3 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uy*uy) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.125*(Fx*ux + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 4 + dist[nr3] = m4 + 0.25*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) - + (m4 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uy*uy) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.125*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(uy*uy) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 5 + dist[nr6] = m5 + 0.25*(Fx*ux + Fy*uy + Fz*(-1 + uz))*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) - + (m5 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.125*(Fx*ux + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 6 + dist[nr5] = m6 + 0.25*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) - + (m6 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) + + 0.125*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q = 7 + dist[nr8] = m7 - 0.125*(Fx*(-1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (0.2222222222222222 + (ux + uy)*(ux + uy) - 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz))\ + - (m7 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uy)*(ux + uy)) + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx*(-1. + ux) + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(-2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 8 + dist[nr7] = m8 + 0.125*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uy)*(ux + uy) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz))\ + - (m8 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uy)*(ux + uy)) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 9 + dist[nr10] = m9 + 0.125*(Fy + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + - (m9 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uy)*(ux - uy)) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fy + Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(-2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 10 + dist[nr9] = m10 + 0.125*(Fx*(1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz))\ + - (m10 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uy)*(ux - uy)) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx*(1 + ux) + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 11 + dist[nr12] = m11 - 0.125*(Fx*(-1 + ux) + Fy*uy + Fz*(-1 + uz))* + (0.2222222222222222 + (ux + uz)*(ux + uz) - 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz))\ + - (m11 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uz)*(ux + uz)) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*(-1. + ux) + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 12 + dist[nr11] = m12 + 0.125*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz)))\ + - (m12 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uz)*(ux + uz)) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.0625*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q = 13 + dist[nr14] = m13 + 0.125*(Fz + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz)))\ + - (m13 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uz)*(ux - uz)) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.0625*(Fz + Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q= 14 + dist[nr13] = m14 + 0.125*(Fx*(1 + ux) + Fy*uy + Fz*(-1 + uz))* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz))\ + - (m14 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uz)*(ux - uz)) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*(1 + ux) + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 15 + dist[nr16] = m15 - 0.125*(Fx*ux + Fy*(-1 + uy) + Fz*(-1 + uz))* + (0.2222222222222222 + (uy + uz)*(uy + uz) - 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz))\ + - (m15 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy + uz)*(uy + uz)) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*ux + Fy*(-1. + uy) + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uy*uy) - 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 16 + dist[nr15] = m16 + 0.125*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz)))\ + - (m16 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy + uz)*(uy + uz)) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + 0.0625*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) - 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*(2. + uz))))/tau; + + // q = 17 + dist[nr18] = m17 + 0.125*(Fz + Fx*ux + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz)))\ + - (m17 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy - uz)*(uy - uz)) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + 0.0625*(Fz + Fx*ux + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) + 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*(2. + uz))))/tau; + + // q = 18 + dist[nr17] = m18 + 0.125*(Fx*ux + Fy*(1 + uy) + Fz*(-1 + uz))* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz))\ + - (m18 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy - uz)*(uy - uz)) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*ux + Fy*(1 + uy) + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uy*uy) + 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + (-2. + uz)*uz)))/tau; + //----------------------------------------------------------------------------------------------------------------------------------------// + + + //Update velocity on device + Vel[0*Np+n] = ux; + Vel[1*Np+n] = uy; + Vel[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = p; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure, + double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){ + + int n; + double ux,uy,uz;//fluid velocity + double p;//pressure + // distribution functions + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m0,m3,m5,m7; + + // for (int n=start; n +#include "hip/hip_runtime.h" + +#define NBLOCKS 1024 +#define NTHREADS 256 + + +__global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Gx, double Gy, double Gz, + double *Poros,double *Perm, double *Velocity, double *Pressure){ + int n; + // conserved momemnts + double rho,vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + double pressure; + //double uu; + // non-conserved moments + double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double GeoFun;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double mu_eff = (1.0/rlx_eff-0.5)/3.0;//kinematic viscosity + double Fx, Fy, Fz;//The total body force including Brinkman force and user-specified (Gx,Gy,Gz) + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + f7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + f8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + f9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + f10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + f11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + f12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + f13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + f14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + f15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + f16 = dist[nr16]; + + // q=17 + //fq = dist[18*Np+n]; + nr17 = neighborList[n+16*Np]; + f17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + f18 = dist[nr18]; + + porosity = Poros[n]; + perm = Perm[n]; + + c0 = 0.5*(1.0+porosity*0.5*mu_eff/perm); + if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes + GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); + c1 = porosity*0.5*GeoFun/sqrt(perm); + if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes + + rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; + pressure = rho/porosity/3.0; + vx = (f1-f2+f7-f8+f9-f10+f11-f12+f13-f14)/rho+0.5*porosity*Gx; + vy = (f3-f4+f7-f8-f9+f10+f15-f16+f17-f18)/rho+0.5*porosity*Gy; + vz = (f5-f6+f11-f12-f13+f14+f15-f16-f17+f18)/rho+0.5*porosity*Gz; + v_mag=sqrt(vx*vx+vy*vy+vz*vz); + ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); + uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); + uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); + u_mag=sqrt(ux*ux+uy*uy+uz*uz); + + //Update the body force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium + Fx = -porosity*mu_eff/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx; + Fy = -porosity*mu_eff/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy; + Fz = -porosity*mu_eff/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz; + if (porosity==1.0){ + Fx=Gx; + Fy=Gy; + Fz=Gz; + } + + //------------------------ BGK collison where body force has higher-order terms ----------------------------------------------------------// +// // q=0 +// dist[n] = f0*(1.0-rlx) + rlx*0.3333333333333333*rho*(1. - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// + 0.3333333333333333*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 1 +// dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q=2 +// dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(-3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 3 +// dist[nr4] = f3*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 4 +// dist[nr3] = f4*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 5 +// dist[nr6] = f5*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(3. + (6.*uz)/porosity)); +// +// // q = 6 +// dist[nr5] = f6*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux+ uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(-3. + (6.*uz)/porosity)); +// +// // q = 7 +// dist[nr8] = f7*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uy) + (4.5*(ux + uy)*(ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(ux + uy))/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 8 +// dist[nr7] = f8*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uy) + (4.5*(-ux - uy)*(-ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uy))/porosity) + Fy*(-3. - (9.*(-ux - uy))/porosity - (3.*uy)/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 9 +// dist[nr10] = f9*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uy) + (4.5*(ux - uy)*(ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux - uy))/porosity) + Fy*(-3. - (9.*(ux - uy))/porosity - (3.*uy)/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 10 +// dist[nr9] = f10*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uy) + (4.5*(-ux + uy)*(-ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(-ux + uy))/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 11 +// dist[nr12] = f11*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uz) + (4.5*(ux + uz)*(ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(ux + uz))/porosity)); +// +// // q = 12 +// dist[nr11] = f12*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uz) + (4.5*(-ux - uz)*(-ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uz))/porosity) + +// Fz*(-3. - (9.*(-ux - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 13 +// dist[nr14] = f13*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uz) + (4.5*(ux - uz)*(ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux - uz))/porosity) + +// Fz*(-3. - (9.*(ux - uz))/porosity - (3.*uz)/porosity)); +// +// // q= 14 +// dist[nr13] = f14*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uz) + (4.5*(-ux + uz)*(-ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(-ux + uz))/porosity)); +// +// // q = 15 +// dist[nr16] = f15*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy + uz) + (4.5*(uy + uz)*(uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(uy + uz))/porosity)); +// +// // q = 16 +// dist[nr15] = f16*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy - uz) + (4.5*(-uy - uz)*(-uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy - uz))/porosity) + +// Fz*(-3. - (9.*(-uy - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 17 +// dist[nr18] = f17*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy - uz) + (4.5*(uy - uz)*(uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy - uz))/porosity) + +// Fz*(-3. - (9.*(uy - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 18 +// dist[nr17] = f18*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy + uz) + (4.5*(-uy + uz)*(-uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(-uy + uz))/porosity)); + //----------------------------------------------------------------------------------------------------------------------------------------// + + + //------------------------ BGK collison where body force has NO higher-order terms ----------------------------------------------------------// + // q=0 + dist[n] = f0*(1.0-rlx) + rlx*0.3333333333333333*rho*(1. - (1.5*(ux*ux + uy*uy + uz*uz))/porosity); + + // q = 1 + dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(3.)); + + // q=2 + dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(-3.)); + + // q = 3 + dist[nr4] = f3*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fy*(3.)); + + // q = 4 + dist[nr3] = f4*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fy*(-3.)); + + // q = 5 + dist[nr6] = f5*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fz*(3.)); + + // q = 6 + dist[nr5] = f6*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux+ uy*uy + uz*uz))/porosity) + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fz*(-3.)); + + // q = 7 + dist[nr8] = f7*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uy) + (4.5*(ux + uy)*(ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fy*(3.)); + + // q = 8 + dist[nr7] = f8*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uy) + (4.5*(-ux - uy)*(-ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fy*(-3.)); + + // q = 9 + dist[nr10] = f9*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uy) + (4.5*(ux - uy)*(ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fy*(-3.)); + + // q = 10 + dist[nr9] = f10*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uy) + (4.5*(-ux + uy)*(-ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fy*(3.)); + + // q = 11 + dist[nr12] = f11*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uz) + (4.5*(ux + uz)*(ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fz*(3.)); + + // q = 12 + dist[nr11] = f12*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uz) + (4.5*(-ux - uz)*(-ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fz*(-3.)); + + // q = 13 + dist[nr14] = f13*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uz) + (4.5*(ux - uz)*(ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fz*(-3.)); + + // q= 14 + dist[nr13] = f14*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uz) + (4.5*(-ux + uz)*(-ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fz*(3.)); + + // q = 15 + dist[nr16] = f15*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy + uz) + (4.5*(uy + uz)*(uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(3.) + Fz*(3.)); + + // q = 16 + dist[nr15] = f16*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy - uz) + (4.5*(-uy - uz)*(-uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(-3.) + Fz*(-3.)); + + // q = 17 + dist[nr18] = f17*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy - uz) + (4.5*(uy - uz)*(uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(3.) + Fz*(-3.)); + + // q = 18 + dist[nr17] = f18*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy + uz) + (4.5*(-uy + uz)*(-uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(-3.) + Fz*(3.)); + //-------------------------------------------------------------------------------------------------------------------------------------------// + + + //Update velocity on device + Velocity[0*Np+n] = ux; + Velocity[1*Np+n] = uy; + Velocity[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = pressure; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Gx, double Gy, double Gz, + double *Poros,double *Perm, double *Velocity, double Den, double *Pressure){ + + int n; + double vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + double pressure;//defined for this incompressible model + // conserved momemnts + double jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double fq; + //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double GeoFun;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double mu_eff = (1.0/rlx_eff-0.5)/3.0;//kinematic viscosity + double Fx, Fy, Fz;//The total body force including Brinkman force and user-specified (Gx,Gy,Gz) + double rlx_setA = rlx; + double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA); + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + fq = dist[nread]; // reading the f1 data into register fq + pressure = fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jx = fq; + m4 = -4.0*fq; + m9 = 2.0*fq; + m10 = -4.0*fq; + + // q=2 + nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nread]; // reading the f2 data into register fq + pressure += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + nread = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + nread = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + nread = neighborList[n+4*Np]; + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q = 6 + nread = neighborList[n+5*Np]; + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + nread = neighborList[n+6*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + nread = neighborList[n+7*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + nread = neighborList[n+8*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + nread = neighborList[n+9*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + nread = neighborList[n+10*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + nread = neighborList[n+11*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + nread = neighborList[n+12*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + nread = neighborList[n+13*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + nread = neighborList[n+16*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + //---------------------------------------------------------------------// + + porosity = Poros[n]; + perm = Perm[n]; + + c0 = 0.5*(1.0+porosity*0.5*mu_eff/perm); + if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes + GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); + c1 = porosity*0.5*GeoFun/sqrt(perm); + if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes + + vx = jx/Den+0.5*porosity*Gx; + vy = jy/Den+0.5*porosity*Gy; + vz = jz/Den+0.5*porosity*Gz; + v_mag=sqrt(vx*vx+vy*vy+vz*vz); + ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); + uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); + uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); + u_mag=sqrt(ux*ux+uy*uy+uz*uz); + + //Update the total force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium + Fx = Den*(-porosity*mu_eff/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx); + Fy = Den*(-porosity*mu_eff/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy); + Fz = Den*(-porosity*mu_eff/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz); + if (porosity==1.0){ + Fx=Den*Gx; + Fy=Den*Gy; + Fz=Den*Gz; + } + + //Calculate pressure for Incompressible-MRT model + pressure=0.5/porosity*(pressure-0.5*Den*u_mag*u_mag/porosity); + +// //..............carry out relaxation process............................................... +// m1 = m1 + rlx_setA*((-30*Den+19*Den*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1) +// + (1-0.5*rlx_setA)*38*(Fx*ux+Fy*uy+Fz*uz)/porosity; +// m2 = m2 + rlx_setA*((12*Den - 5.5*Den*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2) +// + (1-0.5*rlx_setA)*11*(-Fx*ux-Fy*uy-Fz*uz)/porosity; +// jx = jx + Fx; +// m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); +// jy = jy + Fy; +// m6 = m6 + rlx_setB*((-0.6666666666666666*uy*Den) - m6) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); +// jz = jz + Fz; +// m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); +// m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9) +// + (1-0.5*rlx_setA)*(4*Fx*ux-2*Fy*uy-2*Fz*uz)/porosity; +// m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10) +// + (1-0.5*rlx_setA)*(-2*Fx*ux+Fy*uy+Fz*uz)/porosity; +// m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11) +// + (1-0.5*rlx_setA)*(2*Fy*uy-2*Fz*uz)/porosity; +// m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12) +// + (1-0.5*rlx_setA)*(-Fy*uy+Fz*uz)/porosity; +// m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13) +// + (1-0.5*rlx_setA)*(Fy*ux+Fx*uy)/porosity; +// m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14) +// + (1-0.5*rlx_setA)*(Fz*uy+Fy*uz)/porosity; +// m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15) +// + (1-0.5*rlx_setA)*(Fz*ux+Fx*uz)/porosity; +// m16 = m16 + rlx_setB*( - m16); +// m17 = m17 + rlx_setB*( - m17); +// m18 = m18 + rlx_setB*( - m18); +// //....................................................................................................... + + //-------------------- IMRT collison where body force has NO higher-order terms -------------// + //..............carry out relaxation process............................................... + m1 = m1 + rlx_setA*((-30*Den+19*Den*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1); + m2 = m2 + rlx_setA*((12*Den - 5.5*Den*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2); + jx = jx + Fx; + m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); + jy = jy + Fy; + m6 = m6 + rlx_setB*((-0.6666666666666666*uy*Den) - m6) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); + jz = jz + Fz; + m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); + m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9); + m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); + m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11); + m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12); + m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13); + m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14); + m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //....................................................................................................... + + + //.................inverse transformation...................................................... + // q=0 + fq = mrt_V1*Den-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10); + nread = neighborList[n+Np]; + dist[nread] = fq; + + // q=2 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10); + nread = neighborList[n]; + dist[nread] = fq; + + // q = 3 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + nread = neighborList[n+3*Np]; + dist[nread] = fq; + + // q = 4 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + nread = neighborList[n+2*Np]; + dist[nread] = fq; + + // q = 5 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + nread = neighborList[n+5*Np]; + dist[nread] = fq; + + // q = 6 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + nread = neighborList[n+4*Np]; + dist[nread] = fq; + + // q = 7 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17); + nread = neighborList[n+7*Np]; + dist[nread] = fq; + + // q = 8 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m17-m16); + nread = neighborList[n+6*Np]; + dist[nread] = fq; + + // q = 9 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17); + nread = neighborList[n+9*Np]; + dist[nread] = fq; + + // q = 10 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17); + nread = neighborList[n+8*Np]; + dist[nread] = fq; + + // q = 11 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m18-m16); + nread = neighborList[n+11*Np]; + dist[nread] = fq; + + // q = 12 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18); + nread = neighborList[n+10*Np]; + dist[nread]= fq; + + // q = 13 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15-0.125*(m16+m18); + nread = neighborList[n+13*Np]; + dist[nread] = fq; + + // q= 14 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15+0.125*(m16+m18); + nread = neighborList[n+12*Np]; + dist[nread] = fq; + + // q = 15 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + // q = 17 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + //........................................................................ + + //Update velocity on device + Velocity[0*Np+n] = ux; + Velocity[1*Np+n] = uy; + Velocity[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = pressure; + + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAodd_Greyscale_MRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Gx, double Gy, double Gz, + double *Poros,double *Perm, double *Velocity,double rho0, double *Pressure){ + + int n, nread; + int nr1,nr2,nr3,nr4,nr5,nr6; + int nr7,nr8,nr9,nr10; + int nr11,nr12,nr13,nr14; + double vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + double pressure;//defined for this incompressible model + // conserved momemnts + double rho,jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double fq; + //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double GeoFun;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double mu_eff = (1.0/rlx_eff-0.5)/3.0;//kinematic viscosity + double Fx, Fy, Fz;//The total body force including Brinkman force and user-specified (Gx,Gy,Gz) + double rlx_setA = rlx; + double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA); + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s even part of dist) + //fq = dist[nread]; // reading the f2 data into register fq + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nr2]; // reading the f2 data into register fq + rho += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + //nread = neighborList[n+2*Np]; // neighbor 4 + //fq = dist[nread]; + nr3 = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nr3]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + //nread = neighborList[n+3*Np]; // neighbor 3 + //fq = dist[nread]; + nr4 = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nr4]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + //nread = neighborList[n+4*Np]; + //fq = dist[nread]; + nr5 = neighborList[n+4*Np]; + fq = dist[nr5]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + + // q = 6 + //nread = neighborList[n+5*Np]; + //fq = dist[nread]; + nr6 = neighborList[n+5*Np]; + fq = dist[nr6]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + //nread = neighborList[n+6*Np]; + //fq = dist[nread]; + nr7 = neighborList[n+6*Np]; + fq = dist[nr7]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + //nread = neighborList[n+7*Np]; + //fq = dist[nread]; + nr8 = neighborList[n+7*Np]; + fq = dist[nr8]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + //nread = neighborList[n+8*Np]; + //fq = dist[nread]; + nr9 = neighborList[n+8*Np]; + fq = dist[nr9]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + //nread = neighborList[n+9*Np]; + //fq = dist[nread]; + nr10 = neighborList[n+9*Np]; + fq = dist[nr10]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + //nread = neighborList[n+10*Np]; + //fq = dist[nread]; + nr11 = neighborList[n+10*Np]; + fq = dist[nr11]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + //nread = neighborList[n+11*Np]; + //fq = dist[nread]; + nr12 = neighborList[n+11*Np]; + fq = dist[nr12]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + //nread = neighborList[n+12*Np]; + //fq = dist[nread]; + nr13 = neighborList[n+12*Np]; + fq = dist[nr13]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + //nread = neighborList[n+13*Np]; + //fq = dist[nread]; + nr14 = neighborList[n+13*Np]; + fq = dist[nr14]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + //fq = dist[17*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + //fq = dist[8*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + //fq = dist[18*Np+n]; + nread = neighborList[n+16*Np]; + fq = dist[nread]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + //fq = dist[9*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + //---------------------------------------------------------------------// + + porosity = Poros[n]; + perm = Perm[n]; + + c0 = 0.5*(1.0+porosity*0.5*mu_eff/perm); + if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes + GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); + c1 = porosity*0.5*GeoFun/sqrt(perm); + if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes + + vx = jx/rho0+0.5*porosity*Gx; + vy = jy/rho0+0.5*porosity*Gy; + vz = jz/rho0+0.5*porosity*Gz; + v_mag=sqrt(vx*vx+vy*vy+vz*vz); + ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); + uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); + uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); + u_mag=sqrt(ux*ux+uy*uy+uz*uz); + + //Update the total force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium + Fx = rho0*(-porosity*mu_eff/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx); + Fy = rho0*(-porosity*mu_eff/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy); + Fz = rho0*(-porosity*mu_eff/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz); + if (porosity==1.0){ + Fx=rho0*Gx; + Fy=rho0*Gy; + Fz=rho0*Gz; + } + + //Calculate pressure for MRT model + //pressure=rho/3.f/porosity; + pressure=rho/3.f; + + //-------------------- MRT collison where body force has NO higher-order terms -------------// + m1 = m1 + rlx_setA*((19*(ux*ux+uy*uy+uz*uz)*rho0/porosity - 11*rho) - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(ux*ux+uy*uy+uz*uz)*rho0/porosity) - m2); + jx = jx + Fx; + m4 = m4 + rlx_setB*((-0.6666666666666666*ux*rho0)- m4) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); + jy = jy + Fy; + m6 = m6 + rlx_setB*((-0.6666666666666666*uy*rho0)- m6) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); + jz = jz + Fz; + m8 = m8 + rlx_setB*((-0.6666666666666666*uz*rho0)- m8) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); + m9 = m9 + rlx_setA*(((2*ux*ux-uy*uy-uz*uz)*rho0/porosity) - m9); + m10 = m10 + rlx_setA*( - m10); + //m10 = m10 + rlx_setA*(-0.5*rho0*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); + m11 = m11 + rlx_setA*(((uy*uy-uz*uz)*rho0/porosity) - m11); + m12 = m12 + rlx_setA*( - m12); + //m12 = m12 + rlx_setA*(-0.5*(rho0*(uy*uy-uz*uz)/porosity)- m12); + m13 = m13 + rlx_setA*( (ux*uy*rho0/porosity) - m13); + m14 = m14 + rlx_setA*( (uy*uz*rho0/porosity) - m14); + m15 = m15 + rlx_setA*( (ux*uz*rho0/porosity) - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //....................................................................................................... + + + //.................inverse transformation...................................................... + // q=0 + fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10); + //nread = neighborList[n+Np]; + dist[nr2] = fq; + + // q=2 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10); + //nread = neighborList[n]; + dist[nr1] = fq; + + // q = 3 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + //nread = neighborList[n+3*Np]; + dist[nr4] = fq; + + // q = 4 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + //nread = neighborList[n+2*Np]; + dist[nr3] = fq; + + // q = 5 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + //nread = neighborList[n+5*Np]; + dist[nr6] = fq; + + // q = 6 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + //nread = neighborList[n+4*Np]; + dist[nr5] = fq; + + // q = 7 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17); + //nread = neighborList[n+7*Np]; + dist[nr8] = fq; + + // q = 8 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m17-m16); + //nread = neighborList[n+6*Np]; + dist[nr7] = fq; + + // q = 9 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17); + //nread = neighborList[n+9*Np]; + dist[nr10] = fq; + + // q = 10 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17); + //nread = neighborList[n+8*Np]; + dist[nr9] = fq; + + // q = 11 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m18-m16); + //nread = neighborList[n+11*Np]; + dist[nr12] = fq; + + // q = 12 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ + mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18); + //nread = neighborList[n+10*Np]; + dist[nr11]= fq; + + // q = 13 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15-0.125*(m16+m18); + //nread = neighborList[n+13*Np]; + dist[nr14] = fq; + + // q= 14 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15+0.125*(m16+m18); + //nread = neighborList[n+12*Np]; + dist[nr13] = fq; + + + // q = 15 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + + // q = 17 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) + -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) + -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + //........................................................................ + + //Update velocity on device + Velocity[0*Np+n] = ux; + Velocity[1*Np+n] = uy; + Velocity[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = pressure; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale_MRT(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Gx, double Gy, double Gz, + double *Poros,double *Perm, double *Velocity,double rho0, double *Pressure){ + + int n; + double vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + double pressure;//defined for this incompressible model + // conserved momemnts + double rho,jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double fq; + //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double GeoFun;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double mu_eff = (1.0/rlx_eff-0.5)/3.0;//kinematic viscosity + double Fx, Fy, Fz;//The total body force including Brinkman force and user-specified (Gx,Gy,Gz) + double rlx_setA = rlx; + double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA); + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s>>(dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,Pressure); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAeven_Greyscale: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double *Pressure){ + + dvc_ScaLBL_D3Q19_AAodd_Greyscale<<>>(neighborList,dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,Pressure); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAodd_Greyscale: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double Den,double *Pressure){ + + dvc_ScaLBL_D3Q19_AAeven_Greyscale_IMRT<<>>(dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,Den,Pressure); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAeven_Greyscale_IMRT: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double Den,double *Pressure){ + + dvc_ScaLBL_D3Q19_AAodd_Greyscale_IMRT<<>>(neighborList,dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,Den,Pressure); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAodd_Greyscale_IMRT: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_MRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double rho0,double *Pressure){ + + dvc_ScaLBL_D3Q19_AAodd_Greyscale_MRT<<>>(neighborList,dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,rho0,Pressure); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAodd_Greyscale_MRT: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_MRT(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double rho0,double *Pressure){ + + dvc_ScaLBL_D3Q19_AAeven_Greyscale_MRT<<>>(dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,rho0,Pressure); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAeven_Greyscale_MRT: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_GreyIMRT_Init(double *dist, int Np, double Den){ + dvc_ScaLBL_D3Q19_GreyIMRT_Init<<>>(dist, Np, Den); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_GreyIMRT_Init: %s \n",hipGetErrorString(err)); + } +} diff --git a/hip/GreyscaleColor.cu b/hip/GreyscaleColor.cu new file mode 100644 index 00000000..0ceb0522 --- /dev/null +++ b/hip/GreyscaleColor.cu @@ -0,0 +1,3038 @@ +#include +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 1024 +#define NTHREADS 256 + + +//Model-1 & 4 +__global__ void dvc_ScaLBL_D3Q19_AAodd_GreyscaleColor(int *neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, + double *Phi, double *GreySolidGrad, double *Poros,double *Perm, double *Velocity, double *Pressure, + double rhoA, double rhoB, double tauA, double tauB,double tauA_eff,double tauB_eff,double alpha, double beta, + double Gx, double Gy, double Gz, int strideY, int strideZ, int start, int finish, int Np){ + + int n,nn,ijk,nread; + int nr1,nr2,nr3,nr4,nr5,nr6; + int nr7,nr8,nr9,nr10; + int nr11,nr12,nr13,nr14; + //int nr15,nr16,nr17,nr18; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + double vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m3,m5,m7; + double nA,nB; // number density + double a1,b1,a2,b2,nAB,delta; + double C,nx,ny,nz; //color gradient magnitude and direction + double phi,tau,rho0,rlx_setA,rlx_setB; + + double GeoFun=0.0;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double tau_eff; + double mu_eff;//kinematic viscosity + double nx_gs,ny_gs,nz_gs;//grey-solid color gradient + double nx_phase,ny_phase,nz_phase,C_phase; + double Fx,Fy,Fz; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s even part of dist) + //fq = dist[nread]; // reading the f2 data into register fq + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nr2]; // reading the f2 data into register fq + rho += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + //nread = neighborList[n+2*Np]; // neighbor 4 + //fq = dist[nread]; + nr3 = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nr3]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + //nread = neighborList[n+3*Np]; // neighbor 3 + //fq = dist[nread]; + nr4 = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nr4]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + //nread = neighborList[n+4*Np]; + //fq = dist[nread]; + nr5 = neighborList[n+4*Np]; + fq = dist[nr5]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + + // q = 6 + //nread = neighborList[n+5*Np]; + //fq = dist[nread]; + nr6 = neighborList[n+5*Np]; + fq = dist[nr6]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + //nread = neighborList[n+6*Np]; + //fq = dist[nread]; + nr7 = neighborList[n+6*Np]; + fq = dist[nr7]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + //nread = neighborList[n+7*Np]; + //fq = dist[nread]; + nr8 = neighborList[n+7*Np]; + fq = dist[nr8]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + //nread = neighborList[n+8*Np]; + //fq = dist[nread]; + nr9 = neighborList[n+8*Np]; + fq = dist[nr9]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + //nread = neighborList[n+9*Np]; + //fq = dist[nread]; + nr10 = neighborList[n+9*Np]; + fq = dist[nr10]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + //nread = neighborList[n+10*Np]; + //fq = dist[nread]; + nr11 = neighborList[n+10*Np]; + fq = dist[nr11]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + //nread = neighborList[n+11*Np]; + //fq = dist[nread]; + nr12 = neighborList[n+11*Np]; + fq = dist[nr12]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + //nread = neighborList[n+12*Np]; + //fq = dist[nread]; + nr13 = neighborList[n+12*Np]; + fq = dist[nr13]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + //nread = neighborList[n+13*Np]; + //fq = dist[nread]; + nr14 = neighborList[n+13*Np]; + fq = dist[nr14]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + //fq = dist[17*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + //fq = dist[8*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + //fq = dist[18*Np+n]; + nread = neighborList[n+16*Np]; + fq = dist[nread]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + //fq = dist[9*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + + // Compute greyscale related parameters + c0 = 0.5*(1.0+porosity*0.5*mu_eff/perm); + if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes + //GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); + c1 = porosity*0.5*GeoFun/sqrt(perm); + if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes + + vx = jx/rho0+0.5*(porosity*Gx); + vy = jy/rho0+0.5*(porosity*Gy); + vz = jz/rho0+0.5*(porosity*Gz); + v_mag=sqrt(vx*vx+vy*vy+vz*vz); + ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); + uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); + uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); + u_mag=sqrt(ux*ux+uy*uy+uz*uz); + + //Update the total force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium + Fx = rho0*(-porosity*mu_eff/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx); + Fy = rho0*(-porosity*mu_eff/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy); + Fz = rho0*(-porosity*mu_eff/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz); + if (porosity==1.0){ + Fx=rho0*(Gx); + Fy=rho0*(Gy); + Fz=rho0*(Gz); + } + + // write the velocity + Velocity[n] = ux; + Velocity[Np+n] = uy; + Velocity[2*Np+n] = uz; + //Pressure[n] = rho/3.f/porosity; + Pressure[n] = rho/3.f; + + //........................................................................ + //..............carry out relaxation process.............................. + //..........Toelke, Fruediger et. al. 2006................................ + //---------------- NO higher-order force -------------------------------// + if (C == 0.0) nx = ny = nz = 0.0; + m1 = m1 + rlx_setA*((19*(ux*ux+uy*uy+uz*uz)*rho0/porosity - 11*rho) -19*alpha*C - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(ux*ux+uy*uy+uz*uz)*rho0/porosity)- m2); + jx = jx + Fx; + m4 = m4 + rlx_setB*((-0.6666666666666666*ux*rho0)- m4) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); + jy = jy + Fy; + m6 = m6 + rlx_setB*((-0.6666666666666666*uy*rho0)- m6) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); + jz = jz + Fz; + m8 = m8 + rlx_setB*((-0.6666666666666666*uz*rho0)- m8) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); + m9 = m9 + rlx_setA*(((2*ux*ux-uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); + m10 = m10 + rlx_setA*( - m10); + //m10 = m10 + rlx_setA*(-0.5*rho0*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); + m11 = m11 + rlx_setA*(((uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); + m12 = m12 + rlx_setA*( - m12); + //m12 = m12 + rlx_setA*(-0.5*(rho0*(uy*uy-uz*uz)/porosity)- m12); + m13 = m13 + rlx_setA*( (ux*uy*rho0/porosity) + 0.5*alpha*C*nx*ny - m13); + m14 = m14 + rlx_setA*( (uy*uz*rho0/porosity) + 0.5*alpha*C*ny*nz - m14); + m15 = m15 + rlx_setA*( (ux*uz*rho0/porosity) + 0.5*alpha*C*nx*nz - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //----------------------------------------------------------------------// + + //----------------With higher-order force ------------------------------// + //if (C == 0.0) nx = ny = nz = 0.0; + //m1 = m1 + rlx_setA*((19*(ux*ux+uy*uy+uz*uz)*rho0/porosity - 11*rho) -19*alpha*C - m1) + // + (1-0.5*rlx_setA)*38*(Fx*ux+Fy*uy+Fz*uz)/porosity; + //m2 = m2 + rlx_setA*((3*rho - 5.5*(ux*ux+uy*uy+uz*uz)*rho0/porosity)- m2) + // + (1-0.5*rlx_setA)*11*(-Fx*ux-Fy*uy-Fz*uz)/porosity; + //jx = jx + Fx; + //m4 = m4 + rlx_setB*((-0.6666666666666666*ux*rho0)- m4) + // + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); + //jy = jy + Fy; + //m6 = m6 + rlx_setB*((-0.6666666666666666*uy*rho0)- m6) + // + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); + //jz = jz + Fz; + //m8 = m8 + rlx_setB*((-0.6666666666666666*uz*rho0)- m8) + // + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); + //m9 = m9 + rlx_setA*(((2*ux*ux-uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9) + // + (1-0.5*rlx_setA)*(4*Fx*ux-2*Fy*uy-2*Fz*uz)/porosity; + ////m10 = m10 + rlx_setA*( - m10); + //m10 = m10 + rlx_setA*(-0.5*rho0*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10) + // + (1-0.5*rlx_setA)*(-2*Fx*ux+Fy*uy+Fz*uz)/porosity; + //m11 = m11 + rlx_setA*(((uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(ny*ny-nz*nz)- m11) + // + (1-0.5*rlx_setA)*(2*Fy*uy-2*Fz*uz)/porosity; + ////m12 = m12 + rlx_setA*( - m12); + //m12 = m12 + rlx_setA*(-0.5*(rho0*(uy*uy-uz*uz)/porosity)- m12) + // + (1-0.5*rlx_setA)*(-Fy*uy+Fz*uz)/porosity; + //m13 = m13 + rlx_setA*( (ux*uy*rho0/porosity) + 0.5*alpha*C*nx*ny - m13); + // + (1-0.5*rlx_setA)*(Fy*ux+Fx*uy)/porosity; + //m14 = m14 + rlx_setA*( (uy*uz*rho0/porosity) + 0.5*alpha*C*ny*nz - m14); + // + (1-0.5*rlx_setA)*(Fz*uy+Fy*uz)/porosity; + //m15 = m15 + rlx_setA*( (ux*uz*rho0/porosity) + 0.5*alpha*C*nx*nz - m15); + // + (1-0.5*rlx_setA)*(Fz*ux+Fx*uz)/porosity; + //m16 = m16 + rlx_setB*( - m16); + //m17 = m17 + rlx_setB*( - m17); + //m18 = m18 + rlx_setB*( - m18); + //----------------------------------------------------------------------// + + //.................inverse transformation...................................................... + // q=0 + fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10); + //nread = neighborList[n+Np]; + dist[nr2] = fq; + + // q=2 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10); + //nread = neighborList[n]; + dist[nr1] = fq; + + // q = 3 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + //nread = neighborList[n+3*Np]; + dist[nr4] = fq; + + // q = 4 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + //nread = neighborList[n+2*Np]; + dist[nr3] = fq; + + // q = 5 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + //nread = neighborList[n+5*Np]; + dist[nr6] = fq; + + // q = 6 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + //nread = neighborList[n+4*Np]; + dist[nr5] = fq; + + // q = 7 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17); + //nread = neighborList[n+7*Np]; + dist[nr8] = fq; + + // q = 8 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m17-m16); + //nread = neighborList[n+6*Np]; + dist[nr7] = fq; + + // q = 9 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17); + //nread = neighborList[n+9*Np]; + dist[nr10] = fq; + + // q = 10 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17); + //nread = neighborList[n+8*Np]; + dist[nr9] = fq; + + // q = 11 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m18-m16); + //nread = neighborList[n+11*Np]; + dist[nr12] = fq; + + // q = 12 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ + mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18); + //nread = neighborList[n+10*Np]; + dist[nr11]= fq; + + // q = 13 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15-0.125*(m16+m18); + //nread = neighborList[n+13*Np]; + dist[nr14] = fq; + + // q= 14 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15+0.125*(m16+m18); + //nread = neighborList[n+12*Np]; + dist[nr13] = fq; + + + // q = 15 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + + // q = 17 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) + -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) + -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + //........................................................................ + + // Instantiate mass transport distributions + // Stationary value - distribution 0 + nAB = 1.0/(nA+nB); + Aq[n] = 0.3333333333333333*nA; + Bq[n] = 0.3333333333333333*nB; + + //............................................... + // q = 0,2,4 + // Cq = {1,0,0}, {0,1,0}, {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nx; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; + + // q = 1 + //nread = neighborList[n+Np]; + Aq[nr2] = a1; + Bq[nr2] = b1; + // q=2 + //nread = neighborList[n]; + Aq[nr1] = a2; + Bq[nr1] = b2; + + //............................................... + // Cq = {0,1,0} + delta = beta*nA*nB*nAB*0.1111111111111111*ny; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; + + // q = 3 + //nread = neighborList[n+3*Np]; + Aq[nr4] = a1; + Bq[nr4] = b1; + // q = 4 + //nread = neighborList[n+2*Np]; + Aq[nr3] = a2; + Bq[nr3] = b2; + + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nz; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; + + // q = 5 + //nread = neighborList[n+5*Np]; + Aq[nr6] = a1; + Bq[nr6] = b1; + // q = 6 + //nread = neighborList[n+4*Np]; + Aq[nr5] = a2; + Bq[nr5] = b2; + //............................................... + } + } +} + +//Model-1 & 4 +__global__ void dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor(int *Map, double *dist, double *Aq, double *Bq, double *Den, + double *Phi, double *GreySolidGrad, double *Poros,double *Perm, double *Velocity, double *Pressure, + double rhoA, double rhoB, double tauA, double tauB,double tauA_eff,double tauB_eff, double alpha, double beta, + double Gx, double Gy, double Gz, int strideY, int strideZ, int start, int finish, int Np){ + int ijk,nn,n; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + double vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m3,m5,m7; + double nA,nB; // number density + double a1,b1,a2,b2,nAB,delta; + double C,nx,ny,nz; //color gradient magnitude and direction + double phi,tau,rho0,rlx_setA,rlx_setB; + + double GeoFun=0.0;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double tau_eff; + double mu_eff;//kinematic viscosity + double nx_gs,ny_gs,nz_gs;//grey-solid color gradient + double nx_phase,ny_phase,nz_phase,C_phase; + double Fx,Fy,Fz; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; + + Aq[1*Np+n] = a1; + Bq[1*Np+n] = b1; + Aq[2*Np+n] = a2; + Bq[2*Np+n] = b2; + + //............................................... + // q = 2 + // Cq = {0,1,0} + delta = beta*nA*nB*nAB*0.1111111111111111*ny; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; + + Aq[3*Np+n] = a1; + Bq[3*Np+n] = b1; + Aq[4*Np+n] = a2; + Bq[4*Np+n] = b2; + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nz; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; + + Aq[5*Np+n] = a1; + Bq[5*Np+n] = b1; + Aq[6*Np+n] = a2; + Bq[6*Np+n] = b2; + //............................................... + + } + } +} + +__global__ void dvc_ScaLBL_PhaseField_InitFromRestart(double *Den, double *Aq, double *Bq, int start, int finish, int Np){ + int idx; + double nA,nB; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s1.0) t1 =((t1>0.0)-(t1<0.0))*(1.0-fabs(t1))+t1; +// //........................................................................ +// nn = ijk+1; // neighbor index (get convention) +// m2 = Phi[nn]; // get neighbor for phi - 2 +// t2 = m2+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t2)>1.0) t2 =((t2>0.0)-(t2<0.0))*(1.0-fabs(t2))+t2; +// //........................................................................ +// nn = ijk-strideY; // neighbor index (get convention) +// m3 = Phi[nn]; // get neighbor for phi - 3 +// t3 = m3+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t3)>1.0) t3 =((t3>0.0)-(t3<0.0))*(1.0-fabs(t3))+t3; +// //........................................................................ +// nn = ijk+strideY; // neighbor index (get convention) +// m4 = Phi[nn]; // get neighbor for phi - 4 +// t4 = m4+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t4)>1.0) t4 =((t4>0.0)-(t4<0.0))*(1.0-fabs(t4))+t4; +// //........................................................................ +// nn = ijk-strideZ; // neighbor index (get convention) +// m5 = Phi[nn]; // get neighbor for phi - 5 +// t5 = m5+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t5)>1.0) t5 =((t5>0.0)-(t5<0.0))*(1.0-fabs(t5))+t5; +// //........................................................................ +// nn = ijk+strideZ; // neighbor index (get convention) +// m6 = Phi[nn]; // get neighbor for phi - 6 +// t6 = m6+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t6)>1.0) t6 =((t6>0.0)-(t6<0.0))*(1.0-fabs(t6))+t6; +// //........................................................................ +// nn = ijk-strideY-1; // neighbor index (get convention) +// m7 = Phi[nn]; // get neighbor for phi - 7 +// t7 = m7+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t7)>1.0) t7 =((t7>0.0)-(t7<0.0))*(1.0-fabs(t7))+t7; +// //........................................................................ +// nn = ijk+strideY+1; // neighbor index (get convention) +// m8 = Phi[nn]; // get neighbor for phi - 8 +// t8 = m8+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t8)>1.0) t8 =((t8>0.0)-(t8<0.0))*(1.0-fabs(t8))+t8; +// //........................................................................ +// nn = ijk+strideY-1; // neighbor index (get convention) +// m9 = Phi[nn]; // get neighbor for phi - 9 +// t9 = m9+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t9)>1.0) t9 =((t9>0.0)-(t9<0.0))*(1.0-fabs(t9))+t9; +// //........................................................................ +// nn = ijk-strideY+1; // neighbor index (get convention) +// m10 = Phi[nn]; // get neighbor for phi - 10 +// t10 = m10+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t10)>1.0) t10 =((t10>0.0)-(t10<0.0))*(1.0-fabs(t10))+t10; +// //........................................................................ +// nn = ijk-strideZ-1; // neighbor index (get convention) +// m11 = Phi[nn]; // get neighbor for phi - 11 +// t11 = m11+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t11)>1.0) t11 =((t11>0.0)-(t11<0.0))*(1.0-fabs(t11))+t11; +// //........................................................................ +// nn = ijk+strideZ+1; // neighbor index (get convention) +// m12 = Phi[nn]; // get neighbor for phi - 12 +// t12 = m12+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t12)>1.0) t12 =((t12>0.0)-(t12<0.0))*(1.0-fabs(t12))+t12; +// //........................................................................ +// nn = ijk+strideZ-1; // neighbor index (get convention) +// m13 = Phi[nn]; // get neighbor for phi - 13 +// t13 = m13+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t13)>1.0) t13 =((t13>0.0)-(t13<0.0))*(1.0-fabs(t13))+t13; +// //........................................................................ +// nn = ijk-strideZ+1; // neighbor index (get convention) +// m14 = Phi[nn]; // get neighbor for phi - 14 +// t14 = m14+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t14)>1.0) t14 =((t14>0.0)-(t14<0.0))*(1.0-fabs(t14))+t14; +// //........................................................................ +// nn = ijk-strideZ-strideY; // neighbor index (get convention) +// m15 = Phi[nn]; // get neighbor for phi - 15 +// t15 = m15+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t15)>1.0) t15 =((t15>0.0)-(t15<0.0))*(1.0-fabs(t15))+t15; +// //........................................................................ +// nn = ijk+strideZ+strideY; // neighbor index (get convention) +// m16 = Phi[nn]; // get neighbor for phi - 16 +// t16 = m16+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t16)>1.0) t16 =((t16>0.0)-(t16<0.0))*(1.0-fabs(t16))+t16; +// //........................................................................ +// nn = ijk+strideZ-strideY; // neighbor index (get convention) +// m17 = Phi[nn]; // get neighbor for phi - 17 +// t17 = m17+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t17)>1.0) t17 =((t17>0.0)-(t17<0.0))*(1.0-fabs(t17))+t17; +// //........................................................................ +// nn = ijk-strideZ+strideY; // neighbor index (get convention) +// m18 = Phi[nn]; // get neighbor for phi - 18 +// t18 = m18+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t18)>1.0) t18 =((t18>0.0)-(t18<0.0))*(1.0-fabs(t18))+t18; +// //............Compute the Color Gradient................................... +// nx_phase = -(m1-m2+0.5*(m7-m8+m9-m10+m11-m12+m13-m14)); +// ny_phase = -(m3-m4+0.5*(m7-m8-m9+m10+m15-m16+m17-m18)); +// nz_phase = -(m5-m6+0.5*(m11-m12-m13+m14+m15-m16-m17+m18)); +// C_phase = sqrt(nx_phase*nx_phase+ny_phase*ny_phase+nz_phase*nz_phase); +// //correct the normal color gradient by considering the effect of grey solid +// nx = -(t1-t2+0.5*(t7-t8+t9-t10+t11-t12+t13-t14)); +// ny = -(t3-t4+0.5*(t7-t8-t9+t10+t15-t16+t17-t18)); +// nz = -(t5-t6+0.5*(t11-t12-t13+t14+t15-t16-t17+t18)); +// +// if (C_phase==0.0){//i.e. if in a bulk phase, there is no need for grey-solid correction +// nx = nx_phase; +// ny = ny_phase; +// nz = nz_phase; +// } +// +// //...........Normalize the Color Gradient................................. +// C = sqrt(nx*nx+ny*ny+nz*nz); +// double ColorMag = C; +// if (C==0.0) ColorMag=1.0; +// nx = nx/ColorMag; +// ny = ny/ColorMag; +// nz = nz/ColorMag; +// +// // q=0 +// fq = dist[n]; +// rho = fq; +// m1 = -30.0*fq; +// m2 = 12.0*fq; +// +// // q=1 +// //nread = neighborList[n]; // neighbor 2 +// //fq = dist[nread]; // reading the f1 data into register fq +// nr1 = neighborList[n]; +// fq = dist[nr1]; // reading the f1 data into register fq +// rho += fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jx = fq; +// m4 = -4.0*fq; +// m9 = 2.0*fq; +// m10 = -4.0*fq; +// +// // f2 = dist[10*Np+n]; +// //nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) +// //fq = dist[nread]; // reading the f2 data into register fq +// nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) +// fq = dist[nr2]; // reading the f2 data into register fq +// rho += fq; +// m1 -= 11.0*(fq); +// m2 -= 4.0*(fq); +// jx -= fq; +// m4 += 4.0*(fq); +// m9 += 2.0*(fq); +// m10 -= 4.0*(fq); +// +// // q=3 +// //nread = neighborList[n+2*Np]; // neighbor 4 +// //fq = dist[nread]; +// nr3 = neighborList[n+2*Np]; // neighbor 4 +// fq = dist[nr3]; +// rho += fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jy = fq; +// m6 = -4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 = fq; +// m12 = -2.0*fq; +// +// // q = 4 +// //nread = neighborList[n+3*Np]; // neighbor 3 +// //fq = dist[nread]; +// nr4 = neighborList[n+3*Np]; // neighbor 3 +// fq = dist[nr4]; +// rho+= fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jy -= fq; +// m6 += 4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 += fq; +// m12 -= 2.0*fq; +// +// // q=5 +// //nread = neighborList[n+4*Np]; +// //fq = dist[nread]; +// nr5 = neighborList[n+4*Np]; +// fq = dist[nr5]; +// rho += fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jz = fq; +// m8 = -4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 -= fq; +// m12 += 2.0*fq; +// +// +// // q = 6 +// //nread = neighborList[n+5*Np]; +// //fq = dist[nread]; +// nr6 = neighborList[n+5*Np]; +// fq = dist[nr6]; +// rho+= fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jz -= fq; +// m8 += 4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 -= fq; +// m12 += 2.0*fq; +// +// // q=7 +// //nread = neighborList[n+6*Np]; +// //fq = dist[nread]; +// nr7 = neighborList[n+6*Np]; +// fq = dist[nr7]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jy += fq; +// m6 += fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 = fq; +// m16 = fq; +// m17 = -fq; +// +// // q = 8 +// //nread = neighborList[n+7*Np]; +// //fq = dist[nread]; +// nr8 = neighborList[n+7*Np]; +// fq = dist[nr8]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jy -= fq; +// m6 -= fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 += fq; +// m16 -= fq; +// m17 += fq; +// +// // q=9 +// //nread = neighborList[n+8*Np]; +// //fq = dist[nread]; +// nr9 = neighborList[n+8*Np]; +// fq = dist[nr9]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jy -= fq; +// m6 -= fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 -= fq; +// m16 += fq; +// m17 += fq; +// +// // q = 10 +// //nread = neighborList[n+9*Np]; +// //fq = dist[nread]; +// nr10 = neighborList[n+9*Np]; +// fq = dist[nr10]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jy += fq; +// m6 += fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 -= fq; +// m16 -= fq; +// m17 -= fq; +// +// // q=11 +// //nread = neighborList[n+10*Np]; +// //fq = dist[nread]; +// nr11 = neighborList[n+10*Np]; +// fq = dist[nr11]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jz += fq; +// m8 += fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 = fq; +// m16 -= fq; +// m18 = fq; +// +// // q=12 +// //nread = neighborList[n+11*Np]; +// //fq = dist[nread]; +// nr12 = neighborList[n+11*Np]; +// fq = dist[nr12]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jz -= fq; +// m8 -= fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 += fq; +// m16 += fq; +// m18 -= fq; +// +// // q=13 +// //nread = neighborList[n+12*Np]; +// //fq = dist[nread]; +// nr13 = neighborList[n+12*Np]; +// fq = dist[nr13]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jz -= fq; +// m8 -= fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 -= fq; +// m16 -= fq; +// m18 -= fq; +// +// // q=14 +// //nread = neighborList[n+13*Np]; +// //fq = dist[nread]; +// nr14 = neighborList[n+13*Np]; +// fq = dist[nr14]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jz += fq; +// m8 += fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 -= fq; +// m16 += fq; +// m18 += fq; +// +// // q=15 +// nread = neighborList[n+14*Np]; +// fq = dist[nread]; +// //fq = dist[17*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy += fq; +// m6 += fq; +// jz += fq; +// m8 += fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 = fq; +// m17 += fq; +// m18 -= fq; +// +// // q=16 +// nread = neighborList[n+15*Np]; +// fq = dist[nread]; +// //fq = dist[8*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy -= fq; +// m6 -= fq; +// jz -= fq; +// m8 -= fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 += fq; +// m17 -= fq; +// m18 += fq; +// +// // q=17 +// //fq = dist[18*Np+n]; +// nread = neighborList[n+16*Np]; +// fq = dist[nread]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy += fq; +// m6 += fq; +// jz -= fq; +// m8 -= fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 -= fq; +// m17 += fq; +// m18 += fq; +// +// // q=18 +// nread = neighborList[n+17*Np]; +// fq = dist[nread]; +// //fq = dist[9*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy -= fq; +// m6 -= fq; +// jz += fq; +// m8 += fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 -= fq; +// m17 -= fq; +// m18 -= fq; +// +// // Compute greyscale related parameters +// c0 = 0.5*(1.0+porosity*0.5*mu_eff/perm); +// if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes +// //GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); +// c1 = porosity*0.5*GeoFun/sqrt(perm); +// if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes +// +// vx = jx/rho0+0.5*(porosity*Gx); +// vy = jy/rho0+0.5*(porosity*Gy); +// vz = jz/rho0+0.5*(porosity*Gz); +// v_mag=sqrt(vx*vx+vy*vy+vz*vz); +// ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); +// uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); +// uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); +// u_mag=sqrt(ux*ux+uy*uy+uz*uz); +// +// //Update the total force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium +// Fx = rho0*(-porosity*mu_eff/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx); +// Fy = rho0*(-porosity*mu_eff/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy); +// Fz = rho0*(-porosity*mu_eff/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz); +// if (porosity==1.0){ +// Fx=rho0*(Gx); +// Fy=rho0*(Gy); +// Fz=rho0*(Gz); +// } +// +// // write the velocity +// Velocity[n] = ux; +// Velocity[Np+n] = uy; +// Velocity[2*Np+n] = uz; +// +// //........................................................................ +// //..............carry out relaxation process.............................. +// //..........Toelke, Fruediger et. al. 2006................................ +// if (C == 0.0) nx = ny = nz = 0.0; +// m1 = m1 + rlx_setA*((19*(ux*ux+uy*uy+uz*uz)*rho0/porosity - 11*rho) -19*alpha*C - m1); +// m2 = m2 + rlx_setA*((3*rho - 5.5*(ux*ux+uy*uy+uz*uz)*rho0/porosity)- m2); +// jx = jx + Fx; +// m4 = m4 + rlx_setB*((-0.6666666666666666*ux*rho0)- m4) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); +// jy = jy + Fy; +// m6 = m6 + rlx_setB*((-0.6666666666666666*uy*rho0)- m6) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); +// jz = jz + Fz; +// m8 = m8 + rlx_setB*((-0.6666666666666666*uz*rho0)- m8) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); +// m9 = m9 + rlx_setA*(((2*ux*ux-uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); +// m10 = m10 + rlx_setA*( - m10); +// //m10 = m10 + rlx_setA*(-0.5*rho0*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); +// m11 = m11 + rlx_setA*(((uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); +// m12 = m12 + rlx_setA*( - m12); +// //m12 = m12 + rlx_setA*(-0.5*(rho0*(uy*uy-uz*uz)/porosity)- m12); +// m13 = m13 + rlx_setA*( (ux*uy*rho0/porosity) + 0.5*alpha*C*nx*ny - m13); +// m14 = m14 + rlx_setA*( (uy*uz*rho0/porosity) + 0.5*alpha*C*ny*nz - m14); +// m15 = m15 + rlx_setA*( (ux*uz*rho0/porosity) + 0.5*alpha*C*nx*nz - m15); +// m16 = m16 + rlx_setB*( - m16); +// m17 = m17 + rlx_setB*( - m17); +// m18 = m18 + rlx_setB*( - m18); +// +// //.................inverse transformation...................................................... +// // q=0 +// fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; +// dist[n] = fq; +// +// // q = 1 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10); +// //nread = neighborList[n+Np]; +// dist[nr2] = fq; +// +// // q=2 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10); +// //nread = neighborList[n]; +// dist[nr1] = fq; +// +// // q = 3 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); +// //nread = neighborList[n+3*Np]; +// dist[nr4] = fq; +// +// // q = 4 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); +// //nread = neighborList[n+2*Np]; +// dist[nr3] = fq; +// +// // q = 5 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); +// //nread = neighborList[n+5*Np]; +// dist[nr6] = fq; +// +// // q = 6 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); +// //nread = neighborList[n+4*Np]; +// dist[nr5] = fq; +// +// // q = 7 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ +// mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17); +// //nread = neighborList[n+7*Np]; +// dist[nr8] = fq; +// +// // q = 8 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 +// +mrt_V12*m12+0.25*m13+0.125*(m17-m16); +// //nread = neighborList[n+6*Np]; +// dist[nr7] = fq; +// +// // q = 9 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ +// mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17); +// //nread = neighborList[n+9*Np]; +// dist[nr10] = fq; +// +// // q = 10 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ +// mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17); +// //nread = neighborList[n+8*Np]; +// dist[nr9] = fq; +// +// // q = 11 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) +// +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 +// -mrt_V12*m12+0.25*m15+0.125*(m18-m16); +// //nread = neighborList[n+11*Np]; +// dist[nr12] = fq; +// +// // q = 12 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ +// mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18); +// //nread = neighborList[n+10*Np]; +// dist[nr11]= fq; +// +// // q = 13 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) +// +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 +// -mrt_V12*m12-0.25*m15-0.125*(m16+m18); +// //nread = neighborList[n+13*Np]; +// dist[nr14] = fq; +// +// // q= 14 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) +// +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 +// -mrt_V12*m12-0.25*m15+0.125*(m16+m18); +// //nread = neighborList[n+12*Np]; +// dist[nr13] = fq; +// +// +// // q = 15 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) +// -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18); +// nread = neighborList[n+15*Np]; +// dist[nread] = fq; +// +// // q = 16 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) +// -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17); +// nread = neighborList[n+14*Np]; +// dist[nread] = fq; +// +// +// // q = 17 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) +// -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18); +// nread = neighborList[n+17*Np]; +// dist[nread] = fq; +// +// // q = 18 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) +// -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18); +// nread = neighborList[n+16*Np]; +// dist[nread] = fq; +// //........................................................................ +// +// // Instantiate mass transport distributions +// // Stationary value - distribution 0 +// nAB = 1.0/(nA+nB); +// Aq[n] = 0.3333333333333333*nA; +// Bq[n] = 0.3333333333333333*nB; +// +// //............................................... +// // q = 0,2,4 +// // Cq = {1,0,0}, {0,1,0}, {0,0,1} +// delta = beta*nA*nB*nAB*0.1111111111111111*nx; +// if (!(nA*nB*nAB>0)) delta=0; +// a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; +// b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; +// a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; +// b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; +// +// // q = 1 +// //nread = neighborList[n+Np]; +// Aq[nr2] = a1; +// Bq[nr2] = b1; +// // q=2 +// //nread = neighborList[n]; +// Aq[nr1] = a2; +// Bq[nr1] = b2; +// +// //............................................... +// // Cq = {0,1,0} +// delta = beta*nA*nB*nAB*0.1111111111111111*ny; +// if (!(nA*nB*nAB>0)) delta=0; +// a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; +// b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; +// a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; +// b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; +// +// // q = 3 +// //nread = neighborList[n+3*Np]; +// Aq[nr4] = a1; +// Bq[nr4] = b1; +// // q = 4 +// //nread = neighborList[n+2*Np]; +// Aq[nr3] = a2; +// Bq[nr3] = b2; +// +// //............................................... +// // q = 4 +// // Cq = {0,0,1} +// delta = beta*nA*nB*nAB*0.1111111111111111*nz; +// if (!(nA*nB*nAB>0)) delta=0; +// a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; +// b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; +// a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; +// b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; +// +// // q = 5 +// //nread = neighborList[n+5*Np]; +// Aq[nr6] = a1; +// Bq[nr6] = b1; +// // q = 6 +// //nread = neighborList[n+4*Np]; +// Aq[nr5] = a2; +// Bq[nr5] = b2; +// //............................................... +// } +// } +//} +// +////Model-2&3 +//__global__ void dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor(int *Map, double *dist, double *Aq, double *Bq, double *Den, +// double *Phi, double *GreySolidGrad, double *Poros,double *Perm, double *Velocity, +// double rhoA, double rhoB, double tauA, double tauB,double tauA_eff,double tauB_eff, double alpha, double beta, +// double Gx, double Gy, double Gz, int strideY, int strideZ, int start, int finish, int Np){ +// int ijk,nn,n; +// double fq; +// // conserved momemnts +// double rho,jx,jy,jz; +// double vx,vy,vz,v_mag; +// double ux,uy,uz,u_mag; +// // non-conserved moments +// double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; +// double m3,m5,m7; +// double t1,t2,t4,t6,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18; +// double t3,t5,t7; +// double nA,nB; // number density +// double a1,b1,a2,b2,nAB,delta; +// double C,nx,ny,nz; //color gradient magnitude and direction +// double phi,tau,rho0,rlx_setA,rlx_setB; +// +// double GeoFun=0.0;//geometric function from Guo's PRE 66, 036304 (2002) +// double porosity; +// double perm;//voxel permeability +// double c0, c1; //Guo's model parameters +// double tau_eff; +// double mu_eff;//kinematic viscosity +// double nx_phase,ny_phase,nz_phase,C_phase; +// double Fx,Fy,Fz; +// +// const double mrt_V1=0.05263157894736842; +// const double mrt_V2=0.012531328320802; +// const double mrt_V3=0.04761904761904762; +// const double mrt_V4=0.004594820384294068; +// const double mrt_V5=0.01587301587301587; +// const double mrt_V6=0.0555555555555555555555555; +// const double mrt_V7=0.02777777777777778; +// const double mrt_V8=0.08333333333333333; +// const double mrt_V9=0.003341687552213868; +// const double mrt_V10=0.003968253968253968; +// const double mrt_V11=0.01388888888888889; +// const double mrt_V12=0.04166666666666666; +// +// int S = Np/NBLOCKS/NTHREADS + 1; +// for (int s=0; s1.0) t1 =((t1>0.0)-(t1<0.0))*(1.0-fabs(t1))+t1; +// //........................................................................ +// nn = ijk+1; // neighbor index (get convention) +// m2 = Phi[nn]; // get neighbor for phi - 2 +// t2 = m2+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t2)>1.0) t2 =((t2>0.0)-(t2<0.0))*(1.0-fabs(t2))+t2; +// //........................................................................ +// nn = ijk-strideY; // neighbor index (get convention) +// m3 = Phi[nn]; // get neighbor for phi - 3 +// t3 = m3+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t3)>1.0) t3 =((t3>0.0)-(t3<0.0))*(1.0-fabs(t3))+t3; +// //........................................................................ +// nn = ijk+strideY; // neighbor index (get convention) +// m4 = Phi[nn]; // get neighbor for phi - 4 +// t4 = m4+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t4)>1.0) t4 =((t4>0.0)-(t4<0.0))*(1.0-fabs(t4))+t4; +// //........................................................................ +// nn = ijk-strideZ; // neighbor index (get convention) +// m5 = Phi[nn]; // get neighbor for phi - 5 +// t5 = m5+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t5)>1.0) t5 =((t5>0.0)-(t5<0.0))*(1.0-fabs(t5))+t5; +// //........................................................................ +// nn = ijk+strideZ; // neighbor index (get convention) +// m6 = Phi[nn]; // get neighbor for phi - 6 +// t6 = m6+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t6)>1.0) t6 =((t6>0.0)-(t6<0.0))*(1.0-fabs(t6))+t6; +// //........................................................................ +// nn = ijk-strideY-1; // neighbor index (get convention) +// m7 = Phi[nn]; // get neighbor for phi - 7 +// t7 = m7+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t7)>1.0) t7 =((t7>0.0)-(t7<0.0))*(1.0-fabs(t7))+t7; +// //........................................................................ +// nn = ijk+strideY+1; // neighbor index (get convention) +// m8 = Phi[nn]; // get neighbor for phi - 8 +// t8 = m8+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t8)>1.0) t8 =((t8>0.0)-(t8<0.0))*(1.0-fabs(t8))+t8; +// //........................................................................ +// nn = ijk+strideY-1; // neighbor index (get convention) +// m9 = Phi[nn]; // get neighbor for phi - 9 +// t9 = m9+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t9)>1.0) t9 =((t9>0.0)-(t9<0.0))*(1.0-fabs(t9))+t9; +// //........................................................................ +// nn = ijk-strideY+1; // neighbor index (get convention) +// m10 = Phi[nn]; // get neighbor for phi - 10 +// t10 = m10+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t10)>1.0) t10 =((t10>0.0)-(t10<0.0))*(1.0-fabs(t10))+t10; +// //........................................................................ +// nn = ijk-strideZ-1; // neighbor index (get convention) +// m11 = Phi[nn]; // get neighbor for phi - 11 +// t11 = m11+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t11)>1.0) t11 =((t11>0.0)-(t11<0.0))*(1.0-fabs(t11))+t11; +// //........................................................................ +// nn = ijk+strideZ+1; // neighbor index (get convention) +// m12 = Phi[nn]; // get neighbor for phi - 12 +// t12 = m12+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t12)>1.0) t12 =((t12>0.0)-(t12<0.0))*(1.0-fabs(t12))+t12; +// //........................................................................ +// nn = ijk+strideZ-1; // neighbor index (get convention) +// m13 = Phi[nn]; // get neighbor for phi - 13 +// t13 = m13+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t13)>1.0) t13 =((t13>0.0)-(t13<0.0))*(1.0-fabs(t13))+t13; +// //........................................................................ +// nn = ijk-strideZ+1; // neighbor index (get convention) +// m14 = Phi[nn]; // get neighbor for phi - 14 +// t14 = m14+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t14)>1.0) t14 =((t14>0.0)-(t14<0.0))*(1.0-fabs(t14))+t14; +// //........................................................................ +// nn = ijk-strideZ-strideY; // neighbor index (get convention) +// m15 = Phi[nn]; // get neighbor for phi - 15 +// t15 = m15+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t15)>1.0) t15 =((t15>0.0)-(t15<0.0))*(1.0-fabs(t15))+t15; +// //........................................................................ +// nn = ijk+strideZ+strideY; // neighbor index (get convention) +// m16 = Phi[nn]; // get neighbor for phi - 16 +// t16 = m16+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t16)>1.0) t16 =((t16>0.0)-(t16<0.0))*(1.0-fabs(t16))+t16; +// //........................................................................ +// nn = ijk+strideZ-strideY; // neighbor index (get convention) +// m17 = Phi[nn]; // get neighbor for phi - 17 +// t17 = m17+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t17)>1.0) t17 =((t17>0.0)-(t17<0.0))*(1.0-fabs(t17))+t17; +// //........................................................................ +// nn = ijk-strideZ+strideY; // neighbor index (get convention) +// m18 = Phi[nn]; // get neighbor for phi - 18 +// t18 = m18+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t18)>1.0) t18 =((t18>0.0)-(t18<0.0))*(1.0-fabs(t18))+t18; +// //............Compute the Color Gradient................................... +// nx_phase = -(m1-m2+0.5*(m7-m8+m9-m10+m11-m12+m13-m14)); +// ny_phase = -(m3-m4+0.5*(m7-m8-m9+m10+m15-m16+m17-m18)); +// nz_phase = -(m5-m6+0.5*(m11-m12-m13+m14+m15-m16-m17+m18)); +// C_phase = sqrt(nx_phase*nx_phase+ny_phase*ny_phase+nz_phase*nz_phase); +// //correct the normal color gradient by considering the effect of grey solid +// nx = -(t1-t2+0.5*(t7-t8+t9-t10+t11-t12+t13-t14)); +// ny = -(t3-t4+0.5*(t7-t8-t9+t10+t15-t16+t17-t18)); +// nz = -(t5-t6+0.5*(t11-t12-t13+t14+t15-t16-t17+t18)); +// +// if (C_phase==0.0){ +// nx = nx_phase; +// ny = ny_phase; +// nz = nz_phase; +// } +// +// //...........Normalize the Color Gradient................................. +// C = sqrt(nx*nx+ny*ny+nz*nz); +// double ColorMag = C; +// if (C==0.0) ColorMag=1.0; +// nx = nx/ColorMag; +// ny = ny/ColorMag; +// nz = nz/ColorMag; +// +// // q=0 +// fq = dist[n]; +// rho = fq; +// m1 = -30.0*fq; +// m2 = 12.0*fq; +// +// // q=1 +// fq = dist[2*Np+n]; +// rho += fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jx = fq; +// m4 = -4.0*fq; +// m9 = 2.0*fq; +// m10 = -4.0*fq; +// +// // f2 = dist[10*Np+n]; +// fq = dist[1*Np+n]; +// rho += fq; +// m1 -= 11.0*(fq); +// m2 -= 4.0*(fq); +// jx -= fq; +// m4 += 4.0*(fq); +// m9 += 2.0*(fq); +// m10 -= 4.0*(fq); +// +// // q=3 +// fq = dist[4*Np+n]; +// rho += fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jy = fq; +// m6 = -4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 = fq; +// m12 = -2.0*fq; +// +// // q = 4 +// fq = dist[3*Np+n]; +// rho+= fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jy -= fq; +// m6 += 4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 += fq; +// m12 -= 2.0*fq; +// +// // q=5 +// fq = dist[6*Np+n]; +// rho += fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jz = fq; +// m8 = -4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 -= fq; +// m12 += 2.0*fq; +// +// // q = 6 +// fq = dist[5*Np+n]; +// rho+= fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jz -= fq; +// m8 += 4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 -= fq; +// m12 += 2.0*fq; +// +// // q=7 +// fq = dist[8*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jy += fq; +// m6 += fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 = fq; +// m16 = fq; +// m17 = -fq; +// +// // q = 8 +// fq = dist[7*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jy -= fq; +// m6 -= fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 += fq; +// m16 -= fq; +// m17 += fq; +// +// // q=9 +// fq = dist[10*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jy -= fq; +// m6 -= fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 -= fq; +// m16 += fq; +// m17 += fq; +// +// // q = 10 +// fq = dist[9*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jy += fq; +// m6 += fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 -= fq; +// m16 -= fq; +// m17 -= fq; +// +// // q=11 +// fq = dist[12*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jz += fq; +// m8 += fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 = fq; +// m16 -= fq; +// m18 = fq; +// +// // q=12 +// fq = dist[11*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jz -= fq; +// m8 -= fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 += fq; +// m16 += fq; +// m18 -= fq; +// +// // q=13 +// fq = dist[14*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jz -= fq; +// m8 -= fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 -= fq; +// m16 -= fq; +// m18 -= fq; +// +// // q=14 +// fq = dist[13*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jz += fq; +// m8 += fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 -= fq; +// m16 += fq; +// m18 += fq; +// +// // q=15 +// fq = dist[16*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy += fq; +// m6 += fq; +// jz += fq; +// m8 += fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 = fq; +// m17 += fq; +// m18 -= fq; +// +// // q=16 +// fq = dist[15*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy -= fq; +// m6 -= fq; +// jz -= fq; +// m8 -= fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 += fq; +// m17 -= fq; +// m18 += fq; +// +// // q=17 +// fq = dist[18*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy += fq; +// m6 += fq; +// jz -= fq; +// m8 -= fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 -= fq; +// m17 += fq; +// m18 += fq; +// +// // q=18 +// fq = dist[17*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy -= fq; +// m6 -= fq; +// jz += fq; +// m8 += fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 -= fq; +// m17 -= fq; +// m18 -= fq; +// +// // Compute greyscale related parameters +// c0 = 0.5*(1.0+porosity*0.5*mu_eff/perm); +// if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes +// //GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); +// c1 = porosity*0.5*GeoFun/sqrt(perm); +// if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes +// +// vx = jx/rho0+0.5*(porosity*Gx); +// vy = jy/rho0+0.5*(porosity*Gy); +// vz = jz/rho0+0.5*(porosity*Gz); +// v_mag=sqrt(vx*vx+vy*vy+vz*vz); +// ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); +// uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); +// uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); +// u_mag=sqrt(ux*ux+uy*uy+uz*uz); +// +// //Update the total force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium +// Fx = rho0*(-porosity*mu_eff/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx); +// Fy = rho0*(-porosity*mu_eff/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy); +// Fz = rho0*(-porosity*mu_eff/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz); +// if (porosity==1.0){ +// Fx=rho0*(Gx); +// Fy=rho0*(Gy); +// Fz=rho0*(Gz); +// } +// +// // write the velocity +// Velocity[n] = ux; +// Velocity[Np+n] = uy; +// Velocity[2*Np+n] = uz; +// +// //........................................................................ +// //..............carry out relaxation process.............................. +// //..........Toelke, Fruediger et. al. 2006................................ +// if (C == 0.0) nx = ny = nz = 0.0; +// m1 = m1 + rlx_setA*((19*(ux*ux+uy*uy+uz*uz)*rho0/porosity - 11*rho) -19*alpha*C - m1); +// m2 = m2 + rlx_setA*((3*rho - 5.5*(ux*ux+uy*uy+uz*uz)*rho0/porosity)- m2); +// jx = jx + Fx; +// m4 = m4 + rlx_setB*((-0.6666666666666666*ux*rho0)- m4) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); +// jy = jy + Fy; +// m6 = m6 + rlx_setB*((-0.6666666666666666*uy*rho0)- m6) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); +// jz = jz + Fz; +// m8 = m8 + rlx_setB*((-0.6666666666666666*uz*rho0)- m8) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); +// m9 = m9 + rlx_setA*(((2*ux*ux-uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); +// m10 = m10 + rlx_setA*( - m10); +// //m10 = m10 + rlx_setA*(-0.5*rho0*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); +// m11 = m11 + rlx_setA*(((uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); +// m12 = m12 + rlx_setA*( - m12); +// //m12 = m12 + rlx_setA*(-0.5*(rho0*(uy*uy-uz*uz)/porosity)- m12); +// m13 = m13 + rlx_setA*( (ux*uy*rho0/porosity) + 0.5*alpha*C*nx*ny - m13); +// m14 = m14 + rlx_setA*( (uy*uz*rho0/porosity) + 0.5*alpha*C*ny*nz - m14); +// m15 = m15 + rlx_setA*( (ux*uz*rho0/porosity) + 0.5*alpha*C*nx*nz - m15); +// m16 = m16 + rlx_setB*( - m16); +// m17 = m17 + rlx_setB*( - m17); +// m18 = m18 + rlx_setB*( - m18); +// +// //.................inverse transformation...................................................... +// // q=0 +// fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; +// dist[n] = fq; +// +// // q = 1 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10); +// dist[1*Np+n] = fq; +// +// // q=2 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10); +// dist[2*Np+n] = fq; +// +// // q = 3 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); +// dist[3*Np+n] = fq; +// +// // q = 4 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); +// dist[4*Np+n] = fq; +// +// // q = 5 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); +// dist[5*Np+n] = fq; +// +// // q = 6 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); +// dist[6*Np+n] = fq; +// +// // q = 7 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ +// mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17); +// dist[7*Np+n] = fq; +// +// +// // q = 8 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 +// +mrt_V12*m12+0.25*m13+0.125*(m17-m16); +// dist[8*Np+n] = fq; +// +// // q = 9 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ +// mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17); +// dist[9*Np+n] = fq; +// +// // q = 10 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ +// mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17); +// dist[10*Np+n] = fq; +// +// +// // q = 11 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) +// +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 +// -mrt_V12*m12+0.25*m15+0.125*(m18-m16); +// dist[11*Np+n] = fq; +// +// // q = 12 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ +// mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18); +// dist[12*Np+n] = fq; +// +// // q = 13 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) +// +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 +// -mrt_V12*m12-0.25*m15-0.125*(m16+m18); +// dist[13*Np+n] = fq; +// +// // q= 14 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) +// +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 +// -mrt_V12*m12-0.25*m15+0.125*(m16+m18); +// +// dist[14*Np+n] = fq; +// +// // q = 15 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) +// -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18); +// dist[15*Np+n] = fq; +// +// // q = 16 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) +// -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17); +// dist[16*Np+n] = fq; +// +// +// // q = 17 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) +// -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18); +// dist[17*Np+n] = fq; +// +// // q = 18 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) +// -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18); +// dist[18*Np+n] = fq; +// //........................................................................ +// +// // Instantiate mass transport distributions +// // Stationary value - distribution 0 +// nAB = 1.0/(nA+nB); +// Aq[n] = 0.3333333333333333*nA; +// Bq[n] = 0.3333333333333333*nB; +// +// //............................................... +// // q = 0,2,4 +// // Cq = {1,0,0}, {0,1,0}, {0,0,1} +// delta = beta*nA*nB*nAB*0.1111111111111111*nx; +// if (!(nA*nB*nAB>0)) delta=0; +// a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; +// b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; +// a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; +// b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; +// +// Aq[1*Np+n] = a1; +// Bq[1*Np+n] = b1; +// Aq[2*Np+n] = a2; +// Bq[2*Np+n] = b2; +// +// //............................................... +// // q = 2 +// // Cq = {0,1,0} +// delta = beta*nA*nB*nAB*0.1111111111111111*ny; +// if (!(nA*nB*nAB>0)) delta=0; +// a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; +// b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; +// a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; +// b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; +// +// Aq[3*Np+n] = a1; +// Bq[3*Np+n] = b1; +// Aq[4*Np+n] = a2; +// Bq[4*Np+n] = b2; +// //............................................... +// // q = 4 +// // Cq = {0,0,1} +// delta = beta*nA*nB*nAB*0.1111111111111111*nz; +// if (!(nA*nB*nAB>0)) delta=0; +// a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; +// b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; +// a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; +// b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; +// +// Aq[5*Np+n] = a1; +// Bq[5*Np+n] = b1; +// Aq[6*Np+n] = a2; +// Bq[6*Np+n] = b2; +// //............................................... +// +// } +// } +//} + +//__global__ void dvc_ScaLBL_D3Q19_GreyscaleColor_Init(double *dist, double *Porosity, int Np) +//{ +// int n; +// int S = Np/NBLOCKS/NTHREADS + 1; +// double porosity; +// for (int s=0; s>>(dist,Porosity,Np); +// hipError_t err = hipGetLastError(); +// if (hipSuccess != err){ +// printf("hip error in ScaLBL_D3Q19_GreyscaleColor_Init: %s \n",hipGetErrorString(err)); +// } +//} + +//Model-1 & 4 +extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleColor(int *Map, double *dist, double *Aq, double *Bq, double *Den, + double *Phi,double *GreySolidGrad, double *Poros,double *Perm,double *Vel, double *Pressure, + double rhoA, double rhoB, double tauA, double tauB,double tauA_eff,double tauB_eff, double alpha, double beta, + double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ + + //cudaProfilerStart(); + //cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor, cudaFuncCachePreferL1); + + dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor<<>>(Map, dist, Aq, Bq, Den, Phi, GreySolidGrad, Poros, Perm, Vel, Pressure, + rhoA, rhoB, tauA, tauB, tauA_eff, tauB_eff, alpha, beta, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAeven_GreyscaleColor: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); + +} + +//Model-1 & 4 +extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleColor(int *d_neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, + double *Phi, double *GreySolidGrad, double *Poros,double *Perm,double *Vel,double *Pressure, + double rhoA, double rhoB, double tauA, double tauB, double tauA_eff,double tauB_eff, double alpha, double beta, + double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ + + //cudaProfilerStart(); + //cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_GreyscaleColor, cudaFuncCachePreferL1); + + dvc_ScaLBL_D3Q19_AAodd_GreyscaleColor<<>>(d_neighborList, Map, dist, Aq, Bq, Den, Phi, GreySolidGrad, Poros, Perm,Vel,Pressure, + + rhoA, rhoB, tauA, tauB, tauA_eff, tauB_eff,alpha, beta, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAodd_GreyscaleColor: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_PhaseField_InitFromRestart(double *Den, double *Aq, double *Bq, int start, int finish, int Np){ + dvc_ScaLBL_PhaseField_InitFromRestart<<>>(Den, Aq, Bq, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_PhaseField_InitFromRestart: %s \n",hipGetErrorString(err)); + } +} +////Model-2&3 +//extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleColor(int *Map, double *dist, double *Aq, double *Bq, double *Den, +// double *Phi,double *GreySolidGrad, double *Poros,double *Perm,double *Vel, +// double rhoA, double rhoB, double tauA, double tauB,double tauA_eff,double tauB_eff, double alpha, double beta, +// double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ +// +// //cudaProfilerStart(); +// //cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor, cudaFuncCachePreferL1); +// +// dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor<<>>(Map, dist, Aq, Bq, Den, Phi, GreySolidGrad, Poros, Perm, Vel, +// rhoA, rhoB, tauA, tauB, tauA_eff, tauB_eff, alpha, beta, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); +// hipError_t err = hipGetLastError(); +// if (hipSuccess != err){ +// printf("hip error in ScaLBL_D3Q19_AAeven_GreyscaleColor: %s \n",hipGetErrorString(err)); +// } +// //cudaProfilerStop(); +// +//} +// +////Model-2&3 +//extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleColor(int *d_neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, +// double *Phi, double *GreySolidGrad, double *Poros,double *Perm,double *Vel, +// double rhoA, double rhoB, double tauA, double tauB, double tauA_eff,double tauB_eff, double alpha, double beta, +// double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ +// +// //cudaProfilerStart(); +// //cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_GreyscaleColor, cudaFuncCachePreferL1); +// +// dvc_ScaLBL_D3Q19_AAodd_GreyscaleColor<<>>(d_neighborList, Map, dist, Aq, Bq, Den, Phi, GreySolidGrad, Poros, Perm,Vel, +// rhoA, rhoB, tauA, tauB, tauA_eff, tauB_eff,alpha, beta, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); +// +// hipError_t err = hipGetLastError(); +// if (hipSuccess != err){ +// printf("hip error in ScaLBL_D3Q19_AAodd_GreyscaleColor: %s \n",hipGetErrorString(err)); +// } +// //cudaProfilerStop(); +//} diff --git a/hip/Ion.cu b/hip/Ion.cu new file mode 100644 index 00000000..2c48858d --- /dev/null +++ b/hip/Ion.cu @@ -0,0 +1,392 @@ +#include +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 1024 +#define NTHREADS 256 + + +__global__ void dvc_ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){ + int n,nread; + double fq,Ci; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq + // q=2 + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; + // q=4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; + // q=6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; + + // q=0 + dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci; + //dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + + // q = 1 + dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)); + //dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + + // q=2 + dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)); + //dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + + // q = 3 + dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)); + //dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + + // q = 4 + dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)); + //dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + + // q = 5 + dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)); + //dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + + // q = 6 + dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)); + //dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + } + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField, + double Di, int zi, double rlx, double Vt, int start, int finish, int Np){ + int n; + double Ci; + double ux,uy,uz; + double uEPx,uEPy,uEPz;//electrochemical induced velocity + double Ex,Ey,Ez;//electrical field + double f0,f1,f2,f3,f4,f5,f6; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s0) + CD_tmp; + } + } +} + + +extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAodd_IonConcentration<<>>(neighborList,dist,Den,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_IonConcentration: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAeven_IonConcentration<<>>(dist,Den,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_IonConcentration: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *Velocity, double *ElectricField, + double Di, int zi, double rlx, double Vt, int start, int finish, int Np){ + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAodd_Ion<<>>(neighborList,dist,Den,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Ion: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField, + double Di, int zi, double rlx, double Vt, int start, int finish, int Np){ + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAeven_Ion<<>>(dist,Den,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Ion: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_Ion_Init<<>>(dist,Den,DenInit,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_Ion_Init: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_Ion_Init_FromFile<<>>(dist,Den,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_Ion_Init_FromFile: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_Ion_ChargeDensity<<>>(Den,ChargeDensity,IonValence,ion_component,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_Ion_ChargeDensity: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} diff --git a/hip/MixedGradient.cu b/hip/MixedGradient.cu new file mode 100644 index 00000000..f171f408 --- /dev/null +++ b/hip/MixedGradient.cu @@ -0,0 +1,78 @@ +/* Implement Mixed Gradient (Lee et al. JCP 2016)*/ +#include +#include +//#include +#include "hip/hip_runtime.h" + + +#define NBLOCKS 560 +#define NTHREADS 128 + +__global__ void dvc_ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz) +{ + static int D3Q19[18][3]={{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1}, + {1,1,0},{-1,-1,0},{1,-1,0},{-1,1,0}, + {1,0,1},{-1,0,-1},{1,0,-1},{-1,0,1}, + {0,1,1},{0,-1,-1},{0,1,-1},{0,-1,1}}; + + int i,j,k,n,N,idx; + int np,np2,nm; // neighbors + double v,vp,vp2,vm; // values at neighbors + double grad; + N = Nx*Ny*Nz; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s>>(Map, Phi, Gradient, start, finish, Np, Nx, Ny, Nz); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_MixedGradient: %s \n",hipGetErrorString(err)); + } + cudaProfilerStop(); +} + diff --git a/hip/Poisson.cu b/hip/Poisson.cu new file mode 100644 index 00000000..34975f58 --- /dev/null +++ b/hip/Poisson.cu @@ -0,0 +1,330 @@ +#include +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 1024 +#define NTHREADS 256 + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){ + int n; + double psi;//electric potential + double fq; + int nread; + int idx; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; + + Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu + Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound + Ez = (f5-f6)*rlx*4.0; + ElectricField[n+0*Np] = Ex; + ElectricField[n+1*Np] = Ey; + ElectricField[n+2*Np] = Ez; + + // q = 0 + dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e); + + // q = 1 + dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 2 + dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 3 + dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 4 + dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 5 + dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 6 + dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + //........................................................................ + } + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){ + + int n; + double psi;//electric potential + double Ex,Ey,Ez;//electric field + double rho_e;//local charge density + double f0,f1,f2,f3,f4,f5,f6; + double rlx=1.0/tau; + int idx; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s>>(neighborList,Map,dist,Psi,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential<<>>(Map,dist,Psi,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAodd_Poisson<<>>(neighborList,Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Poisson: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAeven_Poisson<<>>(Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Poisson: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_Poisson_Init<<>>(Map,dist,Psi,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_Poisson_Init: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} diff --git a/hip/Stokes.cu b/hip/Stokes.cu new file mode 100644 index 00000000..a6a05fba --- /dev/null +++ b/hip/Stokes.cu @@ -0,0 +1,996 @@ +#include +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 1024 +#define NTHREADS 256 + + +__global__ void dvc_ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz, double rho0, double den_scale, double h, double time_conv,int start, int finish, int Np){ + + int n; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + double ux,uy,uz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + int nread; + // body force due to electric field + double rhoE;//charge density + double Ex,Ey,Ez; + // total body force + double Fx,Fy,Fz; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + fq = dist[nread]; // reading the f1 data into register fq + //fp = dist[10*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jx = fq; + m4 = -4.0*fq; + m9 = 2.0*fq; + m10 = -4.0*fq; + + // f2 = dist[10*Np+n]; + nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nread]; // reading the f2 data into register fq + //fq = dist[Np+n]; + rho += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + nread = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nread]; + //fq = dist[11*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + nread = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nread]; + //fq = dist[2*Np+n]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + nread = neighborList[n+4*Np]; + fq = dist[nread]; + //fq = dist[12*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + + // q = 6 + nread = neighborList[n+5*Np]; + fq = dist[nread]; + //fq = dist[3*Np+n]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + nread = neighborList[n+6*Np]; + fq = dist[nread]; + //fq = dist[13*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + nread = neighborList[n+7*Np]; + fq = dist[nread]; + //fq = dist[4*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + nread = neighborList[n+8*Np]; + fq = dist[nread]; + //fq = dist[14*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + nread = neighborList[n+9*Np]; + fq = dist[nread]; + //fq = dist[5*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + nread = neighborList[n+10*Np]; + fq = dist[nread]; + //fq = dist[15*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + nread = neighborList[n+11*Np]; + fq = dist[nread]; + //fq = dist[6*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + nread = neighborList[n+12*Np]; + fq = dist[nread]; + //fq = dist[16*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + nread = neighborList[n+13*Np]; + fq = dist[nread]; + //fq = dist[7*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + //fq = dist[17*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + //fq = dist[8*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + //fq = dist[18*Np+n]; + nread = neighborList[n+16*Np]; + fq = dist[nread]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + //fq = dist[9*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + + // write the velocity + ux = jx / rho0; + uy = jy / rho0; + uz = jz / rho0; + Velocity[n] = ux; + Velocity[Np+n] = uy; + Velocity[2*Np+n] = uz; + + //..............incorporate external force................................................ + //..............carry out relaxation process............................................... + m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0) - m2); + m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4); + m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6); + m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8); + m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) - m9); + m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10); + m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) - m11); + m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho0) - m12); + m13 = m13 + rlx_setA*((jx*jy/rho0) - m13); + m14 = m14 + rlx_setA*((jy*jz/rho0) - m14); + m15 = m15 + rlx_setA*((jx*jz/rho0) - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //....................................................................................................... + //.................inverse transformation...................................................... + + // q=0 + fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx; + nread = neighborList[n+Np]; + dist[nread] = fq; + + // q=2 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx; + nread = neighborList[n]; + dist[nread] = fq; + + // q = 3 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy; + nread = neighborList[n+3*Np]; + dist[nread] = fq; + + // q = 4 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy; + nread = neighborList[n+2*Np]; + dist[nread] = fq; + + // q = 5 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz; + nread = neighborList[n+5*Np]; + dist[nread] = fq; + + // q = 6 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz; + nread = neighborList[n+4*Np]; + dist[nread] = fq; + + // q = 7 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6) + +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy); + nread = neighborList[n+7*Np]; + dist[nread] = fq; + + // q = 8 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy); + nread = neighborList[n+6*Np]; + dist[nread] = fq; + + // q = 9 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6) + +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy); + nread = neighborList[n+9*Np]; + dist[nread] = fq; + + // q = 10 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4) + +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy); + nread = neighborList[n+8*Np]; + dist[nread] = fq; + + // q = 11 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz); + nread = neighborList[n+11*Np]; + dist[nread] = fq; + + // q = 12 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz); + nread = neighborList[n+10*Np]; + dist[nread]= fq; + + // q = 13 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz); + nread = neighborList[n+13*Np]; + dist[nread] = fq; + + // q= 14 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz); + nread = neighborList[n+12*Np]; + dist[nread] = fq; + + + // q = 15 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + + // q = 17 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) + -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) + -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){ + + int n; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + double ux,uy,uz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + // body force due to electric field + double rhoE;//charge density + double Ex,Ey,Ez; + // total body force + double Fx,Fy,Fz; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s>>(neighborList,dist,Velocity,ChargeDensity,ElectricField,rlx_setA,rlx_setB,Gx,Gy,Gz,rho0,den_scale,h,time_conv,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAodd_StokesMRT: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q19_AAeven_StokesMRT<<>>(dist,Velocity,ChargeDensity,ElectricField,rlx_setA,rlx_setB,Gx,Gy,Gz,rho0,den_scale,h,time_conv,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAeven_StokesMRT: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + From 5c27e3830ae5d29005d3534dcb1e184bcb5803eb Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Fri, 12 Feb 2021 13:43:26 -0500 Subject: [PATCH 20/21] Fixing compile errors without MPI --- StackTrace/ErrorHandlers.h | 3 +-- common/ScaLBL.cpp | 12 +++++++----- common/WideHalo.cpp | 5 ++--- common/WideHalo.h | 3 +-- models/ColorModel.cpp | 13 ++++--------- models/DFHModel.cpp | 10 +++------- models/FreeLeeModel.cpp | 20 +++++++------------- models/GreyscaleColorModel.cpp | 7 +++---- models/GreyscaleModel.cpp | 7 +++---- models/IonModel.cpp | 6 +++--- models/MRTModel.cpp | 7 +++---- models/PoissonSolver.cpp | 8 ++++---- models/StokesModel.cpp | 8 +++----- 13 files changed, 44 insertions(+), 65 deletions(-) diff --git a/StackTrace/ErrorHandlers.h b/StackTrace/ErrorHandlers.h index 12b8d7de..6dd961d7 100644 --- a/StackTrace/ErrorHandlers.h +++ b/StackTrace/ErrorHandlers.h @@ -3,11 +3,10 @@ #include "StackTrace/StackTrace.h" +#include "common/MPI.h" #include -#include "mpi.h" - namespace StackTrace { diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 182004ff..dcadb08e 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -1,5 +1,8 @@ #include "common/ScaLBL.h" +#include + + ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ //...................................................................................... Lock=false; // unlock the communicator @@ -411,20 +414,19 @@ double ScaLBL_Communicator::GetPerformance(int *NeighborList, double *fq, int Np double FZ = 0.0; ScaLBL_D3Q19_Init(fq, Np); //.......create and start timer............ - double starttime,stoptime,cputime; Barrier(); - starttime = MPI_Wtime(); - //......................................... + auto t1 = std::chrono::system_clock::now(); for (int t=0; t( t2 - t1 ).count(); + double cputime = 0.5*diff/TIMESTEPS; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; return MLUPS; diff --git a/common/WideHalo.cpp b/common/WideHalo.cpp index 0c8f1781..ee2b2ca5 100644 --- a/common/WideHalo.cpp +++ b/common/WideHalo.cpp @@ -298,11 +298,10 @@ ScaLBLWideHalo_Communicator::~ScaLBLWideHalo_Communicator() void ScaLBLWideHalo_Communicator::Recv(double *data){ //................................................................................... - MPI_Waitall(26,req1,stat1); - MPI_Waitall(26,req2,stat2); + Utilities::MPI::waitAll(26,req1); + Utilities::MPI::waitAll(26,req2); ScaLBL_DeviceBarrier(); //................................................................................... - //................................................................................... ScaLBL_Scalar_Unpack(dvcRecvList_x, recvCount_x,recvbuf_x, data, Nh); ScaLBL_Scalar_Unpack(dvcRecvList_y, recvCount_y,recvbuf_y, data, Nh); ScaLBL_Scalar_Unpack(dvcRecvList_X, recvCount_X,recvbuf_X, data, Nh); diff --git a/common/WideHalo.h b/common/WideHalo.h index 55c76e50..5c9fcedf 100644 --- a/common/WideHalo.h +++ b/common/WideHalo.h @@ -4,6 +4,7 @@ This class implements support for halo widths larger than 1 #ifndef WideHalo_H #define WideHalo_H #include "common/ScaLBL.h" +#include "common/MPI.h" class ScaLBLWideHalo_Communicator{ public: @@ -52,9 +53,7 @@ private: int sendtag,recvtag; // Give the object it's own MPI communicator RankInfoStruct rank_info; - MPI_Group Group; // Group of processors associated with this domain MPI_Request req1[26],req2[26]; - MPI_Status stat1[26],stat2[26]; //...................................................................................... // MPI ranks for all 18 neighbors //...................................................................................... diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index a9886337..a46ca337 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -688,20 +688,15 @@ void ScaLBL_ColorModel::Run(){ fflush(stdout); } - //.......create and start timer............ - double starttime,stoptime,cputime; - ScaLBL_Comm->Barrier(); - comm.barrier(); - starttime = MPI_Wtime(); - //......................................... - //************ MAIN ITERATION LOOP ***************************************/ + comm.barrier(); PROFILE_START("Loop"); //std::shared_ptr analysis_db; bool Regular = false; auto current_db = db->cloneDatabase(); runAnalysis analysis( current_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map ); //analysis.createThreads( analysis_method, 4 ); + auto t1 = std::chrono::system_clock::now(); while (timestep < timestepMax ) { //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } PROFILE_START("Update"); @@ -1034,10 +1029,10 @@ void ScaLBL_ColorModel::Run(){ PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ ScaLBL_Comm->Barrier(); - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; diff --git a/models/DFHModel.cpp b/models/DFHModel.cpp index 7fd61271..24639d3e 100644 --- a/models/DFHModel.cpp +++ b/models/DFHModel.cpp @@ -490,14 +490,10 @@ void ScaLBL_DFHModel::Run(){ if (rank==0) printf("********************************************************\n"); if (rank==0) printf("No. of timesteps: %i \n", timestepMax); - //.......create and start timer............ - double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = MPI_Wtime(); - //......................................... //************ MAIN ITERATION LOOP ***************************************/ - + auto t1 = std::chrono::system_clock::now(); bool Regular = true; PROFILE_START("Loop"); runAnalysis analysis( analysis_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map ); @@ -589,10 +585,10 @@ void ScaLBL_DFHModel::Run(){ //************************************************************************ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; if (rank==0) printf("********************************************************\n"); diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index b0ee372d..5a6805b6 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -719,14 +719,9 @@ void ScaLBL_FreeLeeModel::Run_TwoFluid(){ fflush(stdout); } - //.......create and start timer............ - double starttime,stoptime,cputime; - ScaLBL_Comm->Barrier(); - comm.barrier(); - starttime = MPI_Wtime(); - //......................................... - //************ MAIN ITERATION LOOP ***************************************/ + comm.barrier(); + auto t1 = std::chrono::system_clock::now(); PROFILE_START("Loop"); while (timestep < timestepMax ) { //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } @@ -818,10 +813,10 @@ void ScaLBL_FreeLeeModel::Run_TwoFluid(){ PROFILE_STOP("Loop"); PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; @@ -846,14 +841,13 @@ void ScaLBL_FreeLeeModel::Run_SingleFluid(){ } //.......create and start timer............ - double starttime,stoptime,cputime; ScaLBL_Comm->Barrier(); comm.barrier(); - starttime = MPI_Wtime(); //......................................... //************ MAIN ITERATION LOOP ***************************************/ PROFILE_START("Loop"); + auto t1 = std::chrono::system_clock::now(); while (timestep < timestepMax ) { //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } PROFILE_START("Update"); @@ -916,10 +910,10 @@ void ScaLBL_FreeLeeModel::Run_SingleFluid(){ PROFILE_STOP("Loop"); PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; diff --git a/models/GreyscaleColorModel.cpp b/models/GreyscaleColorModel.cpp index dc1e12f9..5d2b4d07 100644 --- a/models/GreyscaleColorModel.cpp +++ b/models/GreyscaleColorModel.cpp @@ -910,10 +910,8 @@ void ScaLBL_GreyscaleColorModel::Run(){ } //.......create and start timer............ - double starttime,stoptime,cputime; ScaLBL_Comm->Barrier(); comm.barrier(); - starttime = MPI_Wtime(); //......................................... //************ MAIN ITERATION LOOP ***************************************/ @@ -923,6 +921,7 @@ void ScaLBL_GreyscaleColorModel::Run(){ auto current_db = db->cloneDatabase(); //runAnalysis analysis( current_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map ); //analysis.createThreads( analysis_method, 4 ); + auto t1 = std::chrono::system_clock::now(); while (timestep < timestepMax ) { //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } PROFILE_START("Update"); @@ -1319,10 +1318,10 @@ void ScaLBL_GreyscaleColorModel::Run(){ PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ ScaLBL_Comm->Barrier(); - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index 6c580cc5..308cc1e6 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -485,10 +485,8 @@ void ScaLBL_GreyscaleModel::Run(){ } //.......create and start timer............ - double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = MPI_Wtime(); //......................................... Minkowski Morphology(Mask); @@ -500,6 +498,7 @@ void ScaLBL_GreyscaleModel::Run(){ double rlx_eff = 1.0/tau_eff; double error = 1.0; double flow_rate_previous = 0.0; + auto t1 = std::chrono::system_clock::now(); while (timestep < timestepMax && error > tolerance) { //************************************************************************/ // *************ODD TIMESTEP*************// @@ -744,10 +743,10 @@ void ScaLBL_GreyscaleModel::Run(){ //************************************************************************ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; diff --git a/models/IonModel.cpp b/models/IonModel.cpp index bdd07473..67887811 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -784,7 +784,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ //.......create and start timer............ //double starttime,stoptime,cputime; //ScaLBL_Comm->Barrier(); comm.barrier(); - //starttime = MPI_Wtime(); + //auto t1 = std::chrono::system_clock::now(); for (int ic=0; icLastExterior(), Np); } //************************************************************************/ - //stoptime = MPI_Wtime(); //if (rank==0) printf("-------------------------------------------------------------------\n"); //// Compute the walltime per timestep - //cputime = (stoptime - starttime)/timestep; + //auto t2 = std::chrono::system_clock::now(); + //double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; //// Performance obtained from each node //double MLUPS = double(Np)/cputime/1000000; diff --git a/models/MRTModel.cpp b/models/MRTModel.cpp index 01d13762..e1a451e2 100644 --- a/models/MRTModel.cpp +++ b/models/MRTModel.cpp @@ -230,14 +230,13 @@ void ScaLBL_MRTModel::Run(){ } //.......create and start timer............ - double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = MPI_Wtime(); if (rank==0) printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax); if (rank==0) printf("********************************************************\n"); timestep=0; double error = 1.0; double flow_rate_previous = 0.0; + auto t1 = std::chrono::system_clock::now(); while (timestep < timestepMax && error > tolerance) { //************************************************************************/ timestep++; @@ -354,10 +353,10 @@ void ScaLBL_MRTModel::Run(){ } } //************************************************************************/ - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index 58c0deac..25a31600 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -522,8 +522,8 @@ void ScaLBL_Poisson::Run(double *ChargeDensity, int timestep_from_Study){ //.......create and start timer............ //double starttime,stoptime,cputime; - //ScaLBL_Comm->Barrier(); comm.barrier(); - //starttime = MPI_Wtime(); + //comm.barrier(); + //auto t1 = std::chrono::system_clock::now(); timestep=0; double error = 1.0; @@ -579,11 +579,11 @@ void ScaLBL_Poisson::Run(double *ChargeDensity, int timestep_from_Study){ } //************************************************************************/ - //stoptime = MPI_Wtime(); ////if (rank==0) printf("LB-Poission Solver: a steady-state solution is obtained\n"); ////if (rank==0) printf("---------------------------------------------------------------------------\n"); //// Compute the walltime per timestep - //cputime = (stoptime - starttime)/timestep; + //auto t2 = std::chrono::system_clock::now(); + //double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; //// Performance obtained from each node //double MLUPS = double(Np)/cputime/1000000; diff --git a/models/StokesModel.cpp b/models/StokesModel.cpp index 50b7fa39..fe6b0c92 100644 --- a/models/StokesModel.cpp +++ b/models/StokesModel.cpp @@ -573,16 +573,14 @@ void ScaLBL_StokesModel::Run(){ } } - //.......create and start timer............ - double starttime,stoptime,cputime; ScaLBL_Comm->Barrier(); comm.barrier(); - starttime = MPI_Wtime(); if (rank==0) printf("****************************************************************\n"); if (rank==0) printf("LB Single-Fluid Navier-Stokes Solver: timestepMax = %i\n", timestepMax); if (rank==0) printf("****************************************************************\n"); timestep=0; double error = 1.0; double flow_rate_previous = 0.0; + auto t1 = std::chrono::system_clock::now(); while (timestep < timestepMax && error > tolerance) { //************************************************************************/ timestep++; @@ -700,10 +698,10 @@ void ScaLBL_StokesModel::Run(){ } } //************************************************************************/ - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; From 97517f648266ec1bb40f6217e9443917475c0a5b Mon Sep 17 00:00:00 2001 From: Mark Allen Berrill Date: Fri, 12 Feb 2021 13:19:37 -0600 Subject: [PATCH 21/21] Fixing compile errors with HIP --- hip/CMakeLists.txt | 5 +-- hip/D3Q19.cu | 72 ++++++++++++++++++++++++++++++++++++++++++++ hip/MixedGradient.cu | 7 ++--- 3 files changed, 78 insertions(+), 6 deletions(-) diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index f63cf035..9e613960 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -1,6 +1,7 @@ SET( HIP_SEPERABLE_COMPILATION ON ) -SET_SOURCE_FILES_PROPERTIES( BGK.cu Color.cu CudaExtras.cu D3Q19.cu D3Q7.cu dfh.cu Extras.cu MRT.hip PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 ) -HIP_ADD_LIBRARY( lbpm-hip BGK.cu Color.cu CudaExtras.cu D3Q19.cu D3Q7.cu dfh.cu Extras.cu MRT.cu SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} ) +FILE( GLOB HIP_SOURCES "*.cu" ) +SET_SOURCE_FILES_PROPERTIES( ${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 ) +HIP_ADD_LIBRARY( lbpm-hip ${HIP_SOURCES} SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} ) #TARGET_LINK_LIBRARIES( lbpm-hip /opt/rocm-3.3.0/lib/libhip_hcc.so ) #TARGET_LINK_LIBRARIES( lbpm-wia lbpm-hip ) #ADD_DEPENDENCIES( lbpm-hip copy-include ) diff --git a/hip/D3Q19.cu b/hip/D3Q19.cu index 13d4ab75..fe06820b 100644 --- a/hip/D3Q19.cu +++ b/hip/D3Q19.cu @@ -89,9 +89,25 @@ __global__ void sum_kernel_block(double *sum, double *input, int n) __inline__ __device__ double warpReduceSum(double val) { +#if 0 for (int offset = warpSize/2; offset > 0; offset /= 2) val += __shfl_down_sync(0xFFFFFFFF, val, offset, 32); return val; +#else + short int id = threadIdx.x % warpSize; + __shared__ double tmp[64]; + tmp[id] = val; + __syncthreads(); + if ( warpSize == 64) { + tmp[id] += tmp[id+32]; __syncthreads(); + } + tmp[id] += tmp[id+16]; __syncthreads(); + tmp[id] += tmp[id+8]; __syncthreads(); + tmp[id] += tmp[id+4]; __syncthreads(); + tmp[id] += tmp[id+2]; __syncthreads(); + tmp[id] += tmp[id+1]; __syncthreads(); + return tmp[0]; +#endif } __inline__ __device__ @@ -1730,6 +1746,44 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Pressure_BC_Z(int *list, double *dist, } } +__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){ + int idx, n; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + double f5 = 0.111111111111111111111111 - dist[6*Np+n]; + double f11 = 0.05555555555555555555556 - dist[12*Np+n]; + double f14 = 0.05555555555555555555556 - dist[13*Np+n]; + double f15 = 0.05555555555555555555556 - dist[16*Np+n]; + double f18 = 0.05555555555555555555556 - dist[17*Np+n]; + + dist[6*Np+n] = f5; + dist[12*Np+n] = f11; + dist[13*Np+n] = f14; + dist[16*Np+n] = f15; + dist[17*Np+n] = f18; + } +} + +__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){ + int idx, n; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + double f6 = 0.111111111111111111111111 - dist[5*Np+n]; + double f12 = 0.05555555555555555555556 - dist[11*Np+n]; + double f13 = 0.05555555555555555555556 - dist[14*Np+n] ; + double f16 = 0.05555555555555555555556 - dist[15*Np+n]; + double f17 = 0.05555555555555555555556 - dist[18*Np+n]; + + dist[5*Np+n] = f6; + dist[11*Np+n] = f12; + dist[14*Np+n] = f13; + dist[15*Np+n] = f16; + dist[18*Np+n] = f17; + } +} + __global__ void dvc_ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *d_neighborList, int *list, double *dist, double din, int count, int Np) { int idx, n; @@ -2605,6 +2659,24 @@ extern "C" double ScaLBL_D3Q19_Flux_BC_Z(double *disteven, double *distodd, doub } +extern "C" void ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_Reflection_BC_z<<>>(list, dist, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("HIP error in ScaLBL_D3Q19_Reflection_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_Reflection_BC_Z<<>>(list, dist, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("HIP error in ScaLBL_D3Q19_Reflection_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + extern "C" double deviceReduce(double *in, double* out, int N) { int threads = 512; int blocks = min((N + threads - 1) / threads, 1024); diff --git a/hip/MixedGradient.cu b/hip/MixedGradient.cu index f171f408..31518ee5 100644 --- a/hip/MixedGradient.cu +++ b/hip/MixedGradient.cu @@ -1,5 +1,4 @@ /* Implement Mixed Gradient (Lee et al. JCP 2016)*/ -#include #include //#include #include "hip/hip_runtime.h" @@ -10,7 +9,7 @@ __global__ void dvc_ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz) { - static int D3Q19[18][3]={{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1}, + static const int D3Q19[18][3]={{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1}, {1,1,0},{-1,-1,0},{1,-1,0},{-1,1,0}, {1,0,1},{-1,0,-1},{1,0,-1},{-1,0,1}, {0,1,1},{0,-1,-1},{0,1,-1},{0,-1,1}}; @@ -66,13 +65,13 @@ __global__ void dvc_ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gr extern "C" void ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz) { - cudaProfilerStart(); + hipProfilerStart(); dvc_ScaLBL_D3Q19_MixedGradient<<>>(Map, Phi, Gradient, start, finish, Np, Nx, Ny, Nz); hipError_t err = hipGetLastError(); if (hipSuccess != err){ printf("hip error in ScaLBL_D3Q19_MixedGradient: %s \n",hipGetErrorString(err)); } - cudaProfilerStop(); + hipProfilerStop(); }