Merge branch 'FOM' of github.com:JamesEMcClure/LBPM-WIA into FOM

This commit is contained in:
James McClure
2021-02-12 14:27:55 -05:00
39 changed files with 15444 additions and 3255 deletions

View File

@@ -3,11 +3,10 @@
#include "StackTrace/StackTrace.h"
#include "common/MPI.h"
#include <functional>
#include "mpi.h"
namespace StackTrace
{

View File

@@ -1,5 +1,8 @@
#include "common/ScaLBL.h"
#include <chrono>
ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr <Domain> Dm){
//......................................................................................
Lock=false; // unlock the communicator
@@ -411,20 +414,19 @@ double ScaLBL_Communicator::GetPerformance(int *NeighborList, double *fq, int Np
double FZ = 0.0;
ScaLBL_D3Q19_Init(fq, Np);
//.......create and start timer............
double starttime,stoptime,cputime;
Barrier();
starttime = MPI_Wtime();
//.........................................
auto t1 = std::chrono::system_clock::now();
for (int t=0; t<TIMESTEPS; t++){
ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, FirstInterior(), LastInterior(), Np, RLX_SETA, RLX_SETB, FX, FY, FZ);
ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, 0, LastExterior(), Np, RLX_SETA, RLX_SETB, FX, FY, FZ);
ScaLBL_D3Q19_AAeven_MRT(fq, FirstInterior(), LastInterior(), Np, RLX_SETA, RLX_SETB, FX, FY, FZ);
ScaLBL_D3Q19_AAeven_MRT(fq, 0, LastExterior(), Np, RLX_SETA, RLX_SETB, FX, FY, FZ);
}
stoptime = MPI_Wtime();
auto t2 = std::chrono::system_clock::now();
Barrier();
// Compute the walltime per timestep
cputime = 0.5*(stoptime - starttime)/TIMESTEPS;
double diff = std::chrono::duration<double>( t2 - t1 ).count();
double cputime = 0.5*diff/TIMESTEPS;
// Performance obtained from each node
double MLUPS = double(Np)/cputime/1000000;
return MLUPS;

View File

@@ -70,49 +70,6 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int
extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,
double *Poros,double *Perm, double *Velocity,double Den,double *Pressure);
// ION TRANSPORT MODEL
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np);
extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np);
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np);
// LBM Poisson solver
extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList,int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,
int start, int finish, int Np);
extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,
int start, int finish, int Np);
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np);
//maybe deprecated
//extern "C" void ScaLBL_D3Q7_Poisson_ElectricField(int *neighborList, int *Map, signed char *ID, double *Psi, double *ElectricField, int SolidBC,
// int strideY, int strideZ,int start, int finish, int Np);
// LBM Stokes Model (adapted from MRT model)
extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB,
double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB,
double Gx, double Gy, double Gz, double rho0, double den_scale, double h, double time_conv,int start, int finish, int Np);
extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_MRT(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,
double *Poros,double *Perm, double *Velocity,double Den,double *Pressure);
@@ -129,6 +86,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleColor(int *d_neighborList, int *Map,
double *Phi, double *GreySolidGrad, double *Poros,double *Perm,double *Vel,double *Pressure,
double rhoA, double rhoB, double tauA, double tauB, double tauA_eff,double tauB_eff, double alpha, double beta,
double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np);
// ION TRANSPORT MODEL
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np);
@@ -220,6 +178,36 @@ extern "C" void ScaLBL_D3Q7_AAeven_DFH(double *Aq, double *Bq, double *Den, doub
extern "C" void ScaLBL_D3Q19_Gradient_DFH(int *NeighborList, double *Phi, double *ColorGrad, int start, int finish, int Np);
// FREE ENERGY LEE MODEL
extern "C" void ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np);
extern "C" void ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(double *gqbar, double Fx, double Fy, double Fz, int Np);
extern "C" void ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, double *Den, double *hq, double *ColorGrad,
double rhonA, double rhoB, double tauM, double W, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi,
double rhoA, double rhoB, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double *hq, double *Den, double *Phi,
double rhoA, double rhoB, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad,
double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz,
int strideY, int strideZ, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad,
double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz,
int strideY, int strideZ, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborList, double *dist, double *Vel, double *Pressure,
double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure,
double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np);
// BOUNDARY CONDITION ROUTINES
extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *neighborList, int *list, double *dist, double din, int count, int Np);

View File

@@ -67,7 +67,7 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr <Domain
sendCount_y =getHaloBlock(width,Nxh-width,width,2*width,width,Nzh-width,dvcSendList_y);
sendCount_Y =getHaloBlock(width,Nxh-width,Nyh-2*width,Nyh-width,width,Nzh-width,dvcSendList_Y);
sendCount_z =getHaloBlock(width,Nxh-width,width,Nyh-width,width,2*width,dvcSendList_z);
sendCount_X =getHaloBlock(width,Nxh-width,width,Nyh-width,Nzh-2*width,Nzh-width,dvcSendList_Z);
sendCount_Z =getHaloBlock(width,Nxh-width,width,Nyh-width,Nzh-2*width,Nzh-width,dvcSendList_Z);
// xy
sendCount_xy =getHaloBlock(width,2*width,width,2*width,width,Nzh-width,dvcSendList_xy);
sendCount_xY =getHaloBlock(width,2*width,Nyh-2*width,Nyh-width,width,Nzh-width,dvcSendList_xY);
@@ -87,7 +87,7 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr <Domain
sendCount_xyz =getHaloBlock(width,2*width,width,2*width,width,2*width,dvcSendList_xyz);
sendCount_xyZ =getHaloBlock(width,2*width,width,2*width,Nzh-2*width,Nzh-width,dvcSendList_xyZ);
sendCount_xYz =getHaloBlock(width,2*width,Nyh-2*width,Nyh-width,width,2*width,dvcSendList_xYz);
sendCount_xYz =getHaloBlock(width,2*width,Nyh-2*width,Nyh-width,Nzh-2*width,Nzh-width,dvcSendList_xYZ);
sendCount_xYZ =getHaloBlock(width,2*width,Nyh-2*width,Nyh-width,Nzh-2*width,Nzh-width,dvcSendList_xYZ);
sendCount_Xyz =getHaloBlock(Nxh-2*width,Nxh-width,width,2*width,width,2*width,dvcSendList_Xyz);
sendCount_XyZ =getHaloBlock(Nxh-2*width,Nxh-width,width,2*width,Nzh-2*width,Nzh-width,dvcSendList_XyZ);
sendCount_XYz =getHaloBlock(Nxh-2*width,Nxh-width,Nyh-2*width,Nyh-width,width,2*width,dvcSendList_XYz);
@@ -124,7 +124,7 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr <Domain
recvCount_XyZ =getHaloBlock(Nxh-width,Nxh,0,width,Nzh-width,Nzh,dvcRecvList_XyZ);
recvCount_XYz =getHaloBlock(Nxh-width,Nxh,Nyh-width,Nyh,0,width,dvcRecvList_XYz);
recvCount_XYZ =getHaloBlock(Nxh-width,Nxh,Nyh-width,Nyh,Nzh-width,Nzh,dvcRecvList_XYZ);
//......................................................................................
ScaLBL_AllocateZeroCopy((void **) &sendbuf_x, sendCount_x*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &sendbuf_X, sendCount_X*sizeof(double)); // Allocate device memory
@@ -288,6 +288,7 @@ void ScaLBLWideHalo_Communicator::Send(double *data){
req1[25] = MPI_COMM_SCALBL.Isend(&sendCount_xYZ,1,rank_xYZ,sendtag+25);
req2[25] = MPI_COMM_SCALBL.Irecv(&recvCount_Xyz,1,rank_Xyz,recvtag+25);
//...................................................................................
}
@@ -297,11 +298,10 @@ ScaLBLWideHalo_Communicator::~ScaLBLWideHalo_Communicator()
void ScaLBLWideHalo_Communicator::Recv(double *data){
//...................................................................................
MPI_Waitall(26,req1,stat1);
MPI_Waitall(26,req2,stat2);
Utilities::MPI::waitAll(26,req1);
Utilities::MPI::waitAll(26,req2);
ScaLBL_DeviceBarrier();
//...................................................................................
//...................................................................................
ScaLBL_Scalar_Unpack(dvcRecvList_x, recvCount_x,recvbuf_x, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_y, recvCount_y,recvbuf_y, data, Nh);
ScaLBL_Scalar_Unpack(dvcRecvList_X, recvCount_X,recvbuf_X, data, Nh);

View File

@@ -4,6 +4,7 @@ This class implements support for halo widths larger than 1
#ifndef WideHalo_H
#define WideHalo_H
#include "common/ScaLBL.h"
#include "common/MPI.h"
class ScaLBLWideHalo_Communicator{
public:
@@ -52,9 +53,7 @@ private:
int sendtag,recvtag;
// Give the object it's own MPI communicator
RankInfoStruct rank_info;
MPI_Group Group; // Group of processors associated with this domain
MPI_Request req1[26],req2[26];
MPI_Status stat1[26],stat2[26];
//......................................................................................
// MPI ranks for all 18 neighbors
//......................................................................................
@@ -95,11 +94,11 @@ private:
int *dvcRecvList_xyZ,*dvcRecvList_XyZ,*dvcRecvList_xYZ,*dvcRecvList_XYZ;
//......................................................................................
inline int getHaloBlock(int imin, int imax, int jmin, int jmax, int kmin, int kmax, int *dvcList){
inline int getHaloBlock(int imin, int imax, int jmin, int jmax, int kmin, int kmax, int *& dvcList){
int count = 0;
int *List;
List = new int [(imax-imin)*(jmax-jmin)*(kmax-kmin)];
for (int k=kmin; k<kmax; k++){
for (k=kmin; k<kmax; k++){
for (j=jmin; j<jmax; j++){
for (i=imin; i<imax; i++){
List[count++] = k*Nxh*Nyh + j*Nxh + i;

File diff suppressed because it is too large Load Diff

View File

@@ -12,111 +12,111 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish,
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if ( n<finish ){
// q=0
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
f6 = dist[5*Np+n];
f7 = dist[8*Np+n];
f8 = dist[7*Np+n];
f9 = dist[10*Np+n];
f10 = dist[9*Np+n];
f11 = dist[12*Np+n];
f12 = dist[11*Np+n];
f13 = dist[14*Np+n];
f14 = dist[13*Np+n];
f15 = dist[16*Np+n];
f16 = dist[15*Np+n];
f17 = dist[18*Np+n];
f18 = dist[17*Np+n];
if ( n<finish ){
// q=0
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
f6 = dist[5*Np+n];
f7 = dist[8*Np+n];
f8 = dist[7*Np+n];
f9 = dist[10*Np+n];
f10 = dist[9*Np+n];
f11 = dist[12*Np+n];
f12 = dist[11*Np+n];
f13 = dist[14*Np+n];
f14 = dist[13*Np+n];
f15 = dist[16*Np+n];
f16 = dist[15*Np+n];
f17 = dist[18*Np+n];
f18 = dist[17*Np+n];
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
uu = 1.5*(ux*ux+uy*uy+uz*uz);
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
uu = 1.5*(ux*ux+uy*uy+uz*uz);
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu);
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu);
// q = 1
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx;
// q = 1
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx;
// q=2
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx;
// q=2
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx;
// q = 3
dist[3*Np+n] = f3*(1.0-rlx) +
rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy;
// q = 3
dist[3*Np+n] = f3*(1.0-rlx) +
rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy;
// q = 4
dist[4*Np+n] = f4*(1.0-rlx) +
rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy;
// q = 4
dist[4*Np+n] = f4*(1.0-rlx) +
rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy;
// q = 5
dist[5*Np+n] = f5*(1.0-rlx) +
rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz;
// q = 5
dist[5*Np+n] = f5*(1.0-rlx) +
rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz;
// q = 6
dist[6*Np+n] = f6*(1.0-rlx) +
rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz;
// q = 6
dist[6*Np+n] = f6*(1.0-rlx) +
rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz;
// q = 7
dist[7*Np+n] = f7*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy);
// q = 7
dist[7*Np+n] = f7*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy);
// q = 8
dist[8*Np+n] = f8*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy);
// q = 8
dist[8*Np+n] = f8*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy);
// q = 9
dist[9*Np+n] = f9*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy);
// q = 9
dist[9*Np+n] = f9*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy);
// q = 10
dist[10*Np+n] = f10*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy);
// q = 10
dist[10*Np+n] = f10*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy);
// q = 11
dist[11*Np+n] = f11*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz);
// q = 11
dist[11*Np+n] = f11*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz);
// q = 12
dist[12*Np+n] = f12*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz);
// q = 12
dist[12*Np+n] = f12*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz);
// q = 13
dist[13*Np+n] = f13*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz);
// q = 13
dist[13*Np+n] = f13*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz);
// q= 14
dist[14*Np+n] = f14*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz);
// q= 14
dist[14*Np+n] = f14*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz);
// q = 15
dist[15*Np+n] = f15*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz);
// q = 15
dist[15*Np+n] = f15*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz);
// q = 16
dist[16*Np+n] = f16*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz);
// q = 16
dist[16*Np+n] = f16*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz);
// q = 17
dist[17*Np+n] = f17*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz);
// q = 17
dist[17*Np+n] = f17*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz);
// q = 18
dist[18*Np+n] = f18*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz);
// q = 18
dist[18*Np+n] = f18*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz);
//........................................................................
//........................................................................
}
}
}
@@ -131,180 +131,180 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if ( n<finish ){
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
if ( n<finish ){
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n+2*Np]; // neighbor 4
f3 = dist[nr3];
// q=3
nr3 = neighborList[n+2*Np]; // neighbor 4
f3 = dist[nr3];
// q = 4
nr4 = neighborList[n+3*Np]; // neighbor 3
f4 = dist[nr4];
// q = 4
nr4 = neighborList[n+3*Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n+4*Np];
f5 = dist[nr5];
// q=5
nr5 = neighborList[n+4*Np];
f5 = dist[nr5];
// q = 6
nr6 = neighborList[n+5*Np];
f6 = dist[nr6];
// q=7
nr7 = neighborList[n+6*Np];
f7 = dist[nr7];
// q = 6
nr6 = neighborList[n+5*Np];
f6 = dist[nr6];
// q = 8
nr8 = neighborList[n+7*Np];
f8 = dist[nr8];
// q=7
nr7 = neighborList[n+6*Np];
f7 = dist[nr7];
// q=9
nr9 = neighborList[n+8*Np];
f9 = dist[nr9];
// q = 8
nr8 = neighborList[n+7*Np];
f8 = dist[nr8];
// q = 10
nr10 = neighborList[n+9*Np];
f10 = dist[nr10];
// q=9
nr9 = neighborList[n+8*Np];
f9 = dist[nr9];
// q=11
nr11 = neighborList[n+10*Np];
f11 = dist[nr11];
// q = 10
nr10 = neighborList[n+9*Np];
f10 = dist[nr10];
// q=12
nr12 = neighborList[n+11*Np];
f12 = dist[nr12];
// q=11
nr11 = neighborList[n+10*Np];
f11 = dist[nr11];
// q=13
nr13 = neighborList[n+12*Np];
f13 = dist[nr13];
// q=12
nr12 = neighborList[n+11*Np];
f12 = dist[nr12];
// q=14
nr14 = neighborList[n+13*Np];
f14 = dist[nr14];
// q=13
nr13 = neighborList[n+12*Np];
f13 = dist[nr13];
// q=15
nr15 = neighborList[n+14*Np];
f15 = dist[nr15];
// q=14
nr14 = neighborList[n+13*Np];
f14 = dist[nr14];
// q=16
nr16 = neighborList[n+15*Np];
f16 = dist[nr16];
// q=15
nr15 = neighborList[n+14*Np];
f15 = dist[nr15];
// q=17
//fq = dist[18*Np+n];
nr17 = neighborList[n+16*Np];
f17 = dist[nr17];
// q=16
nr16 = neighborList[n+15*Np];
f16 = dist[nr16];
// q=18
nr18 = neighborList[n+17*Np];
f18 = dist[nr18];
// q=17
//fq = dist[18*Np+n];
nr17 = neighborList[n+16*Np];
f17 = dist[nr17];
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
uu = 1.5*(ux*ux+uy*uy+uz*uz);
// q=18
nr18 = neighborList[n+17*Np];
f18 = dist[nr18];
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu);
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
uu = 1.5*(ux*ux+uy*uy+uz*uz);
// q = 1
dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx;
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu);
// q=2
dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx;
// q = 1
dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx;
// q = 3
dist[nr4] = f3*(1.0-rlx) +
rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy;
// q=2
dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx;
// q = 4
dist[nr3] = f4*(1.0-rlx) +
rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy;
// q = 3
dist[nr4] = f3*(1.0-rlx) +
rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy;
// q = 5
dist[nr6] = f5*(1.0-rlx) +
rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz;
// q = 4
dist[nr3] = f4*(1.0-rlx) +
rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy;
// q = 6
dist[nr5] = f6*(1.0-rlx) +
rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz;
// q = 5
dist[nr6] = f5*(1.0-rlx) +
rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz;
// q = 7
dist[nr8] = f7*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy);
// q = 6
dist[nr5] = f6*(1.0-rlx) +
rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz;
// q = 8
dist[nr7] = f8*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy);
// q = 7
dist[nr8] = f7*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy);
// q = 9
dist[nr10] = f9*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy);
// q = 8
dist[nr7] = f8*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy);
// q = 10
dist[nr9] = f10*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy);
// q = 9
dist[nr10] = f9*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy);
// q = 11
dist[nr12] = f11*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz);
// q = 10
dist[nr9] = f10*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy);
// q = 12
dist[nr11] = f12*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz);
// q = 11
dist[nr12] = f11*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz);
// q = 13
dist[nr14] = f13*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz);
// q = 12
dist[nr11] = f12*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz);
// q= 14
dist[nr13] = f14*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz);
// q = 13
dist[nr14] = f13*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz);
// q = 15
dist[nr16] = f15*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz);
// q= 14
dist[nr13] = f14*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz);
// q = 16
dist[nr15] = f16*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz);
// q = 15
dist[nr16] = f15*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz);
// q = 17
dist[nr18] = f17*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz);
// q = 16
dist[nr15] = f16*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz);
// q = 18
dist[nr17] = f18*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz);
// q = 17
dist[nr18] = f17*(1.0-rlx) +
rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz);
// q = 18
dist[nr17] = f18*(1.0-rlx) +
rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz);
}
}
}
extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
dvc_ScaLBL_D3Q19_AAeven_BGK<<<NBLOCKS,NTHREADS >>>(dist,start,finish,Np,rlx,Fx,Fy,Fz);
cudaError_t err = cudaGetLastError();
dvc_ScaLBL_D3Q19_AAeven_BGK<<<NBLOCKS,NTHREADS >>>(dist,start,finish,Np,rlx,Fx,Fy,Fz);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_AAeven_BGK: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
dvc_ScaLBL_D3Q19_AAodd_BGK<<<NBLOCKS,NTHREADS >>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz);
dvc_ScaLBL_D3Q19_AAodd_BGK<<<NBLOCKS,NTHREADS >>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz);
cudaError_t err = cudaGetLastError();
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_AAeven_BGK: %s \n",cudaGetErrorString(err));
}

2016
cuda/FreeLee.cu Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,7 @@
SET( HIP_SEPERABLE_COMPILATION ON )
SET_SOURCE_FILES_PROPERTIES( BGK.cu Color.cu CudaExtras.cu D3Q19.cu D3Q7.cu dfh.cu Extras.cu MRT.hip PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 )
HIP_ADD_LIBRARY( lbpm-hip BGK.cu Color.cu CudaExtras.cu D3Q19.cu D3Q7.cu dfh.cu Extras.cu MRT.cu SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} )
FILE( GLOB HIP_SOURCES "*.cu" )
SET_SOURCE_FILES_PROPERTIES( ${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 )
HIP_ADD_LIBRARY( lbpm-hip ${HIP_SOURCES} SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} )
#TARGET_LINK_LIBRARIES( lbpm-hip /opt/rocm-3.3.0/lib/libhip_hcc.so )
#TARGET_LINK_LIBRARIES( lbpm-wia lbpm-hip )
#ADD_DEPENDENCIES( lbpm-hip copy-include )

View File

@@ -89,9 +89,25 @@ __global__ void sum_kernel_block(double *sum, double *input, int n)
__inline__ __device__
double warpReduceSum(double val) {
#if 0
for (int offset = warpSize/2; offset > 0; offset /= 2)
val += __shfl_down_sync(0xFFFFFFFF, val, offset, 32);
return val;
#else
short int id = threadIdx.x % warpSize;
__shared__ double tmp[64];
tmp[id] = val;
__syncthreads();
if ( warpSize == 64) {
tmp[id] += tmp[id+32]; __syncthreads();
}
tmp[id] += tmp[id+16]; __syncthreads();
tmp[id] += tmp[id+8]; __syncthreads();
tmp[id] += tmp[id+4]; __syncthreads();
tmp[id] += tmp[id+2]; __syncthreads();
tmp[id] += tmp[id+1]; __syncthreads();
return tmp[0];
#endif
}
__inline__ __device__
@@ -1730,6 +1746,44 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Pressure_BC_Z(int *list, double *dist,
}
}
__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){
int idx, n;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
double f5 = 0.111111111111111111111111 - dist[6*Np+n];
double f11 = 0.05555555555555555555556 - dist[12*Np+n];
double f14 = 0.05555555555555555555556 - dist[13*Np+n];
double f15 = 0.05555555555555555555556 - dist[16*Np+n];
double f18 = 0.05555555555555555555556 - dist[17*Np+n];
dist[6*Np+n] = f5;
dist[12*Np+n] = f11;
dist[13*Np+n] = f14;
dist[16*Np+n] = f15;
dist[17*Np+n] = f18;
}
}
__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){
int idx, n;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
double f6 = 0.111111111111111111111111 - dist[5*Np+n];
double f12 = 0.05555555555555555555556 - dist[11*Np+n];
double f13 = 0.05555555555555555555556 - dist[14*Np+n] ;
double f16 = 0.05555555555555555555556 - dist[15*Np+n];
double f17 = 0.05555555555555555555556 - dist[18*Np+n];
dist[5*Np+n] = f6;
dist[11*Np+n] = f12;
dist[14*Np+n] = f13;
dist[15*Np+n] = f16;
dist[18*Np+n] = f17;
}
}
__global__ void dvc_ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *d_neighborList, int *list, double *dist, double din, int count, int Np)
{
int idx, n;
@@ -2605,6 +2659,24 @@ extern "C" double ScaLBL_D3Q19_Flux_BC_Z(double *disteven, double *distodd, doub
}
extern "C" void ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q19_Reflection_BC_z<<<GRID,512>>>(list, dist, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("HIP error in ScaLBL_D3Q19_Reflection_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q19_Reflection_BC_Z<<<GRID,512>>>(list, dist, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("HIP error in ScaLBL_D3Q19_Reflection_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" double deviceReduce(double *in, double* out, int N) {
int threads = 512;
int blocks = min((N + threads - 1) / threads, 1024);

536
hip/D3Q7BC.cu Normal file
View File

@@ -0,0 +1,536 @@
#include <math.h>
#include <stdio.h>
#include "hip/hip_runtime.h"
#define NBLOCKS 560
#define NTHREADS 128
__global__ void dvc_ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
{
int idx;
int iq,ib;
double value_b,value_q;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
value_b = BoundaryValue[ib];//get boundary value from a solid site
value_q = dist[iq];
dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice
}
}
__global__ void dvc_ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
{
int idx;
int iq,ib;
double value_b,value_q;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
value_b = BoundaryValue[ib];//get boundary value from a solid site
value_q = dist[iq];
dist[iq] = value_q + value_b;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
//...................................................
f5 = Vin - (f0+f1+f2+f3+f4+f6);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
//...................................................
f6 = Vout - (f0+f1+f2+f3+f4+f5);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np)
{
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
f5 = Vin - (f0+f1+f2+f3+f4+f6);
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np)
{
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
// unknown distributions
nr6 = d_neighborList[n+5*Np];
f6 = Vout - (f0+f1+f2+f3+f4+f5);
dist[nr6] = f6;
}
}
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count)
{
int idx,n,nm;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
nm = Map[n];
Psi[nm] = Vin;
}
}
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count)
{
int idx,n,nm;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
nm = Map[n];
Psi[nm] = Vout;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
//...................................................
f5 = Cin - (f0+f1+f2+f3+f4+f6);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
//...................................................
f6 = Cout - (f0+f1+f2+f3+f4+f5);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np)
{
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
f5 = Cin - (f0+f1+f2+f3+f4+f6);
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np)
{
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
// unknown distributions
nr6 = d_neighborList[n+5*Np];
f6 = Cout - (f0+f1+f2+f3+f4+f5);
dist[nr6] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
// unknown distributions
nr6 = d_neighborList[n+5*Np];
dist[nr6] = f6;
}
}
//*************************************************************************
extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_Dirichlet_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_Solid_Dirichlet_D3Q7 (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_Neumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_Solid_Neumann_D3Q7 (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z<<<GRID,512>>>(list, dist, Vin, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z<<<GRID,512>>>(list, dist, Vout, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Vin, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Vout, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Poisson_D3Q7_BC_z<<<GRID,512>>>(list, Map, Psi, Vin, count);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_Poisson_D3Q7_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Poisson_D3Q7_BC_Z<<<GRID,512>>>(list, Map, Psi, Vout, count);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_Poisson_D3Q7_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z<<<GRID,512>>>(list, dist, Cin, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z<<<GRID,512>>>(list, dist, Cout, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Cin, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Cout, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}

2017
hip/FreeLee.cu Normal file

File diff suppressed because it is too large Load Diff

2745
hip/Greyscale.cu Normal file

File diff suppressed because it is too large Load Diff

3038
hip/GreyscaleColor.cu Normal file

File diff suppressed because it is too large Load Diff

392
hip/Ion.cu Normal file
View File

@@ -0,0 +1,392 @@
#include <stdio.h>
#include <math.h>
#include "hip/hip_runtime.h"
#define NBLOCKS 1024
#define NTHREADS 256
__global__ void dvc_ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
int n,nread;
double fq,Ci;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
Ci = fq;
// q=1
nread = neighborList[n];
fq = dist[nread];
Ci += fq;
// q=2
nread = neighborList[n+Np];
fq = dist[nread];
Ci += fq;
// q=3
nread = neighborList[n+2*Np];
fq = dist[nread];
Ci += fq;
// q=4
nread = neighborList[n+3*Np];
fq = dist[nread];
Ci += fq;
// q=5
nread = neighborList[n+4*Np];
fq = dist[nread];
Ci += fq;
// q=6
nread = neighborList[n+5*Np];
fq = dist[nread];
Ci += fq;
Den[n]=Ci;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
int n;
double fq,Ci;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
Ci = fq;
// q=1
fq = dist[2*Np+n];
Ci += fq;
// q=2
fq = dist[1*Np+n];
Ci += fq;
// q=3
fq = dist[4*Np+n];
Ci += fq;
// q=4
fq = dist[3*Np+n];
Ci += fq;
// q=5
fq = dist[6*Np+n];
Ci += fq;
// q=6
fq = dist[5*Np+n];
Ci += fq;
Den[n]=Ci;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
int n;
double Ci;
double ux,uy,uz;
double uEPx,uEPy,uEPz;//electrochemical induced velocity
double Ex,Ey,Ez;//electrical field
double f0,f1,f2,f3,f4,f5,f6;
int nr1,nr2,nr3,nr4,nr5,nr6;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
Ci=Den[n];
Ex=ElectricField[n+0*Np];
Ey=ElectricField[n+1*Np];
Ez=ElectricField[n+2*Np];
ux=Velocity[n+0*Np];
uy=Velocity[n+1*Np];
uz=Velocity[n+2*Np];
uEPx=zi*Di/Vt*Ex;
uEPy=zi*Di/Vt*Ey;
uEPz=zi*Di/Vt*Ez;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
// q=2
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n+2*Np]; // neighbor 4
f3 = dist[nr3];
// q=4
nr4 = neighborList[n+3*Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n+4*Np];
f5 = dist[nr5];
// q=6
nr6 = neighborList[n+5*Np];
f6 = dist[nr6];
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 1
dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
//dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q=2
dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
//dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 3
dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
//dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 4
dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
//dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 5
dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
//dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 6
dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
//dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
int n;
double Ci;
double ux,uy,uz;
double uEPx,uEPy,uEPz;//electrochemical induced velocity
double Ex,Ey,Ez;//electrical field
double f0,f1,f2,f3,f4,f5,f6;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
Ci=Den[n];
Ex=ElectricField[n+0*Np];
Ey=ElectricField[n+1*Np];
Ez=ElectricField[n+2*Np];
ux=Velocity[n+0*Np];
uy=Velocity[n+1*Np];
uz=Velocity[n+2*Np];
uEPx=zi*Di/Vt*Ex;
uEPy=zi*Di/Vt*Ey;
uEPz=zi*Di/Vt*Ez;
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
f6 = dist[5*Np+n];
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 1
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
//dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q=2
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
//dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 3
dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
//dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 4
dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
//dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 5
dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
//dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 6
dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
//dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
int n;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<Np) {
dist[0*Np+n] = 0.25*DenInit;
dist[1*Np+n] = 0.125*DenInit;
dist[2*Np+n] = 0.125*DenInit;
dist[3*Np+n] = 0.125*DenInit;
dist[4*Np+n] = 0.125*DenInit;
dist[5*Np+n] = 0.125*DenInit;
dist[6*Np+n] = 0.125*DenInit;
Den[n] = DenInit;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
int n;
double DenInit;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<Np) {
DenInit = Den[n];
dist[0*Np+n] = 0.25*DenInit;
dist[1*Np+n] = 0.125*DenInit;
dist[2*Np+n] = 0.125*DenInit;
dist[3*Np+n] = 0.125*DenInit;
dist[4*Np+n] = 0.125*DenInit;
dist[5*Np+n] = 0.125*DenInit;
dist[6*Np+n] = 0.125*DenInit;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
int n;
double Ci;//ion concentration of species i
double CD;//charge density
double CD_tmp;
double F = 96485.0;//Faraday's constant; unit[C/mol]; F=e*Na, where Na is the Avogadro constant
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
Ci = Den[n+ion_component*Np];
CD = ChargeDensity[n];
CD_tmp = F*IonValence*Ci;
ChargeDensity[n] = CD*(ion_component>0) + CD_tmp;
}
}
}
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_IonConcentration<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_IonConcentration: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_IonConcentration<<<NBLOCKS,NTHREADS >>>(dist,Den,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_IonConcentration: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_Ion<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Ion: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_Ion<<<NBLOCKS,NTHREADS >>>(dist,Den,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Ion: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_Init<<<NBLOCKS,NTHREADS >>>(dist,Den,DenInit,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Ion_Init: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_Init_FromFile<<<NBLOCKS,NTHREADS >>>(dist,Den,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Ion_Init_FromFile: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_ChargeDensity<<<NBLOCKS,NTHREADS >>>(Den,ChargeDensity,IonValence,ion_component,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Ion_ChargeDensity: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}

77
hip/MixedGradient.cu Normal file
View File

@@ -0,0 +1,77 @@
/* Implement Mixed Gradient (Lee et al. JCP 2016)*/
#include <stdio.h>
//#include <cuda_profiler_api.h>
#include "hip/hip_runtime.h"
#define NBLOCKS 560
#define NTHREADS 128
__global__ void dvc_ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz)
{
static const int D3Q19[18][3]={{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1},
{1,1,0},{-1,-1,0},{1,-1,0},{-1,1,0},
{1,0,1},{-1,0,-1},{1,0,-1},{-1,0,1},
{0,1,1},{0,-1,-1},{0,1,-1},{0,-1,1}};
int i,j,k,n,N,idx;
int np,np2,nm; // neighbors
double v,vp,vp2,vm; // values at neighbors
double grad;
N = Nx*Ny*Nz;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
idx = start + S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (idx<finish){
n = Map[idx]; // layout in regular array
//.......Back out the 3-D indices for node n..............
k = n/(Nx*Ny);
j = (n-Nx*Ny*k)/Nx;
i = n-Nx*Ny*k-Nx*j;
v = Phi[n];
grad = 0.0;
for (int q=0; q<6; q++){
int iqx = D3Q19[q][0];
int iqy = D3Q19[q][1];
int iqz = D3Q19[q][2];
np = (k+iqz)*Nx*Ny + (j+iqy)*Nx + i + iqx;
np2 = (k+2*iqz)*Nx*Ny + (j+2*iqy)*Nx + i + 2*iqx;
nm = (k-iqz)*Nx*Ny + (j-iqy)*Nx + i - iqx;
vp = Phi[np];
vp2 = Phi[np2];
vm = Phi[nm];
grad += 0.25*(5.0*vp-vp2-3.0*v-vm);
}
for (int q=6; q<18; q++){
int iqx = D3Q19[q][0];
int iqy = D3Q19[q][1];
int iqz = D3Q19[q][2];
np = (k+iqz)*Nx*Ny + (j+iqy)*Nx + i + iqx;
np2 = (k+2*iqz)*Nx*Ny + (j+2*iqy)*Nx + i + 2*iqx;
nm = (k-iqz)*Nx*Ny + (j-iqy)*Nx + i - iqx;
vp = Phi[np];
vp2 = Phi[np2];
vm = Phi[nm];
grad += 0.125*(5.0*vp-vp2-3.0*v-vm);
}
Gradient[n] = grad;
}
}
}
extern "C" void ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz)
{
hipProfilerStart();
dvc_ScaLBL_D3Q19_MixedGradient<<<NBLOCKS,NTHREADS >>>(Map, Phi, Gradient, start, finish, Np, Nx, Ny, Nz);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q19_MixedGradient: %s \n",hipGetErrorString(err));
}
hipProfilerStop();
}

330
hip/Poisson.cu Normal file
View File

@@ -0,0 +1,330 @@
#include <stdio.h>
#include <math.h>
#include "hip/hip_runtime.h"
#define NBLOCKS 1024
#define NTHREADS 256
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
double psi;//electric potential
double fq;
int nread;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
psi = fq;
// q=1
nread = neighborList[n];
fq = dist[nread];
psi += fq;
// q=2
nread = neighborList[n+Np];
fq = dist[nread];
psi += fq;
// q=3
nread = neighborList[n+2*Np];
fq = dist[nread];
psi += fq;
// q = 4
nread = neighborList[n+3*Np];
fq = dist[nread];
psi += fq;
// q=5
nread = neighborList[n+4*Np];
fq = dist[nread];
psi += fq;
// q = 6
nread = neighborList[n+5*Np];
fq = dist[nread];
psi += fq;
idx=Map[n];
Psi[idx] = psi;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
double psi;//electric potential
double fq;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
psi = fq;
// q=1
fq = dist[2*Np+n];
psi += fq;
// q=2
fq = dist[1*Np+n];
psi += fq;
// q=3
fq = dist[4*Np+n];
psi += fq;
// q=4
fq = dist[3*Np+n];
psi += fq;
// q=5
fq = dist[6*Np+n];
psi += fq;
// q=6
fq = dist[5*Np+n];
psi += fq;
idx=Map[n];
Psi[idx] = psi;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
int n;
double psi;//electric potential
double Ex,Ey,Ez;//electric field
double rho_e;//local charge density
double f0,f1,f2,f3,f4,f5,f6;
int nr1,nr2,nr3,nr4,nr5,nr6;
double rlx=1.0/tau;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
rho_e = Den_charge[n];
rho_e = rho_e/epsilon_LB;
idx=Map[n];
psi = Psi[idx];
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n+2*Np]; // neighbor 4
f3 = dist[nr3];
// q = 4
nr4 = neighborList[n+3*Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n+4*Np];
f5 = dist[nr5];
// q = 6
nr6 = neighborList[n+5*Np];
f6 = dist[nr6];
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
Ez = (f5-f6)*rlx*4.0;
ElectricField[n+0*Np] = Ex;
ElectricField[n+1*Np] = Ey;
ElectricField[n+2*Np] = Ez;
// q = 0
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
// q = 1
dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 2
dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 3
dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 4
dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 5
dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 6
dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
//........................................................................
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
int n;
double psi;//electric potential
double Ex,Ey,Ez;//electric field
double rho_e;//local charge density
double f0,f1,f2,f3,f4,f5,f6;
double rlx=1.0/tau;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
rho_e = Den_charge[n];
rho_e = rho_e/epsilon_LB;
idx=Map[n];
psi = Psi[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
f6 = dist[5*Np+n];
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
Ez = (f5-f6)*rlx*4.0;
ElectricField[n+0*Np] = Ex;
ElectricField[n+1*Np] = Ey;
ElectricField[n+2*Np] = Ez;
// q = 0
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
// q = 1
dist[1*Np+n] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 2
dist[2*Np+n] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 3
dist[3*Np+n] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 4
dist[4*Np+n] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 5
dist[5*Np+n] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 6
dist[6*Np+n] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
//........................................................................
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
int ijk;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
ijk = Map[n];
dist[0*Np+n] = 0.25*Psi[ijk];
dist[1*Np+n] = 0.125*Psi[ijk];
dist[2*Np+n] = 0.125*Psi[ijk];
dist[3*Np+n] = 0.125*Psi[ijk];
dist[4*Np+n] = 0.125*Psi[ijk];
dist[5*Np+n] = 0.125*Psi[ijk];
dist[6*Np+n] = 0.125*Psi[ijk];
}
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Psi,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_Poisson<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_Poisson<<<NBLOCKS,NTHREADS >>>(Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Poisson_Init<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Poisson_Init: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}

996
hip/Stokes.cu Normal file
View File

@@ -0,0 +1,996 @@
#include <stdio.h>
#include <math.h>
#include "hip/hip_runtime.h"
#define NBLOCKS 1024
#define NTHREADS 256
__global__ void dvc_ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz, double rho0, double den_scale, double h, double time_conv,int start, int finish, int Np){
int n;
double fq;
// conserved momemnts
double rho,jx,jy,jz;
double ux,uy,uz;
// non-conserved moments
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
int nread;
// body force due to electric field
double rhoE;//charge density
double Ex,Ey,Ez;
// total body force
double Fx,Fy,Fz;
const double mrt_V1=0.05263157894736842;
const double mrt_V2=0.012531328320802;
const double mrt_V3=0.04761904761904762;
const double mrt_V4=0.004594820384294068;
const double mrt_V5=0.01587301587301587;
const double mrt_V6=0.0555555555555555555555555;
const double mrt_V7=0.02777777777777778;
const double mrt_V8=0.08333333333333333;
const double mrt_V9=0.003341687552213868;
const double mrt_V10=0.003968253968253968;
const double mrt_V11=0.01388888888888889;
const double mrt_V12=0.04166666666666666;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
rhoE = ChargeDensity[n];
Ex = ElectricField[n+0*Np];
Ey = ElectricField[n+1*Np];
Ez = ElectricField[n+2*Np];
//compute total body force, including input body force (Gx,Gy,Gz)
Fx = Gx + rhoE*Ex*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
Fy = Gy + rhoE*Ey*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
Fz = Gz + rhoE*Ez*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
// q=0
fq = dist[n];
rho = fq;
m1 = -30.0*fq;
m2 = 12.0*fq;
// q=1
nread = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
fq = dist[nread]; // reading the f1 data into register fq
//fp = dist[10*Np+n];
rho += fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jx = fq;
m4 = -4.0*fq;
m9 = 2.0*fq;
m10 = -4.0*fq;
// f2 = dist[10*Np+n];
nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
fq = dist[nread]; // reading the f2 data into register fq
//fq = dist[Np+n];
rho += fq;
m1 -= 11.0*(fq);
m2 -= 4.0*(fq);
jx -= fq;
m4 += 4.0*(fq);
m9 += 2.0*(fq);
m10 -= 4.0*(fq);
// q=3
nread = neighborList[n+2*Np]; // neighbor 4
fq = dist[nread];
//fq = dist[11*Np+n];
rho += fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jy = fq;
m6 = -4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 = fq;
m12 = -2.0*fq;
// q = 4
nread = neighborList[n+3*Np]; // neighbor 3
fq = dist[nread];
//fq = dist[2*Np+n];
rho+= fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jy -= fq;
m6 += 4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 += fq;
m12 -= 2.0*fq;
// q=5
nread = neighborList[n+4*Np];
fq = dist[nread];
//fq = dist[12*Np+n];
rho += fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jz = fq;
m8 = -4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 -= fq;
m12 += 2.0*fq;
// q = 6
nread = neighborList[n+5*Np];
fq = dist[nread];
//fq = dist[3*Np+n];
rho+= fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jz -= fq;
m8 += 4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 -= fq;
m12 += 2.0*fq;
// q=7
nread = neighborList[n+6*Np];
fq = dist[nread];
//fq = dist[13*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jy += fq;
m6 += fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 = fq;
m16 = fq;
m17 = -fq;
// q = 8
nread = neighborList[n+7*Np];
fq = dist[nread];
//fq = dist[4*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jy -= fq;
m6 -= fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 += fq;
m16 -= fq;
m17 += fq;
// q=9
nread = neighborList[n+8*Np];
fq = dist[nread];
//fq = dist[14*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jy -= fq;
m6 -= fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 -= fq;
m16 += fq;
m17 += fq;
// q = 10
nread = neighborList[n+9*Np];
fq = dist[nread];
//fq = dist[5*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jy += fq;
m6 += fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 -= fq;
m16 -= fq;
m17 -= fq;
// q=11
nread = neighborList[n+10*Np];
fq = dist[nread];
//fq = dist[15*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jz += fq;
m8 += fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 = fq;
m16 -= fq;
m18 = fq;
// q=12
nread = neighborList[n+11*Np];
fq = dist[nread];
//fq = dist[6*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jz -= fq;
m8 -= fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 += fq;
m16 += fq;
m18 -= fq;
// q=13
nread = neighborList[n+12*Np];
fq = dist[nread];
//fq = dist[16*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jz -= fq;
m8 -= fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 -= fq;
m16 -= fq;
m18 -= fq;
// q=14
nread = neighborList[n+13*Np];
fq = dist[nread];
//fq = dist[7*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jz += fq;
m8 += fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 -= fq;
m16 += fq;
m18 += fq;
// q=15
nread = neighborList[n+14*Np];
fq = dist[nread];
//fq = dist[17*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy += fq;
m6 += fq;
jz += fq;
m8 += fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 = fq;
m17 += fq;
m18 -= fq;
// q=16
nread = neighborList[n+15*Np];
fq = dist[nread];
//fq = dist[8*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy -= fq;
m6 -= fq;
jz -= fq;
m8 -= fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 += fq;
m17 -= fq;
m18 += fq;
// q=17
//fq = dist[18*Np+n];
nread = neighborList[n+16*Np];
fq = dist[nread];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy += fq;
m6 += fq;
jz -= fq;
m8 -= fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 -= fq;
m17 += fq;
m18 += fq;
// q=18
nread = neighborList[n+17*Np];
fq = dist[nread];
//fq = dist[9*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy -= fq;
m6 -= fq;
jz += fq;
m8 += fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 -= fq;
m17 -= fq;
m18 -= fq;
// write the velocity
ux = jx / rho0;
uy = jy / rho0;
uz = jz / rho0;
Velocity[n] = ux;
Velocity[Np+n] = uy;
Velocity[2*Np+n] = uz;
//..............incorporate external force................................................
//..............carry out relaxation process...............................................
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0) - m2);
m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4);
m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6);
m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) - m9);
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) - m11);
m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho0) - m12);
m13 = m13 + rlx_setA*((jx*jy/rho0) - m13);
m14 = m14 + rlx_setA*((jy*jz/rho0) - m14);
m15 = m15 + rlx_setA*((jx*jz/rho0) - m15);
m16 = m16 + rlx_setB*( - m16);
m17 = m17 + rlx_setB*( - m17);
m18 = m18 + rlx_setB*( - m18);
//.......................................................................................................
//.................inverse transformation......................................................
// q=0
fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2;
dist[n] = fq;
// q = 1
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx;
nread = neighborList[n+Np];
dist[nread] = fq;
// q=2
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx;
nread = neighborList[n];
dist[nread] = fq;
// q = 3
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy;
nread = neighborList[n+3*Np];
dist[nread] = fq;
// q = 4
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy;
nread = neighborList[n+2*Np];
dist[nread] = fq;
// q = 5
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz;
nread = neighborList[n+5*Np];
dist[nread] = fq;
// q = 6
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz;
nread = neighborList[n+4*Np];
dist[nread] = fq;
// q = 7
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy);
nread = neighborList[n+7*Np];
dist[nread] = fq;
// q = 8
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy);
nread = neighborList[n+6*Np];
dist[nread] = fq;
// q = 9
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy);
nread = neighborList[n+9*Np];
dist[nread] = fq;
// q = 10
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy);
nread = neighborList[n+8*Np];
dist[nread] = fq;
// q = 11
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz);
nread = neighborList[n+11*Np];
dist[nread] = fq;
// q = 12
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz);
nread = neighborList[n+10*Np];
dist[nread]= fq;
// q = 13
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz);
nread = neighborList[n+13*Np];
dist[nread] = fq;
// q= 14
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz);
nread = neighborList[n+12*Np];
dist[nread] = fq;
// q = 15
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz);
nread = neighborList[n+15*Np];
dist[nread] = fq;
// q = 16
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz);
nread = neighborList[n+14*Np];
dist[nread] = fq;
// q = 17
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)
-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz);
nread = neighborList[n+17*Np];
dist[nread] = fq;
// q = 18
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)
-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz);
nread = neighborList[n+16*Np];
dist[nread] = fq;
}
}
}
__global__ void dvc_ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){
int n;
double fq;
// conserved momemnts
double rho,jx,jy,jz;
double ux,uy,uz;
// non-conserved moments
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
// body force due to electric field
double rhoE;//charge density
double Ex,Ey,Ez;
// total body force
double Fx,Fy,Fz;
const double mrt_V1=0.05263157894736842;
const double mrt_V2=0.012531328320802;
const double mrt_V3=0.04761904761904762;
const double mrt_V4=0.004594820384294068;
const double mrt_V5=0.01587301587301587;
const double mrt_V6=0.0555555555555555555555555;
const double mrt_V7=0.02777777777777778;
const double mrt_V8=0.08333333333333333;
const double mrt_V9=0.003341687552213868;
const double mrt_V10=0.003968253968253968;
const double mrt_V11=0.01388888888888889;
const double mrt_V12=0.04166666666666666;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
rhoE = ChargeDensity[n];
Ex = ElectricField[n+0*Np];
Ey = ElectricField[n+1*Np];
Ez = ElectricField[n+2*Np];
//compute total body force, including input body force (Gx,Gy,Gz)
Fx = Gx + rhoE*Ex*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;//the extra factors at the end necessarily convert unit from phys to LB
Fy = Gy + rhoE*Ey*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
Fz = Gz + rhoE*Ez*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
// q=0
fq = dist[n];
rho = fq;
m1 = -30.0*fq;
m2 = 12.0*fq;
// q=1
fq = dist[2*Np+n];
rho += fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jx = fq;
m4 = -4.0*fq;
m9 = 2.0*fq;
m10 = -4.0*fq;
// f2 = dist[10*Np+n];
fq = dist[1*Np+n];
rho += fq;
m1 -= 11.0*(fq);
m2 -= 4.0*(fq);
jx -= fq;
m4 += 4.0*(fq);
m9 += 2.0*(fq);
m10 -= 4.0*(fq);
// q=3
fq = dist[4*Np+n];
rho += fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jy = fq;
m6 = -4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 = fq;
m12 = -2.0*fq;
// q = 4
fq = dist[3*Np+n];
rho+= fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jy -= fq;
m6 += 4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 += fq;
m12 -= 2.0*fq;
// q=5
fq = dist[6*Np+n];
rho += fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jz = fq;
m8 = -4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 -= fq;
m12 += 2.0*fq;
// q = 6
fq = dist[5*Np+n];
rho+= fq;
m1 -= 11.0*fq;
m2 -= 4.0*fq;
jz -= fq;
m8 += 4.0*fq;
m9 -= fq;
m10 += 2.0*fq;
m11 -= fq;
m12 += 2.0*fq;
// q=7
fq = dist[8*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jy += fq;
m6 += fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 = fq;
m16 = fq;
m17 = -fq;
// q = 8
fq = dist[7*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jy -= fq;
m6 -= fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 += fq;
m16 -= fq;
m17 += fq;
// q=9
fq = dist[10*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jy -= fq;
m6 -= fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 -= fq;
m16 += fq;
m17 += fq;
// q = 10
fq = dist[9*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jy += fq;
m6 += fq;
m9 += fq;
m10 += fq;
m11 += fq;
m12 += fq;
m13 -= fq;
m16 -= fq;
m17 -= fq;
// q=11
fq = dist[12*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jz += fq;
m8 += fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 = fq;
m16 -= fq;
m18 = fq;
// q=12
fq = dist[11*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jz -= fq;
m8 -= fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 += fq;
m16 += fq;
m18 -= fq;
// q=13
fq = dist[14*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx += fq;
m4 += fq;
jz -= fq;
m8 -= fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 -= fq;
m16 -= fq;
m18 -= fq;
// q=14
fq = dist[13*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jx -= fq;
m4 -= fq;
jz += fq;
m8 += fq;
m9 += fq;
m10 += fq;
m11 -= fq;
m12 -= fq;
m15 -= fq;
m16 += fq;
m18 += fq;
// q=15
fq = dist[16*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy += fq;
m6 += fq;
jz += fq;
m8 += fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 = fq;
m17 += fq;
m18 -= fq;
// q=16
fq = dist[15*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy -= fq;
m6 -= fq;
jz -= fq;
m8 -= fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 += fq;
m17 -= fq;
m18 += fq;
// q=17
fq = dist[18*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy += fq;
m6 += fq;
jz -= fq;
m8 -= fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 -= fq;
m17 += fq;
m18 += fq;
// q=18
fq = dist[17*Np+n];
rho += fq;
m1 += 8.0*fq;
m2 += fq;
jy -= fq;
m6 -= fq;
jz += fq;
m8 += fq;
m9 -= 2.0*fq;
m10 -= 2.0*fq;
m14 -= fq;
m17 -= fq;
m18 -= fq;
// write the velocity
ux = jx / rho0;
uy = jy / rho0;
uz = jz / rho0;
Velocity[n] = ux;
Velocity[Np+n] = uy;
Velocity[2*Np+n] = uz;
//........................................................................
// READ THE DISTRIBUTIONS
// (read from opposite array due to previous swap operation)
//........................................................................
//..............incorporate external force................................................
//..............carry out relaxation process...............................................
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0) - m2);
m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4);
m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6);
m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) - m9);
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) - m11);
m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho0) - m12);
m13 = m13 + rlx_setA*((jx*jy/rho0) - m13);
m14 = m14 + rlx_setA*((jy*jz/rho0) - m14);
m15 = m15 + rlx_setA*((jx*jz/rho0) - m15);
m16 = m16 + rlx_setB*( - m16);
m17 = m17 + rlx_setB*( - m17);
m18 = m18 + rlx_setB*( - m18);
//.......................................................................................................
//.................inverse transformation......................................................
// q=0
fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2;
dist[n] = fq;
// q = 1
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10) + 0.16666666*Fx;
dist[1*Np+n] = fq;
// q=2
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx;
dist[2*Np+n] = fq;
// q = 3
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy;
dist[3*Np+n] = fq;
// q = 4
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy;
dist[4*Np+n] = fq;
// q = 5
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz;
dist[5*Np+n] = fq;
// q = 6
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz;
dist[6*Np+n] = fq;
// q = 7
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy);
dist[7*Np+n] = fq;
// q = 8
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy);
dist[8*Np+n] = fq;
// q = 9
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy);
dist[9*Np+n] = fq;
// q = 10
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy);
dist[10*Np+n] = fq;
// q = 11
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz);
dist[11*Np+n] = fq;
// q = 12
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz);
dist[12*Np+n] = fq;
// q = 13
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz);
dist[13*Np+n] = fq;
// q= 14
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
-mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz);
dist[14*Np+n] = fq;
// q = 15
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz);
dist[15*Np+n] = fq;
// q = 16
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz);
dist[16*Np+n] = fq;
// q = 17
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)
-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz);
dist[17*Np+n] = fq;
// q = 18
fq = mrt_V1*rho+mrt_V9*m1
+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)
-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz);
dist[18*Np+n] = fq;
//........................................................................
}
}
}
extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q19_AAodd_StokesMRT<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Velocity,ChargeDensity,ElectricField,rlx_setA,rlx_setB,Gx,Gy,Gz,rho0,den_scale,h,time_conv,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q19_AAodd_StokesMRT: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q19_AAeven_StokesMRT<<<NBLOCKS,NTHREADS >>>(dist,Velocity,ChargeDensity,ElectricField,rlx_setA,rlx_setB,Gx,Gy,Gz,rho0,den_scale,h,time_conv,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q19_AAeven_StokesMRT: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}

View File

@@ -9,6 +9,7 @@ color lattice boltzmann model
#include <stdlib.h>
#include <time.h>
ScaLBL_ColorModel::ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM):
rank(RANK), nprocs(NP), Restart(0), timestep(0), timestepMax(0),
tauA(0), tauB(0), rhoA(0), rhoB(0), alpha(0), beta(0),
@@ -692,20 +693,15 @@ void ScaLBL_ColorModel::Run(){
fflush(stdout);
}
//.......create and start timer............
double starttime,stoptime,cputime;
ScaLBL_Comm->Barrier();
comm.barrier();
starttime = MPI_Wtime();
//.........................................
//************ MAIN ITERATION LOOP ***************************************/
comm.barrier();
PROFILE_START("Loop");
//std::shared_ptr<Database> analysis_db;
bool Regular = false;
auto current_db = db->cloneDatabase();
runAnalysis analysis( current_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map );
//analysis.createThreads( analysis_method, 4 );
auto t1 = std::chrono::system_clock::now();
while (timestep < timestepMax ) {
//if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); }
PROFILE_START("Update");
@@ -1038,10 +1034,10 @@ void ScaLBL_ColorModel::Run(){
PROFILE_SAVE("lbpm_color_simulator",1);
//************************************************************************
ScaLBL_Comm->Barrier();
stoptime = MPI_Wtime();
if (rank==0) printf("-------------------------------------------------------------------\n");
// Compute the walltime per timestep
cputime = (stoptime - starttime)/timestep;
auto t2 = std::chrono::system_clock::now();
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
// Performance obtained from each node
double MLUPS = double(Np)/cputime/1000000;
@@ -1242,6 +1238,7 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){
}
return(volume_change);
}
double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){
srand(time(NULL));
double mass_loss =0.f;
@@ -1605,3 +1602,68 @@ void ScaLBL_ColorModel::WriteDebug(){
fclose(CGZ_FILE);
*/
}
FlowAdaptor::FlowAdaptor(ScaLBL_ColorModel &M){
Nx = M.Dm->Nx;
Ny = M.Dm->Ny;
Nz = M.Dm->Nz;
timestep=-1;
timestep_previous=-1;
phi.resize(Nx,Ny,Nz); phi.fill(0); // phase indicator field
phi_t.resize(Nx,Ny,Nz); phi_t.fill(0); // time derivative for the phase indicator field
}
FlowAdaptor::~FlowAdaptor(){
}
double FlowAdaptor::MoveInterface(ScaLBL_ColorModel &M){
double INTERFACE_CUTOFF = M.color_db->getWithDefault<double>( "move_interface_cutoff", 0.975 );
double MOVE_INTERFACE_FACTOR = M.color_db->getWithDefault<double>( "move_interface_factor", 10.0 );
ScaLBL_CopyToHost( phi.data(), M.Phi, Nx*Ny*Nz* sizeof( double ) );
/* compute the local derivative of phase indicator field */
double beta = M.beta;
double factor = 0.5/beta;
double total_interface_displacement = 0.0;
double total_interface_sites = 0.0;
for (int n=0; n<Nx*Ny*Nz; n++){
/* compute the distance to the interface */
double value1 = M.Averages->Phi(n);
double dist1 = factor*log((1.0+value1)/(1.0-value1));
double value2 = phi(n);
double dist2 = factor*log((1.0+value2)/(1.0-value2));
phi_t(n) = value2;
if (value1 < INTERFACE_CUTOFF && value1 > -1*INTERFACE_CUTOFF && value2 < INTERFACE_CUTOFF && value2 > -1*INTERFACE_CUTOFF ){
/* time derivative of distance */
double dxdt = 0.125*(dist2-dist1);
/* extrapolate to move the distance further */
double dist3 = dist2 + MOVE_INTERFACE_FACTOR*dxdt;
/* compute the new phase interface */
phi_t(n) = (2.f*(exp(-2.f*beta*(dist3)))/(1.f+exp(-2.f*beta*(dist3))) - 1.f);
total_interface_displacement += fabs(MOVE_INTERFACE_FACTOR*dxdt);
total_interface_sites += 1.0;
}
}
ScaLBL_CopyToDevice( M.Phi, phi_t.data(), Nx*Ny*Nz* sizeof( double ) );
/* ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4){
if (Dm->kproc()==0){
ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0);
ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,1);
ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,2);
}
if (Dm->kproc() == nprocz-1){
ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-1);
ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-2);
ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-3);
}
}
*/
}

View File

@@ -30,6 +30,7 @@ public:
void Initialize();
void Run();
void WriteDebug();
void getPhaseField(DoubleArray &f);
bool Restart,pBC;
bool REVERSE_FLOW_DIRECTION;
@@ -86,3 +87,16 @@ private:
double MorphOpenConnected(double target_volume_change);
};
class FlowAdaptor{
public:
FlowAdaptor(ScaLBL_ColorModel &M);
~FlowAdaptor();
double MoveInterface(ScaLBL_ColorModel &M);
DoubleArray phi;
DoubleArray phi_t;
private:
int Nx, Ny, Nz;
int timestep;
int timestep_previous;
};

View File

@@ -490,14 +490,10 @@ void ScaLBL_DFHModel::Run(){
if (rank==0) printf("********************************************************\n");
if (rank==0) printf("No. of timesteps: %i \n", timestepMax);
//.......create and start timer............
double starttime,stoptime,cputime;
ScaLBL_DeviceBarrier();
comm.barrier();
starttime = MPI_Wtime();
//.........................................
//************ MAIN ITERATION LOOP ***************************************/
auto t1 = std::chrono::system_clock::now();
bool Regular = true;
PROFILE_START("Loop");
runAnalysis analysis( analysis_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map );
@@ -589,10 +585,10 @@ void ScaLBL_DFHModel::Run(){
//************************************************************************
ScaLBL_DeviceBarrier();
comm.barrier();
stoptime = MPI_Wtime();
if (rank==0) printf("-------------------------------------------------------------------\n");
// Compute the walltime per timestep
cputime = (stoptime - starttime)/timestep;
auto t2 = std::chrono::system_clock::now();
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
// Performance obtained from each node
double MLUPS = double(Np)/cputime/1000000;
if (rank==0) printf("********************************************************\n");

View File

@@ -10,8 +10,9 @@ color lattice boltzmann model
#include <time.h>
ScaLBL_FreeLeeModel::ScaLBL_FreeLeeModel(int RANK, int NP, const Utilities::MPI& COMM):
rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),W(0),gamma(0),
rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),tauM(0),rhoA(0),rhoB(0),W(0),gamma(0),kappa(0),beta(0),
Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0),
tau(0),rho0(0),
Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM)
{
@@ -30,10 +31,15 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){
// set defaults
timestepMax = 100000;
tauA = tauB = 1.0;
tauM = 1.0;//relaxation time for phase field
rhoA = rhoB = 1.0;
tau = 1.0;//only for single-fluid Lee model
rho0 = 1.0;//only for single-fluid Lee model
Fx = Fy = Fz = 0.0;
gamma=1e-3;
W=5;
gamma=1e-3;//surface tension
W=5.0;//interfacial thickness
beta = 12.0*gamma/W;
kappa = 3.0*gamma*W/2.0;//beta and kappa are related to surface tension \gamma
Restart=false;
din=dout=1.0;
flux=0.0;
@@ -42,12 +48,21 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){
if (freelee_db->keyExists( "timestepMax" )){
timestepMax = freelee_db->getScalar<int>( "timestepMax" );
}
if (freelee_db->keyExists( "tau" )){//only for single-fluid Lee model
tau = freelee_db->getScalar<double>( "tau" );
}
if (freelee_db->keyExists( "tauA" )){
tauA = freelee_db->getScalar<double>( "tauA" );
}
if (freelee_db->keyExists( "tauB" )){
tauB = freelee_db->getScalar<double>( "tauB" );
}
if (freelee_db->keyExists( "tauM" )){
tauM = freelee_db->getScalar<double>( "tauM" );
}
if (freelee_db->keyExists( "rho0" )){
rho0 = freelee_db->getScalar<double>( "rho0" );
}
if (freelee_db->keyExists( "rhoA" )){
rhoA = freelee_db->getScalar<double>( "rhoA" );
}
@@ -81,6 +96,9 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){
inletB=0.f;
outletA=0.f;
outletB=1.f;
//update secondary parameters
beta = 12.0*gamma/W;
kappa = 3.0*gamma*W/2.0;//beta and kappa are related to surface tension \gamma
//if (BoundaryCondition==4) flux *= rhoA; // mass flux must adjust for density (see formulation for details)
BoundaryCondition = 0;
@@ -184,9 +202,9 @@ void ScaLBL_FreeLeeModel::ReadInput(){
}
void ScaLBL_FreeLeeModel::Create(){
void ScaLBL_FreeLeeModel::Create_TwoFluid(){
/*
* This function creates the variables needed to run a LBM
* This function creates the variables needed to run two-fluid Lee model
*/
//.........................................................
// Initialize communication structures in averaging domain
@@ -198,7 +216,7 @@ void ScaLBL_FreeLeeModel::Create(){
// Create a communicator for the device (will use optimized layout)
// ScaLBL_Communicator ScaLBL_Comm(Mask); // original
ScaLBL_Comm = std::shared_ptr<ScaLBL_Communicator>(new ScaLBL_Communicator(Mask));
ScaLBL_Comm_Regular = std::shared_ptr<ScaLBL_Communicator>(new ScaLBL_Communicator(Mask));
//ScaLBL_Comm_Regular = std::shared_ptr<ScaLBL_Communicator>(new ScaLBL_Communicator(Mask));
ScaLBL_Comm_WideHalo = std::shared_ptr<ScaLBLWideHalo_Communicator>(new ScaLBLWideHalo_Communicator(Mask,2));
// create the layout for the LBM
@@ -220,7 +238,7 @@ void ScaLBL_FreeLeeModel::Create(){
//...........................................................................
ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize);
ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Np);
ScaLBL_AllocateDeviceMemory((void **) &fq, 19*dist_mem_size);
ScaLBL_AllocateDeviceMemory((void **) &gqbar, 19*dist_mem_size);
ScaLBL_AllocateDeviceMemory((void **) &hq, 7*dist_mem_size);
ScaLBL_AllocateDeviceMemory((void **) &mu_phi, dist_mem_size);
ScaLBL_AllocateDeviceMemory((void **) &Den, dist_mem_size);
@@ -239,46 +257,301 @@ void ScaLBL_FreeLeeModel::Create(){
for (int i=1; i<Nx-1; i++){
int idx=Map(i,j,k);
if (!(idx < 0))
TmpMap[idx] = k*Nx*Ny+j*Nx+i;
TmpMap[idx] = ScaLBL_Comm_WideHalo->Map(i,j,k);
}
}
}
// check that TmpMap is valid
for (int idx=0; idx<ScaLBL_Comm->LastExterior(); idx++){
auto n = TmpMap[idx];
if (n > Nx*Ny*Nz){
if (n > Nxh*Nyh*Nzh){
printf("Bad value! idx=%i \n", n);
TmpMap[idx] = Nx*Ny*Nz-1;
TmpMap[idx] = Nxh*Nyh*Nzh-1;
}
}
for (int idx=ScaLBL_Comm->FirstInterior(); idx<ScaLBL_Comm->LastInterior(); idx++){
auto n = TmpMap[idx];
if ( n > Nx*Ny*Nz ){
if ( n > Nxh*Nyh*Nzh ){
printf("Bad value! idx=%i \n",n);
TmpMap[idx] = Nx*Ny*Nz-1;
TmpMap[idx] = Nxh*Nyh*Nzh-1;
}
}
// copy the device map
ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np);
ScaLBL_DeviceBarrier();
delete [] TmpMap;
// copy the neighbor list
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
// initialize phi based on PhaseLabel (include solid component labels)
comm.barrier();
delete [] TmpMap;
delete [] neighborList;
}
/********************************************************
* AssignComponentLabels *
********************************************************/
void ScaLBL_FreeLeeModel::Initialize(){
if (rank==0) printf ("Initializing distributions \n");
ScaLBL_D3Q19_Init(fq, Np);
void ScaLBL_FreeLeeModel::Create_SingleFluid(){
/*
* This function initializes model
* This function creates the variables needed to run single-fluid Lee model
*/
//.........................................................
// Initialize communication structures in averaging domain
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = Mask->id[i];
Mask->CommInit();
Np=Mask->PoreCount();
//...........................................................................
if (rank==0) printf ("Create ScaLBL_Communicator \n");
// Create a communicator for the device (will use optimized layout)
// ScaLBL_Communicator ScaLBL_Comm(Mask); // original
ScaLBL_Comm = std::shared_ptr<ScaLBL_Communicator>(new ScaLBL_Communicator(Mask));
// create the layout for the LBM
int Npad=(Np/16 + 2)*16;
if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N);
Map.resize(Nx,Ny,Nz); Map.fill(-2);
auto neighborList= new int[18*Npad];
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np,1);
comm.barrier();
//...........................................................................
// MAIN VARIABLES ALLOCATED HERE
//...........................................................................
// LBM variables
if (rank==0) printf ("Allocating distributions \n");
//......................device distributions.................................
dist_mem_size = Np*sizeof(double);
neighborSize=18*(Np*sizeof(int));
//...........................................................................
ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize);
ScaLBL_AllocateDeviceMemory((void **) &gqbar, 19*dist_mem_size);
ScaLBL_AllocateDeviceMemory((void **) &Pressure, sizeof(double)*Np);
ScaLBL_AllocateDeviceMemory((void **) &Velocity, 3*sizeof(double)*Np);
//...........................................................................
// Update GPU data structures
if (rank==0) printf ("Setting up device map and neighbor list \n");
// copy the neighbor list
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
comm.barrier();
delete [] neighborList;
}
void ScaLBL_FreeLeeModel::AssignComponentLabels_ChemPotential_ColorGrad()
{
double *phase;
phase = new double[Nh];
size_t NLABELS=0;
signed char VALUE=0;
double AFFINITY=0.f;
auto LabelList = freelee_db->getVector<int>( "ComponentLabels" );
auto AffinityList = freelee_db->getVector<double>( "ComponentAffinity" );
NLABELS=LabelList.size();
if (NLABELS != AffinityList.size()){
ERROR("Error: ComponentLabels and ComponentAffinity must be the same length! \n");
}
double label_count[NLABELS];
double label_count_global[NLABELS];
// Assign the labels
for (size_t idx=0; idx<NLABELS; idx++) label_count[idx]=0;
for (int k=0;k<Nzh;k++){
for (int j=0;j<Nyh;j++){
for (int i=0;i<Nxh;i++){
//idx for double-halo array 'phase'
int nh = k*Nxh*Nyh+j*Nxh+i;
//idx for single-halo array Mask->id[n]
int x=i-1;
int y=j-1;
int z=k-1;
if (x<0) x=0;
if (y<0) y=0;
if (z<0) z=0;
if (x>=Nx) x=Nx-1;
if (y>=Ny) y=Ny-1;
if (z>=Nz) z=Nz-1;
int n = z*Nx*Ny+y*Nx+x;
VALUE=id[n];
// Assign the affinity from the paired list
for (unsigned int idx=0; idx < NLABELS; idx++){
//printf("idx=%i, value=%i, %i, \n",idx, VALUE,LabelList[idx]);
if (VALUE == LabelList[idx]){
AFFINITY=AffinityList[idx];
label_count[idx] += 1.0;
idx = NLABELS;
//Mask->id[n] = 0; // set mask to zero since this is an immobile component
}
}
// fluid labels are reserved
if (VALUE == 1) AFFINITY=1.0;
else if (VALUE == 2) AFFINITY=-1.0;
phase[nh] = AFFINITY;
}
}
}
// Set Dm to match Mask
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = Mask->id[i];
for (size_t idx=0; idx<NLABELS; idx++)
label_count_global[idx] = Dm->Comm.sumReduce(label_count[idx]);
if (rank==0){
printf("Number of component labels: %lu \n",NLABELS);
for (unsigned int idx=0; idx<NLABELS; idx++){
VALUE=LabelList[idx];
AFFINITY=AffinityList[idx];
double volume_fraction = double(label_count_global[idx])/double((Nx-2)*(Ny-2)*(Nz-2)*nprocs);
printf(" label=%d, affinity=%f, volume fraction==%f\n",VALUE,AFFINITY,volume_fraction);
}
}
//compute color gradient and laplacian of phase field
double *ColorGrad_host, *mu_phi_host;
ColorGrad_host = new double[3*Np];
mu_phi_host = new double[Np];
double *Dst;
Dst = new double [3*3*3];
for (int kk=0; kk<3; kk++){
for (int jj=0; jj<3; jj++){
for (int ii=0; ii<3; ii++){
int index = kk*9+jj*3+ii;
Dst[index] = sqrt(double(ii-1)*double(ii-1) + double(jj-1)*double(jj-1)+ double(kk-1)*double(kk-1));
}
}
}
double w_face = 1.0/18.0;
double w_edge = 1.0/36.0;
double w_corner = 0.f;
//local
Dst[13] = 0.f;
//faces
Dst[4] = w_face;
Dst[10] = w_face;
Dst[12] = w_face;
Dst[14] = w_face;
Dst[16] = w_face;
Dst[22] = w_face;
// corners
Dst[0] = w_corner;
Dst[2] = w_corner;
Dst[6] = w_corner;
Dst[8] = w_corner;
Dst[18] = w_corner;
Dst[20] = w_corner;
Dst[24] = w_corner;
Dst[26] = w_corner;
// edges
Dst[1] = w_edge;
Dst[3] = w_edge;
Dst[5] = w_edge;
Dst[7] = w_edge;
Dst[9] = w_edge;
Dst[11] = w_edge;
Dst[15] = w_edge;
Dst[17] = w_edge;
Dst[19] = w_edge;
Dst[21] = w_edge;
Dst[23] = w_edge;
Dst[25] = w_edge;
double cs2_inv = 3.0;//inverse of c_s^2 for D3Q19 lattice
int width = 2;//For better readability: make halo width explicity wherever possible
for (int k=width; k<Nzh-width; k++){
for (int j=width; j<Nyh-width; j++){
for (int i=width; i<Nxh-width; i++){
//idx for double-halo array 'phase'
int nh = k*Nxh*Nyh+j*Nxh+i;
int idx=Map(i-width+1,j-width+1,k-width+1);
if (!(idx < 0)){
double phi_x = 0.f;
double phi_y = 0.f;
double phi_z = 0.f;
double phi_Lap = 0.f;//Laplacian of the phase field
for (int kk=0; kk<3; kk++){
for (int jj=0; jj<3; jj++){
for (int ii=0; ii<3; ii++){
int index = kk*9+jj*3+ii;
double weight= Dst[index];
int idi=i+ii-1;
int idj=j+jj-1;
int idk=k+kk-1;
if (idi < 0) idi=0;
if (idj < 0) idj=0;
if (idk < 0) idk=0;
if (!(idi < Nxh)) idi=Nxh-1;
if (!(idj < Nyh)) idj=Nyh-1;
if (!(idk < Nzh)) idk=Nzh-1;
int nn = idk*Nxh*Nyh + idj*Nxh + idi;
double vec_x = double(ii-1);
double vec_y = double(jj-1);
double vec_z = double(kk-1);
double GWNS=phase[nn];
double GWNS_local=phase[nh];
phi_x += GWNS*weight*vec_x;
phi_y += GWNS*weight*vec_y;
phi_z += GWNS*weight*vec_z;
phi_Lap += weight*(GWNS-GWNS_local);//Laplacian of the phase field
}
}
}
//store color gradient
ColorGrad_host[idx+0*Np] = cs2_inv*phi_x;
ColorGrad_host[idx+1*Np] = cs2_inv*phi_y;
ColorGrad_host[idx+2*Np] = cs2_inv*phi_z;
//compute chemical potential
phi_Lap = 2.0*cs2_inv*phi_Lap;
mu_phi_host[idx] = 4.0*beta*phase[nh]*(phase[nh]+1.0)*(phase[nh]-1.0) - kappa*phi_Lap;
}
}
}
}
//copy all data to device
ScaLBL_CopyToDevice(Phi, phase, Nh*sizeof(double));
ScaLBL_CopyToDevice(ColorGrad, ColorGrad_host, 3*Np*sizeof(double));
ScaLBL_CopyToDevice(mu_phi, mu_phi_host, Np*sizeof(double));
ScaLBL_Comm->Barrier();
comm.barrier();
//debug
//save the phase field and check it
//FILE *OUTFILE;
//sprintf(LocalRankFilename,"Phase_Init.%05i.raw",rank);
//OUTFILE = fopen(LocalRankFilename,"wb");
//fwrite(phase,8,Nh,OUTFILE);
//fclose(OUTFILE);
delete [] phase;
delete [] ColorGrad_host;
delete [] mu_phi_host;
delete [] Dst;
}
void ScaLBL_FreeLeeModel::Initialize_TwoFluid(){
/*
* This function initializes two-fluid Lee model
*/
if (rank==0) printf ("Initializing phase field, chemical potential and color gradient\n");
AssignComponentLabels_ChemPotential_ColorGrad();//initialize phase field Phi
if (rank==0) printf ("Initializing distributions for momentum transport\n");
ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(gqbar, mu_phi, ColorGrad, Fx, Fy, Fz, Np);
if (rank==0) printf ("Initializing density field and distributions for phase-field transport\n");
ScaLBL_FreeLeeModel_PhaseField_Init(dvcMap, Phi, Den, hq, ColorGrad, rhoA, rhoB, tauM, W, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_FreeLeeModel_PhaseField_Init(dvcMap, Phi, Den, hq, ColorGrad, rhoA, rhoB, tauM, W, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
if (Restart == true){
//TODO need to revise this function
if (rank==0){
printf("Reading restart file! \n");
}
@@ -292,7 +565,7 @@ void ScaLBL_FreeLeeModel::Initialize(){
cDen = new double[2*Np];
cDist = new double[19*Np];
ScaLBL_CopyToHost(TmpMap, dvcMap, Np*sizeof(int));
ScaLBL_CopyToHost(cPhi, Phi, N*sizeof(double));
//ScaLBL_CopyToHost(cPhi, Phi, N*sizeof(double));
ifstream File(LocalRestartFile,ios::binary);
int idx;
@@ -331,18 +604,19 @@ void ScaLBL_FreeLeeModel::Initialize(){
// Copy the restart data to the GPU
ScaLBL_CopyToDevice(Den,cDen,2*Np*sizeof(double));
ScaLBL_CopyToDevice(fq,cDist,19*Np*sizeof(double));
ScaLBL_CopyToDevice(gqbar,cDist,19*Np*sizeof(double));
ScaLBL_CopyToDevice(Phi,cPhi,N*sizeof(double));
ScaLBL_DeviceBarrier();
ScaLBL_Comm->Barrier();
comm.barrier();
if (rank==0) printf ("Initializing phase and density fields on device from Restart\n");
//TODO the following function is to be updated.
//ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, 0, ScaLBL_Comm->LastExterior(), Np);
//ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
}
if (rank==0) printf ("Initializing phase field \n");
//ScaLBL_PhaseField_Init(dvcMap, Phi, Den, hq, Bq, 0, ScaLBL_Comm->LastExterior(), Np);
//ScaLBL_PhaseField_Init(dvcMap, Phi, Den, hq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
// establish reservoirs for external bC
// TODO to be revised
if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4 ){
if (Dm->kproc()==0){
ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0);
@@ -358,7 +632,84 @@ void ScaLBL_FreeLeeModel::Initialize(){
//ScaLBL_CopyToHost(Averages->Phi.data(),Phi,N*sizeof(double));
}
void ScaLBL_FreeLeeModel::Run(){
void ScaLBL_FreeLeeModel::Initialize_SingleFluid(){
/*
* This function initializes single-fluid Lee model
*/
if (rank==0) printf ("Initializing distributions for momentum transport\n");
ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(gqbar, Fx, Fy, Fz, Np);
if (Restart == true){
//TODO need to revise this function
//remove the phase-related part
// if (rank==0){
// printf("Reading restart file! \n");
// }
//
// // Read in the restart file to CPU buffers
// int *TmpMap;
// TmpMap = new int[Np];
//
// double *cPhi, *cDist, *cDen;
// cPhi = new double[N];
// cDen = new double[2*Np];
// cDist = new double[19*Np];
// ScaLBL_CopyToHost(TmpMap, dvcMap, Np*sizeof(int));
// //ScaLBL_CopyToHost(cPhi, Phi, N*sizeof(double));
//
// ifstream File(LocalRestartFile,ios::binary);
// int idx;
// double value,va,vb;
// for (int n=0; n<Np; n++){
// File.read((char*) &va, sizeof(va));
// File.read((char*) &vb, sizeof(vb));
// cDen[n] = va;
// cDen[Np+n] = vb;
// }
// for (int n=0; n<Np; n++){
// // Read the distributions
// for (int q=0; q<19; q++){
// File.read((char*) &value, sizeof(value));
// cDist[q*Np+n] = value;
// }
// }
// File.close();
//
// for (int n=0; n<ScaLBL_Comm->LastExterior(); n++){
// va = cDen[n];
// vb = cDen[Np + n];
// value = (va-vb)/(va+vb);
// idx = TmpMap[n];
// if (!(idx < 0) && idx<N)
// cPhi[idx] = value;
// }
// for (int n=ScaLBL_Comm->FirstInterior(); n<ScaLBL_Comm->LastInterior(); n++){
// va = cDen[n];
// vb = cDen[Np + n];
// value = (va-vb)/(va+vb);
// idx = TmpMap[n];
// if (!(idx < 0) && idx<N)
// cPhi[idx] = value;
// }
//
// // Copy the restart data to the GPU
// ScaLBL_CopyToDevice(Den,cDen,2*Np*sizeof(double));
// ScaLBL_CopyToDevice(gqbar,cDist,19*Np*sizeof(double));
// ScaLBL_CopyToDevice(Phi,cPhi,N*sizeof(double));
// ScaLBL_Comm->Barrier();
// comm.barrier();
//
// if (rank==0) printf ("Initializing phase and density fields on device from Restart\n");
// //TODO the following function is to be updated.
// //ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, 0, ScaLBL_Comm->LastExterior(), Np);
// //ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
}
}
void ScaLBL_FreeLeeModel::Run_TwoFluid(){
int nprocs=nprocx*nprocy*nprocz;
const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz);
@@ -368,98 +719,93 @@ void ScaLBL_FreeLeeModel::Run(){
fflush(stdout);
}
//.......create and start timer............
double starttime,stoptime,cputime;
ScaLBL_DeviceBarrier();
comm.barrier();
starttime = MPI_Wtime();
//.........................................
//************ MAIN ITERATION LOOP ***************************************/
comm.barrier();
auto t1 = std::chrono::system_clock::now();
PROFILE_START("Loop");
while (timestep < timestepMax ) {
//if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); }
PROFILE_START("Update");
// *************ODD TIMESTEP*************
timestep++;
/* // Compute the Phase indicator field
// Read for hq, Bq happens in this routine (requires communication)
ScaLBL_Comm->BiSendD3Q7AA(hq,Bq); //READ FROM NORMAL
ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->BiRecvD3Q7AA(hq,Bq); //WRITE INTO OPPOSITE
ScaLBL_DeviceBarrier();
ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np);
//-------------------------------------------------------------------------------------------------------------------
// Compute the Phase indicator field
// Read for hq happens in this routine (requires communication)
ScaLBL_Comm->SendD3Q7AA(hq,0); //READ FROM NORMAL
ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(NeighborList, dvcMap, hq, Den, Phi, rhoA, rhoB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q7AA(hq,0); //WRITE INTO OPPOSITE
ScaLBL_Comm->Barrier();
ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(NeighborList, dvcMap, hq, Den, Phi, rhoA, rhoB, 0, ScaLBL_Comm->LastExterior(), Np);
// Perform the collision operation
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FROM NORMAL
if (BoundaryCondition > 0 && BoundaryCondition < 5){
//TODO to be revised
ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB);
ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB);
}
// Halo exchange for phase field
ScaLBL_Comm_Regular->SendHalo(Phi);
ScaLBL_Comm_WideHalo->Send(Phi);
ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB,
alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm_Regular->RecvHalo(Phi);
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
ScaLBL_DeviceBarrier();
ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM,
kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm_WideHalo->Recv(Phi);
ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE
ScaLBL_Comm->Barrier();
// Set BCs
if (BoundaryCondition == 3){
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep);
}
if (BoundaryCondition == 4){
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep);
}
else if (BoundaryCondition == 5){
ScaLBL_Comm->D3Q19_Reflection_BC_z(fq);
ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq);
ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar);
ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar);
}
ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB,
alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_DeviceBarrier();
MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL);
ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM,
kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_Comm->Barrier();
// *************EVEN TIMESTEP*************
timestep++;
// Compute the Phase indicator field
ScaLBL_Comm->BiSendD3Q7AA(hq,Bq); //READ FROM NORMAL
ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->BiRecvD3Q7AA(hq,Bq); //WRITE INTO OPPOSITE
ScaLBL_DeviceBarrier();
ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_Comm->SendD3Q7AA(hq,0); //READ FROM NORMAL
ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(dvcMap, hq, Den, Phi, rhoA, rhoB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q7AA(hq,0); //WRITE INTO OPPOSITE
ScaLBL_Comm->Barrier();
ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(dvcMap, hq, Den, Phi, rhoA, rhoB, 0, ScaLBL_Comm->LastExterior(), Np);
// Perform the collision operation
ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL
ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FORM NORMAL
// Halo exchange for phase field
if (BoundaryCondition > 0 && BoundaryCondition < 5){
ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB);
ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB);
}
ScaLBL_Comm_Regular->SendHalo(Phi);
ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB,
alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm_Regular->RecvHalo(Phi);
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
ScaLBL_DeviceBarrier();
ScaLBL_Comm_WideHalo->Send(Phi);
ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM,
kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm_WideHalo->Recv(Phi);
ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE
ScaLBL_Comm->Barrier();
// Set boundary conditions
if (BoundaryCondition == 3){
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep);
}
else if (BoundaryCondition == 4){
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep);
}
else if (BoundaryCondition == 5){
ScaLBL_Comm->D3Q19_Reflection_BC_z(fq);
ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq);
ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar);
ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar);
}
ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB,
alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np);
*/
ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM,
kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_Comm->Barrier();
//************************************************************************
PROFILE_STOP("Update");
@@ -467,10 +813,10 @@ void ScaLBL_FreeLeeModel::Run(){
PROFILE_STOP("Loop");
PROFILE_SAVE("lbpm_color_simulator",1);
//************************************************************************
stoptime = MPI_Wtime();
if (rank==0) printf("-------------------------------------------------------------------\n");
// Compute the walltime per timestep
cputime = (stoptime - starttime)/timestep;
auto t2 = std::chrono::system_clock::now();
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
// Performance obtained from each node
double MLUPS = double(Np)/cputime/1000000;
@@ -484,33 +830,123 @@ void ScaLBL_FreeLeeModel::Run(){
// ************************************************************************
}
void ScaLBL_FreeLeeModel::Run_SingleFluid(){
int nprocs=nprocx*nprocy*nprocz;
const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz);
if (rank==0){
printf("********************************************************\n");
printf("No. of timesteps: %i \n", timestepMax);
fflush(stdout);
}
void ScaLBL_FreeLeeModel::WriteDebug(){
//.......create and start timer............
ScaLBL_Comm->Barrier();
comm.barrier();
//.........................................
//************ MAIN ITERATION LOOP ***************************************/
PROFILE_START("Loop");
auto t1 = std::chrono::system_clock::now();
while (timestep < timestepMax ) {
//if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); }
PROFILE_START("Update");
// *************ODD TIMESTEP*************
timestep++;
//-------------------------------------------------------------------------------------------------------------------
// Perform the collision operation
ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FROM NORMAL
ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(NeighborList, gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz,
ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE
ScaLBL_Comm->Barrier();
// Set boundary conditions
// TODO to be revised!
if (BoundaryCondition == 3){
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep);
}
if (BoundaryCondition == 4){
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep);
}
else if (BoundaryCondition == 5){
ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar);
ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar);
}
ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(NeighborList, gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz,
0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_Comm->Barrier();
// *************EVEN TIMESTEP*************
timestep++;
//-------------------------------------------------------------------------------------------------------------------
// Perform the collision operation
ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FORM NORMAL
ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz,
ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE
ScaLBL_Comm->Barrier();
// Set boundary conditions
// TODO to be revised!
if (BoundaryCondition == 3){
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep);
}
else if (BoundaryCondition == 4){
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep);
}
else if (BoundaryCondition == 5){
ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar);
ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar);
}
ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz,
0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_Comm->Barrier();
//************************************************************************
PROFILE_STOP("Update");
}
PROFILE_STOP("Loop");
PROFILE_SAVE("lbpm_color_simulator",1);
//************************************************************************
if (rank==0) printf("-------------------------------------------------------------------\n");
// Compute the walltime per timestep
auto t2 = std::chrono::system_clock::now();
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
// Performance obtained from each node
double MLUPS = double(Np)/cputime/1000000;
if (rank==0) printf("********************************************************\n");
if (rank==0) printf("CPU time = %f \n", cputime);
if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS);
MLUPS *= nprocs;
if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS);
if (rank==0) printf("********************************************************\n");
// ************************************************************************
}
void ScaLBL_FreeLeeModel::WriteDebug_TwoFluid(){
// Copy back final phase indicator field and convert to regular layout
DoubleArray PhaseField(Nx,Ny,Nz);
DoubleArray PhaseData(Nxh,Nyh,Nzh);
//ScaLBL_Comm->RegularLayout(Map,Phi,PhaseField);
ScaLBL_CopyToHost(PhaseField.data(), Phi, sizeof(double)*N);
ScaLBL_CopyToHost(PhaseData.data(), Phi, sizeof(double)*Nh);
FILE *OUTFILE;
sprintf(LocalRankFilename,"Phase.%05i.raw",rank);
OUTFILE = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,OUTFILE);
fwrite(PhaseData.data(),8,Nh,OUTFILE);
fclose(OUTFILE);
ScaLBL_Comm->RegularLayout(Map,&Den[0],PhaseField);
DoubleArray PhaseField(Nx,Ny,Nz);
ScaLBL_Comm->RegularLayout(Map,Den,PhaseField);
FILE *AFILE;
sprintf(LocalRankFilename,"A.%05i.raw",rank);
sprintf(LocalRankFilename,"Density.%05i.raw",rank);
AFILE = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,AFILE);
fclose(AFILE);
ScaLBL_Comm->RegularLayout(Map,&Den[Np],PhaseField);
FILE *BFILE;
sprintf(LocalRankFilename,"B.%05i.raw",rank);
BFILE = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,BFILE);
fclose(BFILE);
ScaLBL_Comm->RegularLayout(Map,Pressure,PhaseField);
FILE *PFILE;
sprintf(LocalRankFilename,"Pressure.%05i.raw",rank);
@@ -561,3 +997,37 @@ void ScaLBL_FreeLeeModel::WriteDebug(){
fclose(CGZ_FILE);
*/
}
void ScaLBL_FreeLeeModel::WriteDebug_SingleFluid(){
DoubleArray PhaseField(Nx,Ny,Nz);
// Copy back final phase indicator field and convert to regular layout
ScaLBL_Comm->RegularLayout(Map,Pressure,PhaseField);
FILE *PFILE;
sprintf(LocalRankFilename,"Pressure.%05i.raw",rank);
PFILE = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,PFILE);
fclose(PFILE);
ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField);
FILE *VELX_FILE;
sprintf(LocalRankFilename,"Velocity_X.%05i.raw",rank);
VELX_FILE = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,VELX_FILE);
fclose(VELX_FILE);
ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],PhaseField);
FILE *VELY_FILE;
sprintf(LocalRankFilename,"Velocity_Y.%05i.raw",rank);
VELY_FILE = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,VELY_FILE);
fclose(VELY_FILE);
ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],PhaseField);
FILE *VELZ_FILE;
sprintf(LocalRankFilename,"Velocity_Z.%05i.raw",rank);
VELZ_FILE = fopen(LocalRankFilename,"wb");
fwrite(PhaseField.data(),8,N,VELZ_FILE);
fclose(VELZ_FILE);
}

View File

@@ -26,16 +26,22 @@ public:
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void ReadInput();
void Create();
void Initialize();
void Run();
void WriteDebug();
void Create_TwoFluid();
void Initialize_TwoFluid();
void Run_TwoFluid();
void WriteDebug_TwoFluid();
void Create_SingleFluid();
void Initialize_SingleFluid();
void Run_SingleFluid();
void WriteDebug_SingleFluid();
bool Restart,pBC;
int timestep,timestepMax;
int BoundaryCondition;
double tauA,tauB,rhoA,rhoB;
double W,gamma;
double tau, rho0;//only for single-fluid Lee model
double tauM;//relaxation time for phase field (or mass)
double W,gamma,kappa,beta;
double Fx,Fy,Fz,flux;
double din,dout,inletA,inletB,outletA,outletB;
@@ -61,7 +67,7 @@ public:
signed char *id;
int *NeighborList;
int *dvcMap;
double *fq, *hq;
double *gqbar, *hq;
double *mu_phi, *Den, *Phi;
double *ColorGrad;
double *Velocity;
@@ -81,6 +87,7 @@ private:
//int rank,nprocs;
void LoadParams(std::shared_ptr<Database> db0);
void AssignComponentLabels_ChemPotential_ColorGrad();
};

View File

@@ -910,10 +910,8 @@ void ScaLBL_GreyscaleColorModel::Run(){
}
//.......create and start timer............
double starttime,stoptime,cputime;
ScaLBL_Comm->Barrier();
comm.barrier();
starttime = MPI_Wtime();
//.........................................
//************ MAIN ITERATION LOOP ***************************************/
@@ -923,6 +921,7 @@ void ScaLBL_GreyscaleColorModel::Run(){
auto current_db = db->cloneDatabase();
//runAnalysis analysis( current_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map );
//analysis.createThreads( analysis_method, 4 );
auto t1 = std::chrono::system_clock::now();
while (timestep < timestepMax ) {
//if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); }
PROFILE_START("Update");
@@ -1319,10 +1318,10 @@ void ScaLBL_GreyscaleColorModel::Run(){
PROFILE_SAVE("lbpm_color_simulator",1);
//************************************************************************
ScaLBL_Comm->Barrier();
stoptime = MPI_Wtime();
if (rank==0) printf("-------------------------------------------------------------------\n");
// Compute the walltime per timestep
cputime = (stoptime - starttime)/timestep;
auto t2 = std::chrono::system_clock::now();
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
// Performance obtained from each node
double MLUPS = double(Np)/cputime/1000000;

View File

@@ -485,10 +485,8 @@ void ScaLBL_GreyscaleModel::Run(){
}
//.......create and start timer............
double starttime,stoptime,cputime;
ScaLBL_DeviceBarrier();
comm.barrier();
starttime = MPI_Wtime();
//.........................................
Minkowski Morphology(Mask);
@@ -500,6 +498,7 @@ void ScaLBL_GreyscaleModel::Run(){
double rlx_eff = 1.0/tau_eff;
double error = 1.0;
double flow_rate_previous = 0.0;
auto t1 = std::chrono::system_clock::now();
while (timestep < timestepMax && error > tolerance) {
//************************************************************************/
// *************ODD TIMESTEP*************//
@@ -744,10 +743,10 @@ void ScaLBL_GreyscaleModel::Run(){
//************************************************************************
ScaLBL_DeviceBarrier();
comm.barrier();
stoptime = MPI_Wtime();
if (rank==0) printf("-------------------------------------------------------------------\n");
// Compute the walltime per timestep
cputime = (stoptime - starttime)/timestep;
auto t2 = std::chrono::system_clock::now();
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
// Performance obtained from each node
double MLUPS = double(Np)/cputime/1000000;

View File

@@ -784,7 +784,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){
//.......create and start timer............
//double starttime,stoptime,cputime;
//ScaLBL_Comm->Barrier(); comm.barrier();
//starttime = MPI_Wtime();
//auto t1 = std::chrono::system_clock::now();
for (int ic=0; ic<number_ion_species; ic++){
timestep=0;
@@ -886,10 +886,10 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){
ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence[ic], ic, 0, ScaLBL_Comm->LastExterior(), Np);
}
//************************************************************************/
//stoptime = MPI_Wtime();
//if (rank==0) printf("-------------------------------------------------------------------\n");
//// Compute the walltime per timestep
//cputime = (stoptime - starttime)/timestep;
//auto t2 = std::chrono::system_clock::now();
//double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
//// Performance obtained from each node
//double MLUPS = double(Np)/cputime/1000000;

View File

@@ -230,14 +230,13 @@ void ScaLBL_MRTModel::Run(){
}
//.......create and start timer............
double starttime,stoptime,cputime;
ScaLBL_DeviceBarrier(); comm.barrier();
starttime = MPI_Wtime();
if (rank==0) printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax);
if (rank==0) printf("********************************************************\n");
timestep=0;
double error = 1.0;
double flow_rate_previous = 0.0;
auto t1 = std::chrono::system_clock::now();
while (timestep < timestepMax && error > tolerance) {
//************************************************************************/
timestep++;
@@ -354,10 +353,10 @@ void ScaLBL_MRTModel::Run(){
}
}
//************************************************************************/
stoptime = MPI_Wtime();
if (rank==0) printf("-------------------------------------------------------------------\n");
// Compute the walltime per timestep
cputime = (stoptime - starttime)/timestep;
auto t2 = std::chrono::system_clock::now();
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
// Performance obtained from each node
double MLUPS = double(Np)/cputime/1000000;

View File

@@ -2,7 +2,7 @@
ScaLBL_Multiphys_Controller::ScaLBL_Multiphys_Controller(int RANK, int NP, const Utilities::MPI& COMM):
rank(RANK),nprocs(NP),Restart(0),timestepMax(0),num_iter_Stokes(0),num_iter_Ion(0),
analysis_interval(0),visualization_interval(0),tolerance(0),comm(COMM)
analysis_interval(0),visualization_interval(0),tolerance(0),time_conv_max(0),comm(COMM)
{
}
@@ -25,6 +25,7 @@ void ScaLBL_Multiphys_Controller::ReadParams(string filename){
analysis_interval = 500;
visualization_interval = 10000;
tolerance = 1.0e-6;
time_conv_max = 0.0;
// load input parameters
if (study_db->keyExists( "timestepMax" )){
@@ -135,3 +136,12 @@ vector<int> ScaLBL_Multiphys_Controller::getIonNumIter_PNP_coupling(double Stoke
}
return num_iter_ion;
}
void ScaLBL_Multiphys_Controller::getTimeConvMax_PNP_coupling(double StokesTimeConv,const vector<double> &IonTimeConv){
//Return maximum of the time converting factor from Stokes and ion solvers
vector<double> TimeConv;
TimeConv.assign(IonTimeConv.begin(),IonTimeConv.end());
TimeConv.insert(TimeConv.begin(),StokesTimeConv);
time_conv_max = *max_element(TimeConv.begin(),TimeConv.end());
}

View File

@@ -27,6 +27,7 @@ public:
int getStokesNumIter_PNP_coupling(double StokesTimeConv,const vector<double> &IonTimeConv);
vector<int> getIonNumIter_PNP_coupling(double StokesTimeConv,const vector<double> &IonTimeConv);
//void getIonNumIter_PNP_coupling(double StokesTimeConv,vector<double> &IonTimeConv,vector<int> &IonTimeMax);
void getTimeConvMax_PNP_coupling(double StokesTimeConv,const vector<double> &IonTimeConv);
bool Restart;
int timestepMax;
@@ -35,6 +36,7 @@ public:
int analysis_interval;
int visualization_interval;
double tolerance;
double time_conv_max;
//double SchmidtNum;//Schmidt number = kinematic_viscosity/mass_diffusivity
int rank,nprocs;

View File

@@ -8,8 +8,11 @@
ScaLBL_Poisson::ScaLBL_Poisson(int RANK, int NP, const Utilities::MPI& COMM):
rank(RANK), nprocs(NP),timestep(0),timestepMax(0),tau(0),k2_inv(0),tolerance(0),h(0),
epsilon0(0),epsilon0_LB(0),epsilonR(0),epsilon_LB(0),Vin(0),Vout(0),Nx(0),Ny(0),Nz(0),N(0),Np(0),analysis_interval(0),
chargeDen_dummy(0),WriteLog(0),
nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),BoundaryConditionSolid(0),Lx(0),Ly(0),Lz(0),comm(COMM)
chargeDen_dummy(0),WriteLog(0),nprocx(0),nprocy(0),nprocz(0),
BoundaryConditionInlet(0),BoundaryConditionOutlet(0),BoundaryConditionSolid(0),Lx(0),Ly(0),Lz(0),
Vin0(0),freqIn(0),t0_In(0),Vin_Type(0),Vout0(0),freqOut(0),t0_Out(0),Vout_Type(0),
TestPeriodic(0),TestPeriodicTime(0),TestPeriodicTimeConv(0),TestPeriodicSaveInterval(0),
comm(COMM)
{
}
@@ -33,10 +36,12 @@ void ScaLBL_Poisson::ReadParams(string filename){
epsilonR = 78.4;//default dielectric constant of water
epsilon_LB = epsilon0_LB*epsilonR;//electric permittivity
analysis_interval = 1000;
Vin = 1.0; //Boundary-z (inlet) electric potential
Vout = 1.0; //Boundary-Z (outlet) electric potential
chargeDen_dummy = 1.0e-3;//For debugging;unit=[C/m^3]
WriteLog = false;
TestPeriodic = false;
TestPeriodicTime = 1.0;//unit: [sec]
TestPeriodicTimeConv = 0.01; //unit [sec/lt]
TestPeriodicSaveInterval = 0.1; //unit [sec]
// LB-Poisson Model parameters
if (electric_db->keyExists( "timestepMax" )){
@@ -57,6 +62,18 @@ void ScaLBL_Poisson::ReadParams(string filename){
if (electric_db->keyExists( "WriteLog" )){
WriteLog = electric_db->getScalar<bool>( "WriteLog" );
}
if (electric_db->keyExists( "TestPeriodic" )){
TestPeriodic = electric_db->getScalar<bool>( "TestPeriodic" );
}
if (electric_db->keyExists( "TestPeriodicTime" )){
TestPeriodicTime = electric_db->getScalar<double>( "TestPeriodicTime" );
}
if (electric_db->keyExists( "TestPeriodicTimeConv" )){
TestPeriodicTimeConv = electric_db->getScalar<double>( "TestPeriodicTimeConv" );
}
if (electric_db->keyExists( "TestPeriodicSaveInterval" )){
TestPeriodicSaveInterval = electric_db->getScalar<double>( "TestPeriodicSaveInterval" );
}
// Read solid boundary condition specific to Poisson equation
BoundaryConditionSolid = 1;
@@ -65,10 +82,15 @@ void ScaLBL_Poisson::ReadParams(string filename){
}
// Read boundary condition for electric potential
// BC = 0: normal periodic BC
// BC = 1: fixed inlet and outlet potential
BoundaryCondition = 0;
if (electric_db->keyExists( "BC" )){
BoundaryCondition = electric_db->getScalar<int>( "BC" );
// BC = 1: fixed electric potential
// BC = 2: sine/cosine periodic electric potential (need extra input parameters)
BoundaryConditionInlet = 0;
BoundaryConditionOutlet = 0;
if (electric_db->keyExists( "BC_Inlet" )){
BoundaryConditionInlet = electric_db->getScalar<int>( "BC_Inlet" );
}
if (electric_db->keyExists( "BC_Outlet" )){
BoundaryConditionOutlet = electric_db->getScalar<int>( "BC_Outlet" );
}
// Read domain parameters
@@ -117,8 +139,17 @@ void ScaLBL_Poisson::SetDomain(){
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = 1; // initialize this way
//Averages = std::shared_ptr<TwoPhase> ( new TwoPhase(Dm) ); // TwoPhase analysis object
comm.barrier();
Dm->BoundaryCondition = BoundaryCondition;
Mask->BoundaryCondition = BoundaryCondition;
if (BoundaryConditionInlet==0 && BoundaryConditionOutlet==0){
Dm->BoundaryCondition = 0;
Mask->BoundaryCondition = 0;
}
else if (BoundaryConditionInlet>0 && BoundaryConditionOutlet>0){
Dm->BoundaryCondition = 1;
Mask->BoundaryCondition = 1;
}
else {//i.e. non-periodic and periodic BCs are mixed
ERROR("Error: check the type of inlet and outlet boundary condition! Mixed periodic and non-periodic BCs are found!\n");
}
Dm->CommInit();
comm.barrier();
@@ -343,15 +374,91 @@ void ScaLBL_Poisson::Create(){
void ScaLBL_Poisson::Potential_Init(double *psi_init){
if (BoundaryCondition==1){
if (electric_db->keyExists( "Vin" )){
Vin = electric_db->getScalar<double>( "Vin" );
}
if (electric_db->keyExists( "Vout" )){
Vout = electric_db->getScalar<double>( "Vout" );
}
//set up default boundary input parameters
Vin0 = Vout0 = 1.0; //unit: [V]
freqIn = freqOut = 50.0; //unit: [Hz]
t0_In = t0_Out = 0.0; //unit: [sec]
Vin_Type = Vout_Type = 1; //1->sin; 2->cos
Vin = 1.0; //Boundary-z (inlet) electric potential
Vout = 1.0; //Boundary-Z (outlet) electric potential
if (BoundaryConditionInlet>0){
switch (BoundaryConditionInlet){
case 1:
if (electric_db->keyExists( "Vin" )){
Vin = electric_db->getScalar<double>( "Vin" );
}
if (rank==0) printf("LB-Poisson Solver: inlet boundary; fixed electric potential Vin = %.3g [V]\n",Vin);
break;
case 2:
if (electric_db->keyExists( "Vin0" )){//voltage amplitude; unit: Volt
Vin0 = electric_db->getScalar<double>( "Vin0" );
}
if (electric_db->keyExists( "freqIn" )){//unit: Hz
freqIn = electric_db->getScalar<double>( "freqIn" );
}
if (electric_db->keyExists( "t0_In" )){//timestep shift, unit: lt
t0_In = electric_db->getScalar<double>( "t0_In" );
}
if (electric_db->keyExists( "Vin_Type" )){
//type=1 -> sine
//tyep=2 -> cosine
Vin_Type = electric_db->getScalar<int>( "Vin_Type" );
if (Vin_Type>2 || Vin_Type<=0) ERROR("Error: user-input Vin_Type is currently not supported! \n");
}
if (rank==0){
if (Vin_Type==1){
printf("LB-Poisson Solver: inlet boundary; periodic electric potential Vin = %.3g*Sin[2*pi*%.3g*(t+%.3g)] [V]\n",Vin0,freqIn,t0_In);
printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vin0,freqIn,t0_In);
}
else if (Vin_Type==2){
printf("LB-Poisson Solver: inlet boundary; periodic electric potential Vin = %.3g*Cos[2*pi*%.3g*(t+%.3g)] [V] \n",Vin0,freqIn,t0_In);
printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vin0,freqIn,t0_In);
}
}
break;
}
}
if (BoundaryConditionOutlet>0){
switch (BoundaryConditionOutlet){
case 1:
if (electric_db->keyExists( "Vout" )){
Vout = electric_db->getScalar<double>( "Vout" );
}
if (rank==0) printf("LB-Poisson Solver: outlet boundary; fixed electric potential Vout = %.3g [V] \n",Vout);
break;
case 2:
if (electric_db->keyExists( "Vout0" )){//voltage amplitude; unit: Volt
Vout0 = electric_db->getScalar<double>( "Vout0" );
}
if (electric_db->keyExists( "freqOut" )){//unit: Hz
freqOut = electric_db->getScalar<double>( "freqOut" );
}
if (electric_db->keyExists( "t0_Out" )){//timestep shift, unit: lt
t0_Out = electric_db->getScalar<double>( "t0_Out" );
}
if (electric_db->keyExists( "Vout_Type" )){
//type=1 -> sine
//tyep=2 -> cosine
Vout_Type = electric_db->getScalar<int>( "Vout_Type" );
if (Vout_Type>2 || Vin_Type<=0) ERROR("Error: user-input Vout_Type is currently not supported! \n");
}
if (rank==0){
if (Vout_Type==1){
printf("LB-Poisson Solver: outlet boundary; periodic electric potential Vout = %.3g*Sin[2*pi*%.3g*(t+%.3g)] [V]\n",Vout0,freqOut,t0_Out);
printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vout0,freqOut,t0_Out);
}
else if (Vout_Type==2){
printf("LB-Poisson Solver: outlet boundary; periodic electric potential Vout = %.3g*Cos[2*pi*%.3g*(t+%.3g)] [V]\n",Vout0,freqOut,t0_Out);
printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vout0,freqOut,t0_Out);
}
}
break;
}
}
//By default only periodic BC is applied and Vin=Vout=1.0, i.e. there is no potential gradient along Z-axis
if (BoundaryConditionInlet==2) Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,t0_In,Vin_Type,0);
if (BoundaryConditionOutlet==2) Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,t0_Out,Vout_Type,0);
double slope = (Vout-Vin)/(Nz-2);
double psi_linearized;
for (int k=0;k<Nz;k++){
@@ -375,10 +482,15 @@ void ScaLBL_Poisson::Potential_Init(double *psi_init){
}
}
double ScaLBL_Poisson::getBoundaryVoltagefromPeriodicBC(double V0, double freq, double t0, int V_type, int time_step){
return V0*(V_type==1)*sin(2.0*M_PI*freq*time_conv*(time_step+t0/time_conv))+V0*(V_type==2)*cos(2.0*M_PI*freq*time_conv*(time_step+t0/time_conv));
}
void ScaLBL_Poisson::Initialize(){
void ScaLBL_Poisson::Initialize(double time_conv_from_Study){
/*
* This function initializes model
* "time_conv_from_Study" is the phys to LB time conversion factor, unit=[sec/lt]
* which is used for periodic voltage input for inlet and outlet boundaries
*/
if (rank==0) printf ("LB-Poisson Solver: initializing D3Q7 distributions\n");
//NOTE the initialization involves two steps:
@@ -386,6 +498,7 @@ void ScaLBL_Poisson::Initialize(){
//2. Initialize electric potential for pore nodes
double *psi_host;
psi_host = new double [Nx*Ny*Nz];
time_conv = time_conv_from_Study;
AssignSolidBoundary(psi_host);//step1
Potential_Init(psi_host);//step2
ScaLBL_CopyToDevice(Psi, psi_host, Nx*Ny*Nz*sizeof(double));
@@ -405,12 +518,12 @@ void ScaLBL_Poisson::Initialize(){
//}
}
void ScaLBL_Poisson::Run(double *ChargeDensity){
void ScaLBL_Poisson::Run(double *ChargeDensity, int timestep_from_Study){
//.......create and start timer............
//double starttime,stoptime,cputime;
//ScaLBL_Comm->Barrier(); comm.barrier();
//starttime = MPI_Wtime();
//comm.barrier();
//auto t1 = std::chrono::system_clock::now();
timestep=0;
double error = 1.0;
@@ -420,13 +533,13 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){
// *************ODD TIMESTEP*************//
timestep++;
SolveElectricPotentialAAodd();//update electric potential
SolveElectricPotentialAAodd(timestep_from_Study);//update electric potential
SolvePoissonAAodd(ChargeDensity);//perform collision
ScaLBL_Comm->Barrier(); comm.barrier();
// *************EVEN TIMESTEP*************//
timestep++;
SolveElectricPotentialAAeven();//update electric potential
SolveElectricPotentialAAeven(timestep_from_Study);//update electric potential
SolvePoissonAAeven(ChargeDensity);//perform collision
ScaLBL_Comm->Barrier(); comm.barrier();
//************************************************************************/
@@ -466,11 +579,11 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){
}
//************************************************************************/
//stoptime = MPI_Wtime();
////if (rank==0) printf("LB-Poission Solver: a steady-state solution is obtained\n");
////if (rank==0) printf("---------------------------------------------------------------------------\n");
//// Compute the walltime per timestep
//cputime = (stoptime - starttime)/timestep;
//auto t2 = std::chrono::system_clock::now();
//double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
//// Performance obtained from each node
//double MLUPS = double(Np)/cputime/1000000;
@@ -506,29 +619,65 @@ void ScaLBL_Poisson::getConvergenceLog(int timestep,double error){
}
}
void ScaLBL_Poisson::SolveElectricPotentialAAodd(){
void ScaLBL_Poisson::SolveElectricPotentialAAodd(int timestep_from_Study){
ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FROM NORMAL
ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE
ScaLBL_Comm->Barrier();
// Set boundary conditions
if (BoundaryCondition == 1){
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
if (BoundaryConditionInlet > 0){
switch (BoundaryConditionInlet){
case 1:
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
break;
case 2:
Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,t0_In,Vin_Type,timestep_from_Study);
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
break;
}
}
if (BoundaryConditionOutlet > 0){
switch (BoundaryConditionOutlet){
case 1:
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
break;
case 2:
Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,t0_Out,Vout_Type,timestep_from_Study);
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
break;
}
}
//-------------------------//
ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
}
void ScaLBL_Poisson::SolveElectricPotentialAAeven(){
void ScaLBL_Poisson::SolveElectricPotentialAAeven(int timestep_from_Study){
ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FORM NORMAL
ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE
ScaLBL_Comm->Barrier();
// Set boundary conditions
if (BoundaryCondition == 1){
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
if (BoundaryConditionInlet > 0){
switch (BoundaryConditionInlet){
case 1:
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
break;
case 2:
Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,t0_In,Vin_Type,timestep_from_Study);
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
break;
}
}
if (BoundaryConditionOutlet > 0){
switch (BoundaryConditionOutlet){
case 1:
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
break;
case 2:
Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,t0_Out,Vout_Type,timestep_from_Study);
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
break;
}
}
//-------------------------//
ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);

View File

@@ -9,6 +9,7 @@
#include <exception>
#include <stdexcept>
#include <fstream>
#include <cmath>
#include "common/ScaLBL.h"
#include "common/Communication.h"
@@ -16,6 +17,7 @@
#include "analysis/Minkowski.h"
#include "ProfilerApp.h"
#define _USE_MATH_DEFINES
#ifndef ScaLBL_POISSON_INC
#define ScaLBL_POISSON_INC
@@ -30,8 +32,8 @@ public:
void SetDomain();
void ReadInput();
void Create();
void Initialize();
void Run(double *ChargeDensity);
void Initialize(double time_conv_from_Study);
void Run(double *ChargeDensity,int timestep_from_Study);
void getElectricPotential(DoubleArray &ReturnValues);
void getElectricPotential_debug(int timestep);
void getElectricField(DoubleArray &Values_x, DoubleArray &Values_y, DoubleArray &Values_z);
@@ -41,7 +43,8 @@ public:
//bool Restart,pBC;
int timestep,timestepMax;
int analysis_interval;
int BoundaryCondition;
int BoundaryConditionInlet;
int BoundaryConditionOutlet;
int BoundaryConditionSolid;
double tau;
double tolerance;
@@ -50,11 +53,18 @@ public:
double Vin, Vout;
double chargeDen_dummy;//for debugging
bool WriteLog;
double Vin0,freqIn,t0_In,Vin_Type;
double Vout0,freqOut,t0_Out,Vout_Type;
bool TestPeriodic;
double TestPeriodicTime;//unit: [sec]
double TestPeriodicTimeConv; //unit [sec/lt]
double TestPeriodicSaveInterval; //unit [sec]
int Nx,Ny,Nz,N,Np;
int rank,nprocx,nprocy,nprocz,nprocs;
double Lx,Ly,Lz;
double h;//image resolution
double time_conv;//phys to LB time converting factor; unit=[sec/lt]
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
@@ -91,12 +101,13 @@ private:
void AssignSolidBoundary(double *poisson_solid);
void Potential_Init(double *psi_init);
void ElectricField_LB_to_Phys(DoubleArray &Efield_reg);
void SolveElectricPotentialAAodd();
void SolveElectricPotentialAAeven();
void SolveElectricPotentialAAodd(int timestep_from_Study);
void SolveElectricPotentialAAeven(int timestep_from_Study);
//void SolveElectricField();
void SolvePoissonAAodd(double *ChargeDensity);
void SolvePoissonAAeven(double *ChargeDensity);
void getConvergenceLog(int timestep,double error);
double getBoundaryVoltagefromPeriodicBC(double V0,double freq,double t0,int V_type,int time_step);
};
#endif

View File

@@ -573,16 +573,14 @@ void ScaLBL_StokesModel::Run(){
}
}
//.......create and start timer............
double starttime,stoptime,cputime;
ScaLBL_Comm->Barrier(); comm.barrier();
starttime = MPI_Wtime();
if (rank==0) printf("****************************************************************\n");
if (rank==0) printf("LB Single-Fluid Navier-Stokes Solver: timestepMax = %i\n", timestepMax);
if (rank==0) printf("****************************************************************\n");
timestep=0;
double error = 1.0;
double flow_rate_previous = 0.0;
auto t1 = std::chrono::system_clock::now();
while (timestep < timestepMax && error > tolerance) {
//************************************************************************/
timestep++;
@@ -700,10 +698,10 @@ void ScaLBL_StokesModel::Run(){
}
}
//************************************************************************/
stoptime = MPI_Wtime();
if (rank==0) printf("-------------------------------------------------------------------\n");
// Compute the walltime per timestep
cputime = (stoptime - starttime)/timestep;
auto t2 = std::chrono::system_clock::now();
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
// Performance obtained from each node
double MLUPS = double(Np)/cputime/1000000;

View File

@@ -4,8 +4,10 @@
ADD_LBPM_EXECUTABLE( lbpm_color_simulator )
ADD_LBPM_EXECUTABLE( lbpm_permeability_simulator )
ADD_LBPM_EXECUTABLE( lbpm_greyscale_simulator )
ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_SingleFluid_simulator )
ADD_LBPM_EXECUTABLE( lbpm_greyscaleColor_simulator )
ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_SingleFluid_simulator )
ADD_LBPM_EXECUTABLE( lbpm_freelee_simulator )
ADD_LBPM_EXECUTABLE( lbpm_freelee_SingleFluidBGK_simulator )
#ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator )
#ADD_LBPM_EXECUTABLE( lbpm_color_macro_simulator )
ADD_LBPM_EXECUTABLE( lbpm_dfh_simulator )

View File

@@ -66,7 +66,7 @@ int main(int argc, char **argv)
PoissonSolver.SetDomain();
PoissonSolver.ReadInput();
PoissonSolver.Create();
PoissonSolver.Initialize();
PoissonSolver.Initialize(0);
int timestep=0;
double error = 1.0;
@@ -74,7 +74,7 @@ int main(int argc, char **argv)
while (timestep < Study.timestepMax && error > Study.tolerance){
timestep++;
PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental
PoissonSolver.Run(IonModel.ChargeDensity,0);//solve Poisson equtaion to get steady-state electrical potental
IonModel.Run(IonModel.FluidVelocityDummy,PoissonSolver.ElectricField); //solve for ion transport and electric potential
timestep++;//AA operations

View File

@@ -82,7 +82,7 @@ int main(int argc, char **argv)
PoissonSolver.SetDomain();
PoissonSolver.ReadInput();
PoissonSolver.Create();
PoissonSolver.Initialize();
PoissonSolver.Initialize(0);
int timestep=0;
@@ -94,7 +94,7 @@ int main(int argc, char **argv)
while (timestep < Study.timestepMax && error > Study.tolerance){
timestep++;
PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental
PoissonSolver.Run(IonModel.ChargeDensity,0);//solve Poisson equtaion to get steady-state electrical potental
StokesModel.Run_Lite(IonModel.ChargeDensity, PoissonSolver.ElectricField);// Solve the N-S equations to get velocity
IonModel.Run(StokesModel.Velocity,PoissonSolver.ElectricField); //solve for ion transport and electric potential

View File

@@ -51,14 +51,37 @@ int main(int argc, char **argv)
PoissonSolver.SetDomain();
PoissonSolver.ReadInput();
PoissonSolver.Create();
PoissonSolver.Initialize();
if (PoissonSolver.TestPeriodic==true){
PoissonSolver.Initialize(PoissonSolver.TestPeriodicTimeConv);
}
else {
PoissonSolver.Initialize(0);
}
//Initialize dummy charge density for test
PoissonSolver.DummyChargeDensity();
PoissonSolver.Run(PoissonSolver.ChargeDensityDummy);
PoissonSolver.getElectricPotential_debug(1);
PoissonSolver.getElectricField_debug(1);
if (PoissonSolver.TestPeriodic==true){
if (rank==0) printf("Testing periodic voltage input is enabled. Total test time is %.3g[s], saving data every %.3g[s]; user-specified time resolution is %.3g[s/lt]\n",
PoissonSolver.TestPeriodicTime,PoissonSolver.TestPeriodicSaveInterval,PoissonSolver.TestPeriodicTimeConv);
int timestep = 0;
int timeMax = int(PoissonSolver.TestPeriodicTime/PoissonSolver.TestPeriodicTimeConv);
int timeSave = int(PoissonSolver.TestPeriodicSaveInterval/PoissonSolver.TestPeriodicTimeConv);
while (timestep<timeMax){
timestep++;
PoissonSolver.Run(PoissonSolver.ChargeDensityDummy,timestep);
if (timestep%timeSave==0){
if (rank==0) printf(" Time = %.3g[s]; saving electric potential and field\n",timestep*PoissonSolver.TestPeriodicTimeConv);
PoissonSolver.getElectricPotential_debug(timestep);
PoissonSolver.getElectricField_debug(timestep);
}
}
}
else {
PoissonSolver.Run(PoissonSolver.ChargeDensityDummy,1);
PoissonSolver.getElectricPotential_debug(1);
PoissonSolver.getElectricField_debug(1);
}
if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n");
if (rank==0) printf("*************************************************************\n");

View File

@@ -79,20 +79,22 @@ int main(int argc, char **argv)
IonModel.timestepMax = Study.getIonNumIter_PNP_coupling(StokesModel.time_conv,IonModel.time_conv);
IonModel.Initialize();
// Get maximal time converting factor based on Sotkes and Ion solvers
Study.getTimeConvMax_PNP_coupling(StokesModel.time_conv,IonModel.time_conv);
// Initialize LB-Poisson model
PoissonSolver.ReadParams(filename);
PoissonSolver.SetDomain();
PoissonSolver.ReadInput();
PoissonSolver.Create();
PoissonSolver.Initialize();
PoissonSolver.Initialize(Study.time_conv_max);
int timestep=0;
while (timestep < Study.timestepMax){
timestep++;
PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental
PoissonSolver.Run(IonModel.ChargeDensity,timestep);//solve Poisson equtaion to get steady-state electrical potental
StokesModel.Run_Lite(IonModel.ChargeDensity, PoissonSolver.ElectricField);// Solve the N-S equations to get velocity
IonModel.Run(StokesModel.Velocity,PoissonSolver.ElectricField); //solve for ion transport and electric potential

View File

@@ -0,0 +1,70 @@
#include <exception>
#include <fstream>
#include <iostream>
#include <stdexcept>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include "common/Utilities.h"
#include "models/FreeLeeModel.h"
//*******************************************************************
// Implementation of Free-Energy Two-Phase LBM (Lee model)
//*******************************************************************
int main( int argc, char **argv )
{
// Initialize
Utilities::startup( argc, argv );
// Load the input database
auto db = std::make_shared<Database>( argv[1] );
{ // Limit scope so variables that contain communicators will free before MPI_Finialize
Utilities::MPI comm( MPI_COMM_WORLD );
int rank = comm.getRank();
int nprocs = comm.getSize();
if (rank == 0){
printf("********************************************************\n");
printf("Running Single-Fluid Solver based on Lee LBM \n");
printf("********************************************************\n");
}
// Initialize compute device
int device=ScaLBL_SetDevice(rank);
NULL_USE( device );
ScaLBL_DeviceBarrier();
comm.barrier();
PROFILE_ENABLE(1);
//PROFILE_ENABLE_TRACE();
//PROFILE_ENABLE_MEMORY();
PROFILE_SYNCHRONIZE();
PROFILE_START("Main");
Utilities::setErrorHandlers();
auto filename = argv[1];
ScaLBL_FreeLeeModel LeeModel( rank,nprocs,comm );
LeeModel.ReadParams( filename );
LeeModel.SetDomain();
LeeModel.ReadInput();
LeeModel.Create_SingleFluid();
LeeModel.Initialize_SingleFluid();
LeeModel.Run_SingleFluid();
LeeModel.WriteDebug_SingleFluid();
PROFILE_STOP("Main");
auto file = db->getWithDefault<std::string>( "TimerFile", "lbpm_freelee_SingleFluidBGK_simulator" );
auto level = db->getWithDefault<int>( "TimerLevel", 1 );
PROFILE_SAVE( file,level );
// ****************************************************
} // Limit scope so variables that contain communicators will free before MPI_Finialize
Utilities::shutdown();
return 0;
}

View File

@@ -0,0 +1,70 @@
#include <exception>
#include <fstream>
#include <iostream>
#include <stdexcept>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include "common/Utilities.h"
#include "models/FreeLeeModel.h"
//*******************************************************************
// Implementation of Free-Energy Two-Phase LBM (Lee model)
//*******************************************************************
int main( int argc, char **argv )
{
// Initialize
Utilities::startup( argc, argv );
// Load the input database
auto db = std::make_shared<Database>( argv[1] );
{ // Limit scope so variables that contain communicators will free before MPI_Finialize
Utilities::MPI comm( MPI_COMM_WORLD );
int rank = comm.getRank();
int nprocs = comm.getSize();
if (rank == 0){
printf("********************************************************\n");
printf("Running Free Energy Lee LBM \n");
printf("********************************************************\n");
}
// Initialize compute device
int device=ScaLBL_SetDevice(rank);
NULL_USE( device );
ScaLBL_DeviceBarrier();
comm.barrier();
PROFILE_ENABLE(1);
//PROFILE_ENABLE_TRACE();
//PROFILE_ENABLE_MEMORY();
PROFILE_SYNCHRONIZE();
PROFILE_START("Main");
Utilities::setErrorHandlers();
auto filename = argv[1];
ScaLBL_FreeLeeModel LeeModel( rank,nprocs,comm );
LeeModel.ReadParams( filename );
LeeModel.SetDomain();
LeeModel.ReadInput();
LeeModel.Create_TwoFluid();
LeeModel.Initialize_TwoFluid();
LeeModel.Run_TwoFluid();
LeeModel.WriteDebug_TwoFluid();
PROFILE_STOP("Main");
auto file = db->getWithDefault<std::string>( "TimerFile", "lbpm_freelee_simulator" );
auto level = db->getWithDefault<int>( "TimerLevel", 1 );
PROFILE_SAVE( file,level );
// ****************************************************
} // Limit scope so variables that contain communicators will free before MPI_Finialize
Utilities::shutdown();
return 0;
}