Merge branch 'FOM' of github.com:JamesEMcClure/LBPM-WIA into FOM
This commit is contained in:
@@ -3,11 +3,10 @@
|
||||
|
||||
|
||||
#include "StackTrace/StackTrace.h"
|
||||
#include "common/MPI.h"
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include "mpi.h"
|
||||
|
||||
|
||||
namespace StackTrace
|
||||
{
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
#include "common/ScaLBL.h"
|
||||
|
||||
#include <chrono>
|
||||
|
||||
|
||||
ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr <Domain> Dm){
|
||||
//......................................................................................
|
||||
Lock=false; // unlock the communicator
|
||||
@@ -411,20 +414,19 @@ double ScaLBL_Communicator::GetPerformance(int *NeighborList, double *fq, int Np
|
||||
double FZ = 0.0;
|
||||
ScaLBL_D3Q19_Init(fq, Np);
|
||||
//.......create and start timer............
|
||||
double starttime,stoptime,cputime;
|
||||
Barrier();
|
||||
starttime = MPI_Wtime();
|
||||
//.........................................
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
for (int t=0; t<TIMESTEPS; t++){
|
||||
ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, FirstInterior(), LastInterior(), Np, RLX_SETA, RLX_SETB, FX, FY, FZ);
|
||||
ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, 0, LastExterior(), Np, RLX_SETA, RLX_SETB, FX, FY, FZ);
|
||||
ScaLBL_D3Q19_AAeven_MRT(fq, FirstInterior(), LastInterior(), Np, RLX_SETA, RLX_SETB, FX, FY, FZ);
|
||||
ScaLBL_D3Q19_AAeven_MRT(fq, 0, LastExterior(), Np, RLX_SETA, RLX_SETB, FX, FY, FZ);
|
||||
}
|
||||
stoptime = MPI_Wtime();
|
||||
auto t2 = std::chrono::system_clock::now();
|
||||
Barrier();
|
||||
// Compute the walltime per timestep
|
||||
cputime = 0.5*(stoptime - starttime)/TIMESTEPS;
|
||||
double diff = std::chrono::duration<double>( t2 - t1 ).count();
|
||||
double cputime = 0.5*diff/TIMESTEPS;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
return MLUPS;
|
||||
|
||||
@@ -70,49 +70,6 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,
|
||||
double *Poros,double *Perm, double *Velocity,double Den,double *Pressure);
|
||||
// ION TRANSPORT MODEL
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np);
|
||||
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *Velocity, double *ElectricField,
|
||||
double Di, int zi, double rlx, double Vt, int start, int finish, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField,
|
||||
double Di, int zi, double rlx, double Vt, int start, int finish, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np);
|
||||
extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np);
|
||||
|
||||
// LBM Poisson solver
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList,int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,
|
||||
int start, int finish, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,
|
||||
int start, int finish, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np);
|
||||
|
||||
//maybe deprecated
|
||||
//extern "C" void ScaLBL_D3Q7_Poisson_ElectricField(int *neighborList, int *Map, signed char *ID, double *Psi, double *ElectricField, int SolidBC,
|
||||
// int strideY, int strideZ,int start, int finish, int Np);
|
||||
|
||||
// LBM Stokes Model (adapted from MRT model)
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB,
|
||||
double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB,
|
||||
double Gx, double Gy, double Gz, double rho0, double den_scale, double h, double time_conv,int start, int finish, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_MRT(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,
|
||||
double *Poros,double *Perm, double *Velocity,double Den,double *Pressure);
|
||||
@@ -129,6 +86,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleColor(int *d_neighborList, int *Map,
|
||||
double *Phi, double *GreySolidGrad, double *Poros,double *Perm,double *Vel,double *Pressure,
|
||||
double rhoA, double rhoB, double tauA, double tauB, double tauA_eff,double tauB_eff, double alpha, double beta,
|
||||
double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np);
|
||||
|
||||
// ION TRANSPORT MODEL
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np);
|
||||
@@ -220,6 +178,36 @@ extern "C" void ScaLBL_D3Q7_AAeven_DFH(double *Aq, double *Bq, double *Den, doub
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_Gradient_DFH(int *NeighborList, double *Phi, double *ColorGrad, int start, int finish, int Np);
|
||||
|
||||
// FREE ENERGY LEE MODEL
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(double *gqbar, double Fx, double Fy, double Fz, int Np);
|
||||
|
||||
extern "C" void ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, double *Den, double *hq, double *ColorGrad,
|
||||
double rhonA, double rhoB, double tauM, double W, int start, int finish, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi,
|
||||
double rhoA, double rhoB, int start, int finish, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double *hq, double *Den, double *Phi,
|
||||
double rhoA, double rhoB, int start, int finish, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad,
|
||||
double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz,
|
||||
int strideY, int strideZ, int start, int finish, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad,
|
||||
double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz,
|
||||
int strideY, int strideZ, int start, int finish, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborList, double *dist, double *Vel, double *Pressure,
|
||||
double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure,
|
||||
double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np);
|
||||
|
||||
|
||||
// BOUNDARY CONDITION ROUTINES
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *neighborList, int *list, double *dist, double din, int count, int Np);
|
||||
|
||||
@@ -67,7 +67,7 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr <Domain
|
||||
sendCount_y =getHaloBlock(width,Nxh-width,width,2*width,width,Nzh-width,dvcSendList_y);
|
||||
sendCount_Y =getHaloBlock(width,Nxh-width,Nyh-2*width,Nyh-width,width,Nzh-width,dvcSendList_Y);
|
||||
sendCount_z =getHaloBlock(width,Nxh-width,width,Nyh-width,width,2*width,dvcSendList_z);
|
||||
sendCount_X =getHaloBlock(width,Nxh-width,width,Nyh-width,Nzh-2*width,Nzh-width,dvcSendList_Z);
|
||||
sendCount_Z =getHaloBlock(width,Nxh-width,width,Nyh-width,Nzh-2*width,Nzh-width,dvcSendList_Z);
|
||||
// xy
|
||||
sendCount_xy =getHaloBlock(width,2*width,width,2*width,width,Nzh-width,dvcSendList_xy);
|
||||
sendCount_xY =getHaloBlock(width,2*width,Nyh-2*width,Nyh-width,width,Nzh-width,dvcSendList_xY);
|
||||
@@ -87,7 +87,7 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr <Domain
|
||||
sendCount_xyz =getHaloBlock(width,2*width,width,2*width,width,2*width,dvcSendList_xyz);
|
||||
sendCount_xyZ =getHaloBlock(width,2*width,width,2*width,Nzh-2*width,Nzh-width,dvcSendList_xyZ);
|
||||
sendCount_xYz =getHaloBlock(width,2*width,Nyh-2*width,Nyh-width,width,2*width,dvcSendList_xYz);
|
||||
sendCount_xYz =getHaloBlock(width,2*width,Nyh-2*width,Nyh-width,Nzh-2*width,Nzh-width,dvcSendList_xYZ);
|
||||
sendCount_xYZ =getHaloBlock(width,2*width,Nyh-2*width,Nyh-width,Nzh-2*width,Nzh-width,dvcSendList_xYZ);
|
||||
sendCount_Xyz =getHaloBlock(Nxh-2*width,Nxh-width,width,2*width,width,2*width,dvcSendList_Xyz);
|
||||
sendCount_XyZ =getHaloBlock(Nxh-2*width,Nxh-width,width,2*width,Nzh-2*width,Nzh-width,dvcSendList_XyZ);
|
||||
sendCount_XYz =getHaloBlock(Nxh-2*width,Nxh-width,Nyh-2*width,Nyh-width,width,2*width,dvcSendList_XYz);
|
||||
@@ -124,7 +124,7 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr <Domain
|
||||
recvCount_XyZ =getHaloBlock(Nxh-width,Nxh,0,width,Nzh-width,Nzh,dvcRecvList_XyZ);
|
||||
recvCount_XYz =getHaloBlock(Nxh-width,Nxh,Nyh-width,Nyh,0,width,dvcRecvList_XYz);
|
||||
recvCount_XYZ =getHaloBlock(Nxh-width,Nxh,Nyh-width,Nyh,Nzh-width,Nzh,dvcRecvList_XYZ);
|
||||
|
||||
|
||||
//......................................................................................
|
||||
ScaLBL_AllocateZeroCopy((void **) &sendbuf_x, sendCount_x*sizeof(double)); // Allocate device memory
|
||||
ScaLBL_AllocateZeroCopy((void **) &sendbuf_X, sendCount_X*sizeof(double)); // Allocate device memory
|
||||
@@ -288,6 +288,7 @@ void ScaLBLWideHalo_Communicator::Send(double *data){
|
||||
req1[25] = MPI_COMM_SCALBL.Isend(&sendCount_xYZ,1,rank_xYZ,sendtag+25);
|
||||
req2[25] = MPI_COMM_SCALBL.Irecv(&recvCount_Xyz,1,rank_Xyz,recvtag+25);
|
||||
//...................................................................................
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -297,11 +298,10 @@ ScaLBLWideHalo_Communicator::~ScaLBLWideHalo_Communicator()
|
||||
void ScaLBLWideHalo_Communicator::Recv(double *data){
|
||||
|
||||
//...................................................................................
|
||||
MPI_Waitall(26,req1,stat1);
|
||||
MPI_Waitall(26,req2,stat2);
|
||||
Utilities::MPI::waitAll(26,req1);
|
||||
Utilities::MPI::waitAll(26,req2);
|
||||
ScaLBL_DeviceBarrier();
|
||||
//...................................................................................
|
||||
//...................................................................................
|
||||
ScaLBL_Scalar_Unpack(dvcRecvList_x, recvCount_x,recvbuf_x, data, Nh);
|
||||
ScaLBL_Scalar_Unpack(dvcRecvList_y, recvCount_y,recvbuf_y, data, Nh);
|
||||
ScaLBL_Scalar_Unpack(dvcRecvList_X, recvCount_X,recvbuf_X, data, Nh);
|
||||
|
||||
@@ -4,6 +4,7 @@ This class implements support for halo widths larger than 1
|
||||
#ifndef WideHalo_H
|
||||
#define WideHalo_H
|
||||
#include "common/ScaLBL.h"
|
||||
#include "common/MPI.h"
|
||||
|
||||
class ScaLBLWideHalo_Communicator{
|
||||
public:
|
||||
@@ -52,9 +53,7 @@ private:
|
||||
int sendtag,recvtag;
|
||||
// Give the object it's own MPI communicator
|
||||
RankInfoStruct rank_info;
|
||||
MPI_Group Group; // Group of processors associated with this domain
|
||||
MPI_Request req1[26],req2[26];
|
||||
MPI_Status stat1[26],stat2[26];
|
||||
//......................................................................................
|
||||
// MPI ranks for all 18 neighbors
|
||||
//......................................................................................
|
||||
@@ -95,11 +94,11 @@ private:
|
||||
int *dvcRecvList_xyZ,*dvcRecvList_XyZ,*dvcRecvList_xYZ,*dvcRecvList_XYZ;
|
||||
//......................................................................................
|
||||
|
||||
inline int getHaloBlock(int imin, int imax, int jmin, int jmax, int kmin, int kmax, int *dvcList){
|
||||
inline int getHaloBlock(int imin, int imax, int jmin, int jmax, int kmin, int kmax, int *& dvcList){
|
||||
int count = 0;
|
||||
int *List;
|
||||
List = new int [(imax-imin)*(jmax-jmin)*(kmax-kmin)];
|
||||
for (int k=kmin; k<kmax; k++){
|
||||
for (k=kmin; k<kmax; k++){
|
||||
for (j=jmin; j<jmax; j++){
|
||||
for (i=imin; i<imax; i++){
|
||||
List[count++] = k*Nxh*Nyh + j*Nxh + i;
|
||||
|
||||
4684
cpu/FreeLee.cpp
4684
cpu/FreeLee.cpp
File diff suppressed because it is too large
Load Diff
414
cuda/BGK.cu
414
cuda/BGK.cu
@@ -12,111 +12,111 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish,
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
|
||||
if ( n<finish ){
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
f7 = dist[8*Np+n];
|
||||
f8 = dist[7*Np+n];
|
||||
f9 = dist[10*Np+n];
|
||||
f10 = dist[9*Np+n];
|
||||
f11 = dist[12*Np+n];
|
||||
f12 = dist[11*Np+n];
|
||||
f13 = dist[14*Np+n];
|
||||
f14 = dist[13*Np+n];
|
||||
f15 = dist[16*Np+n];
|
||||
f16 = dist[15*Np+n];
|
||||
f17 = dist[18*Np+n];
|
||||
f18 = dist[17*Np+n];
|
||||
if ( n<finish ){
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
f7 = dist[8*Np+n];
|
||||
f8 = dist[7*Np+n];
|
||||
f9 = dist[10*Np+n];
|
||||
f10 = dist[9*Np+n];
|
||||
f11 = dist[12*Np+n];
|
||||
f12 = dist[11*Np+n];
|
||||
f13 = dist[14*Np+n];
|
||||
f14 = dist[13*Np+n];
|
||||
f15 = dist[16*Np+n];
|
||||
f16 = dist[15*Np+n];
|
||||
f17 = dist[18*Np+n];
|
||||
f18 = dist[17*Np+n];
|
||||
|
||||
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
|
||||
ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
uu = 1.5*(ux*ux+uy*uy+uz*uz);
|
||||
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
|
||||
ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
uu = 1.5*(ux*ux+uy*uy+uz*uz);
|
||||
|
||||
// q=0
|
||||
dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu);
|
||||
// q=0
|
||||
dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu);
|
||||
|
||||
// q = 1
|
||||
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx;
|
||||
// q = 1
|
||||
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx;
|
||||
|
||||
// q=2
|
||||
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx;
|
||||
// q=2
|
||||
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx;
|
||||
|
||||
// q = 3
|
||||
dist[3*Np+n] = f3*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy;
|
||||
// q = 3
|
||||
dist[3*Np+n] = f3*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy;
|
||||
|
||||
// q = 4
|
||||
dist[4*Np+n] = f4*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy;
|
||||
// q = 4
|
||||
dist[4*Np+n] = f4*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy;
|
||||
|
||||
// q = 5
|
||||
dist[5*Np+n] = f5*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz;
|
||||
// q = 5
|
||||
dist[5*Np+n] = f5*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz;
|
||||
|
||||
// q = 6
|
||||
dist[6*Np+n] = f6*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz;
|
||||
// q = 6
|
||||
dist[6*Np+n] = f6*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz;
|
||||
|
||||
// q = 7
|
||||
dist[7*Np+n] = f7*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy);
|
||||
// q = 7
|
||||
dist[7*Np+n] = f7*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy);
|
||||
|
||||
// q = 8
|
||||
dist[8*Np+n] = f8*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy);
|
||||
// q = 8
|
||||
dist[8*Np+n] = f8*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy);
|
||||
|
||||
// q = 9
|
||||
dist[9*Np+n] = f9*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy);
|
||||
// q = 9
|
||||
dist[9*Np+n] = f9*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy);
|
||||
|
||||
// q = 10
|
||||
dist[10*Np+n] = f10*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy);
|
||||
// q = 10
|
||||
dist[10*Np+n] = f10*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy);
|
||||
|
||||
// q = 11
|
||||
dist[11*Np+n] = f11*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz);
|
||||
// q = 11
|
||||
dist[11*Np+n] = f11*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz);
|
||||
|
||||
// q = 12
|
||||
dist[12*Np+n] = f12*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz);
|
||||
// q = 12
|
||||
dist[12*Np+n] = f12*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz);
|
||||
|
||||
// q = 13
|
||||
dist[13*Np+n] = f13*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz);
|
||||
// q = 13
|
||||
dist[13*Np+n] = f13*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz);
|
||||
|
||||
// q= 14
|
||||
dist[14*Np+n] = f14*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz);
|
||||
// q= 14
|
||||
dist[14*Np+n] = f14*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz);
|
||||
|
||||
// q = 15
|
||||
dist[15*Np+n] = f15*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz);
|
||||
// q = 15
|
||||
dist[15*Np+n] = f15*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz);
|
||||
|
||||
// q = 16
|
||||
dist[16*Np+n] = f16*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz);
|
||||
// q = 16
|
||||
dist[16*Np+n] = f16*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz);
|
||||
|
||||
// q = 17
|
||||
dist[17*Np+n] = f17*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz);
|
||||
// q = 17
|
||||
dist[17*Np+n] = f17*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz);
|
||||
|
||||
// q = 18
|
||||
dist[18*Np+n] = f18*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz);
|
||||
// q = 18
|
||||
dist[18*Np+n] = f18*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz);
|
||||
|
||||
//........................................................................
|
||||
//........................................................................
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -131,180 +131,180 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
|
||||
if ( n<finish ){
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
// q=1
|
||||
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||
if ( n<finish ){
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
// q=1
|
||||
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||
|
||||
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||
|
||||
// q=3
|
||||
nr3 = neighborList[n+2*Np]; // neighbor 4
|
||||
f3 = dist[nr3];
|
||||
// q=3
|
||||
nr3 = neighborList[n+2*Np]; // neighbor 4
|
||||
f3 = dist[nr3];
|
||||
|
||||
// q = 4
|
||||
nr4 = neighborList[n+3*Np]; // neighbor 3
|
||||
f4 = dist[nr4];
|
||||
// q = 4
|
||||
nr4 = neighborList[n+3*Np]; // neighbor 3
|
||||
f4 = dist[nr4];
|
||||
|
||||
// q=5
|
||||
nr5 = neighborList[n+4*Np];
|
||||
f5 = dist[nr5];
|
||||
// q=5
|
||||
nr5 = neighborList[n+4*Np];
|
||||
f5 = dist[nr5];
|
||||
|
||||
// q = 6
|
||||
nr6 = neighborList[n+5*Np];
|
||||
f6 = dist[nr6];
|
||||
|
||||
// q=7
|
||||
nr7 = neighborList[n+6*Np];
|
||||
f7 = dist[nr7];
|
||||
// q = 6
|
||||
nr6 = neighborList[n+5*Np];
|
||||
f6 = dist[nr6];
|
||||
|
||||
// q = 8
|
||||
nr8 = neighborList[n+7*Np];
|
||||
f8 = dist[nr8];
|
||||
// q=7
|
||||
nr7 = neighborList[n+6*Np];
|
||||
f7 = dist[nr7];
|
||||
|
||||
// q=9
|
||||
nr9 = neighborList[n+8*Np];
|
||||
f9 = dist[nr9];
|
||||
// q = 8
|
||||
nr8 = neighborList[n+7*Np];
|
||||
f8 = dist[nr8];
|
||||
|
||||
// q = 10
|
||||
nr10 = neighborList[n+9*Np];
|
||||
f10 = dist[nr10];
|
||||
// q=9
|
||||
nr9 = neighborList[n+8*Np];
|
||||
f9 = dist[nr9];
|
||||
|
||||
// q=11
|
||||
nr11 = neighborList[n+10*Np];
|
||||
f11 = dist[nr11];
|
||||
// q = 10
|
||||
nr10 = neighborList[n+9*Np];
|
||||
f10 = dist[nr10];
|
||||
|
||||
// q=12
|
||||
nr12 = neighborList[n+11*Np];
|
||||
f12 = dist[nr12];
|
||||
// q=11
|
||||
nr11 = neighborList[n+10*Np];
|
||||
f11 = dist[nr11];
|
||||
|
||||
// q=13
|
||||
nr13 = neighborList[n+12*Np];
|
||||
f13 = dist[nr13];
|
||||
// q=12
|
||||
nr12 = neighborList[n+11*Np];
|
||||
f12 = dist[nr12];
|
||||
|
||||
// q=14
|
||||
nr14 = neighborList[n+13*Np];
|
||||
f14 = dist[nr14];
|
||||
// q=13
|
||||
nr13 = neighborList[n+12*Np];
|
||||
f13 = dist[nr13];
|
||||
|
||||
// q=15
|
||||
nr15 = neighborList[n+14*Np];
|
||||
f15 = dist[nr15];
|
||||
// q=14
|
||||
nr14 = neighborList[n+13*Np];
|
||||
f14 = dist[nr14];
|
||||
|
||||
// q=16
|
||||
nr16 = neighborList[n+15*Np];
|
||||
f16 = dist[nr16];
|
||||
// q=15
|
||||
nr15 = neighborList[n+14*Np];
|
||||
f15 = dist[nr15];
|
||||
|
||||
// q=17
|
||||
//fq = dist[18*Np+n];
|
||||
nr17 = neighborList[n+16*Np];
|
||||
f17 = dist[nr17];
|
||||
// q=16
|
||||
nr16 = neighborList[n+15*Np];
|
||||
f16 = dist[nr16];
|
||||
|
||||
// q=18
|
||||
nr18 = neighborList[n+17*Np];
|
||||
f18 = dist[nr18];
|
||||
// q=17
|
||||
//fq = dist[18*Np+n];
|
||||
nr17 = neighborList[n+16*Np];
|
||||
f17 = dist[nr17];
|
||||
|
||||
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
|
||||
ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
uu = 1.5*(ux*ux+uy*uy+uz*uz);
|
||||
// q=18
|
||||
nr18 = neighborList[n+17*Np];
|
||||
f18 = dist[nr18];
|
||||
|
||||
// q=0
|
||||
dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu);
|
||||
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
|
||||
ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
uu = 1.5*(ux*ux+uy*uy+uz*uz);
|
||||
|
||||
// q = 1
|
||||
dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx;
|
||||
// q=0
|
||||
dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu);
|
||||
|
||||
// q=2
|
||||
dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx;
|
||||
// q = 1
|
||||
dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx;
|
||||
|
||||
// q = 3
|
||||
dist[nr4] = f3*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy;
|
||||
// q=2
|
||||
dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx;
|
||||
|
||||
// q = 4
|
||||
dist[nr3] = f4*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy;
|
||||
// q = 3
|
||||
dist[nr4] = f3*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy;
|
||||
|
||||
// q = 5
|
||||
dist[nr6] = f5*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz;
|
||||
// q = 4
|
||||
dist[nr3] = f4*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy;
|
||||
|
||||
// q = 6
|
||||
dist[nr5] = f6*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz;
|
||||
// q = 5
|
||||
dist[nr6] = f5*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz;
|
||||
|
||||
// q = 7
|
||||
dist[nr8] = f7*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy);
|
||||
// q = 6
|
||||
dist[nr5] = f6*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz;
|
||||
|
||||
// q = 8
|
||||
dist[nr7] = f8*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy);
|
||||
// q = 7
|
||||
dist[nr8] = f7*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy);
|
||||
|
||||
// q = 9
|
||||
dist[nr10] = f9*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy);
|
||||
// q = 8
|
||||
dist[nr7] = f8*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy);
|
||||
|
||||
// q = 10
|
||||
dist[nr9] = f10*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy);
|
||||
// q = 9
|
||||
dist[nr10] = f9*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy);
|
||||
|
||||
// q = 11
|
||||
dist[nr12] = f11*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz);
|
||||
// q = 10
|
||||
dist[nr9] = f10*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy);
|
||||
|
||||
// q = 12
|
||||
dist[nr11] = f12*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz);
|
||||
// q = 11
|
||||
dist[nr12] = f11*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz);
|
||||
|
||||
// q = 13
|
||||
dist[nr14] = f13*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz);
|
||||
// q = 12
|
||||
dist[nr11] = f12*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz);
|
||||
|
||||
// q= 14
|
||||
dist[nr13] = f14*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz);
|
||||
// q = 13
|
||||
dist[nr14] = f13*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz);
|
||||
|
||||
// q = 15
|
||||
dist[nr16] = f15*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz);
|
||||
// q= 14
|
||||
dist[nr13] = f14*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz);
|
||||
|
||||
// q = 16
|
||||
dist[nr15] = f16*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz);
|
||||
// q = 15
|
||||
dist[nr16] = f15*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz);
|
||||
|
||||
// q = 17
|
||||
dist[nr18] = f17*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz);
|
||||
// q = 16
|
||||
dist[nr15] = f16*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz);
|
||||
|
||||
// q = 18
|
||||
dist[nr17] = f18*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz);
|
||||
// q = 17
|
||||
dist[nr18] = f17*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz);
|
||||
|
||||
// q = 18
|
||||
dist[nr17] = f18*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
|
||||
|
||||
dvc_ScaLBL_D3Q19_AAeven_BGK<<<NBLOCKS,NTHREADS >>>(dist,start,finish,Np,rlx,Fx,Fy,Fz);
|
||||
|
||||
cudaError_t err = cudaGetLastError();
|
||||
dvc_ScaLBL_D3Q19_AAeven_BGK<<<NBLOCKS,NTHREADS >>>(dist,start,finish,Np,rlx,Fx,Fy,Fz);
|
||||
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q19_AAeven_BGK: %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
|
||||
dvc_ScaLBL_D3Q19_AAodd_BGK<<<NBLOCKS,NTHREADS >>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz);
|
||||
dvc_ScaLBL_D3Q19_AAodd_BGK<<<NBLOCKS,NTHREADS >>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz);
|
||||
|
||||
cudaError_t err = cudaGetLastError();
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q19_AAeven_BGK: %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
2016
cuda/FreeLee.cu
Normal file
2016
cuda/FreeLee.cu
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,7 @@
|
||||
SET( HIP_SEPERABLE_COMPILATION ON )
|
||||
SET_SOURCE_FILES_PROPERTIES( BGK.cu Color.cu CudaExtras.cu D3Q19.cu D3Q7.cu dfh.cu Extras.cu MRT.hip PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 )
|
||||
HIP_ADD_LIBRARY( lbpm-hip BGK.cu Color.cu CudaExtras.cu D3Q19.cu D3Q7.cu dfh.cu Extras.cu MRT.cu SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} )
|
||||
FILE( GLOB HIP_SOURCES "*.cu" )
|
||||
SET_SOURCE_FILES_PROPERTIES( ${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 )
|
||||
HIP_ADD_LIBRARY( lbpm-hip ${HIP_SOURCES} SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} )
|
||||
#TARGET_LINK_LIBRARIES( lbpm-hip /opt/rocm-3.3.0/lib/libhip_hcc.so )
|
||||
#TARGET_LINK_LIBRARIES( lbpm-wia lbpm-hip )
|
||||
#ADD_DEPENDENCIES( lbpm-hip copy-include )
|
||||
|
||||
72
hip/D3Q19.cu
72
hip/D3Q19.cu
@@ -89,9 +89,25 @@ __global__ void sum_kernel_block(double *sum, double *input, int n)
|
||||
|
||||
__inline__ __device__
|
||||
double warpReduceSum(double val) {
|
||||
#if 0
|
||||
for (int offset = warpSize/2; offset > 0; offset /= 2)
|
||||
val += __shfl_down_sync(0xFFFFFFFF, val, offset, 32);
|
||||
return val;
|
||||
#else
|
||||
short int id = threadIdx.x % warpSize;
|
||||
__shared__ double tmp[64];
|
||||
tmp[id] = val;
|
||||
__syncthreads();
|
||||
if ( warpSize == 64) {
|
||||
tmp[id] += tmp[id+32]; __syncthreads();
|
||||
}
|
||||
tmp[id] += tmp[id+16]; __syncthreads();
|
||||
tmp[id] += tmp[id+8]; __syncthreads();
|
||||
tmp[id] += tmp[id+4]; __syncthreads();
|
||||
tmp[id] += tmp[id+2]; __syncthreads();
|
||||
tmp[id] += tmp[id+1]; __syncthreads();
|
||||
return tmp[0];
|
||||
#endif
|
||||
}
|
||||
|
||||
__inline__ __device__
|
||||
@@ -1730,6 +1746,44 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Pressure_BC_Z(int *list, double *dist,
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){
|
||||
int idx, n;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
double f5 = 0.111111111111111111111111 - dist[6*Np+n];
|
||||
double f11 = 0.05555555555555555555556 - dist[12*Np+n];
|
||||
double f14 = 0.05555555555555555555556 - dist[13*Np+n];
|
||||
double f15 = 0.05555555555555555555556 - dist[16*Np+n];
|
||||
double f18 = 0.05555555555555555555556 - dist[17*Np+n];
|
||||
|
||||
dist[6*Np+n] = f5;
|
||||
dist[12*Np+n] = f11;
|
||||
dist[13*Np+n] = f14;
|
||||
dist[16*Np+n] = f15;
|
||||
dist[17*Np+n] = f18;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){
|
||||
int idx, n;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
double f6 = 0.111111111111111111111111 - dist[5*Np+n];
|
||||
double f12 = 0.05555555555555555555556 - dist[11*Np+n];
|
||||
double f13 = 0.05555555555555555555556 - dist[14*Np+n] ;
|
||||
double f16 = 0.05555555555555555555556 - dist[15*Np+n];
|
||||
double f17 = 0.05555555555555555555556 - dist[18*Np+n];
|
||||
|
||||
dist[5*Np+n] = f6;
|
||||
dist[11*Np+n] = f12;
|
||||
dist[14*Np+n] = f13;
|
||||
dist[15*Np+n] = f16;
|
||||
dist[18*Np+n] = f17;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *d_neighborList, int *list, double *dist, double din, int count, int Np)
|
||||
{
|
||||
int idx, n;
|
||||
@@ -2605,6 +2659,24 @@ extern "C" double ScaLBL_D3Q19_Flux_BC_Z(double *disteven, double *distodd, doub
|
||||
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q19_Reflection_BC_z<<<GRID,512>>>(list, dist, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("HIP error in ScaLBL_D3Q19_Reflection_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q19_Reflection_BC_Z<<<GRID,512>>>(list, dist, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("HIP error in ScaLBL_D3Q19_Reflection_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" double deviceReduce(double *in, double* out, int N) {
|
||||
int threads = 512;
|
||||
int blocks = min((N + threads - 1) / threads, 1024);
|
||||
|
||||
536
hip/D3Q7BC.cu
Normal file
536
hip/D3Q7BC.cu
Normal file
@@ -0,0 +1,536 @@
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define NBLOCKS 560
|
||||
#define NTHREADS 128
|
||||
|
||||
__global__ void dvc_ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
|
||||
{
|
||||
|
||||
int idx;
|
||||
int iq,ib;
|
||||
double value_b,value_q;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
iq = BounceBackDist_list[idx];
|
||||
ib = BounceBackSolid_list[idx];
|
||||
value_b = BoundaryValue[ib];//get boundary value from a solid site
|
||||
value_q = dist[iq];
|
||||
dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
|
||||
{
|
||||
|
||||
int idx;
|
||||
int iq,ib;
|
||||
double value_b,value_q;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
iq = BounceBackDist_list[idx];
|
||||
ib = BounceBackSolid_list[idx];
|
||||
value_b = BoundaryValue[ib];//get boundary value from a solid site
|
||||
value_q = dist[iq];
|
||||
dist[iq] = value_q + value_b;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np)
|
||||
{
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
//...................................................
|
||||
f5 = Vin - (f0+f1+f2+f3+f4+f6);
|
||||
dist[6*Np+n] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np)
|
||||
{
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
//...................................................
|
||||
f6 = Vout - (f0+f1+f2+f3+f4+f5);
|
||||
dist[5*Np+n] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np)
|
||||
{
|
||||
int idx, n;
|
||||
int nread,nr5;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+5*Np];
|
||||
f6 = dist[nread];
|
||||
|
||||
// Unknown distributions
|
||||
nr5 = d_neighborList[n+4*Np];
|
||||
f5 = Vin - (f0+f1+f2+f3+f4+f6);
|
||||
dist[nr5] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np)
|
||||
{
|
||||
int idx, n;
|
||||
int nread,nr6;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+4*Np];
|
||||
f5 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
// unknown distributions
|
||||
nr6 = d_neighborList[n+5*Np];
|
||||
f6 = Vout - (f0+f1+f2+f3+f4+f5);
|
||||
dist[nr6] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count)
|
||||
{
|
||||
int idx,n,nm;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
nm = Map[n];
|
||||
Psi[nm] = Vin;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count)
|
||||
{
|
||||
int idx,n,nm;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
nm = Map[n];
|
||||
Psi[nm] = Vout;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np)
|
||||
{
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
//...................................................
|
||||
f5 = Cin - (f0+f1+f2+f3+f4+f6);
|
||||
dist[6*Np+n] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np)
|
||||
{
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
//...................................................
|
||||
f6 = Cout - (f0+f1+f2+f3+f4+f5);
|
||||
dist[5*Np+n] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np)
|
||||
{
|
||||
int idx, n;
|
||||
int nread,nr5;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+5*Np];
|
||||
f6 = dist[nread];
|
||||
|
||||
// Unknown distributions
|
||||
nr5 = d_neighborList[n+4*Np];
|
||||
f5 = Cin - (f0+f1+f2+f3+f4+f6);
|
||||
dist[nr5] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np)
|
||||
{
|
||||
int idx, n;
|
||||
int nread,nr6;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+4*Np];
|
||||
f5 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
// unknown distributions
|
||||
nr6 = d_neighborList[n+5*Np];
|
||||
f6 = Cout - (f0+f1+f2+f3+f4+f5);
|
||||
dist[nr6] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
fsum_partial = f0+f1+f2+f3+f4+f6;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
|
||||
dist[6*Np+n] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
fsum_partial = f0+f1+f2+f3+f4+f5;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
|
||||
dist[5*Np+n] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx, n;
|
||||
int nread,nr5;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+5*Np];
|
||||
f6 = dist[nread];
|
||||
|
||||
fsum_partial = f0+f1+f2+f3+f4+f6;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
|
||||
|
||||
// Unknown distributions
|
||||
nr5 = d_neighborList[n+4*Np];
|
||||
dist[nr5] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx, n;
|
||||
int nread,nr6;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+4*Np];
|
||||
f5 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
fsum_partial = f0+f1+f2+f3+f4+f5;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
|
||||
|
||||
// unknown distributions
|
||||
nr6 = d_neighborList[n+5*Np];
|
||||
dist[nr6] = f6;
|
||||
}
|
||||
}
|
||||
//*************************************************************************
|
||||
|
||||
extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Solid_Dirichlet_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_Solid_Dirichlet_D3Q7 (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Solid_Neumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_Solid_Neumann_D3Q7 (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z<<<GRID,512>>>(list, dist, Vin, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z<<<GRID,512>>>(list, dist, Vout, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Vin, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Vout, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Poisson_D3Q7_BC_z<<<GRID,512>>>(list, Map, Psi, Vin, count);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_Poisson_D3Q7_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Poisson_D3Q7_BC_Z<<<GRID,512>>>(list, Map, Psi, Vout, count);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_Poisson_D3Q7_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z<<<GRID,512>>>(list, dist, Cin, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z<<<GRID,512>>>(list, dist, Cout, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Cin, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Cout, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
2017
hip/FreeLee.cu
Normal file
2017
hip/FreeLee.cu
Normal file
File diff suppressed because it is too large
Load Diff
2745
hip/Greyscale.cu
Normal file
2745
hip/Greyscale.cu
Normal file
File diff suppressed because it is too large
Load Diff
3038
hip/GreyscaleColor.cu
Normal file
3038
hip/GreyscaleColor.cu
Normal file
File diff suppressed because it is too large
Load Diff
392
hip/Ion.cu
Normal file
392
hip/Ion.cu
Normal file
@@ -0,0 +1,392 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define NBLOCKS 1024
|
||||
#define NTHREADS 256
|
||||
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
|
||||
int n,nread;
|
||||
double fq,Ci;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
Ci = fq;
|
||||
|
||||
// q=1
|
||||
nread = neighborList[n];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=2
|
||||
nread = neighborList[n+Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=3
|
||||
nread = neighborList[n+2*Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=4
|
||||
nread = neighborList[n+3*Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=5
|
||||
nread = neighborList[n+4*Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=6
|
||||
nread = neighborList[n+5*Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
Den[n]=Ci;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
|
||||
int n;
|
||||
double fq,Ci;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
Ci = fq;
|
||||
|
||||
// q=1
|
||||
fq = dist[2*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=2
|
||||
fq = dist[1*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=3
|
||||
fq = dist[4*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=4
|
||||
fq = dist[3*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=5
|
||||
fq = dist[6*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=6
|
||||
fq = dist[5*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
Den[n]=Ci;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *Velocity, double *ElectricField,
|
||||
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||
int n;
|
||||
double Ci;
|
||||
double ux,uy,uz;
|
||||
double uEPx,uEPy,uEPz;//electrochemical induced velocity
|
||||
double Ex,Ey,Ez;//electrical field
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
int nr1,nr2,nr3,nr4,nr5,nr6;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
Ci=Den[n];
|
||||
Ex=ElectricField[n+0*Np];
|
||||
Ey=ElectricField[n+1*Np];
|
||||
Ez=ElectricField[n+2*Np];
|
||||
ux=Velocity[n+0*Np];
|
||||
uy=Velocity[n+1*Np];
|
||||
uz=Velocity[n+2*Np];
|
||||
uEPx=zi*Di/Vt*Ex;
|
||||
uEPy=zi*Di/Vt*Ey;
|
||||
uEPz=zi*Di/Vt*Ez;
|
||||
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
// q=1
|
||||
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||
// q=2
|
||||
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||
// q=3
|
||||
nr3 = neighborList[n+2*Np]; // neighbor 4
|
||||
f3 = dist[nr3];
|
||||
// q=4
|
||||
nr4 = neighborList[n+3*Np]; // neighbor 3
|
||||
f4 = dist[nr4];
|
||||
// q=5
|
||||
nr5 = neighborList[n+4*Np];
|
||||
f5 = dist[nr5];
|
||||
// q=6
|
||||
nr6 = neighborList[n+5*Np];
|
||||
f6 = dist[nr6];
|
||||
|
||||
// q=0
|
||||
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
|
||||
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 1
|
||||
dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
|
||||
//dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q=2
|
||||
dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
|
||||
//dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 3
|
||||
dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
|
||||
//dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 4
|
||||
dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
|
||||
//dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 5
|
||||
dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
|
||||
//dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 6
|
||||
dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
|
||||
//dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField,
|
||||
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||
int n;
|
||||
double Ci;
|
||||
double ux,uy,uz;
|
||||
double uEPx,uEPy,uEPz;//electrochemical induced velocity
|
||||
double Ex,Ey,Ez;//electrical field
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
Ci=Den[n];
|
||||
Ex=ElectricField[n+0*Np];
|
||||
Ey=ElectricField[n+1*Np];
|
||||
Ez=ElectricField[n+2*Np];
|
||||
ux=Velocity[n+0*Np];
|
||||
uy=Velocity[n+1*Np];
|
||||
uz=Velocity[n+2*Np];
|
||||
uEPx=zi*Di/Vt*Ex;
|
||||
uEPy=zi*Di/Vt*Ey;
|
||||
uEPz=zi*Di/Vt*Ez;
|
||||
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
|
||||
// q=0
|
||||
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
|
||||
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 1
|
||||
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
|
||||
//dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q=2
|
||||
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
|
||||
//dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 3
|
||||
dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
|
||||
//dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 4
|
||||
dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
|
||||
//dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 5
|
||||
dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
|
||||
//dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 6
|
||||
dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
|
||||
//dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
|
||||
|
||||
int n;
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
if (n<Np) {
|
||||
dist[0*Np+n] = 0.25*DenInit;
|
||||
dist[1*Np+n] = 0.125*DenInit;
|
||||
dist[2*Np+n] = 0.125*DenInit;
|
||||
dist[3*Np+n] = 0.125*DenInit;
|
||||
dist[4*Np+n] = 0.125*DenInit;
|
||||
dist[5*Np+n] = 0.125*DenInit;
|
||||
dist[6*Np+n] = 0.125*DenInit;
|
||||
Den[n] = DenInit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
|
||||
|
||||
int n;
|
||||
double DenInit;
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
if (n<Np) {
|
||||
DenInit = Den[n];
|
||||
dist[0*Np+n] = 0.25*DenInit;
|
||||
dist[1*Np+n] = 0.125*DenInit;
|
||||
dist[2*Np+n] = 0.125*DenInit;
|
||||
dist[3*Np+n] = 0.125*DenInit;
|
||||
dist[4*Np+n] = 0.125*DenInit;
|
||||
dist[5*Np+n] = 0.125*DenInit;
|
||||
dist[6*Np+n] = 0.125*DenInit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
double Ci;//ion concentration of species i
|
||||
double CD;//charge density
|
||||
double CD_tmp;
|
||||
double F = 96485.0;//Faraday's constant; unit[C/mol]; F=e*Na, where Na is the Avogadro constant
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
Ci = Den[n+ion_component*Np];
|
||||
CD = ChargeDensity[n];
|
||||
CD_tmp = F*IonValence*Ci;
|
||||
ChargeDensity[n] = CD*(ion_component>0) + CD_tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAodd_IonConcentration<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_IonConcentration: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAeven_IonConcentration<<<NBLOCKS,NTHREADS >>>(dist,Den,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_IonConcentration: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *Velocity, double *ElectricField,
|
||||
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Ion: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField,
|
||||
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion<<<NBLOCKS,NTHREADS >>>(dist,Den,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Ion: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_Ion_Init<<<NBLOCKS,NTHREADS >>>(dist,Den,DenInit,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_Ion_Init: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_Ion_Init_FromFile<<<NBLOCKS,NTHREADS >>>(dist,Den,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_Ion_Init_FromFile: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_Ion_ChargeDensity<<<NBLOCKS,NTHREADS >>>(Den,ChargeDensity,IonValence,ion_component,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_Ion_ChargeDensity: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
77
hip/MixedGradient.cu
Normal file
77
hip/MixedGradient.cu
Normal file
@@ -0,0 +1,77 @@
|
||||
/* Implement Mixed Gradient (Lee et al. JCP 2016)*/
|
||||
#include <stdio.h>
|
||||
//#include <cuda_profiler_api.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
|
||||
#define NBLOCKS 560
|
||||
#define NTHREADS 128
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz)
|
||||
{
|
||||
static const int D3Q19[18][3]={{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1},
|
||||
{1,1,0},{-1,-1,0},{1,-1,0},{-1,1,0},
|
||||
{1,0,1},{-1,0,-1},{1,0,-1},{-1,0,1},
|
||||
{0,1,1},{0,-1,-1},{0,1,-1},{0,-1,1}};
|
||||
|
||||
int i,j,k,n,N,idx;
|
||||
int np,np2,nm; // neighbors
|
||||
double v,vp,vp2,vm; // values at neighbors
|
||||
double grad;
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
|
||||
//........Get 1-D index for this thread....................
|
||||
idx = start + S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
|
||||
if (idx<finish){
|
||||
n = Map[idx]; // layout in regular array
|
||||
//.......Back out the 3-D indices for node n..............
|
||||
k = n/(Nx*Ny);
|
||||
j = (n-Nx*Ny*k)/Nx;
|
||||
i = n-Nx*Ny*k-Nx*j;
|
||||
v = Phi[n];
|
||||
grad = 0.0;
|
||||
for (int q=0; q<6; q++){
|
||||
int iqx = D3Q19[q][0];
|
||||
int iqy = D3Q19[q][1];
|
||||
int iqz = D3Q19[q][2];
|
||||
np = (k+iqz)*Nx*Ny + (j+iqy)*Nx + i + iqx;
|
||||
np2 = (k+2*iqz)*Nx*Ny + (j+2*iqy)*Nx + i + 2*iqx;
|
||||
nm = (k-iqz)*Nx*Ny + (j-iqy)*Nx + i - iqx;
|
||||
vp = Phi[np];
|
||||
vp2 = Phi[np2];
|
||||
vm = Phi[nm];
|
||||
grad += 0.25*(5.0*vp-vp2-3.0*v-vm);
|
||||
}
|
||||
for (int q=6; q<18; q++){
|
||||
int iqx = D3Q19[q][0];
|
||||
int iqy = D3Q19[q][1];
|
||||
int iqz = D3Q19[q][2];
|
||||
np = (k+iqz)*Nx*Ny + (j+iqy)*Nx + i + iqx;
|
||||
np2 = (k+2*iqz)*Nx*Ny + (j+2*iqy)*Nx + i + 2*iqx;
|
||||
nm = (k-iqz)*Nx*Ny + (j-iqy)*Nx + i - iqx;
|
||||
vp = Phi[np];
|
||||
vp2 = Phi[np2];
|
||||
vm = Phi[nm];
|
||||
grad += 0.125*(5.0*vp-vp2-3.0*v-vm);
|
||||
}
|
||||
Gradient[n] = grad;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz)
|
||||
{
|
||||
hipProfilerStart();
|
||||
dvc_ScaLBL_D3Q19_MixedGradient<<<NBLOCKS,NTHREADS >>>(Map, Phi, Gradient, start, finish, Np, Nx, Ny, Nz);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q19_MixedGradient: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
hipProfilerStop();
|
||||
}
|
||||
|
||||
330
hip/Poisson.cu
Normal file
330
hip/Poisson.cu
Normal file
@@ -0,0 +1,330 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define NBLOCKS 1024
|
||||
#define NTHREADS 256
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
int n;
|
||||
double psi;//electric potential
|
||||
double fq;
|
||||
int nread;
|
||||
int idx;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
psi = fq;
|
||||
|
||||
// q=1
|
||||
nread = neighborList[n];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q=2
|
||||
nread = neighborList[n+Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q=3
|
||||
nread = neighborList[n+2*Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q = 4
|
||||
nread = neighborList[n+3*Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q=5
|
||||
nread = neighborList[n+4*Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q = 6
|
||||
nread = neighborList[n+5*Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
idx=Map[n];
|
||||
Psi[idx] = psi;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
int n;
|
||||
double psi;//electric potential
|
||||
double fq;
|
||||
int idx;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
psi = fq;
|
||||
|
||||
// q=1
|
||||
fq = dist[2*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=2
|
||||
fq = dist[1*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=3
|
||||
fq = dist[4*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=4
|
||||
fq = dist[3*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=5
|
||||
fq = dist[6*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=6
|
||||
fq = dist[5*Np+n];
|
||||
psi += fq;
|
||||
|
||||
idx=Map[n];
|
||||
Psi[idx] = psi;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
double psi;//electric potential
|
||||
double Ex,Ey,Ez;//electric field
|
||||
double rho_e;//local charge density
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
int nr1,nr2,nr3,nr4,nr5,nr6;
|
||||
double rlx=1.0/tau;
|
||||
int idx;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
rho_e = Den_charge[n];
|
||||
rho_e = rho_e/epsilon_LB;
|
||||
idx=Map[n];
|
||||
psi = Psi[idx];
|
||||
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
// q=1
|
||||
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||
|
||||
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||
|
||||
// q=3
|
||||
nr3 = neighborList[n+2*Np]; // neighbor 4
|
||||
f3 = dist[nr3];
|
||||
|
||||
// q = 4
|
||||
nr4 = neighborList[n+3*Np]; // neighbor 3
|
||||
f4 = dist[nr4];
|
||||
|
||||
// q=5
|
||||
nr5 = neighborList[n+4*Np];
|
||||
f5 = dist[nr5];
|
||||
|
||||
// q = 6
|
||||
nr6 = neighborList[n+5*Np];
|
||||
f6 = dist[nr6];
|
||||
|
||||
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
|
||||
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
|
||||
Ez = (f5-f6)*rlx*4.0;
|
||||
ElectricField[n+0*Np] = Ex;
|
||||
ElectricField[n+1*Np] = Ey;
|
||||
ElectricField[n+2*Np] = Ez;
|
||||
|
||||
// q = 0
|
||||
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
|
||||
|
||||
// q = 1
|
||||
dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 2
|
||||
dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 3
|
||||
dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 4
|
||||
dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 5
|
||||
dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 6
|
||||
dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
//........................................................................
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
double psi;//electric potential
|
||||
double Ex,Ey,Ez;//electric field
|
||||
double rho_e;//local charge density
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double rlx=1.0/tau;
|
||||
int idx;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
rho_e = Den_charge[n];
|
||||
rho_e = rho_e/epsilon_LB;
|
||||
idx=Map[n];
|
||||
psi = Psi[idx];
|
||||
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
|
||||
|
||||
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
|
||||
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
|
||||
Ez = (f5-f6)*rlx*4.0;
|
||||
ElectricField[n+0*Np] = Ex;
|
||||
ElectricField[n+1*Np] = Ey;
|
||||
ElectricField[n+2*Np] = Ez;
|
||||
|
||||
// q = 0
|
||||
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
|
||||
|
||||
// q = 1
|
||||
dist[1*Np+n] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 2
|
||||
dist[2*Np+n] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 3
|
||||
dist[3*Np+n] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 4
|
||||
dist[4*Np+n] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 5
|
||||
dist[5*Np+n] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 6
|
||||
dist[6*Np+n] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
//........................................................................
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
int ijk;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
ijk = Map[n];
|
||||
dist[0*Np+n] = 0.25*Psi[ijk];
|
||||
dist[1*Np+n] = 0.125*Psi[ijk];
|
||||
dist[2*Np+n] = 0.125*Psi[ijk];
|
||||
dist[3*Np+n] = 0.125*Psi[ijk];
|
||||
dist[4*Np+n] = 0.125*Psi[ijk];
|
||||
dist[5*Np+n] = 0.125*Psi[ijk];
|
||||
dist[6*Np+n] = 0.125*Psi[ijk];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Psi,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAodd_Poisson<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAeven_Poisson<<<NBLOCKS,NTHREADS >>>(Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_Poisson_Init<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_Poisson_Init: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
996
hip/Stokes.cu
Normal file
996
hip/Stokes.cu
Normal file
@@ -0,0 +1,996 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define NBLOCKS 1024
|
||||
#define NTHREADS 256
|
||||
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz, double rho0, double den_scale, double h, double time_conv,int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
double fq;
|
||||
// conserved momemnts
|
||||
double rho,jx,jy,jz;
|
||||
double ux,uy,uz;
|
||||
// non-conserved moments
|
||||
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
|
||||
int nread;
|
||||
// body force due to electric field
|
||||
double rhoE;//charge density
|
||||
double Ex,Ey,Ez;
|
||||
// total body force
|
||||
double Fx,Fy,Fz;
|
||||
|
||||
const double mrt_V1=0.05263157894736842;
|
||||
const double mrt_V2=0.012531328320802;
|
||||
const double mrt_V3=0.04761904761904762;
|
||||
const double mrt_V4=0.004594820384294068;
|
||||
const double mrt_V5=0.01587301587301587;
|
||||
const double mrt_V6=0.0555555555555555555555555;
|
||||
const double mrt_V7=0.02777777777777778;
|
||||
const double mrt_V8=0.08333333333333333;
|
||||
const double mrt_V9=0.003341687552213868;
|
||||
const double mrt_V10=0.003968253968253968;
|
||||
const double mrt_V11=0.01388888888888889;
|
||||
const double mrt_V12=0.04166666666666666;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
rhoE = ChargeDensity[n];
|
||||
Ex = ElectricField[n+0*Np];
|
||||
Ey = ElectricField[n+1*Np];
|
||||
Ez = ElectricField[n+2*Np];
|
||||
//compute total body force, including input body force (Gx,Gy,Gz)
|
||||
Fx = Gx + rhoE*Ex*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
|
||||
Fy = Gy + rhoE*Ey*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
|
||||
Fz = Gz + rhoE*Ez*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
rho = fq;
|
||||
m1 = -30.0*fq;
|
||||
m2 = 12.0*fq;
|
||||
|
||||
// q=1
|
||||
nread = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
fq = dist[nread]; // reading the f1 data into register fq
|
||||
//fp = dist[10*Np+n];
|
||||
rho += fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jx = fq;
|
||||
m4 = -4.0*fq;
|
||||
m9 = 2.0*fq;
|
||||
m10 = -4.0*fq;
|
||||
|
||||
// f2 = dist[10*Np+n];
|
||||
nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
fq = dist[nread]; // reading the f2 data into register fq
|
||||
//fq = dist[Np+n];
|
||||
rho += fq;
|
||||
m1 -= 11.0*(fq);
|
||||
m2 -= 4.0*(fq);
|
||||
jx -= fq;
|
||||
m4 += 4.0*(fq);
|
||||
m9 += 2.0*(fq);
|
||||
m10 -= 4.0*(fq);
|
||||
|
||||
// q=3
|
||||
nread = neighborList[n+2*Np]; // neighbor 4
|
||||
fq = dist[nread];
|
||||
//fq = dist[11*Np+n];
|
||||
rho += fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jy = fq;
|
||||
m6 = -4.0*fq;
|
||||
m9 -= fq;
|
||||
m10 += 2.0*fq;
|
||||
m11 = fq;
|
||||
m12 = -2.0*fq;
|
||||
|
||||
// q = 4
|
||||
nread = neighborList[n+3*Np]; // neighbor 3
|
||||
fq = dist[nread];
|
||||
//fq = dist[2*Np+n];
|
||||
rho+= fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jy -= fq;
|
||||
m6 += 4.0*fq;
|
||||
m9 -= fq;
|
||||
m10 += 2.0*fq;
|
||||
m11 += fq;
|
||||
m12 -= 2.0*fq;
|
||||
|
||||
// q=5
|
||||
nread = neighborList[n+4*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[12*Np+n];
|
||||
rho += fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jz = fq;
|
||||
m8 = -4.0*fq;
|
||||
m9 -= fq;
|
||||
m10 += 2.0*fq;
|
||||
m11 -= fq;
|
||||
m12 += 2.0*fq;
|
||||
|
||||
|
||||
// q = 6
|
||||
nread = neighborList[n+5*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[3*Np+n];
|
||||
rho+= fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jz -= fq;
|
||||
m8 += 4.0*fq;
|
||||
m9 -= fq;
|
||||
m10 += 2.0*fq;
|
||||
m11 -= fq;
|
||||
m12 += 2.0*fq;
|
||||
|
||||
// q=7
|
||||
nread = neighborList[n+6*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[13*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx += fq;
|
||||
m4 += fq;
|
||||
jy += fq;
|
||||
m6 += fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 += fq;
|
||||
m12 += fq;
|
||||
m13 = fq;
|
||||
m16 = fq;
|
||||
m17 = -fq;
|
||||
|
||||
// q = 8
|
||||
nread = neighborList[n+7*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[4*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx -= fq;
|
||||
m4 -= fq;
|
||||
jy -= fq;
|
||||
m6 -= fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 += fq;
|
||||
m12 += fq;
|
||||
m13 += fq;
|
||||
m16 -= fq;
|
||||
m17 += fq;
|
||||
|
||||
// q=9
|
||||
nread = neighborList[n+8*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[14*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx += fq;
|
||||
m4 += fq;
|
||||
jy -= fq;
|
||||
m6 -= fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 += fq;
|
||||
m12 += fq;
|
||||
m13 -= fq;
|
||||
m16 += fq;
|
||||
m17 += fq;
|
||||
|
||||
// q = 10
|
||||
nread = neighborList[n+9*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[5*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx -= fq;
|
||||
m4 -= fq;
|
||||
jy += fq;
|
||||
m6 += fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 += fq;
|
||||
m12 += fq;
|
||||
m13 -= fq;
|
||||
m16 -= fq;
|
||||
m17 -= fq;
|
||||
|
||||
// q=11
|
||||
nread = neighborList[n+10*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[15*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx += fq;
|
||||
m4 += fq;
|
||||
jz += fq;
|
||||
m8 += fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 -= fq;
|
||||
m12 -= fq;
|
||||
m15 = fq;
|
||||
m16 -= fq;
|
||||
m18 = fq;
|
||||
|
||||
// q=12
|
||||
nread = neighborList[n+11*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[6*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx -= fq;
|
||||
m4 -= fq;
|
||||
jz -= fq;
|
||||
m8 -= fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 -= fq;
|
||||
m12 -= fq;
|
||||
m15 += fq;
|
||||
m16 += fq;
|
||||
m18 -= fq;
|
||||
|
||||
// q=13
|
||||
nread = neighborList[n+12*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[16*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx += fq;
|
||||
m4 += fq;
|
||||
jz -= fq;
|
||||
m8 -= fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 -= fq;
|
||||
m12 -= fq;
|
||||
m15 -= fq;
|
||||
m16 -= fq;
|
||||
m18 -= fq;
|
||||
|
||||
// q=14
|
||||
nread = neighborList[n+13*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[7*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx -= fq;
|
||||
m4 -= fq;
|
||||
jz += fq;
|
||||
m8 += fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 -= fq;
|
||||
m12 -= fq;
|
||||
m15 -= fq;
|
||||
m16 += fq;
|
||||
m18 += fq;
|
||||
|
||||
// q=15
|
||||
nread = neighborList[n+14*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[17*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jy += fq;
|
||||
m6 += fq;
|
||||
jz += fq;
|
||||
m8 += fq;
|
||||
m9 -= 2.0*fq;
|
||||
m10 -= 2.0*fq;
|
||||
m14 = fq;
|
||||
m17 += fq;
|
||||
m18 -= fq;
|
||||
|
||||
// q=16
|
||||
nread = neighborList[n+15*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[8*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jy -= fq;
|
||||
m6 -= fq;
|
||||
jz -= fq;
|
||||
m8 -= fq;
|
||||
m9 -= 2.0*fq;
|
||||
m10 -= 2.0*fq;
|
||||
m14 += fq;
|
||||
m17 -= fq;
|
||||
m18 += fq;
|
||||
|
||||
// q=17
|
||||
//fq = dist[18*Np+n];
|
||||
nread = neighborList[n+16*Np];
|
||||
fq = dist[nread];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jy += fq;
|
||||
m6 += fq;
|
||||
jz -= fq;
|
||||
m8 -= fq;
|
||||
m9 -= 2.0*fq;
|
||||
m10 -= 2.0*fq;
|
||||
m14 -= fq;
|
||||
m17 += fq;
|
||||
m18 += fq;
|
||||
|
||||
// q=18
|
||||
nread = neighborList[n+17*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[9*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jy -= fq;
|
||||
m6 -= fq;
|
||||
jz += fq;
|
||||
m8 += fq;
|
||||
m9 -= 2.0*fq;
|
||||
m10 -= 2.0*fq;
|
||||
m14 -= fq;
|
||||
m17 -= fq;
|
||||
m18 -= fq;
|
||||
|
||||
// write the velocity
|
||||
ux = jx / rho0;
|
||||
uy = jy / rho0;
|
||||
uz = jz / rho0;
|
||||
Velocity[n] = ux;
|
||||
Velocity[Np+n] = uy;
|
||||
Velocity[2*Np+n] = uz;
|
||||
|
||||
//..............incorporate external force................................................
|
||||
//..............carry out relaxation process...............................................
|
||||
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) - m1);
|
||||
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0) - m2);
|
||||
m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4);
|
||||
m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6);
|
||||
m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8);
|
||||
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) - m9);
|
||||
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
|
||||
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) - m11);
|
||||
m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho0) - m12);
|
||||
m13 = m13 + rlx_setA*((jx*jy/rho0) - m13);
|
||||
m14 = m14 + rlx_setA*((jy*jz/rho0) - m14);
|
||||
m15 = m15 + rlx_setA*((jx*jz/rho0) - m15);
|
||||
m16 = m16 + rlx_setB*( - m16);
|
||||
m17 = m17 + rlx_setB*( - m17);
|
||||
m18 = m18 + rlx_setB*( - m18);
|
||||
//.......................................................................................................
|
||||
//.................inverse transformation......................................................
|
||||
|
||||
// q=0
|
||||
fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2;
|
||||
dist[n] = fq;
|
||||
|
||||
// q = 1
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx;
|
||||
nread = neighborList[n+Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q=2
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx;
|
||||
nread = neighborList[n];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 3
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy;
|
||||
nread = neighborList[n+3*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 4
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy;
|
||||
nread = neighborList[n+2*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 5
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz;
|
||||
nread = neighborList[n+5*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 6
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz;
|
||||
nread = neighborList[n+4*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 7
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)
|
||||
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||
+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy);
|
||||
nread = neighborList[n+7*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 8
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||
+mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy);
|
||||
nread = neighborList[n+6*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 9
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)
|
||||
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||
+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy);
|
||||
nread = neighborList[n+9*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 10
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)
|
||||
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||
+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy);
|
||||
nread = neighborList[n+8*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 11
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)
|
||||
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||
-mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz);
|
||||
nread = neighborList[n+11*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 12
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)
|
||||
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||
-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz);
|
||||
nread = neighborList[n+10*Np];
|
||||
dist[nread]= fq;
|
||||
|
||||
// q = 13
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)
|
||||
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||
-mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz);
|
||||
nread = neighborList[n+13*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q= 14
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)
|
||||
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||
-mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz);
|
||||
nread = neighborList[n+12*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
|
||||
// q = 15
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)
|
||||
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz);
|
||||
nread = neighborList[n+15*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 16
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)
|
||||
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz);
|
||||
nread = neighborList[n+14*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
|
||||
// q = 17
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)
|
||||
-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz);
|
||||
nread = neighborList[n+17*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 18
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)
|
||||
-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz);
|
||||
nread = neighborList[n+16*Np];
|
||||
dist[nread] = fq;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
double fq;
|
||||
// conserved momemnts
|
||||
double rho,jx,jy,jz;
|
||||
double ux,uy,uz;
|
||||
// non-conserved moments
|
||||
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
|
||||
// body force due to electric field
|
||||
double rhoE;//charge density
|
||||
double Ex,Ey,Ez;
|
||||
// total body force
|
||||
double Fx,Fy,Fz;
|
||||
|
||||
const double mrt_V1=0.05263157894736842;
|
||||
const double mrt_V2=0.012531328320802;
|
||||
const double mrt_V3=0.04761904761904762;
|
||||
const double mrt_V4=0.004594820384294068;
|
||||
const double mrt_V5=0.01587301587301587;
|
||||
const double mrt_V6=0.0555555555555555555555555;
|
||||
const double mrt_V7=0.02777777777777778;
|
||||
const double mrt_V8=0.08333333333333333;
|
||||
const double mrt_V9=0.003341687552213868;
|
||||
const double mrt_V10=0.003968253968253968;
|
||||
const double mrt_V11=0.01388888888888889;
|
||||
const double mrt_V12=0.04166666666666666;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
rhoE = ChargeDensity[n];
|
||||
Ex = ElectricField[n+0*Np];
|
||||
Ey = ElectricField[n+1*Np];
|
||||
Ez = ElectricField[n+2*Np];
|
||||
//compute total body force, including input body force (Gx,Gy,Gz)
|
||||
Fx = Gx + rhoE*Ex*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;//the extra factors at the end necessarily convert unit from phys to LB
|
||||
Fy = Gy + rhoE*Ey*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
|
||||
Fz = Gz + rhoE*Ez*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
rho = fq;
|
||||
m1 = -30.0*fq;
|
||||
m2 = 12.0*fq;
|
||||
|
||||
// q=1
|
||||
fq = dist[2*Np+n];
|
||||
rho += fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jx = fq;
|
||||
m4 = -4.0*fq;
|
||||
m9 = 2.0*fq;
|
||||
m10 = -4.0*fq;
|
||||
|
||||
// f2 = dist[10*Np+n];
|
||||
fq = dist[1*Np+n];
|
||||
rho += fq;
|
||||
m1 -= 11.0*(fq);
|
||||
m2 -= 4.0*(fq);
|
||||
jx -= fq;
|
||||
m4 += 4.0*(fq);
|
||||
m9 += 2.0*(fq);
|
||||
m10 -= 4.0*(fq);
|
||||
|
||||
// q=3
|
||||
fq = dist[4*Np+n];
|
||||
rho += fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jy = fq;
|
||||
m6 = -4.0*fq;
|
||||
m9 -= fq;
|
||||
m10 += 2.0*fq;
|
||||
m11 = fq;
|
||||
m12 = -2.0*fq;
|
||||
|
||||
// q = 4
|
||||
fq = dist[3*Np+n];
|
||||
rho+= fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jy -= fq;
|
||||
m6 += 4.0*fq;
|
||||
m9 -= fq;
|
||||
m10 += 2.0*fq;
|
||||
m11 += fq;
|
||||
m12 -= 2.0*fq;
|
||||
|
||||
// q=5
|
||||
fq = dist[6*Np+n];
|
||||
rho += fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jz = fq;
|
||||
m8 = -4.0*fq;
|
||||
m9 -= fq;
|
||||
m10 += 2.0*fq;
|
||||
m11 -= fq;
|
||||
m12 += 2.0*fq;
|
||||
|
||||
// q = 6
|
||||
fq = dist[5*Np+n];
|
||||
rho+= fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jz -= fq;
|
||||
m8 += 4.0*fq;
|
||||
m9 -= fq;
|
||||
m10 += 2.0*fq;
|
||||
m11 -= fq;
|
||||
m12 += 2.0*fq;
|
||||
|
||||
// q=7
|
||||
fq = dist[8*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx += fq;
|
||||
m4 += fq;
|
||||
jy += fq;
|
||||
m6 += fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 += fq;
|
||||
m12 += fq;
|
||||
m13 = fq;
|
||||
m16 = fq;
|
||||
m17 = -fq;
|
||||
|
||||
// q = 8
|
||||
fq = dist[7*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx -= fq;
|
||||
m4 -= fq;
|
||||
jy -= fq;
|
||||
m6 -= fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 += fq;
|
||||
m12 += fq;
|
||||
m13 += fq;
|
||||
m16 -= fq;
|
||||
m17 += fq;
|
||||
|
||||
// q=9
|
||||
fq = dist[10*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx += fq;
|
||||
m4 += fq;
|
||||
jy -= fq;
|
||||
m6 -= fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 += fq;
|
||||
m12 += fq;
|
||||
m13 -= fq;
|
||||
m16 += fq;
|
||||
m17 += fq;
|
||||
|
||||
// q = 10
|
||||
fq = dist[9*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx -= fq;
|
||||
m4 -= fq;
|
||||
jy += fq;
|
||||
m6 += fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 += fq;
|
||||
m12 += fq;
|
||||
m13 -= fq;
|
||||
m16 -= fq;
|
||||
m17 -= fq;
|
||||
|
||||
// q=11
|
||||
fq = dist[12*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx += fq;
|
||||
m4 += fq;
|
||||
jz += fq;
|
||||
m8 += fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 -= fq;
|
||||
m12 -= fq;
|
||||
m15 = fq;
|
||||
m16 -= fq;
|
||||
m18 = fq;
|
||||
|
||||
// q=12
|
||||
fq = dist[11*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx -= fq;
|
||||
m4 -= fq;
|
||||
jz -= fq;
|
||||
m8 -= fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 -= fq;
|
||||
m12 -= fq;
|
||||
m15 += fq;
|
||||
m16 += fq;
|
||||
m18 -= fq;
|
||||
|
||||
// q=13
|
||||
fq = dist[14*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx += fq;
|
||||
m4 += fq;
|
||||
jz -= fq;
|
||||
m8 -= fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 -= fq;
|
||||
m12 -= fq;
|
||||
m15 -= fq;
|
||||
m16 -= fq;
|
||||
m18 -= fq;
|
||||
|
||||
// q=14
|
||||
fq = dist[13*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx -= fq;
|
||||
m4 -= fq;
|
||||
jz += fq;
|
||||
m8 += fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 -= fq;
|
||||
m12 -= fq;
|
||||
m15 -= fq;
|
||||
m16 += fq;
|
||||
m18 += fq;
|
||||
|
||||
// q=15
|
||||
fq = dist[16*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jy += fq;
|
||||
m6 += fq;
|
||||
jz += fq;
|
||||
m8 += fq;
|
||||
m9 -= 2.0*fq;
|
||||
m10 -= 2.0*fq;
|
||||
m14 = fq;
|
||||
m17 += fq;
|
||||
m18 -= fq;
|
||||
|
||||
// q=16
|
||||
fq = dist[15*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jy -= fq;
|
||||
m6 -= fq;
|
||||
jz -= fq;
|
||||
m8 -= fq;
|
||||
m9 -= 2.0*fq;
|
||||
m10 -= 2.0*fq;
|
||||
m14 += fq;
|
||||
m17 -= fq;
|
||||
m18 += fq;
|
||||
|
||||
// q=17
|
||||
fq = dist[18*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jy += fq;
|
||||
m6 += fq;
|
||||
jz -= fq;
|
||||
m8 -= fq;
|
||||
m9 -= 2.0*fq;
|
||||
m10 -= 2.0*fq;
|
||||
m14 -= fq;
|
||||
m17 += fq;
|
||||
m18 += fq;
|
||||
|
||||
// q=18
|
||||
fq = dist[17*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jy -= fq;
|
||||
m6 -= fq;
|
||||
jz += fq;
|
||||
m8 += fq;
|
||||
m9 -= 2.0*fq;
|
||||
m10 -= 2.0*fq;
|
||||
m14 -= fq;
|
||||
m17 -= fq;
|
||||
m18 -= fq;
|
||||
|
||||
// write the velocity
|
||||
ux = jx / rho0;
|
||||
uy = jy / rho0;
|
||||
uz = jz / rho0;
|
||||
Velocity[n] = ux;
|
||||
Velocity[Np+n] = uy;
|
||||
Velocity[2*Np+n] = uz;
|
||||
|
||||
|
||||
//........................................................................
|
||||
// READ THE DISTRIBUTIONS
|
||||
// (read from opposite array due to previous swap operation)
|
||||
//........................................................................
|
||||
|
||||
//..............incorporate external force................................................
|
||||
//..............carry out relaxation process...............................................
|
||||
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) - m1);
|
||||
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0) - m2);
|
||||
m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4);
|
||||
m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6);
|
||||
m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8);
|
||||
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) - m9);
|
||||
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
|
||||
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) - m11);
|
||||
m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho0) - m12);
|
||||
m13 = m13 + rlx_setA*((jx*jy/rho0) - m13);
|
||||
m14 = m14 + rlx_setA*((jy*jz/rho0) - m14);
|
||||
m15 = m15 + rlx_setA*((jx*jz/rho0) - m15);
|
||||
m16 = m16 + rlx_setB*( - m16);
|
||||
m17 = m17 + rlx_setB*( - m17);
|
||||
m18 = m18 + rlx_setB*( - m18);
|
||||
//.......................................................................................................
|
||||
//.................inverse transformation......................................................
|
||||
|
||||
// q=0
|
||||
fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2;
|
||||
dist[n] = fq;
|
||||
|
||||
// q = 1
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10) + 0.16666666*Fx;
|
||||
dist[1*Np+n] = fq;
|
||||
|
||||
// q=2
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx;
|
||||
dist[2*Np+n] = fq;
|
||||
|
||||
// q = 3
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy;
|
||||
dist[3*Np+n] = fq;
|
||||
|
||||
// q = 4
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy;
|
||||
dist[4*Np+n] = fq;
|
||||
|
||||
// q = 5
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz;
|
||||
dist[5*Np+n] = fq;
|
||||
|
||||
// q = 6
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz;
|
||||
dist[6*Np+n] = fq;
|
||||
|
||||
// q = 7
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)
|
||||
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||
+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy);
|
||||
dist[7*Np+n] = fq;
|
||||
|
||||
|
||||
// q = 8
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||
+mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy);
|
||||
dist[8*Np+n] = fq;
|
||||
|
||||
// q = 9
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)
|
||||
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||
+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy);
|
||||
dist[9*Np+n] = fq;
|
||||
|
||||
// q = 10
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)
|
||||
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||
+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy);
|
||||
dist[10*Np+n] = fq;
|
||||
|
||||
|
||||
// q = 11
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)
|
||||
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||
-mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz);
|
||||
dist[11*Np+n] = fq;
|
||||
|
||||
// q = 12
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)
|
||||
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||
-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz);
|
||||
dist[12*Np+n] = fq;
|
||||
|
||||
// q = 13
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)
|
||||
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||
-mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz);
|
||||
dist[13*Np+n] = fq;
|
||||
|
||||
// q= 14
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)
|
||||
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||
-mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz);
|
||||
|
||||
dist[14*Np+n] = fq;
|
||||
|
||||
// q = 15
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)
|
||||
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz);
|
||||
dist[15*Np+n] = fq;
|
||||
|
||||
// q = 16
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)
|
||||
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz);
|
||||
dist[16*Np+n] = fq;
|
||||
|
||||
|
||||
// q = 17
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)
|
||||
-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz);
|
||||
dist[17*Np+n] = fq;
|
||||
|
||||
// q = 18
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)
|
||||
-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz);
|
||||
dist[18*Np+n] = fq;
|
||||
|
||||
//........................................................................
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q19_AAodd_StokesMRT<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Velocity,ChargeDensity,ElectricField,rlx_setA,rlx_setB,Gx,Gy,Gz,rho0,den_scale,h,time_conv,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q19_AAodd_StokesMRT: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q19_AAeven_StokesMRT<<<NBLOCKS,NTHREADS >>>(dist,Velocity,ChargeDensity,ElectricField,rlx_setA,rlx_setB,Gx,Gy,Gz,rho0,den_scale,h,time_conv,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q19_AAeven_StokesMRT: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ color lattice boltzmann model
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
|
||||
ScaLBL_ColorModel::ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM):
|
||||
rank(RANK), nprocs(NP), Restart(0), timestep(0), timestepMax(0),
|
||||
tauA(0), tauB(0), rhoA(0), rhoB(0), alpha(0), beta(0),
|
||||
@@ -692,20 +693,15 @@ void ScaLBL_ColorModel::Run(){
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
//.......create and start timer............
|
||||
double starttime,stoptime,cputime;
|
||||
ScaLBL_Comm->Barrier();
|
||||
comm.barrier();
|
||||
starttime = MPI_Wtime();
|
||||
//.........................................
|
||||
|
||||
//************ MAIN ITERATION LOOP ***************************************/
|
||||
comm.barrier();
|
||||
PROFILE_START("Loop");
|
||||
//std::shared_ptr<Database> analysis_db;
|
||||
bool Regular = false;
|
||||
auto current_db = db->cloneDatabase();
|
||||
runAnalysis analysis( current_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map );
|
||||
//analysis.createThreads( analysis_method, 4 );
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
while (timestep < timestepMax ) {
|
||||
//if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); }
|
||||
PROFILE_START("Update");
|
||||
@@ -1038,10 +1034,10 @@ void ScaLBL_ColorModel::Run(){
|
||||
PROFILE_SAVE("lbpm_color_simulator",1);
|
||||
//************************************************************************
|
||||
ScaLBL_Comm->Barrier();
|
||||
stoptime = MPI_Wtime();
|
||||
if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
// Compute the walltime per timestep
|
||||
cputime = (stoptime - starttime)/timestep;
|
||||
auto t2 = std::chrono::system_clock::now();
|
||||
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
@@ -1242,6 +1238,7 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){
|
||||
}
|
||||
return(volume_change);
|
||||
}
|
||||
|
||||
double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){
|
||||
srand(time(NULL));
|
||||
double mass_loss =0.f;
|
||||
@@ -1605,3 +1602,68 @@ void ScaLBL_ColorModel::WriteDebug(){
|
||||
fclose(CGZ_FILE);
|
||||
*/
|
||||
}
|
||||
|
||||
FlowAdaptor::FlowAdaptor(ScaLBL_ColorModel &M){
|
||||
Nx = M.Dm->Nx;
|
||||
Ny = M.Dm->Ny;
|
||||
Nz = M.Dm->Nz;
|
||||
timestep=-1;
|
||||
timestep_previous=-1;
|
||||
|
||||
phi.resize(Nx,Ny,Nz); phi.fill(0); // phase indicator field
|
||||
phi_t.resize(Nx,Ny,Nz); phi_t.fill(0); // time derivative for the phase indicator field
|
||||
}
|
||||
|
||||
FlowAdaptor::~FlowAdaptor(){
|
||||
|
||||
}
|
||||
|
||||
double FlowAdaptor::MoveInterface(ScaLBL_ColorModel &M){
|
||||
|
||||
double INTERFACE_CUTOFF = M.color_db->getWithDefault<double>( "move_interface_cutoff", 0.975 );
|
||||
double MOVE_INTERFACE_FACTOR = M.color_db->getWithDefault<double>( "move_interface_factor", 10.0 );
|
||||
|
||||
ScaLBL_CopyToHost( phi.data(), M.Phi, Nx*Ny*Nz* sizeof( double ) );
|
||||
/* compute the local derivative of phase indicator field */
|
||||
double beta = M.beta;
|
||||
double factor = 0.5/beta;
|
||||
double total_interface_displacement = 0.0;
|
||||
double total_interface_sites = 0.0;
|
||||
for (int n=0; n<Nx*Ny*Nz; n++){
|
||||
/* compute the distance to the interface */
|
||||
double value1 = M.Averages->Phi(n);
|
||||
double dist1 = factor*log((1.0+value1)/(1.0-value1));
|
||||
double value2 = phi(n);
|
||||
double dist2 = factor*log((1.0+value2)/(1.0-value2));
|
||||
phi_t(n) = value2;
|
||||
if (value1 < INTERFACE_CUTOFF && value1 > -1*INTERFACE_CUTOFF && value2 < INTERFACE_CUTOFF && value2 > -1*INTERFACE_CUTOFF ){
|
||||
/* time derivative of distance */
|
||||
double dxdt = 0.125*(dist2-dist1);
|
||||
/* extrapolate to move the distance further */
|
||||
double dist3 = dist2 + MOVE_INTERFACE_FACTOR*dxdt;
|
||||
/* compute the new phase interface */
|
||||
phi_t(n) = (2.f*(exp(-2.f*beta*(dist3)))/(1.f+exp(-2.f*beta*(dist3))) - 1.f);
|
||||
total_interface_displacement += fabs(MOVE_INTERFACE_FACTOR*dxdt);
|
||||
total_interface_sites += 1.0;
|
||||
}
|
||||
}
|
||||
ScaLBL_CopyToDevice( M.Phi, phi_t.data(), Nx*Ny*Nz* sizeof( double ) );
|
||||
|
||||
|
||||
/* ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4){
|
||||
if (Dm->kproc()==0){
|
||||
ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0);
|
||||
ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,1);
|
||||
ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,2);
|
||||
}
|
||||
if (Dm->kproc() == nprocz-1){
|
||||
ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-1);
|
||||
ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-2);
|
||||
ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-3);
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@ public:
|
||||
void Initialize();
|
||||
void Run();
|
||||
void WriteDebug();
|
||||
void getPhaseField(DoubleArray &f);
|
||||
|
||||
bool Restart,pBC;
|
||||
bool REVERSE_FLOW_DIRECTION;
|
||||
@@ -86,3 +87,16 @@ private:
|
||||
double MorphOpenConnected(double target_volume_change);
|
||||
};
|
||||
|
||||
class FlowAdaptor{
|
||||
public:
|
||||
FlowAdaptor(ScaLBL_ColorModel &M);
|
||||
~FlowAdaptor();
|
||||
double MoveInterface(ScaLBL_ColorModel &M);
|
||||
DoubleArray phi;
|
||||
DoubleArray phi_t;
|
||||
private:
|
||||
int Nx, Ny, Nz;
|
||||
int timestep;
|
||||
int timestep_previous;
|
||||
};
|
||||
|
||||
|
||||
@@ -490,14 +490,10 @@ void ScaLBL_DFHModel::Run(){
|
||||
|
||||
if (rank==0) printf("********************************************************\n");
|
||||
if (rank==0) printf("No. of timesteps: %i \n", timestepMax);
|
||||
//.......create and start timer............
|
||||
double starttime,stoptime,cputime;
|
||||
ScaLBL_DeviceBarrier();
|
||||
comm.barrier();
|
||||
starttime = MPI_Wtime();
|
||||
//.........................................
|
||||
//************ MAIN ITERATION LOOP ***************************************/
|
||||
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
bool Regular = true;
|
||||
PROFILE_START("Loop");
|
||||
runAnalysis analysis( analysis_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map );
|
||||
@@ -589,10 +585,10 @@ void ScaLBL_DFHModel::Run(){
|
||||
//************************************************************************
|
||||
ScaLBL_DeviceBarrier();
|
||||
comm.barrier();
|
||||
stoptime = MPI_Wtime();
|
||||
if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
// Compute the walltime per timestep
|
||||
cputime = (stoptime - starttime)/timestep;
|
||||
auto t2 = std::chrono::system_clock::now();
|
||||
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
if (rank==0) printf("********************************************************\n");
|
||||
|
||||
@@ -10,8 +10,9 @@ color lattice boltzmann model
|
||||
#include <time.h>
|
||||
|
||||
ScaLBL_FreeLeeModel::ScaLBL_FreeLeeModel(int RANK, int NP, const Utilities::MPI& COMM):
|
||||
rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),W(0),gamma(0),
|
||||
rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),tauM(0),rhoA(0),rhoB(0),W(0),gamma(0),kappa(0),beta(0),
|
||||
Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0),
|
||||
tau(0),rho0(0),
|
||||
Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM)
|
||||
{
|
||||
|
||||
@@ -30,10 +31,15 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){
|
||||
// set defaults
|
||||
timestepMax = 100000;
|
||||
tauA = tauB = 1.0;
|
||||
tauM = 1.0;//relaxation time for phase field
|
||||
rhoA = rhoB = 1.0;
|
||||
tau = 1.0;//only for single-fluid Lee model
|
||||
rho0 = 1.0;//only for single-fluid Lee model
|
||||
Fx = Fy = Fz = 0.0;
|
||||
gamma=1e-3;
|
||||
W=5;
|
||||
gamma=1e-3;//surface tension
|
||||
W=5.0;//interfacial thickness
|
||||
beta = 12.0*gamma/W;
|
||||
kappa = 3.0*gamma*W/2.0;//beta and kappa are related to surface tension \gamma
|
||||
Restart=false;
|
||||
din=dout=1.0;
|
||||
flux=0.0;
|
||||
@@ -42,12 +48,21 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){
|
||||
if (freelee_db->keyExists( "timestepMax" )){
|
||||
timestepMax = freelee_db->getScalar<int>( "timestepMax" );
|
||||
}
|
||||
if (freelee_db->keyExists( "tau" )){//only for single-fluid Lee model
|
||||
tau = freelee_db->getScalar<double>( "tau" );
|
||||
}
|
||||
if (freelee_db->keyExists( "tauA" )){
|
||||
tauA = freelee_db->getScalar<double>( "tauA" );
|
||||
}
|
||||
if (freelee_db->keyExists( "tauB" )){
|
||||
tauB = freelee_db->getScalar<double>( "tauB" );
|
||||
}
|
||||
if (freelee_db->keyExists( "tauM" )){
|
||||
tauM = freelee_db->getScalar<double>( "tauM" );
|
||||
}
|
||||
if (freelee_db->keyExists( "rho0" )){
|
||||
rho0 = freelee_db->getScalar<double>( "rho0" );
|
||||
}
|
||||
if (freelee_db->keyExists( "rhoA" )){
|
||||
rhoA = freelee_db->getScalar<double>( "rhoA" );
|
||||
}
|
||||
@@ -81,6 +96,9 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){
|
||||
inletB=0.f;
|
||||
outletA=0.f;
|
||||
outletB=1.f;
|
||||
//update secondary parameters
|
||||
beta = 12.0*gamma/W;
|
||||
kappa = 3.0*gamma*W/2.0;//beta and kappa are related to surface tension \gamma
|
||||
//if (BoundaryCondition==4) flux *= rhoA; // mass flux must adjust for density (see formulation for details)
|
||||
|
||||
BoundaryCondition = 0;
|
||||
@@ -184,9 +202,9 @@ void ScaLBL_FreeLeeModel::ReadInput(){
|
||||
|
||||
}
|
||||
|
||||
void ScaLBL_FreeLeeModel::Create(){
|
||||
void ScaLBL_FreeLeeModel::Create_TwoFluid(){
|
||||
/*
|
||||
* This function creates the variables needed to run a LBM
|
||||
* This function creates the variables needed to run two-fluid Lee model
|
||||
*/
|
||||
//.........................................................
|
||||
// Initialize communication structures in averaging domain
|
||||
@@ -198,7 +216,7 @@ void ScaLBL_FreeLeeModel::Create(){
|
||||
// Create a communicator for the device (will use optimized layout)
|
||||
// ScaLBL_Communicator ScaLBL_Comm(Mask); // original
|
||||
ScaLBL_Comm = std::shared_ptr<ScaLBL_Communicator>(new ScaLBL_Communicator(Mask));
|
||||
ScaLBL_Comm_Regular = std::shared_ptr<ScaLBL_Communicator>(new ScaLBL_Communicator(Mask));
|
||||
//ScaLBL_Comm_Regular = std::shared_ptr<ScaLBL_Communicator>(new ScaLBL_Communicator(Mask));
|
||||
ScaLBL_Comm_WideHalo = std::shared_ptr<ScaLBLWideHalo_Communicator>(new ScaLBLWideHalo_Communicator(Mask,2));
|
||||
|
||||
// create the layout for the LBM
|
||||
@@ -220,7 +238,7 @@ void ScaLBL_FreeLeeModel::Create(){
|
||||
//...........................................................................
|
||||
ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Np);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &fq, 19*dist_mem_size);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &gqbar, 19*dist_mem_size);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &hq, 7*dist_mem_size);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &mu_phi, dist_mem_size);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &Den, dist_mem_size);
|
||||
@@ -239,46 +257,301 @@ void ScaLBL_FreeLeeModel::Create(){
|
||||
for (int i=1; i<Nx-1; i++){
|
||||
int idx=Map(i,j,k);
|
||||
if (!(idx < 0))
|
||||
TmpMap[idx] = k*Nx*Ny+j*Nx+i;
|
||||
TmpMap[idx] = ScaLBL_Comm_WideHalo->Map(i,j,k);
|
||||
}
|
||||
}
|
||||
}
|
||||
// check that TmpMap is valid
|
||||
for (int idx=0; idx<ScaLBL_Comm->LastExterior(); idx++){
|
||||
auto n = TmpMap[idx];
|
||||
if (n > Nx*Ny*Nz){
|
||||
if (n > Nxh*Nyh*Nzh){
|
||||
printf("Bad value! idx=%i \n", n);
|
||||
TmpMap[idx] = Nx*Ny*Nz-1;
|
||||
TmpMap[idx] = Nxh*Nyh*Nzh-1;
|
||||
}
|
||||
}
|
||||
for (int idx=ScaLBL_Comm->FirstInterior(); idx<ScaLBL_Comm->LastInterior(); idx++){
|
||||
auto n = TmpMap[idx];
|
||||
if ( n > Nx*Ny*Nz ){
|
||||
if ( n > Nxh*Nyh*Nzh ){
|
||||
printf("Bad value! idx=%i \n",n);
|
||||
TmpMap[idx] = Nx*Ny*Nz-1;
|
||||
TmpMap[idx] = Nxh*Nyh*Nzh-1;
|
||||
}
|
||||
}
|
||||
// copy the device map
|
||||
ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np);
|
||||
ScaLBL_DeviceBarrier();
|
||||
delete [] TmpMap;
|
||||
|
||||
// copy the neighbor list
|
||||
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
|
||||
// initialize phi based on PhaseLabel (include solid component labels)
|
||||
comm.barrier();
|
||||
delete [] TmpMap;
|
||||
delete [] neighborList;
|
||||
}
|
||||
|
||||
/********************************************************
|
||||
* AssignComponentLabels *
|
||||
********************************************************/
|
||||
|
||||
void ScaLBL_FreeLeeModel::Initialize(){
|
||||
|
||||
if (rank==0) printf ("Initializing distributions \n");
|
||||
ScaLBL_D3Q19_Init(fq, Np);
|
||||
void ScaLBL_FreeLeeModel::Create_SingleFluid(){
|
||||
/*
|
||||
* This function initializes model
|
||||
* This function creates the variables needed to run single-fluid Lee model
|
||||
*/
|
||||
//.........................................................
|
||||
// Initialize communication structures in averaging domain
|
||||
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = Mask->id[i];
|
||||
Mask->CommInit();
|
||||
Np=Mask->PoreCount();
|
||||
//...........................................................................
|
||||
if (rank==0) printf ("Create ScaLBL_Communicator \n");
|
||||
// Create a communicator for the device (will use optimized layout)
|
||||
// ScaLBL_Communicator ScaLBL_Comm(Mask); // original
|
||||
ScaLBL_Comm = std::shared_ptr<ScaLBL_Communicator>(new ScaLBL_Communicator(Mask));
|
||||
|
||||
// create the layout for the LBM
|
||||
int Npad=(Np/16 + 2)*16;
|
||||
if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N);
|
||||
Map.resize(Nx,Ny,Nz); Map.fill(-2);
|
||||
auto neighborList= new int[18*Npad];
|
||||
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np,1);
|
||||
comm.barrier();
|
||||
|
||||
//...........................................................................
|
||||
// MAIN VARIABLES ALLOCATED HERE
|
||||
//...........................................................................
|
||||
// LBM variables
|
||||
if (rank==0) printf ("Allocating distributions \n");
|
||||
//......................device distributions.................................
|
||||
dist_mem_size = Np*sizeof(double);
|
||||
neighborSize=18*(Np*sizeof(int));
|
||||
//...........................................................................
|
||||
ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &gqbar, 19*dist_mem_size);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &Pressure, sizeof(double)*Np);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &Velocity, 3*sizeof(double)*Np);
|
||||
//...........................................................................
|
||||
// Update GPU data structures
|
||||
if (rank==0) printf ("Setting up device map and neighbor list \n");
|
||||
// copy the neighbor list
|
||||
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
|
||||
comm.barrier();
|
||||
delete [] neighborList;
|
||||
}
|
||||
|
||||
void ScaLBL_FreeLeeModel::AssignComponentLabels_ChemPotential_ColorGrad()
|
||||
{
|
||||
double *phase;
|
||||
phase = new double[Nh];
|
||||
|
||||
size_t NLABELS=0;
|
||||
signed char VALUE=0;
|
||||
double AFFINITY=0.f;
|
||||
|
||||
auto LabelList = freelee_db->getVector<int>( "ComponentLabels" );
|
||||
auto AffinityList = freelee_db->getVector<double>( "ComponentAffinity" );
|
||||
|
||||
NLABELS=LabelList.size();
|
||||
if (NLABELS != AffinityList.size()){
|
||||
ERROR("Error: ComponentLabels and ComponentAffinity must be the same length! \n");
|
||||
}
|
||||
|
||||
double label_count[NLABELS];
|
||||
double label_count_global[NLABELS];
|
||||
|
||||
// Assign the labels
|
||||
for (size_t idx=0; idx<NLABELS; idx++) label_count[idx]=0;
|
||||
for (int k=0;k<Nzh;k++){
|
||||
for (int j=0;j<Nyh;j++){
|
||||
for (int i=0;i<Nxh;i++){
|
||||
|
||||
//idx for double-halo array 'phase'
|
||||
int nh = k*Nxh*Nyh+j*Nxh+i;
|
||||
|
||||
//idx for single-halo array Mask->id[n]
|
||||
int x=i-1;
|
||||
int y=j-1;
|
||||
int z=k-1;
|
||||
if (x<0) x=0;
|
||||
if (y<0) y=0;
|
||||
if (z<0) z=0;
|
||||
if (x>=Nx) x=Nx-1;
|
||||
if (y>=Ny) y=Ny-1;
|
||||
if (z>=Nz) z=Nz-1;
|
||||
int n = z*Nx*Ny+y*Nx+x;
|
||||
VALUE=id[n];
|
||||
|
||||
// Assign the affinity from the paired list
|
||||
for (unsigned int idx=0; idx < NLABELS; idx++){
|
||||
//printf("idx=%i, value=%i, %i, \n",idx, VALUE,LabelList[idx]);
|
||||
if (VALUE == LabelList[idx]){
|
||||
AFFINITY=AffinityList[idx];
|
||||
label_count[idx] += 1.0;
|
||||
idx = NLABELS;
|
||||
//Mask->id[n] = 0; // set mask to zero since this is an immobile component
|
||||
}
|
||||
}
|
||||
// fluid labels are reserved
|
||||
if (VALUE == 1) AFFINITY=1.0;
|
||||
else if (VALUE == 2) AFFINITY=-1.0;
|
||||
phase[nh] = AFFINITY;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Set Dm to match Mask
|
||||
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = Mask->id[i];
|
||||
|
||||
for (size_t idx=0; idx<NLABELS; idx++)
|
||||
label_count_global[idx] = Dm->Comm.sumReduce(label_count[idx]);
|
||||
|
||||
if (rank==0){
|
||||
printf("Number of component labels: %lu \n",NLABELS);
|
||||
for (unsigned int idx=0; idx<NLABELS; idx++){
|
||||
VALUE=LabelList[idx];
|
||||
AFFINITY=AffinityList[idx];
|
||||
double volume_fraction = double(label_count_global[idx])/double((Nx-2)*(Ny-2)*(Nz-2)*nprocs);
|
||||
printf(" label=%d, affinity=%f, volume fraction==%f\n",VALUE,AFFINITY,volume_fraction);
|
||||
}
|
||||
}
|
||||
|
||||
//compute color gradient and laplacian of phase field
|
||||
double *ColorGrad_host, *mu_phi_host;
|
||||
ColorGrad_host = new double[3*Np];
|
||||
mu_phi_host = new double[Np];
|
||||
|
||||
double *Dst;
|
||||
Dst = new double [3*3*3];
|
||||
for (int kk=0; kk<3; kk++){
|
||||
for (int jj=0; jj<3; jj++){
|
||||
for (int ii=0; ii<3; ii++){
|
||||
int index = kk*9+jj*3+ii;
|
||||
Dst[index] = sqrt(double(ii-1)*double(ii-1) + double(jj-1)*double(jj-1)+ double(kk-1)*double(kk-1));
|
||||
}
|
||||
}
|
||||
}
|
||||
double w_face = 1.0/18.0;
|
||||
double w_edge = 1.0/36.0;
|
||||
double w_corner = 0.f;
|
||||
//local
|
||||
Dst[13] = 0.f;
|
||||
//faces
|
||||
Dst[4] = w_face;
|
||||
Dst[10] = w_face;
|
||||
Dst[12] = w_face;
|
||||
Dst[14] = w_face;
|
||||
Dst[16] = w_face;
|
||||
Dst[22] = w_face;
|
||||
// corners
|
||||
Dst[0] = w_corner;
|
||||
Dst[2] = w_corner;
|
||||
Dst[6] = w_corner;
|
||||
Dst[8] = w_corner;
|
||||
Dst[18] = w_corner;
|
||||
Dst[20] = w_corner;
|
||||
Dst[24] = w_corner;
|
||||
Dst[26] = w_corner;
|
||||
// edges
|
||||
Dst[1] = w_edge;
|
||||
Dst[3] = w_edge;
|
||||
Dst[5] = w_edge;
|
||||
Dst[7] = w_edge;
|
||||
Dst[9] = w_edge;
|
||||
Dst[11] = w_edge;
|
||||
Dst[15] = w_edge;
|
||||
Dst[17] = w_edge;
|
||||
Dst[19] = w_edge;
|
||||
Dst[21] = w_edge;
|
||||
Dst[23] = w_edge;
|
||||
Dst[25] = w_edge;
|
||||
|
||||
double cs2_inv = 3.0;//inverse of c_s^2 for D3Q19 lattice
|
||||
int width = 2;//For better readability: make halo width explicity wherever possible
|
||||
for (int k=width; k<Nzh-width; k++){
|
||||
for (int j=width; j<Nyh-width; j++){
|
||||
for (int i=width; i<Nxh-width; i++){
|
||||
|
||||
//idx for double-halo array 'phase'
|
||||
int nh = k*Nxh*Nyh+j*Nxh+i;
|
||||
|
||||
int idx=Map(i-width+1,j-width+1,k-width+1);
|
||||
if (!(idx < 0)){
|
||||
double phi_x = 0.f;
|
||||
double phi_y = 0.f;
|
||||
double phi_z = 0.f;
|
||||
double phi_Lap = 0.f;//Laplacian of the phase field
|
||||
for (int kk=0; kk<3; kk++){
|
||||
for (int jj=0; jj<3; jj++){
|
||||
for (int ii=0; ii<3; ii++){
|
||||
|
||||
int index = kk*9+jj*3+ii;
|
||||
double weight= Dst[index];
|
||||
|
||||
int idi=i+ii-1;
|
||||
int idj=j+jj-1;
|
||||
int idk=k+kk-1;
|
||||
|
||||
if (idi < 0) idi=0;
|
||||
if (idj < 0) idj=0;
|
||||
if (idk < 0) idk=0;
|
||||
if (!(idi < Nxh)) idi=Nxh-1;
|
||||
if (!(idj < Nyh)) idj=Nyh-1;
|
||||
if (!(idk < Nzh)) idk=Nzh-1;
|
||||
|
||||
int nn = idk*Nxh*Nyh + idj*Nxh + idi;
|
||||
double vec_x = double(ii-1);
|
||||
double vec_y = double(jj-1);
|
||||
double vec_z = double(kk-1);
|
||||
double GWNS=phase[nn];
|
||||
double GWNS_local=phase[nh];
|
||||
phi_x += GWNS*weight*vec_x;
|
||||
phi_y += GWNS*weight*vec_y;
|
||||
phi_z += GWNS*weight*vec_z;
|
||||
phi_Lap += weight*(GWNS-GWNS_local);//Laplacian of the phase field
|
||||
}
|
||||
}
|
||||
}
|
||||
//store color gradient
|
||||
ColorGrad_host[idx+0*Np] = cs2_inv*phi_x;
|
||||
ColorGrad_host[idx+1*Np] = cs2_inv*phi_y;
|
||||
ColorGrad_host[idx+2*Np] = cs2_inv*phi_z;
|
||||
//compute chemical potential
|
||||
phi_Lap = 2.0*cs2_inv*phi_Lap;
|
||||
mu_phi_host[idx] = 4.0*beta*phase[nh]*(phase[nh]+1.0)*(phase[nh]-1.0) - kappa*phi_Lap;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//copy all data to device
|
||||
ScaLBL_CopyToDevice(Phi, phase, Nh*sizeof(double));
|
||||
ScaLBL_CopyToDevice(ColorGrad, ColorGrad_host, 3*Np*sizeof(double));
|
||||
ScaLBL_CopyToDevice(mu_phi, mu_phi_host, Np*sizeof(double));
|
||||
ScaLBL_Comm->Barrier();
|
||||
comm.barrier();
|
||||
|
||||
//debug
|
||||
//save the phase field and check it
|
||||
//FILE *OUTFILE;
|
||||
//sprintf(LocalRankFilename,"Phase_Init.%05i.raw",rank);
|
||||
//OUTFILE = fopen(LocalRankFilename,"wb");
|
||||
//fwrite(phase,8,Nh,OUTFILE);
|
||||
//fclose(OUTFILE);
|
||||
|
||||
|
||||
delete [] phase;
|
||||
delete [] ColorGrad_host;
|
||||
delete [] mu_phi_host;
|
||||
delete [] Dst;
|
||||
}
|
||||
|
||||
void ScaLBL_FreeLeeModel::Initialize_TwoFluid(){
|
||||
/*
|
||||
* This function initializes two-fluid Lee model
|
||||
*/
|
||||
if (rank==0) printf ("Initializing phase field, chemical potential and color gradient\n");
|
||||
AssignComponentLabels_ChemPotential_ColorGrad();//initialize phase field Phi
|
||||
|
||||
if (rank==0) printf ("Initializing distributions for momentum transport\n");
|
||||
ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(gqbar, mu_phi, ColorGrad, Fx, Fy, Fz, Np);
|
||||
|
||||
if (rank==0) printf ("Initializing density field and distributions for phase-field transport\n");
|
||||
ScaLBL_FreeLeeModel_PhaseField_Init(dvcMap, Phi, Den, hq, ColorGrad, rhoA, rhoB, tauM, W, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
ScaLBL_FreeLeeModel_PhaseField_Init(dvcMap, Phi, Den, hq, ColorGrad, rhoA, rhoB, tauM, W, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
|
||||
if (Restart == true){
|
||||
//TODO need to revise this function
|
||||
if (rank==0){
|
||||
printf("Reading restart file! \n");
|
||||
}
|
||||
@@ -292,7 +565,7 @@ void ScaLBL_FreeLeeModel::Initialize(){
|
||||
cDen = new double[2*Np];
|
||||
cDist = new double[19*Np];
|
||||
ScaLBL_CopyToHost(TmpMap, dvcMap, Np*sizeof(int));
|
||||
ScaLBL_CopyToHost(cPhi, Phi, N*sizeof(double));
|
||||
//ScaLBL_CopyToHost(cPhi, Phi, N*sizeof(double));
|
||||
|
||||
ifstream File(LocalRestartFile,ios::binary);
|
||||
int idx;
|
||||
@@ -331,18 +604,19 @@ void ScaLBL_FreeLeeModel::Initialize(){
|
||||
|
||||
// Copy the restart data to the GPU
|
||||
ScaLBL_CopyToDevice(Den,cDen,2*Np*sizeof(double));
|
||||
ScaLBL_CopyToDevice(fq,cDist,19*Np*sizeof(double));
|
||||
ScaLBL_CopyToDevice(gqbar,cDist,19*Np*sizeof(double));
|
||||
ScaLBL_CopyToDevice(Phi,cPhi,N*sizeof(double));
|
||||
ScaLBL_DeviceBarrier();
|
||||
|
||||
ScaLBL_Comm->Barrier();
|
||||
comm.barrier();
|
||||
|
||||
if (rank==0) printf ("Initializing phase and density fields on device from Restart\n");
|
||||
//TODO the following function is to be updated.
|
||||
//ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
//ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
}
|
||||
|
||||
if (rank==0) printf ("Initializing phase field \n");
|
||||
//ScaLBL_PhaseField_Init(dvcMap, Phi, Den, hq, Bq, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
//ScaLBL_PhaseField_Init(dvcMap, Phi, Den, hq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
|
||||
// establish reservoirs for external bC
|
||||
// TODO to be revised
|
||||
if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4 ){
|
||||
if (Dm->kproc()==0){
|
||||
ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0);
|
||||
@@ -358,7 +632,84 @@ void ScaLBL_FreeLeeModel::Initialize(){
|
||||
//ScaLBL_CopyToHost(Averages->Phi.data(),Phi,N*sizeof(double));
|
||||
}
|
||||
|
||||
void ScaLBL_FreeLeeModel::Run(){
|
||||
void ScaLBL_FreeLeeModel::Initialize_SingleFluid(){
|
||||
/*
|
||||
* This function initializes single-fluid Lee model
|
||||
*/
|
||||
if (rank==0) printf ("Initializing distributions for momentum transport\n");
|
||||
ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(gqbar, Fx, Fy, Fz, Np);
|
||||
|
||||
if (Restart == true){
|
||||
//TODO need to revise this function
|
||||
//remove the phase-related part
|
||||
|
||||
|
||||
|
||||
// if (rank==0){
|
||||
// printf("Reading restart file! \n");
|
||||
// }
|
||||
//
|
||||
// // Read in the restart file to CPU buffers
|
||||
// int *TmpMap;
|
||||
// TmpMap = new int[Np];
|
||||
//
|
||||
// double *cPhi, *cDist, *cDen;
|
||||
// cPhi = new double[N];
|
||||
// cDen = new double[2*Np];
|
||||
// cDist = new double[19*Np];
|
||||
// ScaLBL_CopyToHost(TmpMap, dvcMap, Np*sizeof(int));
|
||||
// //ScaLBL_CopyToHost(cPhi, Phi, N*sizeof(double));
|
||||
//
|
||||
// ifstream File(LocalRestartFile,ios::binary);
|
||||
// int idx;
|
||||
// double value,va,vb;
|
||||
// for (int n=0; n<Np; n++){
|
||||
// File.read((char*) &va, sizeof(va));
|
||||
// File.read((char*) &vb, sizeof(vb));
|
||||
// cDen[n] = va;
|
||||
// cDen[Np+n] = vb;
|
||||
// }
|
||||
// for (int n=0; n<Np; n++){
|
||||
// // Read the distributions
|
||||
// for (int q=0; q<19; q++){
|
||||
// File.read((char*) &value, sizeof(value));
|
||||
// cDist[q*Np+n] = value;
|
||||
// }
|
||||
// }
|
||||
// File.close();
|
||||
//
|
||||
// for (int n=0; n<ScaLBL_Comm->LastExterior(); n++){
|
||||
// va = cDen[n];
|
||||
// vb = cDen[Np + n];
|
||||
// value = (va-vb)/(va+vb);
|
||||
// idx = TmpMap[n];
|
||||
// if (!(idx < 0) && idx<N)
|
||||
// cPhi[idx] = value;
|
||||
// }
|
||||
// for (int n=ScaLBL_Comm->FirstInterior(); n<ScaLBL_Comm->LastInterior(); n++){
|
||||
// va = cDen[n];
|
||||
// vb = cDen[Np + n];
|
||||
// value = (va-vb)/(va+vb);
|
||||
// idx = TmpMap[n];
|
||||
// if (!(idx < 0) && idx<N)
|
||||
// cPhi[idx] = value;
|
||||
// }
|
||||
//
|
||||
// // Copy the restart data to the GPU
|
||||
// ScaLBL_CopyToDevice(Den,cDen,2*Np*sizeof(double));
|
||||
// ScaLBL_CopyToDevice(gqbar,cDist,19*Np*sizeof(double));
|
||||
// ScaLBL_CopyToDevice(Phi,cPhi,N*sizeof(double));
|
||||
// ScaLBL_Comm->Barrier();
|
||||
// comm.barrier();
|
||||
//
|
||||
// if (rank==0) printf ("Initializing phase and density fields on device from Restart\n");
|
||||
// //TODO the following function is to be updated.
|
||||
// //ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
// //ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
}
|
||||
}
|
||||
|
||||
void ScaLBL_FreeLeeModel::Run_TwoFluid(){
|
||||
int nprocs=nprocx*nprocy*nprocz;
|
||||
const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz);
|
||||
|
||||
@@ -368,98 +719,93 @@ void ScaLBL_FreeLeeModel::Run(){
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
//.......create and start timer............
|
||||
double starttime,stoptime,cputime;
|
||||
ScaLBL_DeviceBarrier();
|
||||
comm.barrier();
|
||||
starttime = MPI_Wtime();
|
||||
//.........................................
|
||||
|
||||
//************ MAIN ITERATION LOOP ***************************************/
|
||||
comm.barrier();
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
PROFILE_START("Loop");
|
||||
while (timestep < timestepMax ) {
|
||||
//if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); }
|
||||
PROFILE_START("Update");
|
||||
// *************ODD TIMESTEP*************
|
||||
timestep++;
|
||||
/* // Compute the Phase indicator field
|
||||
// Read for hq, Bq happens in this routine (requires communication)
|
||||
ScaLBL_Comm->BiSendD3Q7AA(hq,Bq); //READ FROM NORMAL
|
||||
ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_Comm->BiRecvD3Q7AA(hq,Bq); //WRITE INTO OPPOSITE
|
||||
ScaLBL_DeviceBarrier();
|
||||
ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
//-------------------------------------------------------------------------------------------------------------------
|
||||
// Compute the Phase indicator field
|
||||
// Read for hq happens in this routine (requires communication)
|
||||
ScaLBL_Comm->SendD3Q7AA(hq,0); //READ FROM NORMAL
|
||||
ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(NeighborList, dvcMap, hq, Den, Phi, rhoA, rhoB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_Comm->RecvD3Q7AA(hq,0); //WRITE INTO OPPOSITE
|
||||
ScaLBL_Comm->Barrier();
|
||||
ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(NeighborList, dvcMap, hq, Den, Phi, rhoA, rhoB, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
|
||||
// Perform the collision operation
|
||||
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
|
||||
ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FROM NORMAL
|
||||
if (BoundaryCondition > 0 && BoundaryCondition < 5){
|
||||
//TODO to be revised
|
||||
ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB);
|
||||
ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB);
|
||||
}
|
||||
// Halo exchange for phase field
|
||||
ScaLBL_Comm_Regular->SendHalo(Phi);
|
||||
ScaLBL_Comm_WideHalo->Send(Phi);
|
||||
|
||||
ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB,
|
||||
alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_Comm_Regular->RecvHalo(Phi);
|
||||
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
|
||||
ScaLBL_DeviceBarrier();
|
||||
ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM,
|
||||
kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_Comm_WideHalo->Recv(Phi);
|
||||
ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE
|
||||
ScaLBL_Comm->Barrier();
|
||||
// Set BCs
|
||||
if (BoundaryCondition == 3){
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep);
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep);
|
||||
}
|
||||
if (BoundaryCondition == 4){
|
||||
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
|
||||
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep);
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep);
|
||||
}
|
||||
else if (BoundaryCondition == 5){
|
||||
ScaLBL_Comm->D3Q19_Reflection_BC_z(fq);
|
||||
ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq);
|
||||
ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar);
|
||||
ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar);
|
||||
}
|
||||
ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB,
|
||||
alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
ScaLBL_DeviceBarrier();
|
||||
MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL);
|
||||
ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM,
|
||||
kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
ScaLBL_Comm->Barrier();
|
||||
|
||||
// *************EVEN TIMESTEP*************
|
||||
timestep++;
|
||||
// Compute the Phase indicator field
|
||||
ScaLBL_Comm->BiSendD3Q7AA(hq,Bq); //READ FROM NORMAL
|
||||
ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_Comm->BiRecvD3Q7AA(hq,Bq); //WRITE INTO OPPOSITE
|
||||
ScaLBL_DeviceBarrier();
|
||||
ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
ScaLBL_Comm->SendD3Q7AA(hq,0); //READ FROM NORMAL
|
||||
ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(dvcMap, hq, Den, Phi, rhoA, rhoB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_Comm->RecvD3Q7AA(hq,0); //WRITE INTO OPPOSITE
|
||||
ScaLBL_Comm->Barrier();
|
||||
ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(dvcMap, hq, Den, Phi, rhoA, rhoB, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
|
||||
// Perform the collision operation
|
||||
ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL
|
||||
ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FORM NORMAL
|
||||
// Halo exchange for phase field
|
||||
if (BoundaryCondition > 0 && BoundaryCondition < 5){
|
||||
ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB);
|
||||
ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB);
|
||||
}
|
||||
ScaLBL_Comm_Regular->SendHalo(Phi);
|
||||
ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB,
|
||||
alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_Comm_Regular->RecvHalo(Phi);
|
||||
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
|
||||
ScaLBL_DeviceBarrier();
|
||||
ScaLBL_Comm_WideHalo->Send(Phi);
|
||||
ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM,
|
||||
kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_Comm_WideHalo->Recv(Phi);
|
||||
ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE
|
||||
ScaLBL_Comm->Barrier();
|
||||
// Set boundary conditions
|
||||
if (BoundaryCondition == 3){
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep);
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep);
|
||||
}
|
||||
else if (BoundaryCondition == 4){
|
||||
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
|
||||
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep);
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep);
|
||||
}
|
||||
else if (BoundaryCondition == 5){
|
||||
ScaLBL_Comm->D3Q19_Reflection_BC_z(fq);
|
||||
ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq);
|
||||
ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar);
|
||||
ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar);
|
||||
}
|
||||
ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB,
|
||||
alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
*/
|
||||
ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM,
|
||||
kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
ScaLBL_Comm->Barrier();
|
||||
//************************************************************************
|
||||
PROFILE_STOP("Update");
|
||||
@@ -467,10 +813,10 @@ void ScaLBL_FreeLeeModel::Run(){
|
||||
PROFILE_STOP("Loop");
|
||||
PROFILE_SAVE("lbpm_color_simulator",1);
|
||||
//************************************************************************
|
||||
stoptime = MPI_Wtime();
|
||||
if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
// Compute the walltime per timestep
|
||||
cputime = (stoptime - starttime)/timestep;
|
||||
auto t2 = std::chrono::system_clock::now();
|
||||
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
@@ -484,33 +830,123 @@ void ScaLBL_FreeLeeModel::Run(){
|
||||
// ************************************************************************
|
||||
}
|
||||
|
||||
void ScaLBL_FreeLeeModel::Run_SingleFluid(){
|
||||
int nprocs=nprocx*nprocy*nprocz;
|
||||
const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz);
|
||||
|
||||
if (rank==0){
|
||||
printf("********************************************************\n");
|
||||
printf("No. of timesteps: %i \n", timestepMax);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
void ScaLBL_FreeLeeModel::WriteDebug(){
|
||||
//.......create and start timer............
|
||||
ScaLBL_Comm->Barrier();
|
||||
comm.barrier();
|
||||
//.........................................
|
||||
|
||||
//************ MAIN ITERATION LOOP ***************************************/
|
||||
PROFILE_START("Loop");
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
while (timestep < timestepMax ) {
|
||||
//if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); }
|
||||
PROFILE_START("Update");
|
||||
// *************ODD TIMESTEP*************
|
||||
timestep++;
|
||||
//-------------------------------------------------------------------------------------------------------------------
|
||||
// Perform the collision operation
|
||||
ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FROM NORMAL
|
||||
ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(NeighborList, gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz,
|
||||
ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE
|
||||
ScaLBL_Comm->Barrier();
|
||||
// Set boundary conditions
|
||||
// TODO to be revised!
|
||||
if (BoundaryCondition == 3){
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep);
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep);
|
||||
}
|
||||
if (BoundaryCondition == 4){
|
||||
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep);
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep);
|
||||
}
|
||||
else if (BoundaryCondition == 5){
|
||||
ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar);
|
||||
ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar);
|
||||
}
|
||||
ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(NeighborList, gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz,
|
||||
0, ScaLBL_Comm->LastExterior(), Np);
|
||||
ScaLBL_Comm->Barrier();
|
||||
|
||||
// *************EVEN TIMESTEP*************
|
||||
timestep++;
|
||||
//-------------------------------------------------------------------------------------------------------------------
|
||||
// Perform the collision operation
|
||||
ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FORM NORMAL
|
||||
ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz,
|
||||
ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE
|
||||
ScaLBL_Comm->Barrier();
|
||||
// Set boundary conditions
|
||||
// TODO to be revised!
|
||||
if (BoundaryCondition == 3){
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep);
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep);
|
||||
}
|
||||
else if (BoundaryCondition == 4){
|
||||
din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep);
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep);
|
||||
}
|
||||
else if (BoundaryCondition == 5){
|
||||
ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar);
|
||||
ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar);
|
||||
}
|
||||
ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz,
|
||||
0, ScaLBL_Comm->LastExterior(), Np);
|
||||
ScaLBL_Comm->Barrier();
|
||||
//************************************************************************
|
||||
PROFILE_STOP("Update");
|
||||
}
|
||||
PROFILE_STOP("Loop");
|
||||
PROFILE_SAVE("lbpm_color_simulator",1);
|
||||
//************************************************************************
|
||||
if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
// Compute the walltime per timestep
|
||||
auto t2 = std::chrono::system_clock::now();
|
||||
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
if (rank==0) printf("********************************************************\n");
|
||||
if (rank==0) printf("CPU time = %f \n", cputime);
|
||||
if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS);
|
||||
MLUPS *= nprocs;
|
||||
if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS);
|
||||
if (rank==0) printf("********************************************************\n");
|
||||
|
||||
// ************************************************************************
|
||||
}
|
||||
|
||||
void ScaLBL_FreeLeeModel::WriteDebug_TwoFluid(){
|
||||
// Copy back final phase indicator field and convert to regular layout
|
||||
DoubleArray PhaseField(Nx,Ny,Nz);
|
||||
DoubleArray PhaseData(Nxh,Nyh,Nzh);
|
||||
//ScaLBL_Comm->RegularLayout(Map,Phi,PhaseField);
|
||||
ScaLBL_CopyToHost(PhaseField.data(), Phi, sizeof(double)*N);
|
||||
ScaLBL_CopyToHost(PhaseData.data(), Phi, sizeof(double)*Nh);
|
||||
|
||||
FILE *OUTFILE;
|
||||
sprintf(LocalRankFilename,"Phase.%05i.raw",rank);
|
||||
OUTFILE = fopen(LocalRankFilename,"wb");
|
||||
fwrite(PhaseField.data(),8,N,OUTFILE);
|
||||
fwrite(PhaseData.data(),8,Nh,OUTFILE);
|
||||
fclose(OUTFILE);
|
||||
|
||||
ScaLBL_Comm->RegularLayout(Map,&Den[0],PhaseField);
|
||||
DoubleArray PhaseField(Nx,Ny,Nz);
|
||||
ScaLBL_Comm->RegularLayout(Map,Den,PhaseField);
|
||||
FILE *AFILE;
|
||||
sprintf(LocalRankFilename,"A.%05i.raw",rank);
|
||||
sprintf(LocalRankFilename,"Density.%05i.raw",rank);
|
||||
AFILE = fopen(LocalRankFilename,"wb");
|
||||
fwrite(PhaseField.data(),8,N,AFILE);
|
||||
fclose(AFILE);
|
||||
|
||||
ScaLBL_Comm->RegularLayout(Map,&Den[Np],PhaseField);
|
||||
FILE *BFILE;
|
||||
sprintf(LocalRankFilename,"B.%05i.raw",rank);
|
||||
BFILE = fopen(LocalRankFilename,"wb");
|
||||
fwrite(PhaseField.data(),8,N,BFILE);
|
||||
fclose(BFILE);
|
||||
|
||||
ScaLBL_Comm->RegularLayout(Map,Pressure,PhaseField);
|
||||
FILE *PFILE;
|
||||
sprintf(LocalRankFilename,"Pressure.%05i.raw",rank);
|
||||
@@ -561,3 +997,37 @@ void ScaLBL_FreeLeeModel::WriteDebug(){
|
||||
fclose(CGZ_FILE);
|
||||
*/
|
||||
}
|
||||
|
||||
void ScaLBL_FreeLeeModel::WriteDebug_SingleFluid(){
|
||||
|
||||
DoubleArray PhaseField(Nx,Ny,Nz);
|
||||
|
||||
// Copy back final phase indicator field and convert to regular layout
|
||||
ScaLBL_Comm->RegularLayout(Map,Pressure,PhaseField);
|
||||
FILE *PFILE;
|
||||
sprintf(LocalRankFilename,"Pressure.%05i.raw",rank);
|
||||
PFILE = fopen(LocalRankFilename,"wb");
|
||||
fwrite(PhaseField.data(),8,N,PFILE);
|
||||
fclose(PFILE);
|
||||
|
||||
ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField);
|
||||
FILE *VELX_FILE;
|
||||
sprintf(LocalRankFilename,"Velocity_X.%05i.raw",rank);
|
||||
VELX_FILE = fopen(LocalRankFilename,"wb");
|
||||
fwrite(PhaseField.data(),8,N,VELX_FILE);
|
||||
fclose(VELX_FILE);
|
||||
|
||||
ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],PhaseField);
|
||||
FILE *VELY_FILE;
|
||||
sprintf(LocalRankFilename,"Velocity_Y.%05i.raw",rank);
|
||||
VELY_FILE = fopen(LocalRankFilename,"wb");
|
||||
fwrite(PhaseField.data(),8,N,VELY_FILE);
|
||||
fclose(VELY_FILE);
|
||||
|
||||
ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],PhaseField);
|
||||
FILE *VELZ_FILE;
|
||||
sprintf(LocalRankFilename,"Velocity_Z.%05i.raw",rank);
|
||||
VELZ_FILE = fopen(LocalRankFilename,"wb");
|
||||
fwrite(PhaseField.data(),8,N,VELZ_FILE);
|
||||
fclose(VELZ_FILE);
|
||||
}
|
||||
|
||||
@@ -26,16 +26,22 @@ public:
|
||||
void ReadParams(std::shared_ptr<Database> db0);
|
||||
void SetDomain();
|
||||
void ReadInput();
|
||||
void Create();
|
||||
void Initialize();
|
||||
void Run();
|
||||
void WriteDebug();
|
||||
void Create_TwoFluid();
|
||||
void Initialize_TwoFluid();
|
||||
void Run_TwoFluid();
|
||||
void WriteDebug_TwoFluid();
|
||||
void Create_SingleFluid();
|
||||
void Initialize_SingleFluid();
|
||||
void Run_SingleFluid();
|
||||
void WriteDebug_SingleFluid();
|
||||
|
||||
bool Restart,pBC;
|
||||
int timestep,timestepMax;
|
||||
int BoundaryCondition;
|
||||
double tauA,tauB,rhoA,rhoB;
|
||||
double W,gamma;
|
||||
double tau, rho0;//only for single-fluid Lee model
|
||||
double tauM;//relaxation time for phase field (or mass)
|
||||
double W,gamma,kappa,beta;
|
||||
double Fx,Fy,Fz,flux;
|
||||
double din,dout,inletA,inletB,outletA,outletB;
|
||||
|
||||
@@ -61,7 +67,7 @@ public:
|
||||
signed char *id;
|
||||
int *NeighborList;
|
||||
int *dvcMap;
|
||||
double *fq, *hq;
|
||||
double *gqbar, *hq;
|
||||
double *mu_phi, *Den, *Phi;
|
||||
double *ColorGrad;
|
||||
double *Velocity;
|
||||
@@ -81,6 +87,7 @@ private:
|
||||
|
||||
//int rank,nprocs;
|
||||
void LoadParams(std::shared_ptr<Database> db0);
|
||||
void AssignComponentLabels_ChemPotential_ColorGrad();
|
||||
|
||||
};
|
||||
|
||||
|
||||
@@ -910,10 +910,8 @@ void ScaLBL_GreyscaleColorModel::Run(){
|
||||
}
|
||||
|
||||
//.......create and start timer............
|
||||
double starttime,stoptime,cputime;
|
||||
ScaLBL_Comm->Barrier();
|
||||
comm.barrier();
|
||||
starttime = MPI_Wtime();
|
||||
//.........................................
|
||||
|
||||
//************ MAIN ITERATION LOOP ***************************************/
|
||||
@@ -923,6 +921,7 @@ void ScaLBL_GreyscaleColorModel::Run(){
|
||||
auto current_db = db->cloneDatabase();
|
||||
//runAnalysis analysis( current_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map );
|
||||
//analysis.createThreads( analysis_method, 4 );
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
while (timestep < timestepMax ) {
|
||||
//if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); }
|
||||
PROFILE_START("Update");
|
||||
@@ -1319,10 +1318,10 @@ void ScaLBL_GreyscaleColorModel::Run(){
|
||||
PROFILE_SAVE("lbpm_color_simulator",1);
|
||||
//************************************************************************
|
||||
ScaLBL_Comm->Barrier();
|
||||
stoptime = MPI_Wtime();
|
||||
if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
// Compute the walltime per timestep
|
||||
cputime = (stoptime - starttime)/timestep;
|
||||
auto t2 = std::chrono::system_clock::now();
|
||||
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
|
||||
@@ -485,10 +485,8 @@ void ScaLBL_GreyscaleModel::Run(){
|
||||
}
|
||||
|
||||
//.......create and start timer............
|
||||
double starttime,stoptime,cputime;
|
||||
ScaLBL_DeviceBarrier();
|
||||
comm.barrier();
|
||||
starttime = MPI_Wtime();
|
||||
//.........................................
|
||||
|
||||
Minkowski Morphology(Mask);
|
||||
@@ -500,6 +498,7 @@ void ScaLBL_GreyscaleModel::Run(){
|
||||
double rlx_eff = 1.0/tau_eff;
|
||||
double error = 1.0;
|
||||
double flow_rate_previous = 0.0;
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
while (timestep < timestepMax && error > tolerance) {
|
||||
//************************************************************************/
|
||||
// *************ODD TIMESTEP*************//
|
||||
@@ -744,10 +743,10 @@ void ScaLBL_GreyscaleModel::Run(){
|
||||
//************************************************************************
|
||||
ScaLBL_DeviceBarrier();
|
||||
comm.barrier();
|
||||
stoptime = MPI_Wtime();
|
||||
if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
// Compute the walltime per timestep
|
||||
cputime = (stoptime - starttime)/timestep;
|
||||
auto t2 = std::chrono::system_clock::now();
|
||||
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
|
||||
@@ -784,7 +784,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){
|
||||
//.......create and start timer............
|
||||
//double starttime,stoptime,cputime;
|
||||
//ScaLBL_Comm->Barrier(); comm.barrier();
|
||||
//starttime = MPI_Wtime();
|
||||
//auto t1 = std::chrono::system_clock::now();
|
||||
|
||||
for (int ic=0; ic<number_ion_species; ic++){
|
||||
timestep=0;
|
||||
@@ -886,10 +886,10 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){
|
||||
ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence[ic], ic, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
}
|
||||
//************************************************************************/
|
||||
//stoptime = MPI_Wtime();
|
||||
//if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
//// Compute the walltime per timestep
|
||||
//cputime = (stoptime - starttime)/timestep;
|
||||
//auto t2 = std::chrono::system_clock::now();
|
||||
//double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
//// Performance obtained from each node
|
||||
//double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
|
||||
@@ -230,14 +230,13 @@ void ScaLBL_MRTModel::Run(){
|
||||
}
|
||||
|
||||
//.......create and start timer............
|
||||
double starttime,stoptime,cputime;
|
||||
ScaLBL_DeviceBarrier(); comm.barrier();
|
||||
starttime = MPI_Wtime();
|
||||
if (rank==0) printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax);
|
||||
if (rank==0) printf("********************************************************\n");
|
||||
timestep=0;
|
||||
double error = 1.0;
|
||||
double flow_rate_previous = 0.0;
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
while (timestep < timestepMax && error > tolerance) {
|
||||
//************************************************************************/
|
||||
timestep++;
|
||||
@@ -354,10 +353,10 @@ void ScaLBL_MRTModel::Run(){
|
||||
}
|
||||
}
|
||||
//************************************************************************/
|
||||
stoptime = MPI_Wtime();
|
||||
if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
// Compute the walltime per timestep
|
||||
cputime = (stoptime - starttime)/timestep;
|
||||
auto t2 = std::chrono::system_clock::now();
|
||||
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
ScaLBL_Multiphys_Controller::ScaLBL_Multiphys_Controller(int RANK, int NP, const Utilities::MPI& COMM):
|
||||
rank(RANK),nprocs(NP),Restart(0),timestepMax(0),num_iter_Stokes(0),num_iter_Ion(0),
|
||||
analysis_interval(0),visualization_interval(0),tolerance(0),comm(COMM)
|
||||
analysis_interval(0),visualization_interval(0),tolerance(0),time_conv_max(0),comm(COMM)
|
||||
{
|
||||
|
||||
}
|
||||
@@ -25,6 +25,7 @@ void ScaLBL_Multiphys_Controller::ReadParams(string filename){
|
||||
analysis_interval = 500;
|
||||
visualization_interval = 10000;
|
||||
tolerance = 1.0e-6;
|
||||
time_conv_max = 0.0;
|
||||
|
||||
// load input parameters
|
||||
if (study_db->keyExists( "timestepMax" )){
|
||||
@@ -135,3 +136,12 @@ vector<int> ScaLBL_Multiphys_Controller::getIonNumIter_PNP_coupling(double Stoke
|
||||
}
|
||||
return num_iter_ion;
|
||||
}
|
||||
|
||||
void ScaLBL_Multiphys_Controller::getTimeConvMax_PNP_coupling(double StokesTimeConv,const vector<double> &IonTimeConv){
|
||||
//Return maximum of the time converting factor from Stokes and ion solvers
|
||||
vector<double> TimeConv;
|
||||
|
||||
TimeConv.assign(IonTimeConv.begin(),IonTimeConv.end());
|
||||
TimeConv.insert(TimeConv.begin(),StokesTimeConv);
|
||||
time_conv_max = *max_element(TimeConv.begin(),TimeConv.end());
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ public:
|
||||
int getStokesNumIter_PNP_coupling(double StokesTimeConv,const vector<double> &IonTimeConv);
|
||||
vector<int> getIonNumIter_PNP_coupling(double StokesTimeConv,const vector<double> &IonTimeConv);
|
||||
//void getIonNumIter_PNP_coupling(double StokesTimeConv,vector<double> &IonTimeConv,vector<int> &IonTimeMax);
|
||||
void getTimeConvMax_PNP_coupling(double StokesTimeConv,const vector<double> &IonTimeConv);
|
||||
|
||||
bool Restart;
|
||||
int timestepMax;
|
||||
@@ -35,6 +36,7 @@ public:
|
||||
int analysis_interval;
|
||||
int visualization_interval;
|
||||
double tolerance;
|
||||
double time_conv_max;
|
||||
//double SchmidtNum;//Schmidt number = kinematic_viscosity/mass_diffusivity
|
||||
|
||||
int rank,nprocs;
|
||||
|
||||
@@ -8,8 +8,11 @@
|
||||
ScaLBL_Poisson::ScaLBL_Poisson(int RANK, int NP, const Utilities::MPI& COMM):
|
||||
rank(RANK), nprocs(NP),timestep(0),timestepMax(0),tau(0),k2_inv(0),tolerance(0),h(0),
|
||||
epsilon0(0),epsilon0_LB(0),epsilonR(0),epsilon_LB(0),Vin(0),Vout(0),Nx(0),Ny(0),Nz(0),N(0),Np(0),analysis_interval(0),
|
||||
chargeDen_dummy(0),WriteLog(0),
|
||||
nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),BoundaryConditionSolid(0),Lx(0),Ly(0),Lz(0),comm(COMM)
|
||||
chargeDen_dummy(0),WriteLog(0),nprocx(0),nprocy(0),nprocz(0),
|
||||
BoundaryConditionInlet(0),BoundaryConditionOutlet(0),BoundaryConditionSolid(0),Lx(0),Ly(0),Lz(0),
|
||||
Vin0(0),freqIn(0),t0_In(0),Vin_Type(0),Vout0(0),freqOut(0),t0_Out(0),Vout_Type(0),
|
||||
TestPeriodic(0),TestPeriodicTime(0),TestPeriodicTimeConv(0),TestPeriodicSaveInterval(0),
|
||||
comm(COMM)
|
||||
{
|
||||
|
||||
}
|
||||
@@ -33,10 +36,12 @@ void ScaLBL_Poisson::ReadParams(string filename){
|
||||
epsilonR = 78.4;//default dielectric constant of water
|
||||
epsilon_LB = epsilon0_LB*epsilonR;//electric permittivity
|
||||
analysis_interval = 1000;
|
||||
Vin = 1.0; //Boundary-z (inlet) electric potential
|
||||
Vout = 1.0; //Boundary-Z (outlet) electric potential
|
||||
chargeDen_dummy = 1.0e-3;//For debugging;unit=[C/m^3]
|
||||
WriteLog = false;
|
||||
TestPeriodic = false;
|
||||
TestPeriodicTime = 1.0;//unit: [sec]
|
||||
TestPeriodicTimeConv = 0.01; //unit [sec/lt]
|
||||
TestPeriodicSaveInterval = 0.1; //unit [sec]
|
||||
|
||||
// LB-Poisson Model parameters
|
||||
if (electric_db->keyExists( "timestepMax" )){
|
||||
@@ -57,6 +62,18 @@ void ScaLBL_Poisson::ReadParams(string filename){
|
||||
if (electric_db->keyExists( "WriteLog" )){
|
||||
WriteLog = electric_db->getScalar<bool>( "WriteLog" );
|
||||
}
|
||||
if (electric_db->keyExists( "TestPeriodic" )){
|
||||
TestPeriodic = electric_db->getScalar<bool>( "TestPeriodic" );
|
||||
}
|
||||
if (electric_db->keyExists( "TestPeriodicTime" )){
|
||||
TestPeriodicTime = electric_db->getScalar<double>( "TestPeriodicTime" );
|
||||
}
|
||||
if (electric_db->keyExists( "TestPeriodicTimeConv" )){
|
||||
TestPeriodicTimeConv = electric_db->getScalar<double>( "TestPeriodicTimeConv" );
|
||||
}
|
||||
if (electric_db->keyExists( "TestPeriodicSaveInterval" )){
|
||||
TestPeriodicSaveInterval = electric_db->getScalar<double>( "TestPeriodicSaveInterval" );
|
||||
}
|
||||
|
||||
// Read solid boundary condition specific to Poisson equation
|
||||
BoundaryConditionSolid = 1;
|
||||
@@ -65,10 +82,15 @@ void ScaLBL_Poisson::ReadParams(string filename){
|
||||
}
|
||||
// Read boundary condition for electric potential
|
||||
// BC = 0: normal periodic BC
|
||||
// BC = 1: fixed inlet and outlet potential
|
||||
BoundaryCondition = 0;
|
||||
if (electric_db->keyExists( "BC" )){
|
||||
BoundaryCondition = electric_db->getScalar<int>( "BC" );
|
||||
// BC = 1: fixed electric potential
|
||||
// BC = 2: sine/cosine periodic electric potential (need extra input parameters)
|
||||
BoundaryConditionInlet = 0;
|
||||
BoundaryConditionOutlet = 0;
|
||||
if (electric_db->keyExists( "BC_Inlet" )){
|
||||
BoundaryConditionInlet = electric_db->getScalar<int>( "BC_Inlet" );
|
||||
}
|
||||
if (electric_db->keyExists( "BC_Outlet" )){
|
||||
BoundaryConditionOutlet = electric_db->getScalar<int>( "BC_Outlet" );
|
||||
}
|
||||
|
||||
// Read domain parameters
|
||||
@@ -117,8 +139,17 @@ void ScaLBL_Poisson::SetDomain(){
|
||||
for (int i=0; i<Nx*Ny*Nz; i++) Dm->id[i] = 1; // initialize this way
|
||||
//Averages = std::shared_ptr<TwoPhase> ( new TwoPhase(Dm) ); // TwoPhase analysis object
|
||||
comm.barrier();
|
||||
Dm->BoundaryCondition = BoundaryCondition;
|
||||
Mask->BoundaryCondition = BoundaryCondition;
|
||||
if (BoundaryConditionInlet==0 && BoundaryConditionOutlet==0){
|
||||
Dm->BoundaryCondition = 0;
|
||||
Mask->BoundaryCondition = 0;
|
||||
}
|
||||
else if (BoundaryConditionInlet>0 && BoundaryConditionOutlet>0){
|
||||
Dm->BoundaryCondition = 1;
|
||||
Mask->BoundaryCondition = 1;
|
||||
}
|
||||
else {//i.e. non-periodic and periodic BCs are mixed
|
||||
ERROR("Error: check the type of inlet and outlet boundary condition! Mixed periodic and non-periodic BCs are found!\n");
|
||||
}
|
||||
Dm->CommInit();
|
||||
comm.barrier();
|
||||
|
||||
@@ -343,15 +374,91 @@ void ScaLBL_Poisson::Create(){
|
||||
|
||||
void ScaLBL_Poisson::Potential_Init(double *psi_init){
|
||||
|
||||
if (BoundaryCondition==1){
|
||||
if (electric_db->keyExists( "Vin" )){
|
||||
Vin = electric_db->getScalar<double>( "Vin" );
|
||||
}
|
||||
if (electric_db->keyExists( "Vout" )){
|
||||
Vout = electric_db->getScalar<double>( "Vout" );
|
||||
}
|
||||
//set up default boundary input parameters
|
||||
Vin0 = Vout0 = 1.0; //unit: [V]
|
||||
freqIn = freqOut = 50.0; //unit: [Hz]
|
||||
t0_In = t0_Out = 0.0; //unit: [sec]
|
||||
Vin_Type = Vout_Type = 1; //1->sin; 2->cos
|
||||
Vin = 1.0; //Boundary-z (inlet) electric potential
|
||||
Vout = 1.0; //Boundary-Z (outlet) electric potential
|
||||
|
||||
if (BoundaryConditionInlet>0){
|
||||
switch (BoundaryConditionInlet){
|
||||
case 1:
|
||||
if (electric_db->keyExists( "Vin" )){
|
||||
Vin = electric_db->getScalar<double>( "Vin" );
|
||||
}
|
||||
if (rank==0) printf("LB-Poisson Solver: inlet boundary; fixed electric potential Vin = %.3g [V]\n",Vin);
|
||||
break;
|
||||
case 2:
|
||||
if (electric_db->keyExists( "Vin0" )){//voltage amplitude; unit: Volt
|
||||
Vin0 = electric_db->getScalar<double>( "Vin0" );
|
||||
}
|
||||
if (electric_db->keyExists( "freqIn" )){//unit: Hz
|
||||
freqIn = electric_db->getScalar<double>( "freqIn" );
|
||||
}
|
||||
if (electric_db->keyExists( "t0_In" )){//timestep shift, unit: lt
|
||||
t0_In = electric_db->getScalar<double>( "t0_In" );
|
||||
}
|
||||
if (electric_db->keyExists( "Vin_Type" )){
|
||||
//type=1 -> sine
|
||||
//tyep=2 -> cosine
|
||||
Vin_Type = electric_db->getScalar<int>( "Vin_Type" );
|
||||
if (Vin_Type>2 || Vin_Type<=0) ERROR("Error: user-input Vin_Type is currently not supported! \n");
|
||||
}
|
||||
if (rank==0){
|
||||
if (Vin_Type==1){
|
||||
printf("LB-Poisson Solver: inlet boundary; periodic electric potential Vin = %.3g*Sin[2*pi*%.3g*(t+%.3g)] [V]\n",Vin0,freqIn,t0_In);
|
||||
printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vin0,freqIn,t0_In);
|
||||
}
|
||||
else if (Vin_Type==2){
|
||||
printf("LB-Poisson Solver: inlet boundary; periodic electric potential Vin = %.3g*Cos[2*pi*%.3g*(t+%.3g)] [V] \n",Vin0,freqIn,t0_In);
|
||||
printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vin0,freqIn,t0_In);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (BoundaryConditionOutlet>0){
|
||||
switch (BoundaryConditionOutlet){
|
||||
case 1:
|
||||
if (electric_db->keyExists( "Vout" )){
|
||||
Vout = electric_db->getScalar<double>( "Vout" );
|
||||
}
|
||||
if (rank==0) printf("LB-Poisson Solver: outlet boundary; fixed electric potential Vout = %.3g [V] \n",Vout);
|
||||
break;
|
||||
case 2:
|
||||
if (electric_db->keyExists( "Vout0" )){//voltage amplitude; unit: Volt
|
||||
Vout0 = electric_db->getScalar<double>( "Vout0" );
|
||||
}
|
||||
if (electric_db->keyExists( "freqOut" )){//unit: Hz
|
||||
freqOut = electric_db->getScalar<double>( "freqOut" );
|
||||
}
|
||||
if (electric_db->keyExists( "t0_Out" )){//timestep shift, unit: lt
|
||||
t0_Out = electric_db->getScalar<double>( "t0_Out" );
|
||||
}
|
||||
if (electric_db->keyExists( "Vout_Type" )){
|
||||
//type=1 -> sine
|
||||
//tyep=2 -> cosine
|
||||
Vout_Type = electric_db->getScalar<int>( "Vout_Type" );
|
||||
if (Vout_Type>2 || Vin_Type<=0) ERROR("Error: user-input Vout_Type is currently not supported! \n");
|
||||
}
|
||||
if (rank==0){
|
||||
if (Vout_Type==1){
|
||||
printf("LB-Poisson Solver: outlet boundary; periodic electric potential Vout = %.3g*Sin[2*pi*%.3g*(t+%.3g)] [V]\n",Vout0,freqOut,t0_Out);
|
||||
printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vout0,freqOut,t0_Out);
|
||||
}
|
||||
else if (Vout_Type==2){
|
||||
printf("LB-Poisson Solver: outlet boundary; periodic electric potential Vout = %.3g*Cos[2*pi*%.3g*(t+%.3g)] [V]\n",Vout0,freqOut,t0_Out);
|
||||
printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vout0,freqOut,t0_Out);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
//By default only periodic BC is applied and Vin=Vout=1.0, i.e. there is no potential gradient along Z-axis
|
||||
if (BoundaryConditionInlet==2) Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,t0_In,Vin_Type,0);
|
||||
if (BoundaryConditionOutlet==2) Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,t0_Out,Vout_Type,0);
|
||||
double slope = (Vout-Vin)/(Nz-2);
|
||||
double psi_linearized;
|
||||
for (int k=0;k<Nz;k++){
|
||||
@@ -375,10 +482,15 @@ void ScaLBL_Poisson::Potential_Init(double *psi_init){
|
||||
}
|
||||
}
|
||||
|
||||
double ScaLBL_Poisson::getBoundaryVoltagefromPeriodicBC(double V0, double freq, double t0, int V_type, int time_step){
|
||||
return V0*(V_type==1)*sin(2.0*M_PI*freq*time_conv*(time_step+t0/time_conv))+V0*(V_type==2)*cos(2.0*M_PI*freq*time_conv*(time_step+t0/time_conv));
|
||||
}
|
||||
|
||||
void ScaLBL_Poisson::Initialize(){
|
||||
void ScaLBL_Poisson::Initialize(double time_conv_from_Study){
|
||||
/*
|
||||
* This function initializes model
|
||||
* "time_conv_from_Study" is the phys to LB time conversion factor, unit=[sec/lt]
|
||||
* which is used for periodic voltage input for inlet and outlet boundaries
|
||||
*/
|
||||
if (rank==0) printf ("LB-Poisson Solver: initializing D3Q7 distributions\n");
|
||||
//NOTE the initialization involves two steps:
|
||||
@@ -386,6 +498,7 @@ void ScaLBL_Poisson::Initialize(){
|
||||
//2. Initialize electric potential for pore nodes
|
||||
double *psi_host;
|
||||
psi_host = new double [Nx*Ny*Nz];
|
||||
time_conv = time_conv_from_Study;
|
||||
AssignSolidBoundary(psi_host);//step1
|
||||
Potential_Init(psi_host);//step2
|
||||
ScaLBL_CopyToDevice(Psi, psi_host, Nx*Ny*Nz*sizeof(double));
|
||||
@@ -405,12 +518,12 @@ void ScaLBL_Poisson::Initialize(){
|
||||
//}
|
||||
}
|
||||
|
||||
void ScaLBL_Poisson::Run(double *ChargeDensity){
|
||||
void ScaLBL_Poisson::Run(double *ChargeDensity, int timestep_from_Study){
|
||||
|
||||
//.......create and start timer............
|
||||
//double starttime,stoptime,cputime;
|
||||
//ScaLBL_Comm->Barrier(); comm.barrier();
|
||||
//starttime = MPI_Wtime();
|
||||
//comm.barrier();
|
||||
//auto t1 = std::chrono::system_clock::now();
|
||||
|
||||
timestep=0;
|
||||
double error = 1.0;
|
||||
@@ -420,13 +533,13 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){
|
||||
// *************ODD TIMESTEP*************//
|
||||
timestep++;
|
||||
|
||||
SolveElectricPotentialAAodd();//update electric potential
|
||||
SolveElectricPotentialAAodd(timestep_from_Study);//update electric potential
|
||||
SolvePoissonAAodd(ChargeDensity);//perform collision
|
||||
ScaLBL_Comm->Barrier(); comm.barrier();
|
||||
|
||||
// *************EVEN TIMESTEP*************//
|
||||
timestep++;
|
||||
SolveElectricPotentialAAeven();//update electric potential
|
||||
SolveElectricPotentialAAeven(timestep_from_Study);//update electric potential
|
||||
SolvePoissonAAeven(ChargeDensity);//perform collision
|
||||
ScaLBL_Comm->Barrier(); comm.barrier();
|
||||
//************************************************************************/
|
||||
@@ -466,11 +579,11 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){
|
||||
}
|
||||
|
||||
//************************************************************************/
|
||||
//stoptime = MPI_Wtime();
|
||||
////if (rank==0) printf("LB-Poission Solver: a steady-state solution is obtained\n");
|
||||
////if (rank==0) printf("---------------------------------------------------------------------------\n");
|
||||
//// Compute the walltime per timestep
|
||||
//cputime = (stoptime - starttime)/timestep;
|
||||
//auto t2 = std::chrono::system_clock::now();
|
||||
//double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
//// Performance obtained from each node
|
||||
//double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
@@ -506,29 +619,65 @@ void ScaLBL_Poisson::getConvergenceLog(int timestep,double error){
|
||||
}
|
||||
}
|
||||
|
||||
void ScaLBL_Poisson::SolveElectricPotentialAAodd(){
|
||||
void ScaLBL_Poisson::SolveElectricPotentialAAodd(int timestep_from_Study){
|
||||
ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FROM NORMAL
|
||||
ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE
|
||||
ScaLBL_Comm->Barrier();
|
||||
// Set boundary conditions
|
||||
if (BoundaryCondition == 1){
|
||||
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
|
||||
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
|
||||
if (BoundaryConditionInlet > 0){
|
||||
switch (BoundaryConditionInlet){
|
||||
case 1:
|
||||
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
|
||||
break;
|
||||
case 2:
|
||||
Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,t0_In,Vin_Type,timestep_from_Study);
|
||||
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (BoundaryConditionOutlet > 0){
|
||||
switch (BoundaryConditionOutlet){
|
||||
case 1:
|
||||
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
|
||||
break;
|
||||
case 2:
|
||||
Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,t0_Out,Vout_Type,timestep_from_Study);
|
||||
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
|
||||
break;
|
||||
}
|
||||
}
|
||||
//-------------------------//
|
||||
ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
}
|
||||
|
||||
void ScaLBL_Poisson::SolveElectricPotentialAAeven(){
|
||||
void ScaLBL_Poisson::SolveElectricPotentialAAeven(int timestep_from_Study){
|
||||
ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FORM NORMAL
|
||||
ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE
|
||||
ScaLBL_Comm->Barrier();
|
||||
// Set boundary conditions
|
||||
if (BoundaryCondition == 1){
|
||||
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
|
||||
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
|
||||
if (BoundaryConditionInlet > 0){
|
||||
switch (BoundaryConditionInlet){
|
||||
case 1:
|
||||
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
|
||||
break;
|
||||
case 2:
|
||||
Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,t0_In,Vin_Type,timestep_from_Study);
|
||||
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (BoundaryConditionOutlet > 0){
|
||||
switch (BoundaryConditionOutlet){
|
||||
case 1:
|
||||
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
|
||||
break;
|
||||
case 2:
|
||||
Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,t0_Out,Vout_Type,timestep_from_Study);
|
||||
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
|
||||
break;
|
||||
}
|
||||
}
|
||||
//-------------------------//
|
||||
ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <exception>
|
||||
#include <stdexcept>
|
||||
#include <fstream>
|
||||
#include <cmath>
|
||||
|
||||
#include "common/ScaLBL.h"
|
||||
#include "common/Communication.h"
|
||||
@@ -16,6 +17,7 @@
|
||||
#include "analysis/Minkowski.h"
|
||||
#include "ProfilerApp.h"
|
||||
|
||||
#define _USE_MATH_DEFINES
|
||||
#ifndef ScaLBL_POISSON_INC
|
||||
#define ScaLBL_POISSON_INC
|
||||
|
||||
@@ -30,8 +32,8 @@ public:
|
||||
void SetDomain();
|
||||
void ReadInput();
|
||||
void Create();
|
||||
void Initialize();
|
||||
void Run(double *ChargeDensity);
|
||||
void Initialize(double time_conv_from_Study);
|
||||
void Run(double *ChargeDensity,int timestep_from_Study);
|
||||
void getElectricPotential(DoubleArray &ReturnValues);
|
||||
void getElectricPotential_debug(int timestep);
|
||||
void getElectricField(DoubleArray &Values_x, DoubleArray &Values_y, DoubleArray &Values_z);
|
||||
@@ -41,7 +43,8 @@ public:
|
||||
//bool Restart,pBC;
|
||||
int timestep,timestepMax;
|
||||
int analysis_interval;
|
||||
int BoundaryCondition;
|
||||
int BoundaryConditionInlet;
|
||||
int BoundaryConditionOutlet;
|
||||
int BoundaryConditionSolid;
|
||||
double tau;
|
||||
double tolerance;
|
||||
@@ -50,11 +53,18 @@ public:
|
||||
double Vin, Vout;
|
||||
double chargeDen_dummy;//for debugging
|
||||
bool WriteLog;
|
||||
double Vin0,freqIn,t0_In,Vin_Type;
|
||||
double Vout0,freqOut,t0_Out,Vout_Type;
|
||||
bool TestPeriodic;
|
||||
double TestPeriodicTime;//unit: [sec]
|
||||
double TestPeriodicTimeConv; //unit [sec/lt]
|
||||
double TestPeriodicSaveInterval; //unit [sec]
|
||||
|
||||
int Nx,Ny,Nz,N,Np;
|
||||
int rank,nprocx,nprocy,nprocz,nprocs;
|
||||
double Lx,Ly,Lz;
|
||||
double h;//image resolution
|
||||
double time_conv;//phys to LB time converting factor; unit=[sec/lt]
|
||||
|
||||
std::shared_ptr<Domain> Dm; // this domain is for analysis
|
||||
std::shared_ptr<Domain> Mask; // this domain is for lbm
|
||||
@@ -91,12 +101,13 @@ private:
|
||||
void AssignSolidBoundary(double *poisson_solid);
|
||||
void Potential_Init(double *psi_init);
|
||||
void ElectricField_LB_to_Phys(DoubleArray &Efield_reg);
|
||||
void SolveElectricPotentialAAodd();
|
||||
void SolveElectricPotentialAAeven();
|
||||
void SolveElectricPotentialAAodd(int timestep_from_Study);
|
||||
void SolveElectricPotentialAAeven(int timestep_from_Study);
|
||||
//void SolveElectricField();
|
||||
void SolvePoissonAAodd(double *ChargeDensity);
|
||||
void SolvePoissonAAeven(double *ChargeDensity);
|
||||
void getConvergenceLog(int timestep,double error);
|
||||
double getBoundaryVoltagefromPeriodicBC(double V0,double freq,double t0,int V_type,int time_step);
|
||||
|
||||
};
|
||||
#endif
|
||||
|
||||
@@ -573,16 +573,14 @@ void ScaLBL_StokesModel::Run(){
|
||||
}
|
||||
}
|
||||
|
||||
//.......create and start timer............
|
||||
double starttime,stoptime,cputime;
|
||||
ScaLBL_Comm->Barrier(); comm.barrier();
|
||||
starttime = MPI_Wtime();
|
||||
if (rank==0) printf("****************************************************************\n");
|
||||
if (rank==0) printf("LB Single-Fluid Navier-Stokes Solver: timestepMax = %i\n", timestepMax);
|
||||
if (rank==0) printf("****************************************************************\n");
|
||||
timestep=0;
|
||||
double error = 1.0;
|
||||
double flow_rate_previous = 0.0;
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
while (timestep < timestepMax && error > tolerance) {
|
||||
//************************************************************************/
|
||||
timestep++;
|
||||
@@ -700,10 +698,10 @@ void ScaLBL_StokesModel::Run(){
|
||||
}
|
||||
}
|
||||
//************************************************************************/
|
||||
stoptime = MPI_Wtime();
|
||||
if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
// Compute the walltime per timestep
|
||||
cputime = (stoptime - starttime)/timestep;
|
||||
auto t2 = std::chrono::system_clock::now();
|
||||
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
|
||||
@@ -4,8 +4,10 @@
|
||||
ADD_LBPM_EXECUTABLE( lbpm_color_simulator )
|
||||
ADD_LBPM_EXECUTABLE( lbpm_permeability_simulator )
|
||||
ADD_LBPM_EXECUTABLE( lbpm_greyscale_simulator )
|
||||
ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_SingleFluid_simulator )
|
||||
ADD_LBPM_EXECUTABLE( lbpm_greyscaleColor_simulator )
|
||||
ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_SingleFluid_simulator )
|
||||
ADD_LBPM_EXECUTABLE( lbpm_freelee_simulator )
|
||||
ADD_LBPM_EXECUTABLE( lbpm_freelee_SingleFluidBGK_simulator )
|
||||
#ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator )
|
||||
#ADD_LBPM_EXECUTABLE( lbpm_color_macro_simulator )
|
||||
ADD_LBPM_EXECUTABLE( lbpm_dfh_simulator )
|
||||
|
||||
@@ -66,7 +66,7 @@ int main(int argc, char **argv)
|
||||
PoissonSolver.SetDomain();
|
||||
PoissonSolver.ReadInput();
|
||||
PoissonSolver.Create();
|
||||
PoissonSolver.Initialize();
|
||||
PoissonSolver.Initialize(0);
|
||||
|
||||
int timestep=0;
|
||||
double error = 1.0;
|
||||
@@ -74,7 +74,7 @@ int main(int argc, char **argv)
|
||||
while (timestep < Study.timestepMax && error > Study.tolerance){
|
||||
|
||||
timestep++;
|
||||
PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental
|
||||
PoissonSolver.Run(IonModel.ChargeDensity,0);//solve Poisson equtaion to get steady-state electrical potental
|
||||
IonModel.Run(IonModel.FluidVelocityDummy,PoissonSolver.ElectricField); //solve for ion transport and electric potential
|
||||
|
||||
timestep++;//AA operations
|
||||
|
||||
@@ -82,7 +82,7 @@ int main(int argc, char **argv)
|
||||
PoissonSolver.SetDomain();
|
||||
PoissonSolver.ReadInput();
|
||||
PoissonSolver.Create();
|
||||
PoissonSolver.Initialize();
|
||||
PoissonSolver.Initialize(0);
|
||||
|
||||
|
||||
int timestep=0;
|
||||
@@ -94,7 +94,7 @@ int main(int argc, char **argv)
|
||||
while (timestep < Study.timestepMax && error > Study.tolerance){
|
||||
|
||||
timestep++;
|
||||
PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental
|
||||
PoissonSolver.Run(IonModel.ChargeDensity,0);//solve Poisson equtaion to get steady-state electrical potental
|
||||
StokesModel.Run_Lite(IonModel.ChargeDensity, PoissonSolver.ElectricField);// Solve the N-S equations to get velocity
|
||||
IonModel.Run(StokesModel.Velocity,PoissonSolver.ElectricField); //solve for ion transport and electric potential
|
||||
|
||||
|
||||
@@ -51,14 +51,37 @@ int main(int argc, char **argv)
|
||||
PoissonSolver.SetDomain();
|
||||
PoissonSolver.ReadInput();
|
||||
PoissonSolver.Create();
|
||||
PoissonSolver.Initialize();
|
||||
if (PoissonSolver.TestPeriodic==true){
|
||||
PoissonSolver.Initialize(PoissonSolver.TestPeriodicTimeConv);
|
||||
}
|
||||
else {
|
||||
PoissonSolver.Initialize(0);
|
||||
}
|
||||
|
||||
//Initialize dummy charge density for test
|
||||
PoissonSolver.DummyChargeDensity();
|
||||
|
||||
PoissonSolver.Run(PoissonSolver.ChargeDensityDummy);
|
||||
PoissonSolver.getElectricPotential_debug(1);
|
||||
PoissonSolver.getElectricField_debug(1);
|
||||
if (PoissonSolver.TestPeriodic==true){
|
||||
if (rank==0) printf("Testing periodic voltage input is enabled. Total test time is %.3g[s], saving data every %.3g[s]; user-specified time resolution is %.3g[s/lt]\n",
|
||||
PoissonSolver.TestPeriodicTime,PoissonSolver.TestPeriodicSaveInterval,PoissonSolver.TestPeriodicTimeConv);
|
||||
int timestep = 0;
|
||||
int timeMax = int(PoissonSolver.TestPeriodicTime/PoissonSolver.TestPeriodicTimeConv);
|
||||
int timeSave = int(PoissonSolver.TestPeriodicSaveInterval/PoissonSolver.TestPeriodicTimeConv);
|
||||
while (timestep<timeMax){
|
||||
timestep++;
|
||||
PoissonSolver.Run(PoissonSolver.ChargeDensityDummy,timestep);
|
||||
if (timestep%timeSave==0){
|
||||
if (rank==0) printf(" Time = %.3g[s]; saving electric potential and field\n",timestep*PoissonSolver.TestPeriodicTimeConv);
|
||||
PoissonSolver.getElectricPotential_debug(timestep);
|
||||
PoissonSolver.getElectricField_debug(timestep);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
PoissonSolver.Run(PoissonSolver.ChargeDensityDummy,1);
|
||||
PoissonSolver.getElectricPotential_debug(1);
|
||||
PoissonSolver.getElectricField_debug(1);
|
||||
}
|
||||
|
||||
if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n");
|
||||
if (rank==0) printf("*************************************************************\n");
|
||||
|
||||
@@ -79,20 +79,22 @@ int main(int argc, char **argv)
|
||||
|
||||
IonModel.timestepMax = Study.getIonNumIter_PNP_coupling(StokesModel.time_conv,IonModel.time_conv);
|
||||
IonModel.Initialize();
|
||||
// Get maximal time converting factor based on Sotkes and Ion solvers
|
||||
Study.getTimeConvMax_PNP_coupling(StokesModel.time_conv,IonModel.time_conv);
|
||||
|
||||
// Initialize LB-Poisson model
|
||||
PoissonSolver.ReadParams(filename);
|
||||
PoissonSolver.SetDomain();
|
||||
PoissonSolver.ReadInput();
|
||||
PoissonSolver.Create();
|
||||
PoissonSolver.Initialize();
|
||||
PoissonSolver.Initialize(Study.time_conv_max);
|
||||
|
||||
|
||||
int timestep=0;
|
||||
while (timestep < Study.timestepMax){
|
||||
|
||||
timestep++;
|
||||
PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental
|
||||
PoissonSolver.Run(IonModel.ChargeDensity,timestep);//solve Poisson equtaion to get steady-state electrical potental
|
||||
StokesModel.Run_Lite(IonModel.ChargeDensity, PoissonSolver.ElectricField);// Solve the N-S equations to get velocity
|
||||
IonModel.Run(StokesModel.Velocity,PoissonSolver.ElectricField); //solve for ion transport and electric potential
|
||||
|
||||
|
||||
70
tests/lbpm_freelee_SingleFluidBGK_simulator.cpp
Normal file
70
tests/lbpm_freelee_SingleFluidBGK_simulator.cpp
Normal file
@@ -0,0 +1,70 @@
|
||||
#include <exception>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "common/Utilities.h"
|
||||
#include "models/FreeLeeModel.h"
|
||||
|
||||
//*******************************************************************
|
||||
// Implementation of Free-Energy Two-Phase LBM (Lee model)
|
||||
//*******************************************************************
|
||||
|
||||
int main( int argc, char **argv )
|
||||
{
|
||||
|
||||
// Initialize
|
||||
Utilities::startup( argc, argv );
|
||||
|
||||
// Load the input database
|
||||
auto db = std::make_shared<Database>( argv[1] );
|
||||
|
||||
{ // Limit scope so variables that contain communicators will free before MPI_Finialize
|
||||
|
||||
Utilities::MPI comm( MPI_COMM_WORLD );
|
||||
int rank = comm.getRank();
|
||||
int nprocs = comm.getSize();
|
||||
|
||||
if (rank == 0){
|
||||
printf("********************************************************\n");
|
||||
printf("Running Single-Fluid Solver based on Lee LBM \n");
|
||||
printf("********************************************************\n");
|
||||
}
|
||||
// Initialize compute device
|
||||
int device=ScaLBL_SetDevice(rank);
|
||||
NULL_USE( device );
|
||||
ScaLBL_DeviceBarrier();
|
||||
comm.barrier();
|
||||
|
||||
PROFILE_ENABLE(1);
|
||||
//PROFILE_ENABLE_TRACE();
|
||||
//PROFILE_ENABLE_MEMORY();
|
||||
PROFILE_SYNCHRONIZE();
|
||||
PROFILE_START("Main");
|
||||
Utilities::setErrorHandlers();
|
||||
|
||||
auto filename = argv[1];
|
||||
ScaLBL_FreeLeeModel LeeModel( rank,nprocs,comm );
|
||||
LeeModel.ReadParams( filename );
|
||||
LeeModel.SetDomain();
|
||||
LeeModel.ReadInput();
|
||||
LeeModel.Create_SingleFluid();
|
||||
LeeModel.Initialize_SingleFluid();
|
||||
LeeModel.Run_SingleFluid();
|
||||
LeeModel.WriteDebug_SingleFluid();
|
||||
|
||||
PROFILE_STOP("Main");
|
||||
auto file = db->getWithDefault<std::string>( "TimerFile", "lbpm_freelee_SingleFluidBGK_simulator" );
|
||||
auto level = db->getWithDefault<int>( "TimerLevel", 1 );
|
||||
PROFILE_SAVE( file,level );
|
||||
// ****************************************************
|
||||
|
||||
|
||||
} // Limit scope so variables that contain communicators will free before MPI_Finialize
|
||||
|
||||
Utilities::shutdown();
|
||||
return 0;
|
||||
}
|
||||
70
tests/lbpm_freelee_simulator.cpp
Normal file
70
tests/lbpm_freelee_simulator.cpp
Normal file
@@ -0,0 +1,70 @@
|
||||
#include <exception>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "common/Utilities.h"
|
||||
#include "models/FreeLeeModel.h"
|
||||
|
||||
//*******************************************************************
|
||||
// Implementation of Free-Energy Two-Phase LBM (Lee model)
|
||||
//*******************************************************************
|
||||
|
||||
int main( int argc, char **argv )
|
||||
{
|
||||
|
||||
// Initialize
|
||||
Utilities::startup( argc, argv );
|
||||
|
||||
// Load the input database
|
||||
auto db = std::make_shared<Database>( argv[1] );
|
||||
|
||||
{ // Limit scope so variables that contain communicators will free before MPI_Finialize
|
||||
|
||||
Utilities::MPI comm( MPI_COMM_WORLD );
|
||||
int rank = comm.getRank();
|
||||
int nprocs = comm.getSize();
|
||||
|
||||
if (rank == 0){
|
||||
printf("********************************************************\n");
|
||||
printf("Running Free Energy Lee LBM \n");
|
||||
printf("********************************************************\n");
|
||||
}
|
||||
// Initialize compute device
|
||||
int device=ScaLBL_SetDevice(rank);
|
||||
NULL_USE( device );
|
||||
ScaLBL_DeviceBarrier();
|
||||
comm.barrier();
|
||||
|
||||
PROFILE_ENABLE(1);
|
||||
//PROFILE_ENABLE_TRACE();
|
||||
//PROFILE_ENABLE_MEMORY();
|
||||
PROFILE_SYNCHRONIZE();
|
||||
PROFILE_START("Main");
|
||||
Utilities::setErrorHandlers();
|
||||
|
||||
auto filename = argv[1];
|
||||
ScaLBL_FreeLeeModel LeeModel( rank,nprocs,comm );
|
||||
LeeModel.ReadParams( filename );
|
||||
LeeModel.SetDomain();
|
||||
LeeModel.ReadInput();
|
||||
LeeModel.Create_TwoFluid();
|
||||
LeeModel.Initialize_TwoFluid();
|
||||
LeeModel.Run_TwoFluid();
|
||||
LeeModel.WriteDebug_TwoFluid();
|
||||
|
||||
PROFILE_STOP("Main");
|
||||
auto file = db->getWithDefault<std::string>( "TimerFile", "lbpm_freelee_simulator" );
|
||||
auto level = db->getWithDefault<int>( "TimerLevel", 1 );
|
||||
PROFILE_SAVE( file,level );
|
||||
// ****************************************************
|
||||
|
||||
|
||||
} // Limit scope so variables that contain communicators will free before MPI_Finialize
|
||||
|
||||
Utilities::shutdown();
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user