GPU version is available now
This commit is contained in:
@@ -102,8 +102,9 @@ extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *d
|
|||||||
|
|
||||||
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np);
|
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np);
|
||||||
|
|
||||||
extern "C" void ScaLBL_D3Q7_Poisson_ElectricField(int *neighborList, int *Map, signed char *ID, double *Psi, double *ElectricField, int SolidBC,
|
//maybe deprecated
|
||||||
int strideY, int strideZ,int start, int finish, int Np);
|
//extern "C" void ScaLBL_D3Q7_Poisson_ElectricField(int *neighborList, int *Map, signed char *ID, double *Psi, double *ElectricField, int SolidBC,
|
||||||
|
// int strideY, int strideZ,int start, int finish, int Np);
|
||||||
|
|
||||||
// LBM Stokes Model (adapted from MRT model)
|
// LBM Stokes Model (adapted from MRT model)
|
||||||
|
|
||||||
|
|||||||
@@ -229,10 +229,10 @@ extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity
|
|||||||
double F = 96485.0;//Faraday's constant; unit[C/mol]; F=e*Na, where Na is the Avogadro constant
|
double F = 96485.0;//Faraday's constant; unit[C/mol]; F=e*Na, where Na is the Avogadro constant
|
||||||
|
|
||||||
for (n=start; n<finish; n++){
|
for (n=start; n<finish; n++){
|
||||||
Ci = Den[n+ion_component*Np];
|
Ci = Den[n+ion_component*Np];
|
||||||
CD = ChargeDensity[n];
|
CD = ChargeDensity[n];
|
||||||
CD_tmp = F*IonValence*Ci;
|
CD_tmp = F*IonValence*Ci;
|
||||||
ChargeDensity[n] = CD*(ion_component>0) + CD_tmp;
|
ChargeDensity[n] = CD*(ion_component>0) + CD_tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
206
cpu/Poisson.cpp
206
cpu/Poisson.cpp
@@ -235,109 +235,109 @@ extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, in
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void ScaLBL_D3Q7_Poisson_ElectricField(int *neighborList, int *Map, signed char *ID, double *Psi, double *ElectricField, int SolidBC,
|
//extern "C" void ScaLBL_D3Q7_Poisson_ElectricField(int *neighborList, int *Map, signed char *ID, double *Psi, double *ElectricField, int SolidBC,
|
||||||
int strideY, int strideZ,int start, int finish, int Np){
|
// int strideY, int strideZ,int start, int finish, int Np){
|
||||||
|
//
|
||||||
int n,nn;
|
// int n,nn;
|
||||||
int ijk;
|
// int ijk;
|
||||||
int id;
|
// int id;
|
||||||
// distributions
|
// // distributions
|
||||||
double m1,m2,m3,m4,m5,m6,m7,m8,m9;
|
// double m1,m2,m3,m4,m5,m6,m7,m8,m9;
|
||||||
double m10,m11,m12,m13,m14,m15,m16,m17,m18;
|
// double m10,m11,m12,m13,m14,m15,m16,m17,m18;
|
||||||
double nx,ny,nz;
|
// double nx,ny,nz;
|
||||||
|
//
|
||||||
for (n=start; n<finish; n++){
|
// for (n=start; n<finish; n++){
|
||||||
|
//
|
||||||
// Get the 1D index based on regular data layout
|
// // Get the 1D index based on regular data layout
|
||||||
ijk = Map[n];
|
// ijk = Map[n];
|
||||||
// COMPUTE THE COLOR GRADIENT
|
// // COMPUTE THE COLOR GRADIENT
|
||||||
//........................................................................
|
// //........................................................................
|
||||||
//.................Read Phase Indicator Values............................
|
// //.................Read Phase Indicator Values............................
|
||||||
//........................................................................
|
// //........................................................................
|
||||||
nn = ijk-1; // neighbor index (get convention)
|
// nn = ijk-1; // neighbor index (get convention)
|
||||||
id = ID[nn];
|
// id = ID[nn];
|
||||||
m1 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 1
|
// m1 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 1
|
||||||
//........................................................................
|
// //........................................................................
|
||||||
nn = ijk+1; // neighbor index (get convention)
|
// nn = ijk+1; // neighbor index (get convention)
|
||||||
id = ID[nn];
|
// id = ID[nn];
|
||||||
m2 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 2
|
// m2 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 2
|
||||||
//........................................................................
|
// //........................................................................
|
||||||
nn = ijk-strideY; // neighbor index (get convention)
|
// nn = ijk-strideY; // neighbor index (get convention)
|
||||||
id = ID[nn];
|
// id = ID[nn];
|
||||||
m3 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 3
|
// m3 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 3
|
||||||
//........................................................................
|
// //........................................................................
|
||||||
nn = ijk+strideY; // neighbor index (get convention)
|
// nn = ijk+strideY; // neighbor index (get convention)
|
||||||
id = ID[nn];
|
// id = ID[nn];
|
||||||
m4 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 4
|
// m4 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 4
|
||||||
//........................................................................
|
// //........................................................................
|
||||||
nn = ijk-strideZ; // neighbor index (get convention)
|
// nn = ijk-strideZ; // neighbor index (get convention)
|
||||||
id = ID[nn];
|
// id = ID[nn];
|
||||||
m5 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 5
|
// m5 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 5
|
||||||
//........................................................................
|
// //........................................................................
|
||||||
nn = ijk+strideZ; // neighbor index (get convention)
|
// nn = ijk+strideZ; // neighbor index (get convention)
|
||||||
id = ID[nn];
|
// id = ID[nn];
|
||||||
m6 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 6
|
// m6 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 6
|
||||||
//........................................................................
|
// //........................................................................
|
||||||
nn = ijk-strideY-1; // neighbor index (get convention)
|
// nn = ijk-strideY-1; // neighbor index (get convention)
|
||||||
id = ID[nn];
|
// id = ID[nn];
|
||||||
m7 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 7
|
// m7 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 7
|
||||||
//........................................................................
|
// //........................................................................
|
||||||
nn = ijk+strideY+1; // neighbor index (get convention)
|
// nn = ijk+strideY+1; // neighbor index (get convention)
|
||||||
id = ID[nn];
|
// id = ID[nn];
|
||||||
m8 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 8
|
// m8 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 8
|
||||||
//........................................................................
|
// //........................................................................
|
||||||
nn = ijk+strideY-1; // neighbor index (get convention)
|
// nn = ijk+strideY-1; // neighbor index (get convention)
|
||||||
id = ID[nn];
|
// id = ID[nn];
|
||||||
m9 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 9
|
// m9 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 9
|
||||||
//........................................................................
|
// //........................................................................
|
||||||
nn = ijk-strideY+1; // neighbor index (get convention)
|
// nn = ijk-strideY+1; // neighbor index (get convention)
|
||||||
id = ID[nn];
|
// id = ID[nn];
|
||||||
m10 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 10
|
// m10 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 10
|
||||||
//........................................................................
|
// //........................................................................
|
||||||
nn = ijk-strideZ-1; // neighbor index (get convention)
|
// nn = ijk-strideZ-1; // neighbor index (get convention)
|
||||||
id = ID[nn];
|
// id = ID[nn];
|
||||||
m11 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 11
|
// m11 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 11
|
||||||
//........................................................................
|
// //........................................................................
|
||||||
nn = ijk+strideZ+1; // neighbor index (get convention)
|
// nn = ijk+strideZ+1; // neighbor index (get convention)
|
||||||
id = ID[nn];
|
// id = ID[nn];
|
||||||
m12 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 12
|
// m12 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 12
|
||||||
//........................................................................
|
// //........................................................................
|
||||||
nn = ijk+strideZ-1; // neighbor index (get convention)
|
// nn = ijk+strideZ-1; // neighbor index (get convention)
|
||||||
id = ID[nn];
|
// id = ID[nn];
|
||||||
m13 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 13
|
// m13 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 13
|
||||||
//........................................................................
|
// //........................................................................
|
||||||
nn = ijk-strideZ+1; // neighbor index (get convention)
|
// nn = ijk-strideZ+1; // neighbor index (get convention)
|
||||||
id = ID[nn];
|
// id = ID[nn];
|
||||||
m14 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 14
|
// m14 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 14
|
||||||
//........................................................................
|
// //........................................................................
|
||||||
nn = ijk-strideZ-strideY; // neighbor index (get convention)
|
// nn = ijk-strideZ-strideY; // neighbor index (get convention)
|
||||||
id = ID[nn];
|
// id = ID[nn];
|
||||||
m15 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 15
|
// m15 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 15
|
||||||
//........................................................................
|
// //........................................................................
|
||||||
nn = ijk+strideZ+strideY; // neighbor index (get convention)
|
// nn = ijk+strideZ+strideY; // neighbor index (get convention)
|
||||||
id = ID[nn];
|
// id = ID[nn];
|
||||||
m16 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 16
|
// m16 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 16
|
||||||
//........................................................................
|
// //........................................................................
|
||||||
nn = ijk+strideZ-strideY; // neighbor index (get convention)
|
// nn = ijk+strideZ-strideY; // neighbor index (get convention)
|
||||||
id = ID[nn];
|
// id = ID[nn];
|
||||||
m17 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 17
|
// m17 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 17
|
||||||
//........................................................................
|
// //........................................................................
|
||||||
nn = ijk-strideZ+strideY; // neighbor index (get convention)
|
// nn = ijk-strideZ+strideY; // neighbor index (get convention)
|
||||||
id = ID[nn];
|
// id = ID[nn];
|
||||||
m18 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 18
|
// m18 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 18
|
||||||
//............Compute the Color Gradient...................................
|
// //............Compute the Color Gradient...................................
|
||||||
//nx = 1.f/6.f*(m1-m2+0.5*(m7-m8+m9-m10+m11-m12+m13-m14));
|
// //nx = 1.f/6.f*(m1-m2+0.5*(m7-m8+m9-m10+m11-m12+m13-m14));
|
||||||
//ny = 1.f/6.f*(m3-m4+0.5*(m7-m8-m9+m10+m15-m16+m17-m18));
|
// //ny = 1.f/6.f*(m3-m4+0.5*(m7-m8-m9+m10+m15-m16+m17-m18));
|
||||||
//nz = 1.f/6.f*(m5-m6+0.5*(m11-m12-m13+m14+m15-m16-m17+m18));
|
// //nz = 1.f/6.f*(m5-m6+0.5*(m11-m12-m13+m14+m15-m16-m17+m18));
|
||||||
nx = 1.f/6.f*(m1-m2);//but looks like it needs to multiply another factor of 3
|
// nx = 1.f/6.f*(m1-m2);//but looks like it needs to multiply another factor of 3
|
||||||
ny = 1.f/6.f*(m3-m4);
|
// ny = 1.f/6.f*(m3-m4);
|
||||||
nz = 1.f/6.f*(m5-m6);
|
// nz = 1.f/6.f*(m5-m6);
|
||||||
|
//
|
||||||
ElectricField[n] = nx;
|
// ElectricField[n] = nx;
|
||||||
ElectricField[Np+n] = ny;
|
// ElectricField[Np+n] = ny;
|
||||||
ElectricField[2*Np+n] = nz;
|
// ElectricField[2*Np+n] = nz;
|
||||||
}
|
// }
|
||||||
}
|
//}
|
||||||
|
|
||||||
//extern "C" void ScaLBL_D3Q7_Poisson_getElectricField(double *dist, double *ElectricField, double tau, int Np){
|
//extern "C" void ScaLBL_D3Q7_Poisson_getElectricField(double *dist, double *ElectricField, double tau, int Np){
|
||||||
// int n;
|
// int n;
|
||||||
|
|||||||
918
gpu/D3Q7BC.cu
918
gpu/D3Q7BC.cu
File diff suppressed because it is too large
Load Diff
344
gpu/Ion.cu
Normal file
344
gpu/Ion.cu
Normal file
@@ -0,0 +1,344 @@
|
|||||||
|
#include <stdio.h>
|
||||||
|
#include <math.h>
|
||||||
|
//#include <cuda_profiler_api.h>
|
||||||
|
|
||||||
|
#define NBLOCKS 1024
|
||||||
|
#define NTHREADS 256
|
||||||
|
|
||||||
|
__global__ void dvc_ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
|
||||||
|
int n,nread;
|
||||||
|
double fq,Ci;
|
||||||
|
|
||||||
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||||
|
for (int s=0; s<S; s++){
|
||||||
|
//........Get 1-D index for this thread....................
|
||||||
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||||
|
if (n<finish) {
|
||||||
|
|
||||||
|
// q=0
|
||||||
|
fq = dist[n];
|
||||||
|
Ci = fq;
|
||||||
|
|
||||||
|
// q=1
|
||||||
|
nread = neighborList[n];
|
||||||
|
fq = dist[nread];
|
||||||
|
Ci += fq;
|
||||||
|
|
||||||
|
// q=2
|
||||||
|
nread = neighborList[n+Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
Ci += fq;
|
||||||
|
|
||||||
|
// q=3
|
||||||
|
nread = neighborList[n+2*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
Ci += fq;
|
||||||
|
|
||||||
|
// q=4
|
||||||
|
nread = neighborList[n+3*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
Ci += fq;
|
||||||
|
|
||||||
|
// q=5
|
||||||
|
nread = neighborList[n+4*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
Ci += fq;
|
||||||
|
|
||||||
|
// q=6
|
||||||
|
nread = neighborList[n+5*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
Ci += fq;
|
||||||
|
|
||||||
|
Den[n]=Ci;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void dvc_ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
|
||||||
|
int n;
|
||||||
|
double fq,Ci;
|
||||||
|
|
||||||
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||||
|
for (int s=0; s<S; s++){
|
||||||
|
//........Get 1-D index for this thread....................
|
||||||
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||||
|
if (n<finish) {
|
||||||
|
|
||||||
|
// q=0
|
||||||
|
fq = dist[n];
|
||||||
|
Ci = fq;
|
||||||
|
|
||||||
|
// q=1
|
||||||
|
fq = dist[2*Np+n];
|
||||||
|
Ci += fq;
|
||||||
|
|
||||||
|
// q=2
|
||||||
|
fq = dist[1*Np+n];
|
||||||
|
Ci += fq;
|
||||||
|
|
||||||
|
// q=3
|
||||||
|
fq = dist[4*Np+n];
|
||||||
|
Ci += fq;
|
||||||
|
|
||||||
|
// q=4
|
||||||
|
fq = dist[3*Np+n];
|
||||||
|
Ci += fq;
|
||||||
|
|
||||||
|
// q=5
|
||||||
|
fq = dist[6*Np+n];
|
||||||
|
Ci += fq;
|
||||||
|
|
||||||
|
// q=6
|
||||||
|
fq = dist[5*Np+n];
|
||||||
|
Ci += fq;
|
||||||
|
|
||||||
|
Den[n]=Ci;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *Velocity, double *ElectricField,
|
||||||
|
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||||
|
int n;
|
||||||
|
double Ci;
|
||||||
|
double ux,uy,uz;
|
||||||
|
double uEPx,uEPy,uEPz;//electrochemical induced velocity
|
||||||
|
double Ex,Ey,Ez;//electrical field
|
||||||
|
double f0,f1,f2,f3,f4,f5,f6;
|
||||||
|
int nr1,nr2,nr3,nr4,nr5,nr6;
|
||||||
|
|
||||||
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||||
|
for (int s=0; s<S; s++){
|
||||||
|
//........Get 1-D index for this thread....................
|
||||||
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||||
|
if (n<finish) {
|
||||||
|
|
||||||
|
//Load data
|
||||||
|
Ci=Den[n];
|
||||||
|
Ex=ElectricField[n+0*Np];
|
||||||
|
Ey=ElectricField[n+1*Np];
|
||||||
|
Ez=ElectricField[n+2*Np];
|
||||||
|
ux=Velocity[n+0*Np];
|
||||||
|
uy=Velocity[n+1*Np];
|
||||||
|
uz=Velocity[n+2*Np];
|
||||||
|
uEPx=zi*Di/Vt*Ex;
|
||||||
|
uEPy=zi*Di/Vt*Ey;
|
||||||
|
uEPz=zi*Di/Vt*Ez;
|
||||||
|
|
||||||
|
// q=0
|
||||||
|
f0 = dist[n];
|
||||||
|
// q=1
|
||||||
|
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||||
|
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||||
|
// q=2
|
||||||
|
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||||
|
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||||
|
// q=3
|
||||||
|
nr3 = neighborList[n+2*Np]; // neighbor 4
|
||||||
|
f3 = dist[nr3];
|
||||||
|
// q=4
|
||||||
|
nr4 = neighborList[n+3*Np]; // neighbor 3
|
||||||
|
f4 = dist[nr4];
|
||||||
|
// q=5
|
||||||
|
nr5 = neighborList[n+4*Np];
|
||||||
|
f5 = dist[nr5];
|
||||||
|
// q=6
|
||||||
|
nr6 = neighborList[n+5*Np];
|
||||||
|
f6 = dist[nr6];
|
||||||
|
|
||||||
|
// q=0
|
||||||
|
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
|
||||||
|
|
||||||
|
// q = 1
|
||||||
|
dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
|
||||||
|
|
||||||
|
// q=2
|
||||||
|
dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
|
||||||
|
|
||||||
|
// q = 3
|
||||||
|
dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
|
||||||
|
|
||||||
|
// q = 4
|
||||||
|
dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
|
||||||
|
|
||||||
|
// q = 5
|
||||||
|
dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
|
||||||
|
|
||||||
|
// q = 6
|
||||||
|
dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField,
|
||||||
|
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||||
|
int n;
|
||||||
|
double Ci;
|
||||||
|
double ux,uy,uz;
|
||||||
|
double uEPx,uEPy,uEPz;//electrochemical induced velocity
|
||||||
|
double Ex,Ey,Ez;//electrical field
|
||||||
|
double f0,f1,f2,f3,f4,f5,f6;
|
||||||
|
|
||||||
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||||
|
for (int s=0; s<S; s++){
|
||||||
|
//........Get 1-D index for this thread....................
|
||||||
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||||
|
if (n<finish) {
|
||||||
|
|
||||||
|
//Load data
|
||||||
|
Ci=Den[n];
|
||||||
|
Ex=ElectricField[n+0*Np];
|
||||||
|
Ey=ElectricField[n+1*Np];
|
||||||
|
Ez=ElectricField[n+2*Np];
|
||||||
|
ux=Velocity[n+0*Np];
|
||||||
|
uy=Velocity[n+1*Np];
|
||||||
|
uz=Velocity[n+2*Np];
|
||||||
|
uEPx=zi*Di/Vt*Ex;
|
||||||
|
uEPy=zi*Di/Vt*Ey;
|
||||||
|
uEPz=zi*Di/Vt*Ez;
|
||||||
|
|
||||||
|
f0 = dist[n];
|
||||||
|
f1 = dist[2*Np+n];
|
||||||
|
f2 = dist[1*Np+n];
|
||||||
|
f3 = dist[4*Np+n];
|
||||||
|
f4 = dist[3*Np+n];
|
||||||
|
f5 = dist[6*Np+n];
|
||||||
|
f6 = dist[5*Np+n];
|
||||||
|
|
||||||
|
// q=0
|
||||||
|
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
|
||||||
|
|
||||||
|
// q = 1
|
||||||
|
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
|
||||||
|
|
||||||
|
// q=2
|
||||||
|
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
|
||||||
|
|
||||||
|
// q = 3
|
||||||
|
dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
|
||||||
|
|
||||||
|
// q = 4
|
||||||
|
dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
|
||||||
|
|
||||||
|
// q = 5
|
||||||
|
dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
|
||||||
|
|
||||||
|
// q = 6
|
||||||
|
dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void dvc_ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
|
||||||
|
|
||||||
|
int n;
|
||||||
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||||
|
for (int s=0; s<S; s++){
|
||||||
|
//........Get 1-D index for this thread....................
|
||||||
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||||
|
if (n<Np) {
|
||||||
|
dist[0*Np+n] = 0.25*DenInit;
|
||||||
|
dist[1*Np+n] = 0.125*DenInit;
|
||||||
|
dist[2*Np+n] = 0.125*DenInit;
|
||||||
|
dist[3*Np+n] = 0.125*DenInit;
|
||||||
|
dist[4*Np+n] = 0.125*DenInit;
|
||||||
|
dist[5*Np+n] = 0.125*DenInit;
|
||||||
|
dist[6*Np+n] = 0.125*DenInit;
|
||||||
|
Den[n] = DenInit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
|
||||||
|
|
||||||
|
int n;
|
||||||
|
double Ci;//ion concentration of species i
|
||||||
|
double CD;//charge density
|
||||||
|
double CD_tmp;
|
||||||
|
double F = 96485.0;//Faraday's constant; unit[C/mol]; F=e*Na, where Na is the Avogadro constant
|
||||||
|
|
||||||
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||||
|
for (int s=0; s<S; s++){
|
||||||
|
//........Get 1-D index for this thread....................
|
||||||
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||||
|
if (n<finish) {
|
||||||
|
Ci = Den[n+ion_component*Np];
|
||||||
|
CD = ChargeDensity[n];
|
||||||
|
CD_tmp = F*IonValence*Ci;
|
||||||
|
ChargeDensity[n] = CD*(ion_component>0) + CD_tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
|
||||||
|
|
||||||
|
//cudaProfilerStart();
|
||||||
|
dvc_ScaLBL_D3Q7_AAodd_IonConcentration<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,start,finish,Np);
|
||||||
|
|
||||||
|
cudaError_t err = cudaGetLastError();
|
||||||
|
if (cudaSuccess != err){
|
||||||
|
printf("CUDA error in ScaLBL_D3Q7_AAodd_IonConcentration: %s \n",cudaGetErrorString(err));
|
||||||
|
}
|
||||||
|
//cudaProfilerStop();
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
|
||||||
|
|
||||||
|
//cudaProfilerStart();
|
||||||
|
dvc_ScaLBL_D3Q7_AAeven_IonConcentration<<<NBLOCKS,NTHREADS >>>(dist,Den,start,finish,Np);
|
||||||
|
|
||||||
|
cudaError_t err = cudaGetLastError();
|
||||||
|
if (cudaSuccess != err){
|
||||||
|
printf("CUDA error in ScaLBL_D3Q7_AAeven_IonConcentration: %s \n",cudaGetErrorString(err));
|
||||||
|
}
|
||||||
|
//cudaProfilerStop();
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *Velocity, double *ElectricField,
|
||||||
|
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||||
|
//cudaProfilerStart();
|
||||||
|
dvc_ScaLBL_D3Q7_AAodd_Ion<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
|
||||||
|
|
||||||
|
cudaError_t err = cudaGetLastError();
|
||||||
|
if (cudaSuccess != err){
|
||||||
|
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion: %s \n",cudaGetErrorString(err));
|
||||||
|
}
|
||||||
|
//cudaProfilerStop();
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField,
|
||||||
|
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||||
|
//cudaProfilerStart();
|
||||||
|
dvc_ScaLBL_D3Q7_AAeven_Ion<<<NBLOCKS,NTHREADS >>>(dist,Den,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
|
||||||
|
|
||||||
|
cudaError_t err = cudaGetLastError();
|
||||||
|
if (cudaSuccess != err){
|
||||||
|
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion: %s \n",cudaGetErrorString(err));
|
||||||
|
}
|
||||||
|
//cudaProfilerStop();
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
|
||||||
|
|
||||||
|
//cudaProfilerStart();
|
||||||
|
dvc_ScaLBL_D3Q7_Ion_Init<<<NBLOCKS,NTHREADS >>>(dist,Den,DenInit,Np);
|
||||||
|
|
||||||
|
cudaError_t err = cudaGetLastError();
|
||||||
|
if (cudaSuccess != err){
|
||||||
|
printf("CUDA error in ScaLBL_D3Q7_Ion_Init: %s \n",cudaGetErrorString(err));
|
||||||
|
}
|
||||||
|
//cudaProfilerStop();
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
|
||||||
|
|
||||||
|
//cudaProfilerStart();
|
||||||
|
dvc_ScaLBL_D3Q7_Ion_ChargeDensity<<<NBLOCKS,NTHREADS >>>(Den,ChargeDensity,IonValence,ion_component,start,finish,Np);
|
||||||
|
|
||||||
|
cudaError_t err = cudaGetLastError();
|
||||||
|
if (cudaSuccess != err){
|
||||||
|
printf("CUDA error in ScaLBL_D3Q7_Ion_ChargeDensity: %s \n",cudaGetErrorString(err));
|
||||||
|
}
|
||||||
|
//cudaProfilerStop();
|
||||||
|
}
|
||||||
330
gpu/Poisson.cu
Normal file
330
gpu/Poisson.cu
Normal file
@@ -0,0 +1,330 @@
|
|||||||
|
#include <stdio.h>
|
||||||
|
#include <math.h>
|
||||||
|
//#include <cuda_profiler_api.h>
|
||||||
|
|
||||||
|
#define NBLOCKS 1024
|
||||||
|
#define NTHREADS 256
|
||||||
|
|
||||||
|
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||||
|
int n;
|
||||||
|
double psi;//electric potential
|
||||||
|
double fq;
|
||||||
|
int nread;
|
||||||
|
int idx;
|
||||||
|
|
||||||
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||||
|
for (int s=0; s<S; s++){
|
||||||
|
//........Get 1-D index for this thread....................
|
||||||
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||||
|
if (n<finish) {
|
||||||
|
|
||||||
|
// q=0
|
||||||
|
fq = dist[n];
|
||||||
|
psi = fq;
|
||||||
|
|
||||||
|
// q=1
|
||||||
|
nread = neighborList[n];
|
||||||
|
fq = dist[nread];
|
||||||
|
psi += fq;
|
||||||
|
|
||||||
|
// q=2
|
||||||
|
nread = neighborList[n+Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
psi += fq;
|
||||||
|
|
||||||
|
// q=3
|
||||||
|
nread = neighborList[n+2*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
psi += fq;
|
||||||
|
|
||||||
|
// q = 4
|
||||||
|
nread = neighborList[n+3*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
psi += fq;
|
||||||
|
|
||||||
|
// q=5
|
||||||
|
nread = neighborList[n+4*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
psi += fq;
|
||||||
|
|
||||||
|
// q = 6
|
||||||
|
nread = neighborList[n+5*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
psi += fq;
|
||||||
|
|
||||||
|
idx=Map[n];
|
||||||
|
Psi[idx] = psi;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||||
|
int n;
|
||||||
|
double psi;//electric potential
|
||||||
|
double fq;
|
||||||
|
int idx;
|
||||||
|
|
||||||
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||||
|
for (int s=0; s<S; s++){
|
||||||
|
//........Get 1-D index for this thread....................
|
||||||
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||||
|
if (n<finish) {
|
||||||
|
|
||||||
|
// q=0
|
||||||
|
fq = dist[n];
|
||||||
|
psi = fq;
|
||||||
|
|
||||||
|
// q=1
|
||||||
|
fq = dist[2*Np+n];
|
||||||
|
psi += fq;
|
||||||
|
|
||||||
|
// q=2
|
||||||
|
fq = dist[1*Np+n];
|
||||||
|
psi += fq;
|
||||||
|
|
||||||
|
// q=3
|
||||||
|
fq = dist[4*Np+n];
|
||||||
|
psi += fq;
|
||||||
|
|
||||||
|
// q=4
|
||||||
|
fq = dist[3*Np+n];
|
||||||
|
psi += fq;
|
||||||
|
|
||||||
|
// q=5
|
||||||
|
fq = dist[6*Np+n];
|
||||||
|
psi += fq;
|
||||||
|
|
||||||
|
// q=6
|
||||||
|
fq = dist[5*Np+n];
|
||||||
|
psi += fq;
|
||||||
|
|
||||||
|
idx=Map[n];
|
||||||
|
Psi[idx] = psi;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
|
||||||
|
|
||||||
|
int n;
|
||||||
|
double psi;//electric potential
|
||||||
|
double Ex,Ey,Ez;//electric field
|
||||||
|
double rho_e;//local charge density
|
||||||
|
double f0,f1,f2,f3,f4,f5,f6;
|
||||||
|
int nr1,nr2,nr3,nr4,nr5,nr6;
|
||||||
|
double rlx=1.0/tau;
|
||||||
|
int idx;
|
||||||
|
|
||||||
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||||
|
for (int s=0; s<S; s++){
|
||||||
|
//........Get 1-D index for this thread....................
|
||||||
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||||
|
if (n<finish) {
|
||||||
|
|
||||||
|
//Load data
|
||||||
|
rho_e = Den_charge[n];
|
||||||
|
rho_e = rho_e/epsilon_LB;
|
||||||
|
idx=Map[n];
|
||||||
|
psi = Psi[idx];
|
||||||
|
|
||||||
|
// q=0
|
||||||
|
f0 = dist[n];
|
||||||
|
// q=1
|
||||||
|
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||||
|
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||||
|
|
||||||
|
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||||
|
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||||
|
|
||||||
|
// q=3
|
||||||
|
nr3 = neighborList[n+2*Np]; // neighbor 4
|
||||||
|
f3 = dist[nr3];
|
||||||
|
|
||||||
|
// q = 4
|
||||||
|
nr4 = neighborList[n+3*Np]; // neighbor 3
|
||||||
|
f4 = dist[nr4];
|
||||||
|
|
||||||
|
// q=5
|
||||||
|
nr5 = neighborList[n+4*Np];
|
||||||
|
f5 = dist[nr5];
|
||||||
|
|
||||||
|
// q = 6
|
||||||
|
nr6 = neighborList[n+5*Np];
|
||||||
|
f6 = dist[nr6];
|
||||||
|
|
||||||
|
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
|
||||||
|
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
|
||||||
|
Ez = (f5-f6)*rlx*4.0;
|
||||||
|
ElectricField[n+0*Np] = Ex;
|
||||||
|
ElectricField[n+1*Np] = Ey;
|
||||||
|
ElectricField[n+2*Np] = Ez;
|
||||||
|
|
||||||
|
// q = 0
|
||||||
|
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
|
||||||
|
|
||||||
|
// q = 1
|
||||||
|
dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||||
|
|
||||||
|
// q = 2
|
||||||
|
dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||||
|
|
||||||
|
// q = 3
|
||||||
|
dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||||
|
|
||||||
|
// q = 4
|
||||||
|
dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||||
|
|
||||||
|
// q = 5
|
||||||
|
dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||||
|
|
||||||
|
// q = 6
|
||||||
|
dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||||
|
//........................................................................
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
|
||||||
|
|
||||||
|
int n;
|
||||||
|
double psi;//electric potential
|
||||||
|
double Ex,Ey,Ez;//electric field
|
||||||
|
double rho_e;//local charge density
|
||||||
|
double f0,f1,f2,f3,f4,f5,f6;
|
||||||
|
double rlx=1.0/tau;
|
||||||
|
int idx;
|
||||||
|
|
||||||
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||||
|
for (int s=0; s<S; s++){
|
||||||
|
//........Get 1-D index for this thread....................
|
||||||
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||||
|
if (n<finish) {
|
||||||
|
|
||||||
|
//Load data
|
||||||
|
rho_e = Den_charge[n];
|
||||||
|
rho_e = rho_e/epsilon_LB;
|
||||||
|
idx=Map[n];
|
||||||
|
psi = Psi[idx];
|
||||||
|
|
||||||
|
f0 = dist[n];
|
||||||
|
f1 = dist[2*Np+n];
|
||||||
|
f2 = dist[1*Np+n];
|
||||||
|
f3 = dist[4*Np+n];
|
||||||
|
f4 = dist[3*Np+n];
|
||||||
|
f5 = dist[6*Np+n];
|
||||||
|
f6 = dist[5*Np+n];
|
||||||
|
|
||||||
|
|
||||||
|
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
|
||||||
|
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
|
||||||
|
Ez = (f5-f6)*rlx*4.0;
|
||||||
|
ElectricField[n+0*Np] = Ex;
|
||||||
|
ElectricField[n+1*Np] = Ey;
|
||||||
|
ElectricField[n+2*Np] = Ez;
|
||||||
|
|
||||||
|
// q = 0
|
||||||
|
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
|
||||||
|
|
||||||
|
// q = 1
|
||||||
|
dist[1*Np+n] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||||
|
|
||||||
|
// q = 2
|
||||||
|
dist[2*Np+n] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||||
|
|
||||||
|
// q = 3
|
||||||
|
dist[3*Np+n] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||||
|
|
||||||
|
// q = 4
|
||||||
|
dist[4*Np+n] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||||
|
|
||||||
|
// q = 5
|
||||||
|
dist[5*Np+n] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||||
|
|
||||||
|
// q = 6
|
||||||
|
dist[6*Np+n] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||||
|
//........................................................................
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void dvc_ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||||
|
|
||||||
|
int n;
|
||||||
|
int ijk;
|
||||||
|
|
||||||
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||||
|
for (int s=0; s<S; s++){
|
||||||
|
//........Get 1-D index for this thread....................
|
||||||
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||||
|
if (n<finish) {
|
||||||
|
ijk = Map[n];
|
||||||
|
dist[0*Np+n] = 0.25*Psi[ijk];
|
||||||
|
dist[1*Np+n] = 0.125*Psi[ijk];
|
||||||
|
dist[2*Np+n] = 0.125*Psi[ijk];
|
||||||
|
dist[3*Np+n] = 0.125*Psi[ijk];
|
||||||
|
dist[4*Np+n] = 0.125*Psi[ijk];
|
||||||
|
dist[5*Np+n] = 0.125*Psi[ijk];
|
||||||
|
dist[6*Np+n] = 0.125*Psi[ijk];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||||
|
|
||||||
|
//cudaProfilerStart();
|
||||||
|
dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Psi,start,finish,Np);
|
||||||
|
|
||||||
|
cudaError_t err = cudaGetLastError();
|
||||||
|
if (cudaSuccess != err){
|
||||||
|
printf("CUDA error in ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential: %s \n",cudaGetErrorString(err));
|
||||||
|
}
|
||||||
|
//cudaProfilerStop();
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||||
|
|
||||||
|
//cudaProfilerStart();
|
||||||
|
dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
|
||||||
|
|
||||||
|
cudaError_t err = cudaGetLastError();
|
||||||
|
if (cudaSuccess != err){
|
||||||
|
printf("CUDA error in ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential: %s \n",cudaGetErrorString(err));
|
||||||
|
}
|
||||||
|
//cudaProfilerStop();
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
|
||||||
|
|
||||||
|
//cudaProfilerStart();
|
||||||
|
dvc_ScaLBL_D3Q7_AAodd_Poisson<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,start,finish,Np);
|
||||||
|
|
||||||
|
cudaError_t err = cudaGetLastError();
|
||||||
|
if (cudaSuccess != err){
|
||||||
|
printf("CUDA error in ScaLBL_D3Q7_AAodd_Poisson: %s \n",cudaGetErrorString(err));
|
||||||
|
}
|
||||||
|
//cudaProfilerStop();
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
|
||||||
|
|
||||||
|
//cudaProfilerStart();
|
||||||
|
dvc_ScaLBL_D3Q7_AAeven_Poisson<<<NBLOCKS,NTHREADS >>>(Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,start,finish,Np);
|
||||||
|
|
||||||
|
cudaError_t err = cudaGetLastError();
|
||||||
|
if (cudaSuccess != err){
|
||||||
|
printf("CUDA error in ScaLBL_D3Q7_AAeven_Poisson: %s \n",cudaGetErrorString(err));
|
||||||
|
}
|
||||||
|
//cudaProfilerStop();
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||||
|
|
||||||
|
//cudaProfilerStart();
|
||||||
|
dvc_ScaLBL_D3Q7_Poisson_Init<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
|
||||||
|
|
||||||
|
cudaError_t err = cudaGetLastError();
|
||||||
|
if (cudaSuccess != err){
|
||||||
|
printf("CUDA error in ScaLBL_D3Q7_Poisson_Init: %s \n",cudaGetErrorString(err));
|
||||||
|
}
|
||||||
|
//cudaProfilerStop();
|
||||||
|
}
|
||||||
995
gpu/Stokes.cu
Normal file
995
gpu/Stokes.cu
Normal file
@@ -0,0 +1,995 @@
|
|||||||
|
#include <stdio.h>
|
||||||
|
#include <math.h>
|
||||||
|
//#include <cuda_profiler_api.h>
|
||||||
|
|
||||||
|
#define NBLOCKS 1024
|
||||||
|
#define NTHREADS 256
|
||||||
|
|
||||||
|
__global__ void dvc_ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz, double rho0, double den_scale, double h, double time_conv,int start, int finish, int Np){
|
||||||
|
|
||||||
|
int n;
|
||||||
|
double fq;
|
||||||
|
// conserved momemnts
|
||||||
|
double rho,jx,jy,jz;
|
||||||
|
double ux,uy,uz;
|
||||||
|
// non-conserved moments
|
||||||
|
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
|
||||||
|
int nread;
|
||||||
|
// body force due to electric field
|
||||||
|
double rhoE;//charge density
|
||||||
|
double Ex,Ey,Ez;
|
||||||
|
// total body force
|
||||||
|
double Fx,Fy,Fz;
|
||||||
|
|
||||||
|
const double mrt_V1=0.05263157894736842;
|
||||||
|
const double mrt_V2=0.012531328320802;
|
||||||
|
const double mrt_V3=0.04761904761904762;
|
||||||
|
const double mrt_V4=0.004594820384294068;
|
||||||
|
const double mrt_V5=0.01587301587301587;
|
||||||
|
const double mrt_V6=0.0555555555555555555555555;
|
||||||
|
const double mrt_V7=0.02777777777777778;
|
||||||
|
const double mrt_V8=0.08333333333333333;
|
||||||
|
const double mrt_V9=0.003341687552213868;
|
||||||
|
const double mrt_V10=0.003968253968253968;
|
||||||
|
const double mrt_V11=0.01388888888888889;
|
||||||
|
const double mrt_V12=0.04166666666666666;
|
||||||
|
|
||||||
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||||
|
for (int s=0; s<S; s++){
|
||||||
|
//........Get 1-D index for this thread....................
|
||||||
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||||
|
if (n<finish) {
|
||||||
|
|
||||||
|
//Load data
|
||||||
|
rhoE = ChargeDensity[n];
|
||||||
|
Ex = ElectricField[n+0*Np];
|
||||||
|
Ey = ElectricField[n+1*Np];
|
||||||
|
Ez = ElectricField[n+2*Np];
|
||||||
|
//compute total body force, including input body force (Gx,Gy,Gz)
|
||||||
|
Fx = Gx + rhoE*Ex*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
|
||||||
|
Fy = Gy + rhoE*Ey*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
|
||||||
|
Fz = Gz + rhoE*Ez*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
|
||||||
|
|
||||||
|
// q=0
|
||||||
|
fq = dist[n];
|
||||||
|
rho = fq;
|
||||||
|
m1 = -30.0*fq;
|
||||||
|
m2 = 12.0*fq;
|
||||||
|
|
||||||
|
// q=1
|
||||||
|
nread = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||||
|
fq = dist[nread]; // reading the f1 data into register fq
|
||||||
|
//fp = dist[10*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 -= 11.0*fq;
|
||||||
|
m2 -= 4.0*fq;
|
||||||
|
jx = fq;
|
||||||
|
m4 = -4.0*fq;
|
||||||
|
m9 = 2.0*fq;
|
||||||
|
m10 = -4.0*fq;
|
||||||
|
|
||||||
|
// f2 = dist[10*Np+n];
|
||||||
|
nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||||
|
fq = dist[nread]; // reading the f2 data into register fq
|
||||||
|
//fq = dist[Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 -= 11.0*(fq);
|
||||||
|
m2 -= 4.0*(fq);
|
||||||
|
jx -= fq;
|
||||||
|
m4 += 4.0*(fq);
|
||||||
|
m9 += 2.0*(fq);
|
||||||
|
m10 -= 4.0*(fq);
|
||||||
|
|
||||||
|
// q=3
|
||||||
|
nread = neighborList[n+2*Np]; // neighbor 4
|
||||||
|
fq = dist[nread];
|
||||||
|
//fq = dist[11*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 -= 11.0*fq;
|
||||||
|
m2 -= 4.0*fq;
|
||||||
|
jy = fq;
|
||||||
|
m6 = -4.0*fq;
|
||||||
|
m9 -= fq;
|
||||||
|
m10 += 2.0*fq;
|
||||||
|
m11 = fq;
|
||||||
|
m12 = -2.0*fq;
|
||||||
|
|
||||||
|
// q = 4
|
||||||
|
nread = neighborList[n+3*Np]; // neighbor 3
|
||||||
|
fq = dist[nread];
|
||||||
|
//fq = dist[2*Np+n];
|
||||||
|
rho+= fq;
|
||||||
|
m1 -= 11.0*fq;
|
||||||
|
m2 -= 4.0*fq;
|
||||||
|
jy -= fq;
|
||||||
|
m6 += 4.0*fq;
|
||||||
|
m9 -= fq;
|
||||||
|
m10 += 2.0*fq;
|
||||||
|
m11 += fq;
|
||||||
|
m12 -= 2.0*fq;
|
||||||
|
|
||||||
|
// q=5
|
||||||
|
nread = neighborList[n+4*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
//fq = dist[12*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 -= 11.0*fq;
|
||||||
|
m2 -= 4.0*fq;
|
||||||
|
jz = fq;
|
||||||
|
m8 = -4.0*fq;
|
||||||
|
m9 -= fq;
|
||||||
|
m10 += 2.0*fq;
|
||||||
|
m11 -= fq;
|
||||||
|
m12 += 2.0*fq;
|
||||||
|
|
||||||
|
|
||||||
|
// q = 6
|
||||||
|
nread = neighborList[n+5*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
//fq = dist[3*Np+n];
|
||||||
|
rho+= fq;
|
||||||
|
m1 -= 11.0*fq;
|
||||||
|
m2 -= 4.0*fq;
|
||||||
|
jz -= fq;
|
||||||
|
m8 += 4.0*fq;
|
||||||
|
m9 -= fq;
|
||||||
|
m10 += 2.0*fq;
|
||||||
|
m11 -= fq;
|
||||||
|
m12 += 2.0*fq;
|
||||||
|
|
||||||
|
// q=7
|
||||||
|
nread = neighborList[n+6*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
//fq = dist[13*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jx += fq;
|
||||||
|
m4 += fq;
|
||||||
|
jy += fq;
|
||||||
|
m6 += fq;
|
||||||
|
m9 += fq;
|
||||||
|
m10 += fq;
|
||||||
|
m11 += fq;
|
||||||
|
m12 += fq;
|
||||||
|
m13 = fq;
|
||||||
|
m16 = fq;
|
||||||
|
m17 = -fq;
|
||||||
|
|
||||||
|
// q = 8
|
||||||
|
nread = neighborList[n+7*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
//fq = dist[4*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jx -= fq;
|
||||||
|
m4 -= fq;
|
||||||
|
jy -= fq;
|
||||||
|
m6 -= fq;
|
||||||
|
m9 += fq;
|
||||||
|
m10 += fq;
|
||||||
|
m11 += fq;
|
||||||
|
m12 += fq;
|
||||||
|
m13 += fq;
|
||||||
|
m16 -= fq;
|
||||||
|
m17 += fq;
|
||||||
|
|
||||||
|
// q=9
|
||||||
|
nread = neighborList[n+8*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
//fq = dist[14*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jx += fq;
|
||||||
|
m4 += fq;
|
||||||
|
jy -= fq;
|
||||||
|
m6 -= fq;
|
||||||
|
m9 += fq;
|
||||||
|
m10 += fq;
|
||||||
|
m11 += fq;
|
||||||
|
m12 += fq;
|
||||||
|
m13 -= fq;
|
||||||
|
m16 += fq;
|
||||||
|
m17 += fq;
|
||||||
|
|
||||||
|
// q = 10
|
||||||
|
nread = neighborList[n+9*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
//fq = dist[5*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jx -= fq;
|
||||||
|
m4 -= fq;
|
||||||
|
jy += fq;
|
||||||
|
m6 += fq;
|
||||||
|
m9 += fq;
|
||||||
|
m10 += fq;
|
||||||
|
m11 += fq;
|
||||||
|
m12 += fq;
|
||||||
|
m13 -= fq;
|
||||||
|
m16 -= fq;
|
||||||
|
m17 -= fq;
|
||||||
|
|
||||||
|
// q=11
|
||||||
|
nread = neighborList[n+10*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
//fq = dist[15*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jx += fq;
|
||||||
|
m4 += fq;
|
||||||
|
jz += fq;
|
||||||
|
m8 += fq;
|
||||||
|
m9 += fq;
|
||||||
|
m10 += fq;
|
||||||
|
m11 -= fq;
|
||||||
|
m12 -= fq;
|
||||||
|
m15 = fq;
|
||||||
|
m16 -= fq;
|
||||||
|
m18 = fq;
|
||||||
|
|
||||||
|
// q=12
|
||||||
|
nread = neighborList[n+11*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
//fq = dist[6*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jx -= fq;
|
||||||
|
m4 -= fq;
|
||||||
|
jz -= fq;
|
||||||
|
m8 -= fq;
|
||||||
|
m9 += fq;
|
||||||
|
m10 += fq;
|
||||||
|
m11 -= fq;
|
||||||
|
m12 -= fq;
|
||||||
|
m15 += fq;
|
||||||
|
m16 += fq;
|
||||||
|
m18 -= fq;
|
||||||
|
|
||||||
|
// q=13
|
||||||
|
nread = neighborList[n+12*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
//fq = dist[16*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jx += fq;
|
||||||
|
m4 += fq;
|
||||||
|
jz -= fq;
|
||||||
|
m8 -= fq;
|
||||||
|
m9 += fq;
|
||||||
|
m10 += fq;
|
||||||
|
m11 -= fq;
|
||||||
|
m12 -= fq;
|
||||||
|
m15 -= fq;
|
||||||
|
m16 -= fq;
|
||||||
|
m18 -= fq;
|
||||||
|
|
||||||
|
// q=14
|
||||||
|
nread = neighborList[n+13*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
//fq = dist[7*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jx -= fq;
|
||||||
|
m4 -= fq;
|
||||||
|
jz += fq;
|
||||||
|
m8 += fq;
|
||||||
|
m9 += fq;
|
||||||
|
m10 += fq;
|
||||||
|
m11 -= fq;
|
||||||
|
m12 -= fq;
|
||||||
|
m15 -= fq;
|
||||||
|
m16 += fq;
|
||||||
|
m18 += fq;
|
||||||
|
|
||||||
|
// q=15
|
||||||
|
nread = neighborList[n+14*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
//fq = dist[17*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jy += fq;
|
||||||
|
m6 += fq;
|
||||||
|
jz += fq;
|
||||||
|
m8 += fq;
|
||||||
|
m9 -= 2.0*fq;
|
||||||
|
m10 -= 2.0*fq;
|
||||||
|
m14 = fq;
|
||||||
|
m17 += fq;
|
||||||
|
m18 -= fq;
|
||||||
|
|
||||||
|
// q=16
|
||||||
|
nread = neighborList[n+15*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
//fq = dist[8*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jy -= fq;
|
||||||
|
m6 -= fq;
|
||||||
|
jz -= fq;
|
||||||
|
m8 -= fq;
|
||||||
|
m9 -= 2.0*fq;
|
||||||
|
m10 -= 2.0*fq;
|
||||||
|
m14 += fq;
|
||||||
|
m17 -= fq;
|
||||||
|
m18 += fq;
|
||||||
|
|
||||||
|
// q=17
|
||||||
|
//fq = dist[18*Np+n];
|
||||||
|
nread = neighborList[n+16*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jy += fq;
|
||||||
|
m6 += fq;
|
||||||
|
jz -= fq;
|
||||||
|
m8 -= fq;
|
||||||
|
m9 -= 2.0*fq;
|
||||||
|
m10 -= 2.0*fq;
|
||||||
|
m14 -= fq;
|
||||||
|
m17 += fq;
|
||||||
|
m18 += fq;
|
||||||
|
|
||||||
|
// q=18
|
||||||
|
nread = neighborList[n+17*Np];
|
||||||
|
fq = dist[nread];
|
||||||
|
//fq = dist[9*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jy -= fq;
|
||||||
|
m6 -= fq;
|
||||||
|
jz += fq;
|
||||||
|
m8 += fq;
|
||||||
|
m9 -= 2.0*fq;
|
||||||
|
m10 -= 2.0*fq;
|
||||||
|
m14 -= fq;
|
||||||
|
m17 -= fq;
|
||||||
|
m18 -= fq;
|
||||||
|
|
||||||
|
// write the velocity
|
||||||
|
ux = jx / rho0;
|
||||||
|
uy = jy / rho0;
|
||||||
|
uz = jz / rho0;
|
||||||
|
Velocity[n] = ux;
|
||||||
|
Velocity[Np+n] = uy;
|
||||||
|
Velocity[2*Np+n] = uz;
|
||||||
|
|
||||||
|
//..............incorporate external force................................................
|
||||||
|
//..............carry out relaxation process...............................................
|
||||||
|
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) - m1);
|
||||||
|
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0) - m2);
|
||||||
|
m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4);
|
||||||
|
m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6);
|
||||||
|
m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8);
|
||||||
|
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) - m9);
|
||||||
|
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
|
||||||
|
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) - m11);
|
||||||
|
m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho0) - m12);
|
||||||
|
m13 = m13 + rlx_setA*((jx*jy/rho0) - m13);
|
||||||
|
m14 = m14 + rlx_setA*((jy*jz/rho0) - m14);
|
||||||
|
m15 = m15 + rlx_setA*((jx*jz/rho0) - m15);
|
||||||
|
m16 = m16 + rlx_setB*( - m16);
|
||||||
|
m17 = m17 + rlx_setB*( - m17);
|
||||||
|
m18 = m18 + rlx_setB*( - m18);
|
||||||
|
//.......................................................................................................
|
||||||
|
//.................inverse transformation......................................................
|
||||||
|
|
||||||
|
// q=0
|
||||||
|
fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2;
|
||||||
|
dist[n] = fq;
|
||||||
|
|
||||||
|
// q = 1
|
||||||
|
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx;
|
||||||
|
nread = neighborList[n+Np];
|
||||||
|
dist[nread] = fq;
|
||||||
|
|
||||||
|
// q=2
|
||||||
|
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx;
|
||||||
|
nread = neighborList[n];
|
||||||
|
dist[nread] = fq;
|
||||||
|
|
||||||
|
// q = 3
|
||||||
|
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy;
|
||||||
|
nread = neighborList[n+3*Np];
|
||||||
|
dist[nread] = fq;
|
||||||
|
|
||||||
|
// q = 4
|
||||||
|
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy;
|
||||||
|
nread = neighborList[n+2*Np];
|
||||||
|
dist[nread] = fq;
|
||||||
|
|
||||||
|
// q = 5
|
||||||
|
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz;
|
||||||
|
nread = neighborList[n+5*Np];
|
||||||
|
dist[nread] = fq;
|
||||||
|
|
||||||
|
// q = 6
|
||||||
|
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz;
|
||||||
|
nread = neighborList[n+4*Np];
|
||||||
|
dist[nread] = fq;
|
||||||
|
|
||||||
|
// q = 7
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)
|
||||||
|
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||||
|
+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy);
|
||||||
|
nread = neighborList[n+7*Np];
|
||||||
|
dist[nread] = fq;
|
||||||
|
|
||||||
|
// q = 8
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||||
|
+mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy);
|
||||||
|
nread = neighborList[n+6*Np];
|
||||||
|
dist[nread] = fq;
|
||||||
|
|
||||||
|
// q = 9
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)
|
||||||
|
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||||
|
+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy);
|
||||||
|
nread = neighborList[n+9*Np];
|
||||||
|
dist[nread] = fq;
|
||||||
|
|
||||||
|
// q = 10
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)
|
||||||
|
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||||
|
+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy);
|
||||||
|
nread = neighborList[n+8*Np];
|
||||||
|
dist[nread] = fq;
|
||||||
|
|
||||||
|
// q = 11
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1
|
||||||
|
+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)
|
||||||
|
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||||
|
-mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz);
|
||||||
|
nread = neighborList[n+11*Np];
|
||||||
|
dist[nread] = fq;
|
||||||
|
|
||||||
|
// q = 12
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)
|
||||||
|
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||||
|
-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz);
|
||||||
|
nread = neighborList[n+10*Np];
|
||||||
|
dist[nread]= fq;
|
||||||
|
|
||||||
|
// q = 13
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1
|
||||||
|
+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)
|
||||||
|
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||||
|
-mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz);
|
||||||
|
nread = neighborList[n+13*Np];
|
||||||
|
dist[nread] = fq;
|
||||||
|
|
||||||
|
// q= 14
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1
|
||||||
|
+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)
|
||||||
|
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||||
|
-mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz);
|
||||||
|
nread = neighborList[n+12*Np];
|
||||||
|
dist[nread] = fq;
|
||||||
|
|
||||||
|
|
||||||
|
// q = 15
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1
|
||||||
|
+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)
|
||||||
|
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz);
|
||||||
|
nread = neighborList[n+15*Np];
|
||||||
|
dist[nread] = fq;
|
||||||
|
|
||||||
|
// q = 16
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1
|
||||||
|
+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)
|
||||||
|
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz);
|
||||||
|
nread = neighborList[n+14*Np];
|
||||||
|
dist[nread] = fq;
|
||||||
|
|
||||||
|
|
||||||
|
// q = 17
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1
|
||||||
|
+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)
|
||||||
|
-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz);
|
||||||
|
nread = neighborList[n+17*Np];
|
||||||
|
dist[nread] = fq;
|
||||||
|
|
||||||
|
// q = 18
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1
|
||||||
|
+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)
|
||||||
|
-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz);
|
||||||
|
nread = neighborList[n+16*Np];
|
||||||
|
dist[nread] = fq;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void dvc_ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){
|
||||||
|
|
||||||
|
int n;
|
||||||
|
double fq;
|
||||||
|
// conserved momemnts
|
||||||
|
double rho,jx,jy,jz;
|
||||||
|
double ux,uy,uz;
|
||||||
|
// non-conserved moments
|
||||||
|
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
|
||||||
|
// body force due to electric field
|
||||||
|
double rhoE;//charge density
|
||||||
|
double Ex,Ey,Ez;
|
||||||
|
// total body force
|
||||||
|
double Fx,Fy,Fz;
|
||||||
|
|
||||||
|
const double mrt_V1=0.05263157894736842;
|
||||||
|
const double mrt_V2=0.012531328320802;
|
||||||
|
const double mrt_V3=0.04761904761904762;
|
||||||
|
const double mrt_V4=0.004594820384294068;
|
||||||
|
const double mrt_V5=0.01587301587301587;
|
||||||
|
const double mrt_V6=0.0555555555555555555555555;
|
||||||
|
const double mrt_V7=0.02777777777777778;
|
||||||
|
const double mrt_V8=0.08333333333333333;
|
||||||
|
const double mrt_V9=0.003341687552213868;
|
||||||
|
const double mrt_V10=0.003968253968253968;
|
||||||
|
const double mrt_V11=0.01388888888888889;
|
||||||
|
const double mrt_V12=0.04166666666666666;
|
||||||
|
|
||||||
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||||
|
for (int s=0; s<S; s++){
|
||||||
|
//........Get 1-D index for this thread....................
|
||||||
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||||
|
if (n<finish) {
|
||||||
|
|
||||||
|
//Load data
|
||||||
|
rhoE = ChargeDensity[n];
|
||||||
|
Ex = ElectricField[n+0*Np];
|
||||||
|
Ey = ElectricField[n+1*Np];
|
||||||
|
Ez = ElectricField[n+2*Np];
|
||||||
|
//compute total body force, including input body force (Gx,Gy,Gz)
|
||||||
|
Fx = Gx + rhoE*Ex*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;//the extra factors at the end necessarily convert unit from phys to LB
|
||||||
|
Fy = Gy + rhoE*Ey*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
|
||||||
|
Fz = Gz + rhoE*Ez*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
|
||||||
|
|
||||||
|
// q=0
|
||||||
|
fq = dist[n];
|
||||||
|
rho = fq;
|
||||||
|
m1 = -30.0*fq;
|
||||||
|
m2 = 12.0*fq;
|
||||||
|
|
||||||
|
// q=1
|
||||||
|
fq = dist[2*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 -= 11.0*fq;
|
||||||
|
m2 -= 4.0*fq;
|
||||||
|
jx = fq;
|
||||||
|
m4 = -4.0*fq;
|
||||||
|
m9 = 2.0*fq;
|
||||||
|
m10 = -4.0*fq;
|
||||||
|
|
||||||
|
// f2 = dist[10*Np+n];
|
||||||
|
fq = dist[1*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 -= 11.0*(fq);
|
||||||
|
m2 -= 4.0*(fq);
|
||||||
|
jx -= fq;
|
||||||
|
m4 += 4.0*(fq);
|
||||||
|
m9 += 2.0*(fq);
|
||||||
|
m10 -= 4.0*(fq);
|
||||||
|
|
||||||
|
// q=3
|
||||||
|
fq = dist[4*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 -= 11.0*fq;
|
||||||
|
m2 -= 4.0*fq;
|
||||||
|
jy = fq;
|
||||||
|
m6 = -4.0*fq;
|
||||||
|
m9 -= fq;
|
||||||
|
m10 += 2.0*fq;
|
||||||
|
m11 = fq;
|
||||||
|
m12 = -2.0*fq;
|
||||||
|
|
||||||
|
// q = 4
|
||||||
|
fq = dist[3*Np+n];
|
||||||
|
rho+= fq;
|
||||||
|
m1 -= 11.0*fq;
|
||||||
|
m2 -= 4.0*fq;
|
||||||
|
jy -= fq;
|
||||||
|
m6 += 4.0*fq;
|
||||||
|
m9 -= fq;
|
||||||
|
m10 += 2.0*fq;
|
||||||
|
m11 += fq;
|
||||||
|
m12 -= 2.0*fq;
|
||||||
|
|
||||||
|
// q=5
|
||||||
|
fq = dist[6*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 -= 11.0*fq;
|
||||||
|
m2 -= 4.0*fq;
|
||||||
|
jz = fq;
|
||||||
|
m8 = -4.0*fq;
|
||||||
|
m9 -= fq;
|
||||||
|
m10 += 2.0*fq;
|
||||||
|
m11 -= fq;
|
||||||
|
m12 += 2.0*fq;
|
||||||
|
|
||||||
|
// q = 6
|
||||||
|
fq = dist[5*Np+n];
|
||||||
|
rho+= fq;
|
||||||
|
m1 -= 11.0*fq;
|
||||||
|
m2 -= 4.0*fq;
|
||||||
|
jz -= fq;
|
||||||
|
m8 += 4.0*fq;
|
||||||
|
m9 -= fq;
|
||||||
|
m10 += 2.0*fq;
|
||||||
|
m11 -= fq;
|
||||||
|
m12 += 2.0*fq;
|
||||||
|
|
||||||
|
// q=7
|
||||||
|
fq = dist[8*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jx += fq;
|
||||||
|
m4 += fq;
|
||||||
|
jy += fq;
|
||||||
|
m6 += fq;
|
||||||
|
m9 += fq;
|
||||||
|
m10 += fq;
|
||||||
|
m11 += fq;
|
||||||
|
m12 += fq;
|
||||||
|
m13 = fq;
|
||||||
|
m16 = fq;
|
||||||
|
m17 = -fq;
|
||||||
|
|
||||||
|
// q = 8
|
||||||
|
fq = dist[7*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jx -= fq;
|
||||||
|
m4 -= fq;
|
||||||
|
jy -= fq;
|
||||||
|
m6 -= fq;
|
||||||
|
m9 += fq;
|
||||||
|
m10 += fq;
|
||||||
|
m11 += fq;
|
||||||
|
m12 += fq;
|
||||||
|
m13 += fq;
|
||||||
|
m16 -= fq;
|
||||||
|
m17 += fq;
|
||||||
|
|
||||||
|
// q=9
|
||||||
|
fq = dist[10*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jx += fq;
|
||||||
|
m4 += fq;
|
||||||
|
jy -= fq;
|
||||||
|
m6 -= fq;
|
||||||
|
m9 += fq;
|
||||||
|
m10 += fq;
|
||||||
|
m11 += fq;
|
||||||
|
m12 += fq;
|
||||||
|
m13 -= fq;
|
||||||
|
m16 += fq;
|
||||||
|
m17 += fq;
|
||||||
|
|
||||||
|
// q = 10
|
||||||
|
fq = dist[9*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jx -= fq;
|
||||||
|
m4 -= fq;
|
||||||
|
jy += fq;
|
||||||
|
m6 += fq;
|
||||||
|
m9 += fq;
|
||||||
|
m10 += fq;
|
||||||
|
m11 += fq;
|
||||||
|
m12 += fq;
|
||||||
|
m13 -= fq;
|
||||||
|
m16 -= fq;
|
||||||
|
m17 -= fq;
|
||||||
|
|
||||||
|
// q=11
|
||||||
|
fq = dist[12*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jx += fq;
|
||||||
|
m4 += fq;
|
||||||
|
jz += fq;
|
||||||
|
m8 += fq;
|
||||||
|
m9 += fq;
|
||||||
|
m10 += fq;
|
||||||
|
m11 -= fq;
|
||||||
|
m12 -= fq;
|
||||||
|
m15 = fq;
|
||||||
|
m16 -= fq;
|
||||||
|
m18 = fq;
|
||||||
|
|
||||||
|
// q=12
|
||||||
|
fq = dist[11*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jx -= fq;
|
||||||
|
m4 -= fq;
|
||||||
|
jz -= fq;
|
||||||
|
m8 -= fq;
|
||||||
|
m9 += fq;
|
||||||
|
m10 += fq;
|
||||||
|
m11 -= fq;
|
||||||
|
m12 -= fq;
|
||||||
|
m15 += fq;
|
||||||
|
m16 += fq;
|
||||||
|
m18 -= fq;
|
||||||
|
|
||||||
|
// q=13
|
||||||
|
fq = dist[14*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jx += fq;
|
||||||
|
m4 += fq;
|
||||||
|
jz -= fq;
|
||||||
|
m8 -= fq;
|
||||||
|
m9 += fq;
|
||||||
|
m10 += fq;
|
||||||
|
m11 -= fq;
|
||||||
|
m12 -= fq;
|
||||||
|
m15 -= fq;
|
||||||
|
m16 -= fq;
|
||||||
|
m18 -= fq;
|
||||||
|
|
||||||
|
// q=14
|
||||||
|
fq = dist[13*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jx -= fq;
|
||||||
|
m4 -= fq;
|
||||||
|
jz += fq;
|
||||||
|
m8 += fq;
|
||||||
|
m9 += fq;
|
||||||
|
m10 += fq;
|
||||||
|
m11 -= fq;
|
||||||
|
m12 -= fq;
|
||||||
|
m15 -= fq;
|
||||||
|
m16 += fq;
|
||||||
|
m18 += fq;
|
||||||
|
|
||||||
|
// q=15
|
||||||
|
fq = dist[16*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jy += fq;
|
||||||
|
m6 += fq;
|
||||||
|
jz += fq;
|
||||||
|
m8 += fq;
|
||||||
|
m9 -= 2.0*fq;
|
||||||
|
m10 -= 2.0*fq;
|
||||||
|
m14 = fq;
|
||||||
|
m17 += fq;
|
||||||
|
m18 -= fq;
|
||||||
|
|
||||||
|
// q=16
|
||||||
|
fq = dist[15*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jy -= fq;
|
||||||
|
m6 -= fq;
|
||||||
|
jz -= fq;
|
||||||
|
m8 -= fq;
|
||||||
|
m9 -= 2.0*fq;
|
||||||
|
m10 -= 2.0*fq;
|
||||||
|
m14 += fq;
|
||||||
|
m17 -= fq;
|
||||||
|
m18 += fq;
|
||||||
|
|
||||||
|
// q=17
|
||||||
|
fq = dist[18*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jy += fq;
|
||||||
|
m6 += fq;
|
||||||
|
jz -= fq;
|
||||||
|
m8 -= fq;
|
||||||
|
m9 -= 2.0*fq;
|
||||||
|
m10 -= 2.0*fq;
|
||||||
|
m14 -= fq;
|
||||||
|
m17 += fq;
|
||||||
|
m18 += fq;
|
||||||
|
|
||||||
|
// q=18
|
||||||
|
fq = dist[17*Np+n];
|
||||||
|
rho += fq;
|
||||||
|
m1 += 8.0*fq;
|
||||||
|
m2 += fq;
|
||||||
|
jy -= fq;
|
||||||
|
m6 -= fq;
|
||||||
|
jz += fq;
|
||||||
|
m8 += fq;
|
||||||
|
m9 -= 2.0*fq;
|
||||||
|
m10 -= 2.0*fq;
|
||||||
|
m14 -= fq;
|
||||||
|
m17 -= fq;
|
||||||
|
m18 -= fq;
|
||||||
|
|
||||||
|
// write the velocity
|
||||||
|
ux = jx / rho0;
|
||||||
|
uy = jy / rho0;
|
||||||
|
uz = jz / rho0;
|
||||||
|
Velocity[n] = ux;
|
||||||
|
Velocity[Np+n] = uy;
|
||||||
|
Velocity[2*Np+n] = uz;
|
||||||
|
|
||||||
|
|
||||||
|
//........................................................................
|
||||||
|
// READ THE DISTRIBUTIONS
|
||||||
|
// (read from opposite array due to previous swap operation)
|
||||||
|
//........................................................................
|
||||||
|
|
||||||
|
//..............incorporate external force................................................
|
||||||
|
//..............carry out relaxation process...............................................
|
||||||
|
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) - m1);
|
||||||
|
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0) - m2);
|
||||||
|
m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4);
|
||||||
|
m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6);
|
||||||
|
m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8);
|
||||||
|
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) - m9);
|
||||||
|
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
|
||||||
|
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) - m11);
|
||||||
|
m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho0) - m12);
|
||||||
|
m13 = m13 + rlx_setA*((jx*jy/rho0) - m13);
|
||||||
|
m14 = m14 + rlx_setA*((jy*jz/rho0) - m14);
|
||||||
|
m15 = m15 + rlx_setA*((jx*jz/rho0) - m15);
|
||||||
|
m16 = m16 + rlx_setB*( - m16);
|
||||||
|
m17 = m17 + rlx_setB*( - m17);
|
||||||
|
m18 = m18 + rlx_setB*( - m18);
|
||||||
|
//.......................................................................................................
|
||||||
|
//.................inverse transformation......................................................
|
||||||
|
|
||||||
|
// q=0
|
||||||
|
fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2;
|
||||||
|
dist[n] = fq;
|
||||||
|
|
||||||
|
// q = 1
|
||||||
|
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10) + 0.16666666*Fx;
|
||||||
|
dist[1*Np+n] = fq;
|
||||||
|
|
||||||
|
// q=2
|
||||||
|
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx;
|
||||||
|
dist[2*Np+n] = fq;
|
||||||
|
|
||||||
|
// q = 3
|
||||||
|
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy;
|
||||||
|
dist[3*Np+n] = fq;
|
||||||
|
|
||||||
|
// q = 4
|
||||||
|
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy;
|
||||||
|
dist[4*Np+n] = fq;
|
||||||
|
|
||||||
|
// q = 5
|
||||||
|
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz;
|
||||||
|
dist[5*Np+n] = fq;
|
||||||
|
|
||||||
|
// q = 6
|
||||||
|
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz;
|
||||||
|
dist[6*Np+n] = fq;
|
||||||
|
|
||||||
|
// q = 7
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)
|
||||||
|
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||||
|
+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy);
|
||||||
|
dist[7*Np+n] = fq;
|
||||||
|
|
||||||
|
|
||||||
|
// q = 8
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||||
|
+mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy);
|
||||||
|
dist[8*Np+n] = fq;
|
||||||
|
|
||||||
|
// q = 9
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)
|
||||||
|
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||||
|
+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy);
|
||||||
|
dist[9*Np+n] = fq;
|
||||||
|
|
||||||
|
// q = 10
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)
|
||||||
|
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||||
|
+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy);
|
||||||
|
dist[10*Np+n] = fq;
|
||||||
|
|
||||||
|
|
||||||
|
// q = 11
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1
|
||||||
|
+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)
|
||||||
|
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||||
|
-mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz);
|
||||||
|
dist[11*Np+n] = fq;
|
||||||
|
|
||||||
|
// q = 12
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)
|
||||||
|
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||||
|
-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz);
|
||||||
|
dist[12*Np+n] = fq;
|
||||||
|
|
||||||
|
// q = 13
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1
|
||||||
|
+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)
|
||||||
|
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||||
|
-mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz);
|
||||||
|
dist[13*Np+n] = fq;
|
||||||
|
|
||||||
|
// q= 14
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1
|
||||||
|
+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)
|
||||||
|
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||||
|
-mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz);
|
||||||
|
|
||||||
|
dist[14*Np+n] = fq;
|
||||||
|
|
||||||
|
// q = 15
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1
|
||||||
|
+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)
|
||||||
|
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz);
|
||||||
|
dist[15*Np+n] = fq;
|
||||||
|
|
||||||
|
// q = 16
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1
|
||||||
|
+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)
|
||||||
|
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz);
|
||||||
|
dist[16*Np+n] = fq;
|
||||||
|
|
||||||
|
|
||||||
|
// q = 17
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1
|
||||||
|
+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)
|
||||||
|
-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz);
|
||||||
|
dist[17*Np+n] = fq;
|
||||||
|
|
||||||
|
// q = 18
|
||||||
|
fq = mrt_V1*rho+mrt_V9*m1
|
||||||
|
+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)
|
||||||
|
-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz);
|
||||||
|
dist[18*Np+n] = fq;
|
||||||
|
|
||||||
|
//........................................................................
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){
|
||||||
|
|
||||||
|
//cudaProfilerStart();
|
||||||
|
dvc_ScaLBL_D3Q19_AAodd_StokesMRT<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Velocity,ChargeDensity,ElectricField,rlx_setA,rlx_setB,Gx,Gy,Gz,rho0,den_scale,h,time_conv,start,finish,Np);
|
||||||
|
|
||||||
|
cudaError_t err = cudaGetLastError();
|
||||||
|
if (cudaSuccess != err){
|
||||||
|
printf("CUDA error in ScaLBL_D3Q19_AAodd_StokesMRT: %s \n",cudaGetErrorString(err));
|
||||||
|
}
|
||||||
|
//cudaProfilerStop();
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){
|
||||||
|
|
||||||
|
//cudaProfilerStart();
|
||||||
|
dvc_ScaLBL_D3Q19_AAeven_StokesMRT<<<NBLOCKS,NTHREADS >>>(dist,Velocity,ChargeDensity,ElectricField,rlx_setA,rlx_setB,Gx,Gy,Gz,rho0,den_scale,h,time_conv,start,finish,Np);
|
||||||
|
|
||||||
|
cudaError_t err = cudaGetLastError();
|
||||||
|
if (cudaSuccess != err){
|
||||||
|
printf("CUDA error in ScaLBL_D3Q19_AAeven_StokesMRT: %s \n",cudaGetErrorString(err));
|
||||||
|
}
|
||||||
|
//cudaProfilerStop();
|
||||||
|
}
|
||||||
|
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
ScaLBL_Multiphys_Controller::ScaLBL_Multiphys_Controller(int RANK, int NP, MPI_Comm COMM):
|
ScaLBL_Multiphys_Controller::ScaLBL_Multiphys_Controller(int RANK, int NP, MPI_Comm COMM):
|
||||||
rank(RANK),nprocs(NP),Restart(0),timestepMax(0),num_iter_Stokes(0),num_iter_Ion(0),
|
rank(RANK),nprocs(NP),Restart(0),timestepMax(0),num_iter_Stokes(0),num_iter_Ion(0),
|
||||||
analysis_interval(0),tolerance(0),comm(COMM)
|
analysis_interval(0),visualization_interval(0),tolerance(0),comm(COMM)
|
||||||
{
|
{
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -23,6 +23,7 @@ void ScaLBL_Multiphys_Controller::ReadParams(string filename){
|
|||||||
num_iter_Stokes=1;
|
num_iter_Stokes=1;
|
||||||
num_iter_Ion.push_back(1);
|
num_iter_Ion.push_back(1);
|
||||||
analysis_interval = 500;
|
analysis_interval = 500;
|
||||||
|
visualization_interval = 10000;
|
||||||
tolerance = 1.0e-6;
|
tolerance = 1.0e-6;
|
||||||
|
|
||||||
// load input parameters
|
// load input parameters
|
||||||
@@ -32,6 +33,9 @@ void ScaLBL_Multiphys_Controller::ReadParams(string filename){
|
|||||||
if (study_db->keyExists( "analysis_interval" )){
|
if (study_db->keyExists( "analysis_interval" )){
|
||||||
analysis_interval = study_db->getScalar<int>( "analysis_interval" );
|
analysis_interval = study_db->getScalar<int>( "analysis_interval" );
|
||||||
}
|
}
|
||||||
|
if (study_db->keyExists( "visualization_interval" )){
|
||||||
|
visualization_interval = study_db->getScalar<int>( "visualization_interval" );
|
||||||
|
}
|
||||||
if (study_db->keyExists( "tolerance" )){
|
if (study_db->keyExists( "tolerance" )){
|
||||||
tolerance = study_db->getScalar<double>( "tolerance" );
|
tolerance = study_db->getScalar<double>( "tolerance" );
|
||||||
}
|
}
|
||||||
@@ -76,15 +80,8 @@ int ScaLBL_Multiphys_Controller::getStokesNumIter_PNP_coupling(double StokesTime
|
|||||||
int num_iter_stokes;
|
int num_iter_stokes;
|
||||||
vector<double> TimeConv;
|
vector<double> TimeConv;
|
||||||
|
|
||||||
printf("*****Debug; IonTimeConv size = %i\n",IonTimeConv.size());
|
|
||||||
for (unsigned int i =0; i<IonTimeConv.size();i++){
|
|
||||||
printf("*****Debug; Ion %i; IonTimeConv = %.5g\n",i,IonTimeConv[i]);
|
|
||||||
}
|
|
||||||
TimeConv.assign(IonTimeConv.begin(),IonTimeConv.end());
|
TimeConv.assign(IonTimeConv.begin(),IonTimeConv.end());
|
||||||
TimeConv.insert(TimeConv.begin(),StokesTimeConv);
|
TimeConv.insert(TimeConv.begin(),StokesTimeConv);
|
||||||
for (unsigned int i =0; i<TimeConv.size();i++){
|
|
||||||
printf("*****Debug; all TimeConv %i; TimeConv = %.5g\n",i,TimeConv[i]);
|
|
||||||
}
|
|
||||||
vector<double>::iterator it_max = max_element(TimeConv.begin(),TimeConv.end());
|
vector<double>::iterator it_max = max_element(TimeConv.begin(),TimeConv.end());
|
||||||
int idx_max = distance(TimeConv.begin(),it_max);
|
int idx_max = distance(TimeConv.begin(),it_max);
|
||||||
if (idx_max==0){
|
if (idx_max==0){
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ public:
|
|||||||
int num_iter_Stokes;
|
int num_iter_Stokes;
|
||||||
vector<int> num_iter_Ion;
|
vector<int> num_iter_Ion;
|
||||||
int analysis_interval;
|
int analysis_interval;
|
||||||
|
int visualization_interval;
|
||||||
double tolerance;
|
double tolerance;
|
||||||
//double SchmidtNum;//Schmidt number = kinematic_viscosity/mass_diffusivity
|
//double SchmidtNum;//Schmidt number = kinematic_viscosity/mass_diffusivity
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
ADD_LBPM_EXECUTABLE( lbpm_color_simulator )
|
ADD_LBPM_EXECUTABLE( lbpm_color_simulator )
|
||||||
ADD_LBPM_EXECUTABLE( lbpm_permeability_simulator )
|
ADD_LBPM_EXECUTABLE( lbpm_permeability_simulator )
|
||||||
ADD_LBPM_EXECUTABLE( lbpm_greyscale_simulator )
|
ADD_LBPM_EXECUTABLE( lbpm_greyscale_simulator )
|
||||||
ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_dfh_simulator )
|
ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_SingleFluid_simulator )
|
||||||
#ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator )
|
#ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator )
|
||||||
#ADD_LBPM_EXECUTABLE( lbpm_color_macro_simulator )
|
#ADD_LBPM_EXECUTABLE( lbpm_color_macro_simulator )
|
||||||
ADD_LBPM_EXECUTABLE( lbpm_dfh_simulator )
|
ADD_LBPM_EXECUTABLE( lbpm_dfh_simulator )
|
||||||
|
|||||||
@@ -112,7 +112,7 @@ int main(int argc, char **argv)
|
|||||||
if (rank==0) printf("*************************************************************\n");
|
if (rank==0) printf("*************************************************************\n");
|
||||||
|
|
||||||
PROFILE_STOP("Main");
|
PROFILE_STOP("Main");
|
||||||
PROFILE_SAVE("lbpm_electrokinetic_simulator",1);
|
PROFILE_SAVE("TestPNP_Stokes",1);
|
||||||
// ****************************************************
|
// ****************************************************
|
||||||
MPI_Barrier(comm);
|
MPI_Barrier(comm);
|
||||||
} // Limit scope so variables that contain communicators will free before MPI_Finialize
|
} // Limit scope so variables that contain communicators will free before MPI_Finialize
|
||||||
|
|||||||
@@ -7,15 +7,15 @@
|
|||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
#include "models/StokesModel.h"
|
|
||||||
#include "models/IonModel.h"
|
#include "models/IonModel.h"
|
||||||
|
#include "models/StokesModel.h"
|
||||||
#include "models/PoissonSolver.h"
|
#include "models/PoissonSolver.h"
|
||||||
#include "models/MultiPhysController.h"
|
#include "models/MultiPhysController.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
//***************************************************************************
|
//***************************************************************************
|
||||||
// Implementation of Multiphysics simulator using lattice-Boltzmann method
|
// Test lattice-Boltzmann Ion Model coupled with Poisson equation
|
||||||
//***************************************************************************
|
//***************************************************************************
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
@@ -35,7 +35,7 @@ int main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
if (rank == 0){
|
if (rank == 0){
|
||||||
printf("********************************************************\n");
|
printf("********************************************************\n");
|
||||||
printf("Running Electrokinetic LBM Simulator \n");
|
printf("Running LBPM electrokinetic single-fluid solver \n");
|
||||||
printf("********************************************************\n");
|
printf("********************************************************\n");
|
||||||
}
|
}
|
||||||
//PROFILE_ENABLE_TRACE();
|
//PROFILE_ENABLE_TRACE();
|
||||||
@@ -53,18 +53,24 @@ int main(int argc, char **argv)
|
|||||||
// Load controller information
|
// Load controller information
|
||||||
Study.ReadParams(filename);
|
Study.ReadParams(filename);
|
||||||
|
|
||||||
// Initialize LB Navier-Stokes model
|
// Load user input database files for Navier-Stokes and Ion solvers
|
||||||
StokesModel.ReadParams(filename,Study.num_iter_Stokes);
|
StokesModel.ReadParams(filename);
|
||||||
|
IonModel.ReadParams(filename);
|
||||||
|
|
||||||
|
// Setup other model specific structures
|
||||||
StokesModel.SetDomain();
|
StokesModel.SetDomain();
|
||||||
StokesModel.ReadInput();
|
StokesModel.ReadInput();
|
||||||
StokesModel.Create(); // creating the model will create data structure to match the pore structure and allocate variables
|
StokesModel.Create(); // creating the model will create data structure to match the pore structure and allocate variables
|
||||||
StokesModel.Initialize(); // initializing the model will set initial conditions for variables
|
|
||||||
|
|
||||||
// Initialize LB-Ion model
|
|
||||||
IonModel.ReadParams(filename,Study.num_iter_Ion);
|
|
||||||
IonModel.SetDomain();
|
IonModel.SetDomain();
|
||||||
IonModel.ReadInput();
|
IonModel.ReadInput();
|
||||||
IonModel.Create();
|
IonModel.Create();
|
||||||
|
|
||||||
|
// Get internal iteration number
|
||||||
|
StokesModel.timestepMax = Study.getStokesNumIter_PNP_coupling(StokesModel.time_conv,IonModel.time_conv);
|
||||||
|
StokesModel.Initialize(); // initializing the model will set initial conditions for variables
|
||||||
|
|
||||||
|
IonModel.timestepMax = Study.getIonNumIter_PNP_coupling(StokesModel.time_conv,IonModel.time_conv);
|
||||||
IonModel.Initialize();
|
IonModel.Initialize();
|
||||||
|
|
||||||
// Initialize LB-Poisson model
|
// Initialize LB-Poisson model
|
||||||
@@ -74,39 +80,36 @@ int main(int argc, char **argv)
|
|||||||
PoissonSolver.Create();
|
PoissonSolver.Create();
|
||||||
PoissonSolver.Initialize();
|
PoissonSolver.Initialize();
|
||||||
|
|
||||||
|
|
||||||
int timestep=0;
|
int timestep=0;
|
||||||
while (timestep < Study.timestepMax){
|
while (timestep < Study.timestepMax){
|
||||||
|
|
||||||
timestep++;
|
timestep++;
|
||||||
//if (rank==0) printf("timestep=%i; running Poisson solver\n",timestep);
|
|
||||||
PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental
|
PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental
|
||||||
//PoissonSolver.getElectricPotential(timestep);
|
|
||||||
|
|
||||||
//if (rank==0) printf("timestep=%i; running StokesModel\n",timestep);
|
|
||||||
StokesModel.Run_Lite(IonModel.ChargeDensity, PoissonSolver.ElectricField);// Solve the N-S equations to get velocity
|
StokesModel.Run_Lite(IonModel.ChargeDensity, PoissonSolver.ElectricField);// Solve the N-S equations to get velocity
|
||||||
//StokesModel.getVelocity(timestep);
|
|
||||||
|
|
||||||
//if (rank==0) printf("timestep=%i; running Ion model\n",timestep);
|
|
||||||
IonModel.Run(StokesModel.Velocity,PoissonSolver.ElectricField); //solve for ion transport and electric potential
|
IonModel.Run(StokesModel.Velocity,PoissonSolver.ElectricField); //solve for ion transport and electric potential
|
||||||
//IonModel.getIonConcentration(timestep);
|
|
||||||
|
|
||||||
|
|
||||||
timestep++;//AA operations
|
timestep++;//AA operations
|
||||||
//--------------------------------------------
|
|
||||||
//potentially leave analysis module for future
|
if (timestep%Study.visualization_interval==0){
|
||||||
//--------------------------------------------
|
PoissonSolver.getElectricPotential(timestep);
|
||||||
|
PoissonSolver.getElectricField(timestep);
|
||||||
|
IonModel.getIonConcentration(timestep);
|
||||||
|
StokesModel.getVelocity(timestep);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
StokesModel.getVelocity(timestep);
|
if (rank==0) printf("Save simulation raw data at maximum timestep\n");
|
||||||
PoissonSolver.getElectricPotential(timestep);
|
PoissonSolver.getElectricPotential(timestep);
|
||||||
PoissonSolver.getElectricField(timestep);
|
PoissonSolver.getElectricField(timestep);
|
||||||
IonModel.getIonConcentration(timestep);
|
IonModel.getIonConcentration(timestep);
|
||||||
|
StokesModel.getVelocity(timestep);
|
||||||
|
|
||||||
if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n");
|
if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n");
|
||||||
if (rank==0) printf("*************************************************************\n");
|
if (rank==0) printf("*************************************************************\n");
|
||||||
|
|
||||||
PROFILE_STOP("Main");
|
PROFILE_STOP("Main");
|
||||||
PROFILE_SAVE("lbpm_electrokinetic_simulator",1);
|
PROFILE_SAVE("lbpm_electrokinetic_SingleFluid_simulator",1);
|
||||||
// ****************************************************
|
// ****************************************************
|
||||||
MPI_Barrier(comm);
|
MPI_Barrier(comm);
|
||||||
} // Limit scope so variables that contain communicators will free before MPI_Finialize
|
} // Limit scope so variables that contain communicators will free before MPI_Finialize
|
||||||
Reference in New Issue
Block a user