837 lines
27 KiB
Plaintext
837 lines
27 KiB
Plaintext
#include <stdio.h>
|
|
#include <math.h>
|
|
#include "hip/hip_runtime.h"
|
|
|
|
#define NBLOCKS 1024
|
|
#define NTHREADS 256
|
|
|
|
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
|
int n;
|
|
double psi;//electric potential
|
|
double fq;
|
|
int nread;
|
|
int idx;
|
|
|
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
|
for (int s=0; s<S; s++){
|
|
//........Get 1-D index for this thread....................
|
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
|
if (n<finish) {
|
|
|
|
// q=0
|
|
fq = dist[n];
|
|
psi = fq;
|
|
|
|
// q=1
|
|
nread = neighborList[n];
|
|
fq = dist[nread];
|
|
psi += fq;
|
|
|
|
// q=2
|
|
nread = neighborList[n+Np];
|
|
fq = dist[nread];
|
|
psi += fq;
|
|
|
|
// q=3
|
|
nread = neighborList[n+2*Np];
|
|
fq = dist[nread];
|
|
psi += fq;
|
|
|
|
// q = 4
|
|
nread = neighborList[n+3*Np];
|
|
fq = dist[nread];
|
|
psi += fq;
|
|
|
|
// q=5
|
|
nread = neighborList[n+4*Np];
|
|
fq = dist[nread];
|
|
psi += fq;
|
|
|
|
// q = 6
|
|
nread = neighborList[n+5*Np];
|
|
fq = dist[nread];
|
|
psi += fq;
|
|
|
|
idx=Map[n];
|
|
Psi[idx] = psi;
|
|
}
|
|
}
|
|
}
|
|
|
|
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
|
int n;
|
|
double psi;//electric potential
|
|
double fq;
|
|
int idx;
|
|
|
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
|
for (int s=0; s<S; s++){
|
|
//........Get 1-D index for this thread....................
|
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
|
if (n<finish) {
|
|
|
|
// q=0
|
|
fq = dist[n];
|
|
psi = fq;
|
|
|
|
// q=1
|
|
fq = dist[2*Np+n];
|
|
psi += fq;
|
|
|
|
// q=2
|
|
fq = dist[1*Np+n];
|
|
psi += fq;
|
|
|
|
// q=3
|
|
fq = dist[4*Np+n];
|
|
psi += fq;
|
|
|
|
// q=4
|
|
fq = dist[3*Np+n];
|
|
psi += fq;
|
|
|
|
// q=5
|
|
fq = dist[6*Np+n];
|
|
psi += fq;
|
|
|
|
// q=6
|
|
fq = dist[5*Np+n];
|
|
psi += fq;
|
|
|
|
idx=Map[n];
|
|
Psi[idx] = psi;
|
|
}
|
|
}
|
|
}
|
|
|
|
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
|
|
|
|
int n;
|
|
double psi;//electric potential
|
|
double Ex,Ey,Ez;//electric field
|
|
double rho_e;//local charge density
|
|
double f0,f1,f2,f3,f4,f5,f6;
|
|
int nr1,nr2,nr3,nr4,nr5,nr6;
|
|
double rlx=1.0/tau;
|
|
int idx;
|
|
|
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
|
for (int s=0; s<S; s++){
|
|
//........Get 1-D index for this thread....................
|
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
|
if (n<finish) {
|
|
|
|
//Load data
|
|
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
|
|
//and thus the net space charge density is zero.
|
|
//rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
|
|
rho_e = Den_charge[n] / epsilon_LB;
|
|
idx=Map[n];
|
|
psi = Psi[idx];
|
|
|
|
// q=0
|
|
f0 = dist[n];
|
|
// q=1
|
|
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
|
f1 = dist[nr1]; // reading the f1 data into register fq
|
|
|
|
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
|
|
f2 = dist[nr2]; // reading the f2 data into register fq
|
|
|
|
// q=3
|
|
nr3 = neighborList[n+2*Np]; // neighbor 4
|
|
f3 = dist[nr3];
|
|
|
|
// q = 4
|
|
nr4 = neighborList[n+3*Np]; // neighbor 3
|
|
f4 = dist[nr4];
|
|
|
|
// q=5
|
|
nr5 = neighborList[n+4*Np];
|
|
f5 = dist[nr5];
|
|
|
|
// q = 6
|
|
nr6 = neighborList[n+5*Np];
|
|
f6 = dist[nr6];
|
|
|
|
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
|
|
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
|
|
Ez = (f5-f6)*rlx*4.0;
|
|
ElectricField[n+0*Np] = Ex;
|
|
ElectricField[n+1*Np] = Ey;
|
|
ElectricField[n+2*Np] = Ez;
|
|
|
|
// q = 0
|
|
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
|
|
|
|
// q = 1
|
|
dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
|
|
|
// q = 2
|
|
dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
|
|
|
// q = 3
|
|
dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
|
|
|
// q = 4
|
|
dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
|
|
|
// q = 5
|
|
dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
|
|
|
// q = 6
|
|
dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
|
//........................................................................
|
|
}
|
|
}
|
|
}
|
|
|
|
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
|
|
|
|
int n;
|
|
double psi;//electric potential
|
|
double Ex,Ey,Ez;//electric field
|
|
double rho_e;//local charge density
|
|
double f0,f1,f2,f3,f4,f5,f6;
|
|
double rlx=1.0/tau;
|
|
int idx;
|
|
|
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
|
for (int s=0; s<S; s++){
|
|
//........Get 1-D index for this thread....................
|
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
|
if (n<finish) {
|
|
|
|
//Load data
|
|
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
|
|
//and thus the net space charge density is zero.
|
|
rho_e = Den_charge[n] / epsilon_LB;
|
|
// rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
|
|
idx=Map[n];
|
|
|
|
psi = Psi[idx];
|
|
|
|
f0 = dist[n];
|
|
f1 = dist[2*Np+n];
|
|
f2 = dist[1*Np+n];
|
|
f3 = dist[4*Np+n];
|
|
f4 = dist[3*Np+n];
|
|
f5 = dist[6*Np+n];
|
|
f6 = dist[5*Np+n];
|
|
|
|
|
|
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
|
|
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
|
|
Ez = (f5-f6)*rlx*4.0;
|
|
ElectricField[n+0*Np] = Ex;
|
|
ElectricField[n+1*Np] = Ey;
|
|
ElectricField[n+2*Np] = Ez;
|
|
|
|
// q = 0
|
|
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
|
|
|
|
// q = 1
|
|
dist[1*Np+n] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
|
|
|
// q = 2
|
|
dist[2*Np+n] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
|
|
|
// q = 3
|
|
dist[3*Np+n] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
|
|
|
// q = 4
|
|
dist[4*Np+n] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
|
|
|
// q = 5
|
|
dist[5*Np+n] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
|
|
|
// q = 6
|
|
dist[6*Np+n] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
|
//........................................................................
|
|
}
|
|
}
|
|
}
|
|
|
|
__global__ void dvc_ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
|
|
|
int n;
|
|
int ijk;
|
|
|
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
|
for (int s=0; s<S; s++){
|
|
//........Get 1-D index for this thread....................
|
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
|
if (n<finish) {
|
|
ijk = Map[n];
|
|
dist[0*Np+n] = 0.25*Psi[ijk];
|
|
dist[1*Np+n] = 0.125*Psi[ijk];
|
|
dist[2*Np+n] = 0.125*Psi[ijk];
|
|
dist[3*Np+n] = 0.125*Psi[ijk];
|
|
dist[4*Np+n] = 0.125*Psi[ijk];
|
|
dist[5*Np+n] = 0.125*Psi[ijk];
|
|
dist[6*Np+n] = 0.125*Psi[ijk];
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
__global__ void dvc_ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
|
|
double *dist, double *Den_charge,
|
|
double *Psi, double *ElectricField,
|
|
double tau, double epsilon_LB, bool UseSlippingVelBC,
|
|
int start, int finish, int Np) {
|
|
int n;
|
|
double psi; //electric potential
|
|
double Ex, Ey, Ez; //electric field
|
|
double rho_e; //local charge density
|
|
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
|
|
f16, f17, f18;
|
|
int nr1, nr2, nr3, nr4, nr5, nr6, nr7, nr8, nr9, nr10, nr11, nr12, nr13,
|
|
nr14, nr15, nr16, nr17, nr18;
|
|
double error,sum_q;
|
|
double rlx = 1.0 / tau;
|
|
int idx;
|
|
|
|
double W0 = 0.5;
|
|
double W1 = 1.0/24.0;
|
|
double W2 = 1.0/48.0;
|
|
|
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
|
for (int s=0; s<S; s++){
|
|
//........Get 1-D index for this thread....................
|
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
|
if (n<finish) {
|
|
//Load data
|
|
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
|
|
//and thus the net space charge density is zero.
|
|
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
|
|
|
|
// q=0
|
|
f0 = dist[n];
|
|
// q=1
|
|
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
|
f1 = dist[nr1]; // reading the f1 data into register fq
|
|
|
|
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
|
|
f2 = dist[nr2]; // reading the f2 data into register fq
|
|
|
|
// q=3
|
|
nr3 = neighborList[n + 2 * Np]; // neighbor 4
|
|
f3 = dist[nr3];
|
|
|
|
// q = 4
|
|
nr4 = neighborList[n + 3 * Np]; // neighbor 3
|
|
f4 = dist[nr4];
|
|
|
|
// q=5
|
|
nr5 = neighborList[n + 4 * Np];
|
|
f5 = dist[nr5];
|
|
|
|
// q = 6
|
|
nr6 = neighborList[n + 5 * Np];
|
|
f6 = dist[nr6];
|
|
|
|
// q=7
|
|
nr7 = neighborList[n + 6 * Np];
|
|
f7 = dist[nr7];
|
|
|
|
// q = 8
|
|
nr8 = neighborList[n + 7 * Np];
|
|
f8 = dist[nr8];
|
|
|
|
// q=9
|
|
nr9 = neighborList[n + 8 * Np];
|
|
f9 = dist[nr9];
|
|
|
|
// q = 10
|
|
nr10 = neighborList[n + 9 * Np];
|
|
f10 = dist[nr10];
|
|
|
|
// q=11
|
|
nr11 = neighborList[n + 10 * Np];
|
|
f11 = dist[nr11];
|
|
|
|
// q=12
|
|
nr12 = neighborList[n + 11 * Np];
|
|
f12 = dist[nr12];
|
|
|
|
// q=13
|
|
nr13 = neighborList[n + 12 * Np];
|
|
f13 = dist[nr13];
|
|
|
|
// q=14
|
|
nr14 = neighborList[n + 13 * Np];
|
|
f14 = dist[nr14];
|
|
|
|
// q=15
|
|
nr15 = neighborList[n + 14 * Np];
|
|
f15 = dist[nr15];
|
|
|
|
// q=16
|
|
nr16 = neighborList[n + 15 * Np];
|
|
f16 = dist[nr16];
|
|
|
|
// q=17
|
|
//fq = dist[18*Np+n];
|
|
nr17 = neighborList[n + 16 * Np];
|
|
f17 = dist[nr17];
|
|
|
|
// q=18
|
|
nr18 = neighborList[n + 17 * Np];
|
|
f18 = dist[nr18];
|
|
|
|
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
|
|
error = 8.0*(sum_q - f0) + rho_e;
|
|
|
|
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
|
|
|
|
idx = Map[n];
|
|
Psi[idx] = psi;
|
|
|
|
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
|
|
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
|
|
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
|
|
ElectricField[n + 0 * Np] = Ex;
|
|
ElectricField[n + 1 * Np] = Ey;
|
|
ElectricField[n + 2 * Np] = Ez;
|
|
|
|
// q = 0
|
|
dist[n] = W0*psi; //f0 * (1.0 - rlx) - (1.0-0.5*rlx)*W0*rho_e;
|
|
|
|
// q = 1
|
|
dist[nr2] = W1*psi; //f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
|
|
|
// q = 2
|
|
dist[nr1] = W1*psi; //f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
|
|
|
// q = 3
|
|
dist[nr4] = W1*psi; //f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
|
|
|
// q = 4
|
|
dist[nr3] = W1*psi; //f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
|
|
|
// q = 5
|
|
dist[nr6] = W1*psi; //f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
|
|
|
// q = 6
|
|
dist[nr5] = W1*psi; //f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
|
//........................................................................
|
|
|
|
// q = 7
|
|
dist[nr8] = W2*psi; //f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
|
|
// q = 8
|
|
dist[nr7] = W2*psi; //f8 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
|
|
// q = 9
|
|
dist[nr10] = W2*psi; //f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
|
|
// q = 10
|
|
dist[nr9] = W2*psi; //f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
|
|
// q = 11
|
|
dist[nr12] = W2*psi; //f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
|
|
// q = 12
|
|
dist[nr11] = W2*psi; //f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
|
|
// q = 13
|
|
dist[nr14] = W2*psi; //f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
|
|
// q= 14
|
|
dist[nr13] = W2*psi; //f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
|
|
// q = 15
|
|
dist[nr16] = W2*psi; //f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
|
|
// q = 16
|
|
dist[nr15] = W2*psi; //f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
|
|
// q = 17
|
|
dist[nr18] = W2*psi; //f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
|
|
// q = 18
|
|
dist[nr17] = W2*psi; //f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
__global__ void dvc_ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
|
|
double *Den_charge, double *Psi,
|
|
double *ElectricField, double *Error, double tau,
|
|
double epsilon_LB, bool UseSlippingVelBC,
|
|
int start, int finish, int Np) {
|
|
int n;
|
|
double psi; //electric potential
|
|
double Ex, Ey, Ez; //electric field
|
|
double rho_e; //local charge density
|
|
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
|
|
f16, f17, f18;
|
|
double error,sum_q;
|
|
double rlx = 1.0 / tau;
|
|
int idx;
|
|
double W0 = 0.5;
|
|
double W1 = 1.0/24.0;
|
|
double W2 = 1.0/48.0;
|
|
|
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
|
for (int s=0; s<S; s++){
|
|
//........Get 1-D index for this thread....................
|
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
|
if (n<finish) {
|
|
//Load data
|
|
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
|
|
//and thus the net space charge density is zero.
|
|
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
|
|
|
|
f0 = dist[n];
|
|
f1 = dist[2 * Np + n];
|
|
f2 = dist[1 * Np + n];
|
|
f3 = dist[4 * Np + n];
|
|
f4 = dist[3 * Np + n];
|
|
f5 = dist[6 * Np + n];
|
|
f6 = dist[5 * Np + n];
|
|
|
|
f7 = dist[8 * Np + n];
|
|
f8 = dist[7 * Np + n];
|
|
f9 = dist[10 * Np + n];
|
|
f10 = dist[9 * Np + n];
|
|
f11 = dist[12 * Np + n];
|
|
f12 = dist[11 * Np + n];
|
|
f13 = dist[14 * Np + n];
|
|
f14 = dist[13 * Np + n];
|
|
f15 = dist[16 * Np + n];
|
|
f16 = dist[15 * Np + n];
|
|
f17 = dist[18 * Np + n];
|
|
f18 = dist[17 * Np + n];
|
|
|
|
/* Ex = (f1 - f2) * rlx *
|
|
4.0; //NOTE the unit of electric field here is V/lu
|
|
Ey = (f3 - f4) * rlx *
|
|
4.0; //factor 4.0 is D3Q7 lattice squared speed of sound
|
|
Ez = (f5 - f6) * rlx * 4.0;
|
|
*/
|
|
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
|
|
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
|
|
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
|
|
ElectricField[n + 0 * Np] = Ex;
|
|
ElectricField[n + 1 * Np] = Ey;
|
|
ElectricField[n + 2 * Np] = Ez;
|
|
|
|
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
|
|
error = 8.0*(sum_q - f0) + rho_e;
|
|
|
|
Error[n] = error;
|
|
|
|
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
|
|
|
|
idx = Map[n];
|
|
Psi[idx] = psi;
|
|
|
|
// q = 0
|
|
dist[n] = W0*psi;//
|
|
|
|
// q = 1
|
|
dist[1 * Np + n] = W1*psi;//f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
|
|
|
// q = 2
|
|
dist[2 * Np + n] = W1*psi;//f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
|
|
|
// q = 3
|
|
dist[3 * Np + n] = W1*psi;//f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
|
|
|
// q = 4
|
|
dist[4 * Np + n] = W1*psi;//f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
|
|
|
// q = 5
|
|
dist[5 * Np + n] = W1*psi;//f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
|
|
|
// q = 6
|
|
dist[6 * Np + n] = W1*psi;//f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
|
|
|
dist[7 * Np + n] = W2*psi;//f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
dist[8 * Np + n] = W2*psi;//f8* (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
dist[9 * Np + n] = W2*psi;//f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
dist[10 * Np + n] = W2*psi;//f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
dist[11 * Np + n] = W2*psi;//f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
dist[12 * Np + n] = W2*psi;//f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
dist[13 * Np + n] = W2*psi;//f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
dist[14 * Np + n] = W2*psi;//f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
dist[15 * Np + n] = W2*psi;//f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
dist[16 * Np + n] = W2*psi;//f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
dist[17 * Np + n] = W2*psi;//f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
dist[18 * Np + n] = W2*psi;//f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
|
//........................................................................
|
|
}
|
|
}
|
|
}
|
|
|
|
__global__ void dvc_ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi,
|
|
int start, int finish, int Np) {
|
|
int n;
|
|
int ijk;
|
|
double W0 = 0.5;
|
|
double W1 = 1.0/24.0;
|
|
double W2 = 1.0/48.0;
|
|
int S = Np/NBLOCKS/NTHREADS + 1;
|
|
for (int s=0; s<S; s++){
|
|
//........Get 1-D index for this thread....................
|
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
|
if (n<finish) {
|
|
ijk = Map[n];
|
|
dist[0 * Np + n] = W0 * Psi[ijk];//3333333333333333* Psi[ijk];
|
|
dist[1 * Np + n] = W1 * Psi[ijk];
|
|
dist[2 * Np + n] = W1 * Psi[ijk];
|
|
dist[3 * Np + n] = W1 * Psi[ijk];
|
|
dist[4 * Np + n] = W1 * Psi[ijk];
|
|
dist[5 * Np + n] = W1 * Psi[ijk];
|
|
dist[6 * Np + n] = W1 * Psi[ijk];
|
|
dist[7 * Np + n] = W2* Psi[ijk];
|
|
dist[8 * Np + n] = W2* Psi[ijk];
|
|
dist[9 * Np + n] = W2* Psi[ijk];
|
|
dist[10 * Np + n] = W2* Psi[ijk];
|
|
dist[11 * Np + n] = W2* Psi[ijk];
|
|
dist[12 * Np + n] = W2* Psi[ijk];
|
|
dist[13 * Np + n] = W2* Psi[ijk];
|
|
dist[14 * Np + n] = W2* Psi[ijk];
|
|
dist[15 * Np + n] = W2* Psi[ijk];
|
|
dist[16 * Np + n] = W2* Psi[ijk];
|
|
dist[17 * Np + n] = W2* Psi[ijk];
|
|
dist[18 * Np + n] = W2* Psi[ijk];
|
|
}
|
|
}
|
|
}
|
|
__global__ void dvc_ScaLBL_D3Q19_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np) {
|
|
|
|
double W1 = 1.0/24.0;
|
|
double W2 = 1.0/48.0;
|
|
|
|
int idx = blockIdx.x*blockDim.x + threadIdx.x;
|
|
|
|
if (idx < count){
|
|
int n = list[idx];
|
|
|
|
dist[6 * Np + n] = W1*Vin;
|
|
dist[12 * Np + n] = W2*Vin;
|
|
dist[13 * Np + n] = W2*Vin;
|
|
dist[16 * Np + n] = W2*Vin;
|
|
dist[17 * Np + n] = W2*Vin;
|
|
}
|
|
}
|
|
|
|
__global__ void dvc_ScaLBL_D3Q19_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np) {
|
|
|
|
double W1 = 1.0/24.0;
|
|
double W2 = 1.0/48.0;
|
|
|
|
int idx = blockIdx.x*blockDim.x + threadIdx.x;
|
|
|
|
if (idx < count){
|
|
int n = list[idx];
|
|
dist[5 * Np + n] = W1*Vout;
|
|
dist[11 * Np + n] = W2*Vout;
|
|
dist[14 * Np + n] = W2*Vout;
|
|
dist[15 * Np + n] = W2*Vout;
|
|
dist[18 * Np + n] = W2*Vout;
|
|
}
|
|
}
|
|
|
|
__global__ void dvc_ScaLBL_D3Q19_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np) {
|
|
|
|
double W1 = 1.0/24.0;
|
|
double W2 = 1.0/48.0;
|
|
int nr5, nr11, nr14, nr15, nr18;
|
|
|
|
int idx = blockIdx.x*blockDim.x + threadIdx.x;
|
|
|
|
if (idx < count){
|
|
int n = list[idx];
|
|
|
|
|
|
// Unknown distributions
|
|
nr5 = d_neighborList[n + 4 * Np];
|
|
nr11 = d_neighborList[n + 10 * Np];
|
|
nr15 = d_neighborList[n + 14 * Np];
|
|
nr14 = d_neighborList[n + 13 * Np];
|
|
nr18 = d_neighborList[n + 17 * Np];
|
|
|
|
dist[nr5] = W1*Vin;
|
|
dist[nr11] = W2*Vin;
|
|
dist[nr15] = W2*Vin;
|
|
dist[nr14] = W2*Vin;
|
|
dist[nr18] = W2*Vin;
|
|
}
|
|
}
|
|
|
|
__global__ void dvc_ScaLBL_D3Q19_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np) {
|
|
|
|
double W1 = 1.0/24.0;
|
|
double W2 = 1.0/48.0;
|
|
int nr6, nr12, nr13, nr16, nr17;
|
|
|
|
int idx = blockIdx.x*blockDim.x + threadIdx.x;
|
|
|
|
if (idx < count){
|
|
int n = list[idx];
|
|
// unknown distributions
|
|
nr6 = d_neighborList[n + 5 * Np];
|
|
nr12 = d_neighborList[n + 11 * Np];
|
|
nr16 = d_neighborList[n + 15 * Np];
|
|
nr17 = d_neighborList[n + 16 * Np];
|
|
nr13 = d_neighborList[n + 12 * Np];
|
|
|
|
dist[nr6] = W1*Vout;
|
|
dist[nr12] = W2*Vout;
|
|
dist[nr16] = W2*Vout;
|
|
dist[nr17] = W2*Vout;
|
|
dist[nr13] = W2*Vout;
|
|
}
|
|
}
|
|
|
|
/* wrapper functions to launch kernels */
|
|
extern "C" void ScaLBL_D3Q19_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){
|
|
int GRID = count / 512 + 1;
|
|
dvc_ScaLBL_D3Q19_AAeven_Poisson_Potential_BC_z<<<GRID,512>>>(list, dist, Vin, count, Np);
|
|
cudaError_t err = cudaGetLastError();
|
|
if (cudaSuccess != err){
|
|
printf("Hip error in ScaLBL_D3Q19_AAeven_Poisson_Potential_BC_z (kernel): %s \n",cudaGetErrorString(err));
|
|
}
|
|
}
|
|
//
|
|
extern "C" void ScaLBL_D3Q19_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np){
|
|
int GRID = count / 512 + 1;
|
|
dvc_ScaLBL_D3Q19_AAeven_Poisson_Potential_BC_Z<<<GRID,512>>>(list, dist, Vout, count, Np);
|
|
cudaError_t err = cudaGetLastError();
|
|
if (cudaSuccess != err){
|
|
printf("Hip error in ScaLBL_D3Q19_AAeven_Poisson_Potential_BC_Z (kernel): %s \n",cudaGetErrorString(err));
|
|
}
|
|
}
|
|
extern "C" void ScaLBL_D3Q19_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count,int Np) {
|
|
int GRID = count / 512 + 1;
|
|
dvc_ScaLBL_D3Q19_AAodd_Poisson_Potential_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Vin, count, Np);
|
|
cudaError_t err = cudaGetLastError();
|
|
if (cudaSuccess != err){
|
|
printf("Hip error in ScaLBL_D3Q19_AAodd_Poisson_Potential_BC_z (kernel): %s \n",cudaGetErrorString(err));
|
|
}
|
|
}
|
|
//
|
|
extern "C" void ScaLBL_D3Q19_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np) {
|
|
|
|
int GRID = count / 512 + 1;
|
|
dvc_ScaLBL_D3Q19_AAodd_Poisson_Potential_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Vout, count, Np);
|
|
cudaError_t err = cudaGetLastError();
|
|
if (cudaSuccess != err){
|
|
printf("Hip error in ScaLBL_D3Q19_AAodd_Poisson_Potential_BC_Z (kernel): %s \n",cudaGetErrorString(err));
|
|
}
|
|
}
|
|
|
|
extern "C" void ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
|
|
double *dist, double *Den_charge,
|
|
double *Psi, double *ElectricField,
|
|
double tau, double epsilon_LB, bool UseSlippingVelBC,
|
|
int start, int finish, int Np) {
|
|
|
|
hipFuncSetCacheConfig( (void*) dvc_ScaLBL_D3Q19_AAodd_Poisson, hipFuncCachePreferL1);
|
|
|
|
dvc_ScaLBL_D3Q19_AAodd_Poisson<<<NBLOCKS,NTHREADS >>>(neighborList, Map,
|
|
dist, Den_charge, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, start, finish, Np);
|
|
|
|
hipError_t err = hipGetLastError();
|
|
if (hipSuccess != err){
|
|
printf("Hip error in dvc_ScaLBL_D3Q19_AAodd_Poisson: %s \n",hipGetErrorString(err));
|
|
}
|
|
}
|
|
|
|
extern "C" void ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
|
|
double *Den_charge, double *Psi,
|
|
double *ElectricField, double *Error, double tau,
|
|
double epsilon_LB, bool UseSlippingVelBC,
|
|
int start, int finish, int Np) {
|
|
|
|
hipFuncSetCacheConfig( (void*) dvc_ScaLBL_D3Q19_AAeven_Poisson, hipFuncCachePreferL1);
|
|
|
|
|
|
dvc_ScaLBL_D3Q19_AAeven_Poisson<<<NBLOCKS,NTHREADS >>>( Map, dist, Den_charge, Psi,
|
|
ElectricField, Error, tau, epsilon_LB, UseSlippingVelBC, start, finish, Np);
|
|
|
|
hipError_t err = hipGetLastError();
|
|
if (hipSuccess != err){
|
|
printf("Hip error in dvc_ScaLBL_D3Q19_AAeven_Poisson: %s \n",hipGetErrorString(err));
|
|
}
|
|
}
|
|
|
|
extern "C" void ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi,
|
|
int start, int finish, int Np){
|
|
//hipProfilerStart();
|
|
|
|
dvc_ScaLBL_D3Q19_Poisson_Init<<<NBLOCKS,NTHREADS >>>(Map, dist, Psi, start, finish, Np);
|
|
|
|
hipError_t err = hipGetLastError();
|
|
if (hipSuccess != err){
|
|
printf("Hip error in ScaLBL_D3Q19_Poisson_Init: %s \n",hipGetErrorString(err));
|
|
}
|
|
|
|
}
|
|
|
|
|
|
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
|
|
|
//hipProfilerStart();
|
|
dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Psi,start,finish,Np);
|
|
|
|
hipError_t err = hipGetLastError();
|
|
if (hipSuccess != err){
|
|
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
|
|
}
|
|
//hipProfilerStop();
|
|
}
|
|
|
|
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
|
|
|
//hipProfilerStart();
|
|
dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
|
|
|
|
hipError_t err = hipGetLastError();
|
|
if (hipSuccess != err){
|
|
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
|
|
}
|
|
//hipProfilerStop();
|
|
}
|
|
|
|
extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
|
|
|
|
//hipProfilerStart();
|
|
dvc_ScaLBL_D3Q7_AAodd_Poisson<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,UseSlippingVelBC,start,finish,Np);
|
|
|
|
hipError_t err = hipGetLastError();
|
|
if (hipSuccess != err){
|
|
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson: %s \n",hipGetErrorString(err));
|
|
}
|
|
//hipProfilerStop();
|
|
}
|
|
|
|
extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
|
|
|
|
//hipProfilerStart();
|
|
dvc_ScaLBL_D3Q7_AAeven_Poisson<<<NBLOCKS,NTHREADS >>>(Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,UseSlippingVelBC,start,finish,Np);
|
|
|
|
hipError_t err = hipGetLastError();
|
|
if (hipSuccess != err){
|
|
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson: %s \n",hipGetErrorString(err));
|
|
}
|
|
//hipProfilerStop();
|
|
}
|
|
|
|
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
|
|
|
//hipProfilerStart();
|
|
dvc_ScaLBL_D3Q7_Poisson_Init<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
|
|
|
|
hipError_t err = hipGetLastError();
|
|
if (hipSuccess != err){
|
|
printf("hip error in ScaLBL_D3Q7_Poisson_Init: %s \n",hipGetErrorString(err));
|
|
}
|
|
//hipProfilerStop();
|
|
}
|