Merge branch 'FOM_dev' of github.com:JamesEMcClure/LBPM-WIA into FOM_dev
This commit is contained in:
commit
aba83e5829
@ -3,11 +3,10 @@
|
||||
|
||||
|
||||
#include "StackTrace/StackTrace.h"
|
||||
#include "common/MPI.h"
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include "mpi.h"
|
||||
|
||||
|
||||
namespace StackTrace
|
||||
{
|
||||
|
@ -1,5 +1,8 @@
|
||||
#include "common/ScaLBL.h"
|
||||
|
||||
#include <chrono>
|
||||
|
||||
|
||||
ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr <Domain> Dm){
|
||||
//......................................................................................
|
||||
Lock=false; // unlock the communicator
|
||||
@ -411,20 +414,19 @@ double ScaLBL_Communicator::GetPerformance(int *NeighborList, double *fq, int Np
|
||||
double FZ = 0.0;
|
||||
ScaLBL_D3Q19_Init(fq, Np);
|
||||
//.......create and start timer............
|
||||
double starttime,stoptime,cputime;
|
||||
Barrier();
|
||||
starttime = MPI_Wtime();
|
||||
//.........................................
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
for (int t=0; t<TIMESTEPS; t++){
|
||||
ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, FirstInterior(), LastInterior(), Np, RLX_SETA, RLX_SETB, FX, FY, FZ);
|
||||
ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, 0, LastExterior(), Np, RLX_SETA, RLX_SETB, FX, FY, FZ);
|
||||
ScaLBL_D3Q19_AAeven_MRT(fq, FirstInterior(), LastInterior(), Np, RLX_SETA, RLX_SETB, FX, FY, FZ);
|
||||
ScaLBL_D3Q19_AAeven_MRT(fq, 0, LastExterior(), Np, RLX_SETA, RLX_SETB, FX, FY, FZ);
|
||||
}
|
||||
stoptime = MPI_Wtime();
|
||||
auto t2 = std::chrono::system_clock::now();
|
||||
Barrier();
|
||||
// Compute the walltime per timestep
|
||||
cputime = 0.5*(stoptime - starttime)/TIMESTEPS;
|
||||
double diff = std::chrono::duration<double>( t2 - t1 ).count();
|
||||
double cputime = 0.5*diff/TIMESTEPS;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
return MLUPS;
|
||||
|
@ -298,11 +298,10 @@ ScaLBLWideHalo_Communicator::~ScaLBLWideHalo_Communicator()
|
||||
void ScaLBLWideHalo_Communicator::Recv(double *data){
|
||||
|
||||
//...................................................................................
|
||||
MPI_Waitall(26,req1,stat1);
|
||||
MPI_Waitall(26,req2,stat2);
|
||||
Utilities::MPI::waitAll(26,req1);
|
||||
Utilities::MPI::waitAll(26,req2);
|
||||
ScaLBL_DeviceBarrier();
|
||||
//...................................................................................
|
||||
//...................................................................................
|
||||
ScaLBL_Scalar_Unpack(dvcRecvList_x, recvCount_x,recvbuf_x, data, Nh);
|
||||
ScaLBL_Scalar_Unpack(dvcRecvList_y, recvCount_y,recvbuf_y, data, Nh);
|
||||
ScaLBL_Scalar_Unpack(dvcRecvList_X, recvCount_X,recvbuf_X, data, Nh);
|
||||
|
@ -4,6 +4,7 @@ This class implements support for halo widths larger than 1
|
||||
#ifndef WideHalo_H
|
||||
#define WideHalo_H
|
||||
#include "common/ScaLBL.h"
|
||||
#include "common/MPI.h"
|
||||
|
||||
class ScaLBLWideHalo_Communicator{
|
||||
public:
|
||||
@ -52,9 +53,7 @@ private:
|
||||
int sendtag,recvtag;
|
||||
// Give the object it's own MPI communicator
|
||||
RankInfoStruct rank_info;
|
||||
MPI_Group Group; // Group of processors associated with this domain
|
||||
MPI_Request req1[26],req2[26];
|
||||
MPI_Status stat1[26],stat2[26];
|
||||
//......................................................................................
|
||||
// MPI ranks for all 18 neighbors
|
||||
//......................................................................................
|
||||
|
414
cuda/BGK.cu
414
cuda/BGK.cu
@ -12,111 +12,111 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish,
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
|
||||
if ( n<finish ){
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
f7 = dist[8*Np+n];
|
||||
f8 = dist[7*Np+n];
|
||||
f9 = dist[10*Np+n];
|
||||
f10 = dist[9*Np+n];
|
||||
f11 = dist[12*Np+n];
|
||||
f12 = dist[11*Np+n];
|
||||
f13 = dist[14*Np+n];
|
||||
f14 = dist[13*Np+n];
|
||||
f15 = dist[16*Np+n];
|
||||
f16 = dist[15*Np+n];
|
||||
f17 = dist[18*Np+n];
|
||||
f18 = dist[17*Np+n];
|
||||
if ( n<finish ){
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
f7 = dist[8*Np+n];
|
||||
f8 = dist[7*Np+n];
|
||||
f9 = dist[10*Np+n];
|
||||
f10 = dist[9*Np+n];
|
||||
f11 = dist[12*Np+n];
|
||||
f12 = dist[11*Np+n];
|
||||
f13 = dist[14*Np+n];
|
||||
f14 = dist[13*Np+n];
|
||||
f15 = dist[16*Np+n];
|
||||
f16 = dist[15*Np+n];
|
||||
f17 = dist[18*Np+n];
|
||||
f18 = dist[17*Np+n];
|
||||
|
||||
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
|
||||
ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
uu = 1.5*(ux*ux+uy*uy+uz*uz);
|
||||
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
|
||||
ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
uu = 1.5*(ux*ux+uy*uy+uz*uz);
|
||||
|
||||
// q=0
|
||||
dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu);
|
||||
// q=0
|
||||
dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu);
|
||||
|
||||
// q = 1
|
||||
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx;
|
||||
// q = 1
|
||||
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx;
|
||||
|
||||
// q=2
|
||||
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx;
|
||||
// q=2
|
||||
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx;
|
||||
|
||||
// q = 3
|
||||
dist[3*Np+n] = f3*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy;
|
||||
// q = 3
|
||||
dist[3*Np+n] = f3*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy;
|
||||
|
||||
// q = 4
|
||||
dist[4*Np+n] = f4*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy;
|
||||
// q = 4
|
||||
dist[4*Np+n] = f4*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy;
|
||||
|
||||
// q = 5
|
||||
dist[5*Np+n] = f5*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz;
|
||||
// q = 5
|
||||
dist[5*Np+n] = f5*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz;
|
||||
|
||||
// q = 6
|
||||
dist[6*Np+n] = f6*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz;
|
||||
// q = 6
|
||||
dist[6*Np+n] = f6*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz;
|
||||
|
||||
// q = 7
|
||||
dist[7*Np+n] = f7*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy);
|
||||
// q = 7
|
||||
dist[7*Np+n] = f7*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy);
|
||||
|
||||
// q = 8
|
||||
dist[8*Np+n] = f8*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy);
|
||||
// q = 8
|
||||
dist[8*Np+n] = f8*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy);
|
||||
|
||||
// q = 9
|
||||
dist[9*Np+n] = f9*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy);
|
||||
// q = 9
|
||||
dist[9*Np+n] = f9*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy);
|
||||
|
||||
// q = 10
|
||||
dist[10*Np+n] = f10*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy);
|
||||
// q = 10
|
||||
dist[10*Np+n] = f10*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy);
|
||||
|
||||
// q = 11
|
||||
dist[11*Np+n] = f11*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz);
|
||||
// q = 11
|
||||
dist[11*Np+n] = f11*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz);
|
||||
|
||||
// q = 12
|
||||
dist[12*Np+n] = f12*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz);
|
||||
// q = 12
|
||||
dist[12*Np+n] = f12*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz);
|
||||
|
||||
// q = 13
|
||||
dist[13*Np+n] = f13*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz);
|
||||
// q = 13
|
||||
dist[13*Np+n] = f13*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz);
|
||||
|
||||
// q= 14
|
||||
dist[14*Np+n] = f14*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz);
|
||||
// q= 14
|
||||
dist[14*Np+n] = f14*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz);
|
||||
|
||||
// q = 15
|
||||
dist[15*Np+n] = f15*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz);
|
||||
// q = 15
|
||||
dist[15*Np+n] = f15*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz);
|
||||
|
||||
// q = 16
|
||||
dist[16*Np+n] = f16*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz);
|
||||
// q = 16
|
||||
dist[16*Np+n] = f16*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz);
|
||||
|
||||
// q = 17
|
||||
dist[17*Np+n] = f17*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz);
|
||||
// q = 17
|
||||
dist[17*Np+n] = f17*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz);
|
||||
|
||||
// q = 18
|
||||
dist[18*Np+n] = f18*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz);
|
||||
// q = 18
|
||||
dist[18*Np+n] = f18*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz);
|
||||
|
||||
//........................................................................
|
||||
//........................................................................
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -131,180 +131,180 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
|
||||
if ( n<finish ){
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
// q=1
|
||||
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||
if ( n<finish ){
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
// q=1
|
||||
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||
|
||||
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||
|
||||
// q=3
|
||||
nr3 = neighborList[n+2*Np]; // neighbor 4
|
||||
f3 = dist[nr3];
|
||||
// q=3
|
||||
nr3 = neighborList[n+2*Np]; // neighbor 4
|
||||
f3 = dist[nr3];
|
||||
|
||||
// q = 4
|
||||
nr4 = neighborList[n+3*Np]; // neighbor 3
|
||||
f4 = dist[nr4];
|
||||
// q = 4
|
||||
nr4 = neighborList[n+3*Np]; // neighbor 3
|
||||
f4 = dist[nr4];
|
||||
|
||||
// q=5
|
||||
nr5 = neighborList[n+4*Np];
|
||||
f5 = dist[nr5];
|
||||
// q=5
|
||||
nr5 = neighborList[n+4*Np];
|
||||
f5 = dist[nr5];
|
||||
|
||||
// q = 6
|
||||
nr6 = neighborList[n+5*Np];
|
||||
f6 = dist[nr6];
|
||||
|
||||
// q=7
|
||||
nr7 = neighborList[n+6*Np];
|
||||
f7 = dist[nr7];
|
||||
// q = 6
|
||||
nr6 = neighborList[n+5*Np];
|
||||
f6 = dist[nr6];
|
||||
|
||||
// q = 8
|
||||
nr8 = neighborList[n+7*Np];
|
||||
f8 = dist[nr8];
|
||||
// q=7
|
||||
nr7 = neighborList[n+6*Np];
|
||||
f7 = dist[nr7];
|
||||
|
||||
// q=9
|
||||
nr9 = neighborList[n+8*Np];
|
||||
f9 = dist[nr9];
|
||||
// q = 8
|
||||
nr8 = neighborList[n+7*Np];
|
||||
f8 = dist[nr8];
|
||||
|
||||
// q = 10
|
||||
nr10 = neighborList[n+9*Np];
|
||||
f10 = dist[nr10];
|
||||
// q=9
|
||||
nr9 = neighborList[n+8*Np];
|
||||
f9 = dist[nr9];
|
||||
|
||||
// q=11
|
||||
nr11 = neighborList[n+10*Np];
|
||||
f11 = dist[nr11];
|
||||
// q = 10
|
||||
nr10 = neighborList[n+9*Np];
|
||||
f10 = dist[nr10];
|
||||
|
||||
// q=12
|
||||
nr12 = neighborList[n+11*Np];
|
||||
f12 = dist[nr12];
|
||||
// q=11
|
||||
nr11 = neighborList[n+10*Np];
|
||||
f11 = dist[nr11];
|
||||
|
||||
// q=13
|
||||
nr13 = neighborList[n+12*Np];
|
||||
f13 = dist[nr13];
|
||||
// q=12
|
||||
nr12 = neighborList[n+11*Np];
|
||||
f12 = dist[nr12];
|
||||
|
||||
// q=14
|
||||
nr14 = neighborList[n+13*Np];
|
||||
f14 = dist[nr14];
|
||||
// q=13
|
||||
nr13 = neighborList[n+12*Np];
|
||||
f13 = dist[nr13];
|
||||
|
||||
// q=15
|
||||
nr15 = neighborList[n+14*Np];
|
||||
f15 = dist[nr15];
|
||||
// q=14
|
||||
nr14 = neighborList[n+13*Np];
|
||||
f14 = dist[nr14];
|
||||
|
||||
// q=16
|
||||
nr16 = neighborList[n+15*Np];
|
||||
f16 = dist[nr16];
|
||||
// q=15
|
||||
nr15 = neighborList[n+14*Np];
|
||||
f15 = dist[nr15];
|
||||
|
||||
// q=17
|
||||
//fq = dist[18*Np+n];
|
||||
nr17 = neighborList[n+16*Np];
|
||||
f17 = dist[nr17];
|
||||
// q=16
|
||||
nr16 = neighborList[n+15*Np];
|
||||
f16 = dist[nr16];
|
||||
|
||||
// q=18
|
||||
nr18 = neighborList[n+17*Np];
|
||||
f18 = dist[nr18];
|
||||
// q=17
|
||||
//fq = dist[18*Np+n];
|
||||
nr17 = neighborList[n+16*Np];
|
||||
f17 = dist[nr17];
|
||||
|
||||
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
|
||||
ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
uu = 1.5*(ux*ux+uy*uy+uz*uz);
|
||||
// q=18
|
||||
nr18 = neighborList[n+17*Np];
|
||||
f18 = dist[nr18];
|
||||
|
||||
// q=0
|
||||
dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu);
|
||||
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
|
||||
ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
uu = 1.5*(ux*ux+uy*uy+uz*uz);
|
||||
|
||||
// q = 1
|
||||
dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx;
|
||||
// q=0
|
||||
dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu);
|
||||
|
||||
// q=2
|
||||
dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx;
|
||||
// q = 1
|
||||
dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx;
|
||||
|
||||
// q = 3
|
||||
dist[nr4] = f3*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy;
|
||||
// q=2
|
||||
dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx;
|
||||
|
||||
// q = 4
|
||||
dist[nr3] = f4*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy;
|
||||
// q = 3
|
||||
dist[nr4] = f3*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy;
|
||||
|
||||
// q = 5
|
||||
dist[nr6] = f5*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz;
|
||||
// q = 4
|
||||
dist[nr3] = f4*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy;
|
||||
|
||||
// q = 6
|
||||
dist[nr5] = f6*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz;
|
||||
// q = 5
|
||||
dist[nr6] = f5*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz;
|
||||
|
||||
// q = 7
|
||||
dist[nr8] = f7*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy);
|
||||
// q = 6
|
||||
dist[nr5] = f6*(1.0-rlx) +
|
||||
rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz;
|
||||
|
||||
// q = 8
|
||||
dist[nr7] = f8*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy);
|
||||
// q = 7
|
||||
dist[nr8] = f7*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy);
|
||||
|
||||
// q = 9
|
||||
dist[nr10] = f9*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy);
|
||||
// q = 8
|
||||
dist[nr7] = f8*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy);
|
||||
|
||||
// q = 10
|
||||
dist[nr9] = f10*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy);
|
||||
// q = 9
|
||||
dist[nr10] = f9*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy);
|
||||
|
||||
// q = 11
|
||||
dist[nr12] = f11*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz);
|
||||
// q = 10
|
||||
dist[nr9] = f10*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy);
|
||||
|
||||
// q = 12
|
||||
dist[nr11] = f12*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz);
|
||||
// q = 11
|
||||
dist[nr12] = f11*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz);
|
||||
|
||||
// q = 13
|
||||
dist[nr14] = f13*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz);
|
||||
// q = 12
|
||||
dist[nr11] = f12*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz);
|
||||
|
||||
// q= 14
|
||||
dist[nr13] = f14*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz);
|
||||
// q = 13
|
||||
dist[nr14] = f13*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz);
|
||||
|
||||
// q = 15
|
||||
dist[nr16] = f15*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz);
|
||||
// q= 14
|
||||
dist[nr13] = f14*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz);
|
||||
|
||||
// q = 16
|
||||
dist[nr15] = f16*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz);
|
||||
// q = 15
|
||||
dist[nr16] = f15*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz);
|
||||
|
||||
// q = 17
|
||||
dist[nr18] = f17*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz);
|
||||
// q = 16
|
||||
dist[nr15] = f16*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz);
|
||||
|
||||
// q = 18
|
||||
dist[nr17] = f18*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz);
|
||||
// q = 17
|
||||
dist[nr18] = f17*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz);
|
||||
|
||||
// q = 18
|
||||
dist[nr17] = f18*(1.0-rlx) +
|
||||
rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
|
||||
|
||||
dvc_ScaLBL_D3Q19_AAeven_BGK<<<NBLOCKS,NTHREADS >>>(dist,start,finish,Np,rlx,Fx,Fy,Fz);
|
||||
|
||||
cudaError_t err = cudaGetLastError();
|
||||
dvc_ScaLBL_D3Q19_AAeven_BGK<<<NBLOCKS,NTHREADS >>>(dist,start,finish,Np,rlx,Fx,Fy,Fz);
|
||||
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q19_AAeven_BGK: %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
|
||||
dvc_ScaLBL_D3Q19_AAodd_BGK<<<NBLOCKS,NTHREADS >>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz);
|
||||
dvc_ScaLBL_D3Q19_AAodd_BGK<<<NBLOCKS,NTHREADS >>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz);
|
||||
|
||||
cudaError_t err = cudaGetLastError();
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q19_AAeven_BGK: %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
|
2016
cuda/FreeLee.cu
Normal file
2016
cuda/FreeLee.cu
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,7 @@
|
||||
SET( HIP_SEPERABLE_COMPILATION ON )
|
||||
SET_SOURCE_FILES_PROPERTIES( BGK.cu Color.cu CudaExtras.cu D3Q19.cu D3Q7.cu dfh.cu Extras.cu MRT.hip PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 )
|
||||
HIP_ADD_LIBRARY( lbpm-hip BGK.cu Color.cu CudaExtras.cu D3Q19.cu D3Q7.cu dfh.cu Extras.cu MRT.cu SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} )
|
||||
FILE( GLOB HIP_SOURCES "*.cu" )
|
||||
SET_SOURCE_FILES_PROPERTIES( ${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 )
|
||||
HIP_ADD_LIBRARY( lbpm-hip ${HIP_SOURCES} SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} )
|
||||
#TARGET_LINK_LIBRARIES( lbpm-hip /opt/rocm-3.3.0/lib/libhip_hcc.so )
|
||||
#TARGET_LINK_LIBRARIES( lbpm-wia lbpm-hip )
|
||||
#ADD_DEPENDENCIES( lbpm-hip copy-include )
|
||||
|
72
hip/D3Q19.cu
72
hip/D3Q19.cu
@ -89,9 +89,25 @@ __global__ void sum_kernel_block(double *sum, double *input, int n)
|
||||
|
||||
__inline__ __device__
|
||||
double warpReduceSum(double val) {
|
||||
#if 0
|
||||
for (int offset = warpSize/2; offset > 0; offset /= 2)
|
||||
val += __shfl_down_sync(0xFFFFFFFF, val, offset, 32);
|
||||
return val;
|
||||
#else
|
||||
short int id = threadIdx.x % warpSize;
|
||||
__shared__ double tmp[64];
|
||||
tmp[id] = val;
|
||||
__syncthreads();
|
||||
if ( warpSize == 64) {
|
||||
tmp[id] += tmp[id+32]; __syncthreads();
|
||||
}
|
||||
tmp[id] += tmp[id+16]; __syncthreads();
|
||||
tmp[id] += tmp[id+8]; __syncthreads();
|
||||
tmp[id] += tmp[id+4]; __syncthreads();
|
||||
tmp[id] += tmp[id+2]; __syncthreads();
|
||||
tmp[id] += tmp[id+1]; __syncthreads();
|
||||
return tmp[0];
|
||||
#endif
|
||||
}
|
||||
|
||||
__inline__ __device__
|
||||
@ -1730,6 +1746,44 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Pressure_BC_Z(int *list, double *dist,
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){
|
||||
int idx, n;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
double f5 = 0.111111111111111111111111 - dist[6*Np+n];
|
||||
double f11 = 0.05555555555555555555556 - dist[12*Np+n];
|
||||
double f14 = 0.05555555555555555555556 - dist[13*Np+n];
|
||||
double f15 = 0.05555555555555555555556 - dist[16*Np+n];
|
||||
double f18 = 0.05555555555555555555556 - dist[17*Np+n];
|
||||
|
||||
dist[6*Np+n] = f5;
|
||||
dist[12*Np+n] = f11;
|
||||
dist[13*Np+n] = f14;
|
||||
dist[16*Np+n] = f15;
|
||||
dist[17*Np+n] = f18;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){
|
||||
int idx, n;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
double f6 = 0.111111111111111111111111 - dist[5*Np+n];
|
||||
double f12 = 0.05555555555555555555556 - dist[11*Np+n];
|
||||
double f13 = 0.05555555555555555555556 - dist[14*Np+n] ;
|
||||
double f16 = 0.05555555555555555555556 - dist[15*Np+n];
|
||||
double f17 = 0.05555555555555555555556 - dist[18*Np+n];
|
||||
|
||||
dist[5*Np+n] = f6;
|
||||
dist[11*Np+n] = f12;
|
||||
dist[14*Np+n] = f13;
|
||||
dist[15*Np+n] = f16;
|
||||
dist[18*Np+n] = f17;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *d_neighborList, int *list, double *dist, double din, int count, int Np)
|
||||
{
|
||||
int idx, n;
|
||||
@ -2605,6 +2659,24 @@ extern "C" double ScaLBL_D3Q19_Flux_BC_Z(double *disteven, double *distodd, doub
|
||||
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q19_Reflection_BC_z<<<GRID,512>>>(list, dist, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("HIP error in ScaLBL_D3Q19_Reflection_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q19_Reflection_BC_Z<<<GRID,512>>>(list, dist, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("HIP error in ScaLBL_D3Q19_Reflection_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" double deviceReduce(double *in, double* out, int N) {
|
||||
int threads = 512;
|
||||
int blocks = min((N + threads - 1) / threads, 1024);
|
||||
|
536
hip/D3Q7BC.cu
Normal file
536
hip/D3Q7BC.cu
Normal file
@ -0,0 +1,536 @@
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define NBLOCKS 560
|
||||
#define NTHREADS 128
|
||||
|
||||
__global__ void dvc_ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
|
||||
{
|
||||
|
||||
int idx;
|
||||
int iq,ib;
|
||||
double value_b,value_q;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
iq = BounceBackDist_list[idx];
|
||||
ib = BounceBackSolid_list[idx];
|
||||
value_b = BoundaryValue[ib];//get boundary value from a solid site
|
||||
value_q = dist[iq];
|
||||
dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
|
||||
{
|
||||
|
||||
int idx;
|
||||
int iq,ib;
|
||||
double value_b,value_q;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
iq = BounceBackDist_list[idx];
|
||||
ib = BounceBackSolid_list[idx];
|
||||
value_b = BoundaryValue[ib];//get boundary value from a solid site
|
||||
value_q = dist[iq];
|
||||
dist[iq] = value_q + value_b;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np)
|
||||
{
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
//...................................................
|
||||
f5 = Vin - (f0+f1+f2+f3+f4+f6);
|
||||
dist[6*Np+n] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np)
|
||||
{
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
//...................................................
|
||||
f6 = Vout - (f0+f1+f2+f3+f4+f5);
|
||||
dist[5*Np+n] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np)
|
||||
{
|
||||
int idx, n;
|
||||
int nread,nr5;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+5*Np];
|
||||
f6 = dist[nread];
|
||||
|
||||
// Unknown distributions
|
||||
nr5 = d_neighborList[n+4*Np];
|
||||
f5 = Vin - (f0+f1+f2+f3+f4+f6);
|
||||
dist[nr5] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np)
|
||||
{
|
||||
int idx, n;
|
||||
int nread,nr6;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+4*Np];
|
||||
f5 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
// unknown distributions
|
||||
nr6 = d_neighborList[n+5*Np];
|
||||
f6 = Vout - (f0+f1+f2+f3+f4+f5);
|
||||
dist[nr6] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count)
|
||||
{
|
||||
int idx,n,nm;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
nm = Map[n];
|
||||
Psi[nm] = Vin;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count)
|
||||
{
|
||||
int idx,n,nm;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
nm = Map[n];
|
||||
Psi[nm] = Vout;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np)
|
||||
{
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
//...................................................
|
||||
f5 = Cin - (f0+f1+f2+f3+f4+f6);
|
||||
dist[6*Np+n] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np)
|
||||
{
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
//...................................................
|
||||
f6 = Cout - (f0+f1+f2+f3+f4+f5);
|
||||
dist[5*Np+n] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np)
|
||||
{
|
||||
int idx, n;
|
||||
int nread,nr5;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+5*Np];
|
||||
f6 = dist[nread];
|
||||
|
||||
// Unknown distributions
|
||||
nr5 = d_neighborList[n+4*Np];
|
||||
f5 = Cin - (f0+f1+f2+f3+f4+f6);
|
||||
dist[nr5] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np)
|
||||
{
|
||||
int idx, n;
|
||||
int nread,nr6;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+4*Np];
|
||||
f5 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
// unknown distributions
|
||||
nr6 = d_neighborList[n+5*Np];
|
||||
f6 = Cout - (f0+f1+f2+f3+f4+f5);
|
||||
dist[nr6] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
fsum_partial = f0+f1+f2+f3+f4+f6;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
|
||||
dist[6*Np+n] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
fsum_partial = f0+f1+f2+f3+f4+f5;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
|
||||
dist[5*Np+n] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx, n;
|
||||
int nread,nr5;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+5*Np];
|
||||
f6 = dist[nread];
|
||||
|
||||
fsum_partial = f0+f1+f2+f3+f4+f6;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
|
||||
|
||||
// Unknown distributions
|
||||
nr5 = d_neighborList[n+4*Np];
|
||||
dist[nr5] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx, n;
|
||||
int nread,nr6;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+4*Np];
|
||||
f5 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
fsum_partial = f0+f1+f2+f3+f4+f5;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
|
||||
|
||||
// unknown distributions
|
||||
nr6 = d_neighborList[n+5*Np];
|
||||
dist[nr6] = f6;
|
||||
}
|
||||
}
|
||||
//*************************************************************************
|
||||
|
||||
extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Solid_Dirichlet_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_Solid_Dirichlet_D3Q7 (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Solid_Neumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_Solid_Neumann_D3Q7 (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z<<<GRID,512>>>(list, dist, Vin, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z<<<GRID,512>>>(list, dist, Vout, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Vin, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Vout, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Poisson_D3Q7_BC_z<<<GRID,512>>>(list, Map, Psi, Vin, count);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_Poisson_D3Q7_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Poisson_D3Q7_BC_Z<<<GRID,512>>>(list, Map, Psi, Vout, count);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_Poisson_D3Q7_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z<<<GRID,512>>>(list, dist, Cin, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z<<<GRID,512>>>(list, dist, Cout, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Cin, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Cout, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
2017
hip/FreeLee.cu
Normal file
2017
hip/FreeLee.cu
Normal file
File diff suppressed because it is too large
Load Diff
2745
hip/Greyscale.cu
Normal file
2745
hip/Greyscale.cu
Normal file
File diff suppressed because it is too large
Load Diff
3038
hip/GreyscaleColor.cu
Normal file
3038
hip/GreyscaleColor.cu
Normal file
File diff suppressed because it is too large
Load Diff
392
hip/Ion.cu
Normal file
392
hip/Ion.cu
Normal file
@ -0,0 +1,392 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define NBLOCKS 1024
|
||||
#define NTHREADS 256
|
||||
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
|
||||
int n,nread;
|
||||
double fq,Ci;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
Ci = fq;
|
||||
|
||||
// q=1
|
||||
nread = neighborList[n];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=2
|
||||
nread = neighborList[n+Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=3
|
||||
nread = neighborList[n+2*Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=4
|
||||
nread = neighborList[n+3*Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=5
|
||||
nread = neighborList[n+4*Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=6
|
||||
nread = neighborList[n+5*Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
Den[n]=Ci;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
|
||||
int n;
|
||||
double fq,Ci;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
Ci = fq;
|
||||
|
||||
// q=1
|
||||
fq = dist[2*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=2
|
||||
fq = dist[1*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=3
|
||||
fq = dist[4*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=4
|
||||
fq = dist[3*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=5
|
||||
fq = dist[6*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=6
|
||||
fq = dist[5*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
Den[n]=Ci;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *Velocity, double *ElectricField,
|
||||
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||
int n;
|
||||
double Ci;
|
||||
double ux,uy,uz;
|
||||
double uEPx,uEPy,uEPz;//electrochemical induced velocity
|
||||
double Ex,Ey,Ez;//electrical field
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
int nr1,nr2,nr3,nr4,nr5,nr6;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
Ci=Den[n];
|
||||
Ex=ElectricField[n+0*Np];
|
||||
Ey=ElectricField[n+1*Np];
|
||||
Ez=ElectricField[n+2*Np];
|
||||
ux=Velocity[n+0*Np];
|
||||
uy=Velocity[n+1*Np];
|
||||
uz=Velocity[n+2*Np];
|
||||
uEPx=zi*Di/Vt*Ex;
|
||||
uEPy=zi*Di/Vt*Ey;
|
||||
uEPz=zi*Di/Vt*Ez;
|
||||
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
// q=1
|
||||
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||
// q=2
|
||||
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||
// q=3
|
||||
nr3 = neighborList[n+2*Np]; // neighbor 4
|
||||
f3 = dist[nr3];
|
||||
// q=4
|
||||
nr4 = neighborList[n+3*Np]; // neighbor 3
|
||||
f4 = dist[nr4];
|
||||
// q=5
|
||||
nr5 = neighborList[n+4*Np];
|
||||
f5 = dist[nr5];
|
||||
// q=6
|
||||
nr6 = neighborList[n+5*Np];
|
||||
f6 = dist[nr6];
|
||||
|
||||
// q=0
|
||||
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
|
||||
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 1
|
||||
dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
|
||||
//dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q=2
|
||||
dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
|
||||
//dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 3
|
||||
dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
|
||||
//dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 4
|
||||
dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
|
||||
//dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 5
|
||||
dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
|
||||
//dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 6
|
||||
dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
|
||||
//dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField,
|
||||
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||
int n;
|
||||
double Ci;
|
||||
double ux,uy,uz;
|
||||
double uEPx,uEPy,uEPz;//electrochemical induced velocity
|
||||
double Ex,Ey,Ez;//electrical field
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
Ci=Den[n];
|
||||
Ex=ElectricField[n+0*Np];
|
||||
Ey=ElectricField[n+1*Np];
|
||||
Ez=ElectricField[n+2*Np];
|
||||
ux=Velocity[n+0*Np];
|
||||
uy=Velocity[n+1*Np];
|
||||
uz=Velocity[n+2*Np];
|
||||
uEPx=zi*Di/Vt*Ex;
|
||||
uEPy=zi*Di/Vt*Ey;
|
||||
uEPz=zi*Di/Vt*Ez;
|
||||
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
|
||||
// q=0
|
||||
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
|
||||
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 1
|
||||
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
|
||||
//dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q=2
|
||||
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
|
||||
//dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 3
|
||||
dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
|
||||
//dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 4
|
||||
dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
|
||||
//dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 5
|
||||
dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
|
||||
//dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 6
|
||||
dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
|
||||
//dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
|
||||
|
||||
int n;
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
if (n<Np) {
|
||||
dist[0*Np+n] = 0.25*DenInit;
|
||||
dist[1*Np+n] = 0.125*DenInit;
|
||||
dist[2*Np+n] = 0.125*DenInit;
|
||||
dist[3*Np+n] = 0.125*DenInit;
|
||||
dist[4*Np+n] = 0.125*DenInit;
|
||||
dist[5*Np+n] = 0.125*DenInit;
|
||||
dist[6*Np+n] = 0.125*DenInit;
|
||||
Den[n] = DenInit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
|
||||
|
||||
int n;
|
||||
double DenInit;
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
if (n<Np) {
|
||||
DenInit = Den[n];
|
||||
dist[0*Np+n] = 0.25*DenInit;
|
||||
dist[1*Np+n] = 0.125*DenInit;
|
||||
dist[2*Np+n] = 0.125*DenInit;
|
||||
dist[3*Np+n] = 0.125*DenInit;
|
||||
dist[4*Np+n] = 0.125*DenInit;
|
||||
dist[5*Np+n] = 0.125*DenInit;
|
||||
dist[6*Np+n] = 0.125*DenInit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
double Ci;//ion concentration of species i
|
||||
double CD;//charge density
|
||||
double CD_tmp;
|
||||
double F = 96485.0;//Faraday's constant; unit[C/mol]; F=e*Na, where Na is the Avogadro constant
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
Ci = Den[n+ion_component*Np];
|
||||
CD = ChargeDensity[n];
|
||||
CD_tmp = F*IonValence*Ci;
|
||||
ChargeDensity[n] = CD*(ion_component>0) + CD_tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAodd_IonConcentration<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_IonConcentration: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAeven_IonConcentration<<<NBLOCKS,NTHREADS >>>(dist,Den,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_IonConcentration: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *Velocity, double *ElectricField,
|
||||
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Ion: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField,
|
||||
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion<<<NBLOCKS,NTHREADS >>>(dist,Den,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Ion: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_Ion_Init<<<NBLOCKS,NTHREADS >>>(dist,Den,DenInit,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_Ion_Init: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_Ion_Init_FromFile<<<NBLOCKS,NTHREADS >>>(dist,Den,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_Ion_Init_FromFile: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_Ion_ChargeDensity<<<NBLOCKS,NTHREADS >>>(Den,ChargeDensity,IonValence,ion_component,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_Ion_ChargeDensity: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
77
hip/MixedGradient.cu
Normal file
77
hip/MixedGradient.cu
Normal file
@ -0,0 +1,77 @@
|
||||
/* Implement Mixed Gradient (Lee et al. JCP 2016)*/
|
||||
#include <stdio.h>
|
||||
//#include <cuda_profiler_api.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
|
||||
#define NBLOCKS 560
|
||||
#define NTHREADS 128
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz)
|
||||
{
|
||||
static const int D3Q19[18][3]={{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1},
|
||||
{1,1,0},{-1,-1,0},{1,-1,0},{-1,1,0},
|
||||
{1,0,1},{-1,0,-1},{1,0,-1},{-1,0,1},
|
||||
{0,1,1},{0,-1,-1},{0,1,-1},{0,-1,1}};
|
||||
|
||||
int i,j,k,n,N,idx;
|
||||
int np,np2,nm; // neighbors
|
||||
double v,vp,vp2,vm; // values at neighbors
|
||||
double grad;
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
|
||||
//........Get 1-D index for this thread....................
|
||||
idx = start + S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
|
||||
if (idx<finish){
|
||||
n = Map[idx]; // layout in regular array
|
||||
//.......Back out the 3-D indices for node n..............
|
||||
k = n/(Nx*Ny);
|
||||
j = (n-Nx*Ny*k)/Nx;
|
||||
i = n-Nx*Ny*k-Nx*j;
|
||||
v = Phi[n];
|
||||
grad = 0.0;
|
||||
for (int q=0; q<6; q++){
|
||||
int iqx = D3Q19[q][0];
|
||||
int iqy = D3Q19[q][1];
|
||||
int iqz = D3Q19[q][2];
|
||||
np = (k+iqz)*Nx*Ny + (j+iqy)*Nx + i + iqx;
|
||||
np2 = (k+2*iqz)*Nx*Ny + (j+2*iqy)*Nx + i + 2*iqx;
|
||||
nm = (k-iqz)*Nx*Ny + (j-iqy)*Nx + i - iqx;
|
||||
vp = Phi[np];
|
||||
vp2 = Phi[np2];
|
||||
vm = Phi[nm];
|
||||
grad += 0.25*(5.0*vp-vp2-3.0*v-vm);
|
||||
}
|
||||
for (int q=6; q<18; q++){
|
||||
int iqx = D3Q19[q][0];
|
||||
int iqy = D3Q19[q][1];
|
||||
int iqz = D3Q19[q][2];
|
||||
np = (k+iqz)*Nx*Ny + (j+iqy)*Nx + i + iqx;
|
||||
np2 = (k+2*iqz)*Nx*Ny + (j+2*iqy)*Nx + i + 2*iqx;
|
||||
nm = (k-iqz)*Nx*Ny + (j-iqy)*Nx + i - iqx;
|
||||
vp = Phi[np];
|
||||
vp2 = Phi[np2];
|
||||
vm = Phi[nm];
|
||||
grad += 0.125*(5.0*vp-vp2-3.0*v-vm);
|
||||
}
|
||||
Gradient[n] = grad;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz)
|
||||
{
|
||||
hipProfilerStart();
|
||||
dvc_ScaLBL_D3Q19_MixedGradient<<<NBLOCKS,NTHREADS >>>(Map, Phi, Gradient, start, finish, Np, Nx, Ny, Nz);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q19_MixedGradient: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
hipProfilerStop();
|
||||
}
|
||||
|
330
hip/Poisson.cu
Normal file
330
hip/Poisson.cu
Normal file
@ -0,0 +1,330 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define NBLOCKS 1024
|
||||
#define NTHREADS 256
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
int n;
|
||||
double psi;//electric potential
|
||||
double fq;
|
||||
int nread;
|
||||
int idx;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
psi = fq;
|
||||
|
||||
// q=1
|
||||
nread = neighborList[n];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q=2
|
||||
nread = neighborList[n+Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q=3
|
||||
nread = neighborList[n+2*Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q = 4
|
||||
nread = neighborList[n+3*Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q=5
|
||||
nread = neighborList[n+4*Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q = 6
|
||||
nread = neighborList[n+5*Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
idx=Map[n];
|
||||
Psi[idx] = psi;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
int n;
|
||||
double psi;//electric potential
|
||||
double fq;
|
||||
int idx;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
psi = fq;
|
||||
|
||||
// q=1
|
||||
fq = dist[2*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=2
|
||||
fq = dist[1*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=3
|
||||
fq = dist[4*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=4
|
||||
fq = dist[3*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=5
|
||||
fq = dist[6*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=6
|
||||
fq = dist[5*Np+n];
|
||||
psi += fq;
|
||||
|
||||
idx=Map[n];
|
||||
Psi[idx] = psi;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
double psi;//electric potential
|
||||
double Ex,Ey,Ez;//electric field
|
||||
double rho_e;//local charge density
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
int nr1,nr2,nr3,nr4,nr5,nr6;
|
||||
double rlx=1.0/tau;
|
||||
int idx;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
rho_e = Den_charge[n];
|
||||
rho_e = rho_e/epsilon_LB;
|
||||
idx=Map[n];
|
||||
psi = Psi[idx];
|
||||
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
// q=1
|
||||
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||
|
||||
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||
|
||||
// q=3
|
||||
nr3 = neighborList[n+2*Np]; // neighbor 4
|
||||
f3 = dist[nr3];
|
||||
|
||||
// q = 4
|
||||
nr4 = neighborList[n+3*Np]; // neighbor 3
|
||||
f4 = dist[nr4];
|
||||
|
||||
// q=5
|
||||
nr5 = neighborList[n+4*Np];
|
||||
f5 = dist[nr5];
|
||||
|
||||
// q = 6
|
||||
nr6 = neighborList[n+5*Np];
|
||||
f6 = dist[nr6];
|
||||
|
||||
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
|
||||
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
|
||||
Ez = (f5-f6)*rlx*4.0;
|
||||
ElectricField[n+0*Np] = Ex;
|
||||
ElectricField[n+1*Np] = Ey;
|
||||
ElectricField[n+2*Np] = Ez;
|
||||
|
||||
// q = 0
|
||||
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
|
||||
|
||||
// q = 1
|
||||
dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 2
|
||||
dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 3
|
||||
dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 4
|
||||
dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 5
|
||||
dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 6
|
||||
dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
//........................................................................
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
double psi;//electric potential
|
||||
double Ex,Ey,Ez;//electric field
|
||||
double rho_e;//local charge density
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double rlx=1.0/tau;
|
||||
int idx;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
rho_e = Den_charge[n];
|
||||
rho_e = rho_e/epsilon_LB;
|
||||
idx=Map[n];
|
||||
psi = Psi[idx];
|
||||
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
|
||||
|
||||
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
|
||||
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
|
||||
Ez = (f5-f6)*rlx*4.0;
|
||||
ElectricField[n+0*Np] = Ex;
|
||||
ElectricField[n+1*Np] = Ey;
|
||||
ElectricField[n+2*Np] = Ez;
|
||||
|
||||
// q = 0
|
||||
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
|
||||
|
||||
// q = 1
|
||||
dist[1*Np+n] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 2
|
||||
dist[2*Np+n] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 3
|
||||
dist[3*Np+n] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 4
|
||||
dist[4*Np+n] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 5
|
||||
dist[5*Np+n] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 6
|
||||
dist[6*Np+n] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
//........................................................................
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
int ijk;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
ijk = Map[n];
|
||||
dist[0*Np+n] = 0.25*Psi[ijk];
|
||||
dist[1*Np+n] = 0.125*Psi[ijk];
|
||||
dist[2*Np+n] = 0.125*Psi[ijk];
|
||||
dist[3*Np+n] = 0.125*Psi[ijk];
|
||||
dist[4*Np+n] = 0.125*Psi[ijk];
|
||||
dist[5*Np+n] = 0.125*Psi[ijk];
|
||||
dist[6*Np+n] = 0.125*Psi[ijk];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Psi,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAodd_Poisson<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAeven_Poisson<<<NBLOCKS,NTHREADS >>>(Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_Poisson_Init<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_Poisson_Init: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
996
hip/Stokes.cu
Normal file
996
hip/Stokes.cu
Normal file
@ -0,0 +1,996 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define NBLOCKS 1024
|
||||
#define NTHREADS 256
|
||||
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz, double rho0, double den_scale, double h, double time_conv,int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
double fq;
|
||||
// conserved momemnts
|
||||
double rho,jx,jy,jz;
|
||||
double ux,uy,uz;
|
||||
// non-conserved moments
|
||||
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
|
||||
int nread;
|
||||
// body force due to electric field
|
||||
double rhoE;//charge density
|
||||
double Ex,Ey,Ez;
|
||||
// total body force
|
||||
double Fx,Fy,Fz;
|
||||
|
||||
const double mrt_V1=0.05263157894736842;
|
||||
const double mrt_V2=0.012531328320802;
|
||||
const double mrt_V3=0.04761904761904762;
|
||||
const double mrt_V4=0.004594820384294068;
|
||||
const double mrt_V5=0.01587301587301587;
|
||||
const double mrt_V6=0.0555555555555555555555555;
|
||||
const double mrt_V7=0.02777777777777778;
|
||||
const double mrt_V8=0.08333333333333333;
|
||||
const double mrt_V9=0.003341687552213868;
|
||||
const double mrt_V10=0.003968253968253968;
|
||||
const double mrt_V11=0.01388888888888889;
|
||||
const double mrt_V12=0.04166666666666666;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
rhoE = ChargeDensity[n];
|
||||
Ex = ElectricField[n+0*Np];
|
||||
Ey = ElectricField[n+1*Np];
|
||||
Ez = ElectricField[n+2*Np];
|
||||
//compute total body force, including input body force (Gx,Gy,Gz)
|
||||
Fx = Gx + rhoE*Ex*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
|
||||
Fy = Gy + rhoE*Ey*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
|
||||
Fz = Gz + rhoE*Ez*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
rho = fq;
|
||||
m1 = -30.0*fq;
|
||||
m2 = 12.0*fq;
|
||||
|
||||
// q=1
|
||||
nread = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
fq = dist[nread]; // reading the f1 data into register fq
|
||||
//fp = dist[10*Np+n];
|
||||
rho += fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jx = fq;
|
||||
m4 = -4.0*fq;
|
||||
m9 = 2.0*fq;
|
||||
m10 = -4.0*fq;
|
||||
|
||||
// f2 = dist[10*Np+n];
|
||||
nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
fq = dist[nread]; // reading the f2 data into register fq
|
||||
//fq = dist[Np+n];
|
||||
rho += fq;
|
||||
m1 -= 11.0*(fq);
|
||||
m2 -= 4.0*(fq);
|
||||
jx -= fq;
|
||||
m4 += 4.0*(fq);
|
||||
m9 += 2.0*(fq);
|
||||
m10 -= 4.0*(fq);
|
||||
|
||||
// q=3
|
||||
nread = neighborList[n+2*Np]; // neighbor 4
|
||||
fq = dist[nread];
|
||||
//fq = dist[11*Np+n];
|
||||
rho += fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jy = fq;
|
||||
m6 = -4.0*fq;
|
||||
m9 -= fq;
|
||||
m10 += 2.0*fq;
|
||||
m11 = fq;
|
||||
m12 = -2.0*fq;
|
||||
|
||||
// q = 4
|
||||
nread = neighborList[n+3*Np]; // neighbor 3
|
||||
fq = dist[nread];
|
||||
//fq = dist[2*Np+n];
|
||||
rho+= fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jy -= fq;
|
||||
m6 += 4.0*fq;
|
||||
m9 -= fq;
|
||||
m10 += 2.0*fq;
|
||||
m11 += fq;
|
||||
m12 -= 2.0*fq;
|
||||
|
||||
// q=5
|
||||
nread = neighborList[n+4*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[12*Np+n];
|
||||
rho += fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jz = fq;
|
||||
m8 = -4.0*fq;
|
||||
m9 -= fq;
|
||||
m10 += 2.0*fq;
|
||||
m11 -= fq;
|
||||
m12 += 2.0*fq;
|
||||
|
||||
|
||||
// q = 6
|
||||
nread = neighborList[n+5*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[3*Np+n];
|
||||
rho+= fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jz -= fq;
|
||||
m8 += 4.0*fq;
|
||||
m9 -= fq;
|
||||
m10 += 2.0*fq;
|
||||
m11 -= fq;
|
||||
m12 += 2.0*fq;
|
||||
|
||||
// q=7
|
||||
nread = neighborList[n+6*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[13*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx += fq;
|
||||
m4 += fq;
|
||||
jy += fq;
|
||||
m6 += fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 += fq;
|
||||
m12 += fq;
|
||||
m13 = fq;
|
||||
m16 = fq;
|
||||
m17 = -fq;
|
||||
|
||||
// q = 8
|
||||
nread = neighborList[n+7*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[4*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx -= fq;
|
||||
m4 -= fq;
|
||||
jy -= fq;
|
||||
m6 -= fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 += fq;
|
||||
m12 += fq;
|
||||
m13 += fq;
|
||||
m16 -= fq;
|
||||
m17 += fq;
|
||||
|
||||
// q=9
|
||||
nread = neighborList[n+8*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[14*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx += fq;
|
||||
m4 += fq;
|
||||
jy -= fq;
|
||||
m6 -= fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 += fq;
|
||||
m12 += fq;
|
||||
m13 -= fq;
|
||||
m16 += fq;
|
||||
m17 += fq;
|
||||
|
||||
// q = 10
|
||||
nread = neighborList[n+9*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[5*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx -= fq;
|
||||
m4 -= fq;
|
||||
jy += fq;
|
||||
m6 += fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 += fq;
|
||||
m12 += fq;
|
||||
m13 -= fq;
|
||||
m16 -= fq;
|
||||
m17 -= fq;
|
||||
|
||||
// q=11
|
||||
nread = neighborList[n+10*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[15*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx += fq;
|
||||
m4 += fq;
|
||||
jz += fq;
|
||||
m8 += fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 -= fq;
|
||||
m12 -= fq;
|
||||
m15 = fq;
|
||||
m16 -= fq;
|
||||
m18 = fq;
|
||||
|
||||
// q=12
|
||||
nread = neighborList[n+11*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[6*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx -= fq;
|
||||
m4 -= fq;
|
||||
jz -= fq;
|
||||
m8 -= fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 -= fq;
|
||||
m12 -= fq;
|
||||
m15 += fq;
|
||||
m16 += fq;
|
||||
m18 -= fq;
|
||||
|
||||
// q=13
|
||||
nread = neighborList[n+12*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[16*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx += fq;
|
||||
m4 += fq;
|
||||
jz -= fq;
|
||||
m8 -= fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 -= fq;
|
||||
m12 -= fq;
|
||||
m15 -= fq;
|
||||
m16 -= fq;
|
||||
m18 -= fq;
|
||||
|
||||
// q=14
|
||||
nread = neighborList[n+13*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[7*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx -= fq;
|
||||
m4 -= fq;
|
||||
jz += fq;
|
||||
m8 += fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 -= fq;
|
||||
m12 -= fq;
|
||||
m15 -= fq;
|
||||
m16 += fq;
|
||||
m18 += fq;
|
||||
|
||||
// q=15
|
||||
nread = neighborList[n+14*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[17*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jy += fq;
|
||||
m6 += fq;
|
||||
jz += fq;
|
||||
m8 += fq;
|
||||
m9 -= 2.0*fq;
|
||||
m10 -= 2.0*fq;
|
||||
m14 = fq;
|
||||
m17 += fq;
|
||||
m18 -= fq;
|
||||
|
||||
// q=16
|
||||
nread = neighborList[n+15*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[8*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jy -= fq;
|
||||
m6 -= fq;
|
||||
jz -= fq;
|
||||
m8 -= fq;
|
||||
m9 -= 2.0*fq;
|
||||
m10 -= 2.0*fq;
|
||||
m14 += fq;
|
||||
m17 -= fq;
|
||||
m18 += fq;
|
||||
|
||||
// q=17
|
||||
//fq = dist[18*Np+n];
|
||||
nread = neighborList[n+16*Np];
|
||||
fq = dist[nread];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jy += fq;
|
||||
m6 += fq;
|
||||
jz -= fq;
|
||||
m8 -= fq;
|
||||
m9 -= 2.0*fq;
|
||||
m10 -= 2.0*fq;
|
||||
m14 -= fq;
|
||||
m17 += fq;
|
||||
m18 += fq;
|
||||
|
||||
// q=18
|
||||
nread = neighborList[n+17*Np];
|
||||
fq = dist[nread];
|
||||
//fq = dist[9*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jy -= fq;
|
||||
m6 -= fq;
|
||||
jz += fq;
|
||||
m8 += fq;
|
||||
m9 -= 2.0*fq;
|
||||
m10 -= 2.0*fq;
|
||||
m14 -= fq;
|
||||
m17 -= fq;
|
||||
m18 -= fq;
|
||||
|
||||
// write the velocity
|
||||
ux = jx / rho0;
|
||||
uy = jy / rho0;
|
||||
uz = jz / rho0;
|
||||
Velocity[n] = ux;
|
||||
Velocity[Np+n] = uy;
|
||||
Velocity[2*Np+n] = uz;
|
||||
|
||||
//..............incorporate external force................................................
|
||||
//..............carry out relaxation process...............................................
|
||||
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) - m1);
|
||||
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0) - m2);
|
||||
m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4);
|
||||
m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6);
|
||||
m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8);
|
||||
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) - m9);
|
||||
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
|
||||
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) - m11);
|
||||
m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho0) - m12);
|
||||
m13 = m13 + rlx_setA*((jx*jy/rho0) - m13);
|
||||
m14 = m14 + rlx_setA*((jy*jz/rho0) - m14);
|
||||
m15 = m15 + rlx_setA*((jx*jz/rho0) - m15);
|
||||
m16 = m16 + rlx_setB*( - m16);
|
||||
m17 = m17 + rlx_setB*( - m17);
|
||||
m18 = m18 + rlx_setB*( - m18);
|
||||
//.......................................................................................................
|
||||
//.................inverse transformation......................................................
|
||||
|
||||
// q=0
|
||||
fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2;
|
||||
dist[n] = fq;
|
||||
|
||||
// q = 1
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx;
|
||||
nread = neighborList[n+Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q=2
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx;
|
||||
nread = neighborList[n];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 3
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy;
|
||||
nread = neighborList[n+3*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 4
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy;
|
||||
nread = neighborList[n+2*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 5
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz;
|
||||
nread = neighborList[n+5*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 6
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz;
|
||||
nread = neighborList[n+4*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 7
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)
|
||||
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||
+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy);
|
||||
nread = neighborList[n+7*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 8
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||
+mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy);
|
||||
nread = neighborList[n+6*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 9
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)
|
||||
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||
+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy);
|
||||
nread = neighborList[n+9*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 10
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)
|
||||
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||
+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy);
|
||||
nread = neighborList[n+8*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 11
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)
|
||||
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||
-mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz);
|
||||
nread = neighborList[n+11*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 12
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)
|
||||
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||
-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz);
|
||||
nread = neighborList[n+10*Np];
|
||||
dist[nread]= fq;
|
||||
|
||||
// q = 13
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)
|
||||
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||
-mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz);
|
||||
nread = neighborList[n+13*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q= 14
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)
|
||||
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||
-mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz);
|
||||
nread = neighborList[n+12*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
|
||||
// q = 15
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)
|
||||
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz);
|
||||
nread = neighborList[n+15*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 16
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)
|
||||
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz);
|
||||
nread = neighborList[n+14*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
|
||||
// q = 17
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)
|
||||
-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz);
|
||||
nread = neighborList[n+17*Np];
|
||||
dist[nread] = fq;
|
||||
|
||||
// q = 18
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)
|
||||
-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz);
|
||||
nread = neighborList[n+16*Np];
|
||||
dist[nread] = fq;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
double fq;
|
||||
// conserved momemnts
|
||||
double rho,jx,jy,jz;
|
||||
double ux,uy,uz;
|
||||
// non-conserved moments
|
||||
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
|
||||
// body force due to electric field
|
||||
double rhoE;//charge density
|
||||
double Ex,Ey,Ez;
|
||||
// total body force
|
||||
double Fx,Fy,Fz;
|
||||
|
||||
const double mrt_V1=0.05263157894736842;
|
||||
const double mrt_V2=0.012531328320802;
|
||||
const double mrt_V3=0.04761904761904762;
|
||||
const double mrt_V4=0.004594820384294068;
|
||||
const double mrt_V5=0.01587301587301587;
|
||||
const double mrt_V6=0.0555555555555555555555555;
|
||||
const double mrt_V7=0.02777777777777778;
|
||||
const double mrt_V8=0.08333333333333333;
|
||||
const double mrt_V9=0.003341687552213868;
|
||||
const double mrt_V10=0.003968253968253968;
|
||||
const double mrt_V11=0.01388888888888889;
|
||||
const double mrt_V12=0.04166666666666666;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
rhoE = ChargeDensity[n];
|
||||
Ex = ElectricField[n+0*Np];
|
||||
Ey = ElectricField[n+1*Np];
|
||||
Ez = ElectricField[n+2*Np];
|
||||
//compute total body force, including input body force (Gx,Gy,Gz)
|
||||
Fx = Gx + rhoE*Ex*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;//the extra factors at the end necessarily convert unit from phys to LB
|
||||
Fy = Gy + rhoE*Ey*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
|
||||
Fz = Gz + rhoE*Ez*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
rho = fq;
|
||||
m1 = -30.0*fq;
|
||||
m2 = 12.0*fq;
|
||||
|
||||
// q=1
|
||||
fq = dist[2*Np+n];
|
||||
rho += fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jx = fq;
|
||||
m4 = -4.0*fq;
|
||||
m9 = 2.0*fq;
|
||||
m10 = -4.0*fq;
|
||||
|
||||
// f2 = dist[10*Np+n];
|
||||
fq = dist[1*Np+n];
|
||||
rho += fq;
|
||||
m1 -= 11.0*(fq);
|
||||
m2 -= 4.0*(fq);
|
||||
jx -= fq;
|
||||
m4 += 4.0*(fq);
|
||||
m9 += 2.0*(fq);
|
||||
m10 -= 4.0*(fq);
|
||||
|
||||
// q=3
|
||||
fq = dist[4*Np+n];
|
||||
rho += fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jy = fq;
|
||||
m6 = -4.0*fq;
|
||||
m9 -= fq;
|
||||
m10 += 2.0*fq;
|
||||
m11 = fq;
|
||||
m12 = -2.0*fq;
|
||||
|
||||
// q = 4
|
||||
fq = dist[3*Np+n];
|
||||
rho+= fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jy -= fq;
|
||||
m6 += 4.0*fq;
|
||||
m9 -= fq;
|
||||
m10 += 2.0*fq;
|
||||
m11 += fq;
|
||||
m12 -= 2.0*fq;
|
||||
|
||||
// q=5
|
||||
fq = dist[6*Np+n];
|
||||
rho += fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jz = fq;
|
||||
m8 = -4.0*fq;
|
||||
m9 -= fq;
|
||||
m10 += 2.0*fq;
|
||||
m11 -= fq;
|
||||
m12 += 2.0*fq;
|
||||
|
||||
// q = 6
|
||||
fq = dist[5*Np+n];
|
||||
rho+= fq;
|
||||
m1 -= 11.0*fq;
|
||||
m2 -= 4.0*fq;
|
||||
jz -= fq;
|
||||
m8 += 4.0*fq;
|
||||
m9 -= fq;
|
||||
m10 += 2.0*fq;
|
||||
m11 -= fq;
|
||||
m12 += 2.0*fq;
|
||||
|
||||
// q=7
|
||||
fq = dist[8*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx += fq;
|
||||
m4 += fq;
|
||||
jy += fq;
|
||||
m6 += fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 += fq;
|
||||
m12 += fq;
|
||||
m13 = fq;
|
||||
m16 = fq;
|
||||
m17 = -fq;
|
||||
|
||||
// q = 8
|
||||
fq = dist[7*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx -= fq;
|
||||
m4 -= fq;
|
||||
jy -= fq;
|
||||
m6 -= fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 += fq;
|
||||
m12 += fq;
|
||||
m13 += fq;
|
||||
m16 -= fq;
|
||||
m17 += fq;
|
||||
|
||||
// q=9
|
||||
fq = dist[10*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx += fq;
|
||||
m4 += fq;
|
||||
jy -= fq;
|
||||
m6 -= fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 += fq;
|
||||
m12 += fq;
|
||||
m13 -= fq;
|
||||
m16 += fq;
|
||||
m17 += fq;
|
||||
|
||||
// q = 10
|
||||
fq = dist[9*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx -= fq;
|
||||
m4 -= fq;
|
||||
jy += fq;
|
||||
m6 += fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 += fq;
|
||||
m12 += fq;
|
||||
m13 -= fq;
|
||||
m16 -= fq;
|
||||
m17 -= fq;
|
||||
|
||||
// q=11
|
||||
fq = dist[12*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx += fq;
|
||||
m4 += fq;
|
||||
jz += fq;
|
||||
m8 += fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 -= fq;
|
||||
m12 -= fq;
|
||||
m15 = fq;
|
||||
m16 -= fq;
|
||||
m18 = fq;
|
||||
|
||||
// q=12
|
||||
fq = dist[11*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx -= fq;
|
||||
m4 -= fq;
|
||||
jz -= fq;
|
||||
m8 -= fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 -= fq;
|
||||
m12 -= fq;
|
||||
m15 += fq;
|
||||
m16 += fq;
|
||||
m18 -= fq;
|
||||
|
||||
// q=13
|
||||
fq = dist[14*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx += fq;
|
||||
m4 += fq;
|
||||
jz -= fq;
|
||||
m8 -= fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 -= fq;
|
||||
m12 -= fq;
|
||||
m15 -= fq;
|
||||
m16 -= fq;
|
||||
m18 -= fq;
|
||||
|
||||
// q=14
|
||||
fq = dist[13*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jx -= fq;
|
||||
m4 -= fq;
|
||||
jz += fq;
|
||||
m8 += fq;
|
||||
m9 += fq;
|
||||
m10 += fq;
|
||||
m11 -= fq;
|
||||
m12 -= fq;
|
||||
m15 -= fq;
|
||||
m16 += fq;
|
||||
m18 += fq;
|
||||
|
||||
// q=15
|
||||
fq = dist[16*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jy += fq;
|
||||
m6 += fq;
|
||||
jz += fq;
|
||||
m8 += fq;
|
||||
m9 -= 2.0*fq;
|
||||
m10 -= 2.0*fq;
|
||||
m14 = fq;
|
||||
m17 += fq;
|
||||
m18 -= fq;
|
||||
|
||||
// q=16
|
||||
fq = dist[15*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jy -= fq;
|
||||
m6 -= fq;
|
||||
jz -= fq;
|
||||
m8 -= fq;
|
||||
m9 -= 2.0*fq;
|
||||
m10 -= 2.0*fq;
|
||||
m14 += fq;
|
||||
m17 -= fq;
|
||||
m18 += fq;
|
||||
|
||||
// q=17
|
||||
fq = dist[18*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jy += fq;
|
||||
m6 += fq;
|
||||
jz -= fq;
|
||||
m8 -= fq;
|
||||
m9 -= 2.0*fq;
|
||||
m10 -= 2.0*fq;
|
||||
m14 -= fq;
|
||||
m17 += fq;
|
||||
m18 += fq;
|
||||
|
||||
// q=18
|
||||
fq = dist[17*Np+n];
|
||||
rho += fq;
|
||||
m1 += 8.0*fq;
|
||||
m2 += fq;
|
||||
jy -= fq;
|
||||
m6 -= fq;
|
||||
jz += fq;
|
||||
m8 += fq;
|
||||
m9 -= 2.0*fq;
|
||||
m10 -= 2.0*fq;
|
||||
m14 -= fq;
|
||||
m17 -= fq;
|
||||
m18 -= fq;
|
||||
|
||||
// write the velocity
|
||||
ux = jx / rho0;
|
||||
uy = jy / rho0;
|
||||
uz = jz / rho0;
|
||||
Velocity[n] = ux;
|
||||
Velocity[Np+n] = uy;
|
||||
Velocity[2*Np+n] = uz;
|
||||
|
||||
|
||||
//........................................................................
|
||||
// READ THE DISTRIBUTIONS
|
||||
// (read from opposite array due to previous swap operation)
|
||||
//........................................................................
|
||||
|
||||
//..............incorporate external force................................................
|
||||
//..............carry out relaxation process...............................................
|
||||
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) - m1);
|
||||
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0) - m2);
|
||||
m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4);
|
||||
m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6);
|
||||
m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8);
|
||||
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) - m9);
|
||||
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
|
||||
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) - m11);
|
||||
m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho0) - m12);
|
||||
m13 = m13 + rlx_setA*((jx*jy/rho0) - m13);
|
||||
m14 = m14 + rlx_setA*((jy*jz/rho0) - m14);
|
||||
m15 = m15 + rlx_setA*((jx*jz/rho0) - m15);
|
||||
m16 = m16 + rlx_setB*( - m16);
|
||||
m17 = m17 + rlx_setB*( - m17);
|
||||
m18 = m18 + rlx_setB*( - m18);
|
||||
//.......................................................................................................
|
||||
//.................inverse transformation......................................................
|
||||
|
||||
// q=0
|
||||
fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2;
|
||||
dist[n] = fq;
|
||||
|
||||
// q = 1
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10) + 0.16666666*Fx;
|
||||
dist[1*Np+n] = fq;
|
||||
|
||||
// q=2
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx;
|
||||
dist[2*Np+n] = fq;
|
||||
|
||||
// q = 3
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy;
|
||||
dist[3*Np+n] = fq;
|
||||
|
||||
// q = 4
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy;
|
||||
dist[4*Np+n] = fq;
|
||||
|
||||
// q = 5
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz;
|
||||
dist[5*Np+n] = fq;
|
||||
|
||||
// q = 6
|
||||
fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz;
|
||||
dist[6*Np+n] = fq;
|
||||
|
||||
// q = 7
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)
|
||||
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||
+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy);
|
||||
dist[7*Np+n] = fq;
|
||||
|
||||
|
||||
// q = 8
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||
+mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy);
|
||||
dist[8*Np+n] = fq;
|
||||
|
||||
// q = 9
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)
|
||||
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||
+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy);
|
||||
dist[9*Np+n] = fq;
|
||||
|
||||
// q = 10
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)
|
||||
+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11
|
||||
+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy);
|
||||
dist[10*Np+n] = fq;
|
||||
|
||||
|
||||
// q = 11
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)
|
||||
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||
-mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz);
|
||||
dist[11*Np+n] = fq;
|
||||
|
||||
// q = 12
|
||||
fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)
|
||||
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||
-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz);
|
||||
dist[12*Np+n] = fq;
|
||||
|
||||
// q = 13
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)
|
||||
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||
-mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz);
|
||||
dist[13*Np+n] = fq;
|
||||
|
||||
// q= 14
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)
|
||||
+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11
|
||||
-mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz);
|
||||
|
||||
dist[14*Np+n] = fq;
|
||||
|
||||
// q = 15
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)
|
||||
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz);
|
||||
dist[15*Np+n] = fq;
|
||||
|
||||
// q = 16
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)
|
||||
-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz);
|
||||
dist[16*Np+n] = fq;
|
||||
|
||||
|
||||
// q = 17
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)
|
||||
-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz);
|
||||
dist[17*Np+n] = fq;
|
||||
|
||||
// q = 18
|
||||
fq = mrt_V1*rho+mrt_V9*m1
|
||||
+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)
|
||||
-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz);
|
||||
dist[18*Np+n] = fq;
|
||||
|
||||
//........................................................................
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q19_AAodd_StokesMRT<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Velocity,ChargeDensity,ElectricField,rlx_setA,rlx_setB,Gx,Gy,Gz,rho0,den_scale,h,time_conv,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q19_AAodd_StokesMRT: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q19_AAeven_StokesMRT<<<NBLOCKS,NTHREADS >>>(dist,Velocity,ChargeDensity,ElectricField,rlx_setA,rlx_setB,Gx,Gy,Gz,rho0,den_scale,h,time_conv,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q19_AAeven_StokesMRT: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
@ -9,6 +9,7 @@ color lattice boltzmann model
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
|
||||
ScaLBL_ColorModel::ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM):
|
||||
rank(RANK), nprocs(NP), Restart(0), timestep(0), timestepMax(0),
|
||||
tauA(0), tauB(0), rhoA(0), rhoB(0), alpha(0), beta(0),
|
||||
@ -288,12 +289,17 @@ void ScaLBL_ColorModel::AssignComponentLabels(double *phase)
|
||||
|
||||
auto LabelList = color_db->getVector<int>( "ComponentLabels" );
|
||||
auto AffinityList = color_db->getVector<double>( "ComponentAffinity" );
|
||||
auto WettingConvention = color_db->getWithDefault<std::string>( "WettingConvention", "none" );
|
||||
|
||||
NLABELS=LabelList.size();
|
||||
if (NLABELS != AffinityList.size()){
|
||||
ERROR("Error: ComponentLabels and ComponentAffinity must be the same length! \n");
|
||||
}
|
||||
|
||||
if (WettingConvention == "SCAL"){
|
||||
for (size_t idx=0; idx<NLABELS; idx++) AffinityList[idx] *= -1.0;
|
||||
}
|
||||
|
||||
double label_count[NLABELS];
|
||||
double label_count_global[NLABELS];
|
||||
// Assign the labels
|
||||
@ -687,20 +693,15 @@ void ScaLBL_ColorModel::Run(){
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
//.......create and start timer............
|
||||
double starttime,stoptime,cputime;
|
||||
ScaLBL_Comm->Barrier();
|
||||
comm.barrier();
|
||||
starttime = MPI_Wtime();
|
||||
//.........................................
|
||||
|
||||
//************ MAIN ITERATION LOOP ***************************************/
|
||||
comm.barrier();
|
||||
PROFILE_START("Loop");
|
||||
//std::shared_ptr<Database> analysis_db;
|
||||
bool Regular = false;
|
||||
auto current_db = db->cloneDatabase();
|
||||
runAnalysis analysis( current_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map );
|
||||
//analysis.createThreads( analysis_method, 4 );
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
while (timestep < timestepMax ) {
|
||||
//if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); }
|
||||
PROFILE_START("Update");
|
||||
@ -1033,10 +1034,10 @@ void ScaLBL_ColorModel::Run(){
|
||||
PROFILE_SAVE("lbpm_color_simulator",1);
|
||||
//************************************************************************
|
||||
ScaLBL_Comm->Barrier();
|
||||
stoptime = MPI_Wtime();
|
||||
if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
// Compute the walltime per timestep
|
||||
cputime = (stoptime - starttime)/timestep;
|
||||
auto t2 = std::chrono::system_clock::now();
|
||||
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
@ -1237,6 +1238,7 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){
|
||||
}
|
||||
return(volume_change);
|
||||
}
|
||||
|
||||
double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){
|
||||
srand(time(NULL));
|
||||
double mass_loss =0.f;
|
||||
@ -1600,3 +1602,68 @@ void ScaLBL_ColorModel::WriteDebug(){
|
||||
fclose(CGZ_FILE);
|
||||
*/
|
||||
}
|
||||
|
||||
FlowAdaptor::FlowAdaptor(ScaLBL_ColorModel &M){
|
||||
Nx = M.Dm->Nx;
|
||||
Ny = M.Dm->Ny;
|
||||
Nz = M.Dm->Nz;
|
||||
timestep=-1;
|
||||
timestep_previous=-1;
|
||||
|
||||
phi.resize(Nx,Ny,Nz); phi.fill(0); // phase indicator field
|
||||
phi_t.resize(Nx,Ny,Nz); phi_t.fill(0); // time derivative for the phase indicator field
|
||||
}
|
||||
|
||||
FlowAdaptor::~FlowAdaptor(){
|
||||
|
||||
}
|
||||
|
||||
double FlowAdaptor::MoveInterface(ScaLBL_ColorModel &M){
|
||||
|
||||
double INTERFACE_CUTOFF = M.color_db->getWithDefault<double>( "move_interface_cutoff", 0.975 );
|
||||
double MOVE_INTERFACE_FACTOR = M.color_db->getWithDefault<double>( "move_interface_factor", 10.0 );
|
||||
|
||||
ScaLBL_CopyToHost( phi.data(), M.Phi, Nx*Ny*Nz* sizeof( double ) );
|
||||
/* compute the local derivative of phase indicator field */
|
||||
double beta = M.beta;
|
||||
double factor = 0.5/beta;
|
||||
double total_interface_displacement = 0.0;
|
||||
double total_interface_sites = 0.0;
|
||||
for (int n=0; n<Nx*Ny*Nz; n++){
|
||||
/* compute the distance to the interface */
|
||||
double value1 = M.Averages->Phi(n);
|
||||
double dist1 = factor*log((1.0+value1)/(1.0-value1));
|
||||
double value2 = phi(n);
|
||||
double dist2 = factor*log((1.0+value2)/(1.0-value2));
|
||||
phi_t(n) = value2;
|
||||
if (value1 < INTERFACE_CUTOFF && value1 > -1*INTERFACE_CUTOFF && value2 < INTERFACE_CUTOFF && value2 > -1*INTERFACE_CUTOFF ){
|
||||
/* time derivative of distance */
|
||||
double dxdt = 0.125*(dist2-dist1);
|
||||
/* extrapolate to move the distance further */
|
||||
double dist3 = dist2 + MOVE_INTERFACE_FACTOR*dxdt;
|
||||
/* compute the new phase interface */
|
||||
phi_t(n) = (2.f*(exp(-2.f*beta*(dist3)))/(1.f+exp(-2.f*beta*(dist3))) - 1.f);
|
||||
total_interface_displacement += fabs(MOVE_INTERFACE_FACTOR*dxdt);
|
||||
total_interface_sites += 1.0;
|
||||
}
|
||||
}
|
||||
ScaLBL_CopyToDevice( M.Phi, phi_t.data(), Nx*Ny*Nz* sizeof( double ) );
|
||||
|
||||
|
||||
/* ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4){
|
||||
if (Dm->kproc()==0){
|
||||
ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0);
|
||||
ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,1);
|
||||
ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,2);
|
||||
}
|
||||
if (Dm->kproc() == nprocz-1){
|
||||
ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-1);
|
||||
ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-2);
|
||||
ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-3);
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
|
@ -30,6 +30,7 @@ public:
|
||||
void Initialize();
|
||||
void Run();
|
||||
void WriteDebug();
|
||||
void getPhaseField(DoubleArray &f);
|
||||
|
||||
bool Restart,pBC;
|
||||
bool REVERSE_FLOW_DIRECTION;
|
||||
@ -86,3 +87,16 @@ private:
|
||||
double MorphOpenConnected(double target_volume_change);
|
||||
};
|
||||
|
||||
class FlowAdaptor{
|
||||
public:
|
||||
FlowAdaptor(ScaLBL_ColorModel &M);
|
||||
~FlowAdaptor();
|
||||
double MoveInterface(ScaLBL_ColorModel &M);
|
||||
DoubleArray phi;
|
||||
DoubleArray phi_t;
|
||||
private:
|
||||
int Nx, Ny, Nz;
|
||||
int timestep;
|
||||
int timestep_previous;
|
||||
};
|
||||
|
||||
|
@ -490,14 +490,10 @@ void ScaLBL_DFHModel::Run(){
|
||||
|
||||
if (rank==0) printf("********************************************************\n");
|
||||
if (rank==0) printf("No. of timesteps: %i \n", timestepMax);
|
||||
//.......create and start timer............
|
||||
double starttime,stoptime,cputime;
|
||||
ScaLBL_DeviceBarrier();
|
||||
comm.barrier();
|
||||
starttime = MPI_Wtime();
|
||||
//.........................................
|
||||
//************ MAIN ITERATION LOOP ***************************************/
|
||||
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
bool Regular = true;
|
||||
PROFILE_START("Loop");
|
||||
runAnalysis analysis( analysis_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map );
|
||||
@ -589,10 +585,10 @@ void ScaLBL_DFHModel::Run(){
|
||||
//************************************************************************
|
||||
ScaLBL_DeviceBarrier();
|
||||
comm.barrier();
|
||||
stoptime = MPI_Wtime();
|
||||
if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
// Compute the walltime per timestep
|
||||
cputime = (stoptime - starttime)/timestep;
|
||||
auto t2 = std::chrono::system_clock::now();
|
||||
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
if (rank==0) printf("********************************************************\n");
|
||||
|
@ -747,14 +747,9 @@ void ScaLBL_FreeLeeModel::Run_TwoFluid(){
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
//.......create and start timer............
|
||||
double starttime,stoptime,cputime;
|
||||
ScaLBL_Comm->Barrier();
|
||||
comm.barrier();
|
||||
starttime = MPI_Wtime();
|
||||
//.........................................
|
||||
|
||||
//************ MAIN ITERATION LOOP ***************************************/
|
||||
comm.barrier();
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
PROFILE_START("Loop");
|
||||
while (timestep < timestepMax ) {
|
||||
//if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); }
|
||||
@ -846,10 +841,10 @@ void ScaLBL_FreeLeeModel::Run_TwoFluid(){
|
||||
PROFILE_STOP("Loop");
|
||||
PROFILE_SAVE("lbpm_color_simulator",1);
|
||||
//************************************************************************
|
||||
stoptime = MPI_Wtime();
|
||||
if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
// Compute the walltime per timestep
|
||||
cputime = (stoptime - starttime)/timestep;
|
||||
auto t2 = std::chrono::system_clock::now();
|
||||
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
@ -874,14 +869,13 @@ void ScaLBL_FreeLeeModel::Run_SingleFluid(){
|
||||
}
|
||||
|
||||
//.......create and start timer............
|
||||
double starttime,stoptime,cputime;
|
||||
ScaLBL_Comm->Barrier();
|
||||
comm.barrier();
|
||||
starttime = MPI_Wtime();
|
||||
//.........................................
|
||||
|
||||
//************ MAIN ITERATION LOOP ***************************************/
|
||||
PROFILE_START("Loop");
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
while (timestep < timestepMax ) {
|
||||
//if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); }
|
||||
PROFILE_START("Update");
|
||||
@ -944,10 +938,10 @@ void ScaLBL_FreeLeeModel::Run_SingleFluid(){
|
||||
PROFILE_STOP("Loop");
|
||||
PROFILE_SAVE("lbpm_color_simulator",1);
|
||||
//************************************************************************
|
||||
stoptime = MPI_Wtime();
|
||||
if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
// Compute the walltime per timestep
|
||||
cputime = (stoptime - starttime)/timestep;
|
||||
auto t2 = std::chrono::system_clock::now();
|
||||
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
|
@ -910,10 +910,8 @@ void ScaLBL_GreyscaleColorModel::Run(){
|
||||
}
|
||||
|
||||
//.......create and start timer............
|
||||
double starttime,stoptime,cputime;
|
||||
ScaLBL_Comm->Barrier();
|
||||
comm.barrier();
|
||||
starttime = MPI_Wtime();
|
||||
//.........................................
|
||||
|
||||
//************ MAIN ITERATION LOOP ***************************************/
|
||||
@ -923,6 +921,7 @@ void ScaLBL_GreyscaleColorModel::Run(){
|
||||
auto current_db = db->cloneDatabase();
|
||||
//runAnalysis analysis( current_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map );
|
||||
//analysis.createThreads( analysis_method, 4 );
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
while (timestep < timestepMax ) {
|
||||
//if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); }
|
||||
PROFILE_START("Update");
|
||||
@ -1319,10 +1318,10 @@ void ScaLBL_GreyscaleColorModel::Run(){
|
||||
PROFILE_SAVE("lbpm_color_simulator",1);
|
||||
//************************************************************************
|
||||
ScaLBL_Comm->Barrier();
|
||||
stoptime = MPI_Wtime();
|
||||
if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
// Compute the walltime per timestep
|
||||
cputime = (stoptime - starttime)/timestep;
|
||||
auto t2 = std::chrono::system_clock::now();
|
||||
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
|
@ -485,10 +485,8 @@ void ScaLBL_GreyscaleModel::Run(){
|
||||
}
|
||||
|
||||
//.......create and start timer............
|
||||
double starttime,stoptime,cputime;
|
||||
ScaLBL_DeviceBarrier();
|
||||
comm.barrier();
|
||||
starttime = MPI_Wtime();
|
||||
//.........................................
|
||||
|
||||
Minkowski Morphology(Mask);
|
||||
@ -500,6 +498,7 @@ void ScaLBL_GreyscaleModel::Run(){
|
||||
double rlx_eff = 1.0/tau_eff;
|
||||
double error = 1.0;
|
||||
double flow_rate_previous = 0.0;
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
while (timestep < timestepMax && error > tolerance) {
|
||||
//************************************************************************/
|
||||
// *************ODD TIMESTEP*************//
|
||||
@ -744,10 +743,10 @@ void ScaLBL_GreyscaleModel::Run(){
|
||||
//************************************************************************
|
||||
ScaLBL_DeviceBarrier();
|
||||
comm.barrier();
|
||||
stoptime = MPI_Wtime();
|
||||
if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
// Compute the walltime per timestep
|
||||
cputime = (stoptime - starttime)/timestep;
|
||||
auto t2 = std::chrono::system_clock::now();
|
||||
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
|
@ -784,7 +784,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){
|
||||
//.......create and start timer............
|
||||
//double starttime,stoptime,cputime;
|
||||
//ScaLBL_Comm->Barrier(); comm.barrier();
|
||||
//starttime = MPI_Wtime();
|
||||
//auto t1 = std::chrono::system_clock::now();
|
||||
|
||||
for (int ic=0; ic<number_ion_species; ic++){
|
||||
timestep=0;
|
||||
@ -886,10 +886,10 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){
|
||||
ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence[ic], ic, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
}
|
||||
//************************************************************************/
|
||||
//stoptime = MPI_Wtime();
|
||||
//if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
//// Compute the walltime per timestep
|
||||
//cputime = (stoptime - starttime)/timestep;
|
||||
//auto t2 = std::chrono::system_clock::now();
|
||||
//double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
//// Performance obtained from each node
|
||||
//double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
|
@ -230,14 +230,13 @@ void ScaLBL_MRTModel::Run(){
|
||||
}
|
||||
|
||||
//.......create and start timer............
|
||||
double starttime,stoptime,cputime;
|
||||
ScaLBL_DeviceBarrier(); comm.barrier();
|
||||
starttime = MPI_Wtime();
|
||||
if (rank==0) printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax);
|
||||
if (rank==0) printf("********************************************************\n");
|
||||
timestep=0;
|
||||
double error = 1.0;
|
||||
double flow_rate_previous = 0.0;
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
while (timestep < timestepMax && error > tolerance) {
|
||||
//************************************************************************/
|
||||
timestep++;
|
||||
@ -354,10 +353,10 @@ void ScaLBL_MRTModel::Run(){
|
||||
}
|
||||
}
|
||||
//************************************************************************/
|
||||
stoptime = MPI_Wtime();
|
||||
if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
// Compute the walltime per timestep
|
||||
cputime = (stoptime - starttime)/timestep;
|
||||
auto t2 = std::chrono::system_clock::now();
|
||||
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
|
@ -522,8 +522,8 @@ void ScaLBL_Poisson::Run(double *ChargeDensity, int timestep_from_Study){
|
||||
|
||||
//.......create and start timer............
|
||||
//double starttime,stoptime,cputime;
|
||||
//ScaLBL_Comm->Barrier(); comm.barrier();
|
||||
//starttime = MPI_Wtime();
|
||||
//comm.barrier();
|
||||
//auto t1 = std::chrono::system_clock::now();
|
||||
|
||||
timestep=0;
|
||||
double error = 1.0;
|
||||
@ -579,11 +579,11 @@ void ScaLBL_Poisson::Run(double *ChargeDensity, int timestep_from_Study){
|
||||
}
|
||||
|
||||
//************************************************************************/
|
||||
//stoptime = MPI_Wtime();
|
||||
////if (rank==0) printf("LB-Poission Solver: a steady-state solution is obtained\n");
|
||||
////if (rank==0) printf("---------------------------------------------------------------------------\n");
|
||||
//// Compute the walltime per timestep
|
||||
//cputime = (stoptime - starttime)/timestep;
|
||||
//auto t2 = std::chrono::system_clock::now();
|
||||
//double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
//// Performance obtained from each node
|
||||
//double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
|
@ -573,16 +573,14 @@ void ScaLBL_StokesModel::Run(){
|
||||
}
|
||||
}
|
||||
|
||||
//.......create and start timer............
|
||||
double starttime,stoptime,cputime;
|
||||
ScaLBL_Comm->Barrier(); comm.barrier();
|
||||
starttime = MPI_Wtime();
|
||||
if (rank==0) printf("****************************************************************\n");
|
||||
if (rank==0) printf("LB Single-Fluid Navier-Stokes Solver: timestepMax = %i\n", timestepMax);
|
||||
if (rank==0) printf("****************************************************************\n");
|
||||
timestep=0;
|
||||
double error = 1.0;
|
||||
double flow_rate_previous = 0.0;
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
while (timestep < timestepMax && error > tolerance) {
|
||||
//************************************************************************/
|
||||
timestep++;
|
||||
@ -700,10 +698,10 @@ void ScaLBL_StokesModel::Run(){
|
||||
}
|
||||
}
|
||||
//************************************************************************/
|
||||
stoptime = MPI_Wtime();
|
||||
if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
// Compute the walltime per timestep
|
||||
cputime = (stoptime - starttime)/timestep;
|
||||
auto t2 = std::chrono::system_clock::now();
|
||||
double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user