CPU and GPU versions without averaging tools, examples for each case using 8 MPI processes, currently set up for HokieSpeed supercomputing cluster

This commit is contained in:
James E McClure 2013-08-26 15:12:25 -04:00
parent 29ccc29a61
commit 438f075331
76 changed files with 18008 additions and 0 deletions

37
Makefile Normal file
View File

@ -0,0 +1,37 @@
CUDA_FLAGS=-arch sm_20
bin/ColorLBM:gpu/lb2_Color_mpi.cpp lib/libcuColor.a lib/libcuD3Q19.a lib/libcuD3Q7.a lib/libcuExtras.a
mkdir -p bin
mpicxx -O3 -o bin/ColorLBM gpu/lb2_Color_mpi.cpp -lcuColor -lcuD3Q19 -lcuD3Q7 -lcuExtras -Llib
#bin/gpuMRT:gpu/lb1_MRT.cu lib/libcuMRT.a lib/libcuD3Q19.a
# mkdir -p bin
# nvcc -O3 -o bin/gpuMRT $(CUDA_FLAGS) gpu/lb1_MRT.cu -lcuMRT -lcuD3Q19 -Llib
#bin/gpuColor:gpu/lb2_Color.cu lib/libcuColor.a lib/libcuD3Q19.a
# mkdir -p bin
# nvcc -o bin/gpuColor $(CUDA_FLAGS) gpu/lb2_Color.cu -lcuColor -lcuD3Q19 -Llib
lib/libcuExtras.a: gpu/CudaExtras.cu
mkdir -p lib
nvcc -lib $(CUDA_FLAGS) gpu/CudaExtras.cu -o lib/libcuExtras.a
#lib/libcuMRT.a: gpu/MRT.cu
# mkdir -p lib
# nvcc -lib $(CUDA_FLAGS) gpu/MRT.cu -o lib/libcuMRT.a
lib/libcuD3Q7.a: gpu/D3Q7.cu
mkdir -p lib
nvcc -lib $(CUDA_FLAGS) gpu/D3Q7.cu -o lib/libcuD3Q7.a
lib/libcuD3Q19.a: gpu/D3Q19.cu
mkdir -p lib
nvcc -lib $(CUDA_FLAGS) gpu/D3Q19.cu -o lib/libcuD3Q19.a
lib/libcuColor.a: gpu/Color.cu
mkdir -p lib
nvcc -lib $(CUDA_FLAGS) gpu/Color.cu -o lib/libcuColor.a
clean:
rm bin/*
rm lib/*

756
cpu/Color.cpp Normal file
View File

@ -0,0 +1,756 @@
#include <math.h>
extern void InitDenColor(char *ID, double *Den, double *Phi, double das, double dbs, int N)
{
int n;
for (n=0; n<N; n++){
if ( ID[n] == 1){
Den[2*n] = 1.0;
Den[2*n+1] = 0.0;
Phi[n] = 1.0;
}
else if ( ID[n] == 2){
Den[2*n] = 0.0;
Den[2*n+1] = 1.0;
Phi[n] = -1.0;
}
else{
Den[2*n] = das;
Den[2*n+1] = dbs;
Phi[n] = (das-dbs)/(das+dbs);
}
}
}
extern void InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
{
int n,N;
N = Nx*Ny*Nz;
for (n=0; n<N; n++){
if (ID[n] > 0){
f_even[n] = 0.3333333333333333;
f_odd[n] = 0.055555555555555555; //double(100*n)+1.f;
f_even[N+n] = 0.055555555555555555; //double(100*n)+2.f;
f_odd[N+n] = 0.055555555555555555; //double(100*n)+3.f;
f_even[2*N+n] = 0.055555555555555555; //double(100*n)+4.f;
f_odd[2*N+n] = 0.055555555555555555; //double(100*n)+5.f;
f_even[3*N+n] = 0.055555555555555555; //double(100*n)+6.f;
f_odd[3*N+n] = 0.0277777777777778; //double(100*n)+7.f;
f_even[4*N+n] = 0.0277777777777778; //double(100*n)+8.f;
f_odd[4*N+n] = 0.0277777777777778; //double(100*n)+9.f;
f_even[5*N+n] = 0.0277777777777778; //double(100*n)+10.f;
f_odd[5*N+n] = 0.0277777777777778; //double(100*n)+11.f;
f_even[6*N+n] = 0.0277777777777778; //double(100*n)+12.f;
f_odd[6*N+n] = 0.0277777777777778; //double(100*n)+13.f;
f_even[7*N+n] = 0.0277777777777778; //double(100*n)+14.f;
f_odd[7*N+n] = 0.0277777777777778; //double(100*n)+15.f;
f_even[8*N+n] = 0.0277777777777778; //double(100*n)+16.f;
f_odd[8*N+n] = 0.0277777777777778; //double(100*n)+17.f;
f_even[9*N+n] = 0.0277777777777778; //double(100*n)+18.f;
}
else{
for(int q=0; q<9; q++){
f_even[q*N+n] = -1.0;
f_odd[q*N+n] = -1.0;
}
f_even[9*N+n] = -1.0;
}
}
}
extern void Compute_VELOCITY(char *ID, double *disteven, double *distodd, double *vel, int Nx, int Ny, int Nz)
{
int n,N;
// distributions
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
double vx,vy,vz;
N = Nx*Ny*Nz;
for (n=0; n<N; n++){
if (ID[n] > 0){
//........................................................................
// Registers to store the distributions
//........................................................................
f2 = disteven[N+n];
f4 = disteven[2*N+n];
f6 = disteven[3*N+n];
f8 = disteven[4*N+n];
f10 = disteven[5*N+n];
f12 = disteven[6*N+n];
f14 = disteven[7*N+n];
f16 = disteven[8*N+n];
f18 = disteven[9*N+n];
//........................................................................
f1 = distodd[n];
f3 = distodd[1*N+n];
f5 = distodd[2*N+n];
f7 = distodd[3*N+n];
f9 = distodd[4*N+n];
f11 = distodd[5*N+n];
f13 = distodd[6*N+n];
f15 = distodd[7*N+n];
f17 = distodd[8*N+n];
//.................Compute the velocity...................................
vx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
vy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
vz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
//..................Write the velocity.....................................
vel[n] = vx;
vel[N+n] = vy;
vel[2*N+n] = vz;
//........................................................................
}
}
}
//*************************************************************************
//*************************************************************************
extern void PressureBC_inlet(double *disteven, double *distodd, double din,
int Nx, int Ny, int Nz)
{
int n,N;
// distributions
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
double uz;
N = Nx*Ny*Nz;
for (n=0; n<Nx*Ny; n++){
//........................................................................
// Read distributions from "opposite" memory convention
//........................................................................
//........................................................................
f1 = distodd[n];
f3 = distodd[N+n];
f5 = distodd[2*N+n];
f7 = distodd[3*N+n];
f9 = distodd[4*N+n];
f11 = distodd[5*N+n];
f13 = distodd[6*N+n];
f15 = distodd[7*N+n];
f17 = distodd[8*N+n];
//........................................................................
f0 = disteven[n];
f2 = disteven[N+n];
f4 = disteven[2*N+n];
f6 = disteven[3*N+n];
f8 = disteven[4*N+n];
f10 = disteven[5*N+n];
f12 = disteven[6*N+n];
f14 = disteven[7*N+n];
f16 = disteven[8*N+n];
f18 = disteven[9*N+n];
//...................................................
//........Determine the intlet flow velocity.........
// uz = -1 + (f0+f3+f4+f1+f2+f7+f8+f10+f9
// + 2*(f5+f15+f18+f11+f14))/din;
//........Set the unknown distributions..............
// f6 = f5 - 0.3333333333333333*din*uz;
// f16 = f15 - 0.1666666666666667*din*uz;
// f17 = f16 - f3 + f4-f15+f18-f7+f8-f10+f9;
// f12= 0.5*(-din*uz+f5+f15+f18+f11+f14-f6-f16-
// f17+f1-f2-f14+f11+f7-f8-f10+f9);
// f13= -din*uz+f5+f15+f18+f11+f14-f6-f16-f17-f12;
// Determine the outlet flow velocity
uz = 1.0 - (f0+f4+f3+f2+f1+f8+f7+f9+ f10 +
2*(f5+ f15+f18+f11+f14))/din;
// Set the unknown distributions:
f6 = f5 + 0.3333333333333333*din*uz;
f16 = f15 + 0.1666666666666667*din*uz;
f17 = f16 + f4 - f3-f15+f18+f8-f7 +f9-f10;
f12= (din*uz+f5+ f15+f18+f11+f14-f6-f16-f17-f2+f1-f14+f11-f8+f7+f9-f10)*0.5;
f13= din*uz+f5+ f15+f18+f11+f14-f6-f16-f17-f12;
//........Store in "opposite" memory location..........
disteven[3*N+n] = f6;
disteven[6*N+n] = f12;
distodd[6*N+n] = f13;
disteven[8*N+n] = f16;
distodd[8*N+n] = f17;
//...................................................
}
}
extern void PressureBC_outlet(double *disteven, double *distodd, double dout,
int Nx, int Ny, int Nz, int S, int outlet)
{
int n,N;
// distributions
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
double uz;
N = Nx*Ny*Nz;
// Loop over the boundary - threadblocks delineated by start...finish
for ( n=0; n<N; n++){
//........................................................................
// Read distributions from "opposite" memory convention
//........................................................................
f1 = distodd[n];
f3 = distodd[N+n];
f5 = distodd[2*N+n];
f7 = distodd[3*N+n];
f9 = distodd[4*N+n];
f11 = distodd[5*N+n];
f13 = distodd[6*N+n];
f15 = distodd[7*N+n];
f17 = distodd[8*N+n];
//........................................................................
f0 = disteven[n];
f2 = disteven[N+n];
f4 = disteven[2*N+n];
f6 = disteven[3*N+n];
f8 = disteven[4*N+n];
f10 = disteven[5*N+n];
f12 = disteven[6*N+n];
f14 = disteven[7*N+n];
f16 = disteven[8*N+n];
f18 = disteven[9*N+n];
//........Determine the outlet flow velocity.........
// uz = 1 - (f0+f3+f4+f1+f2+f7+f8+f10+f9+
// 2*(f6+f16+f17+f12+f13))/dout;
//...................................................
//........Set the Unknown Distributions..............
// f5 = f6 + 0.33333333333333338*dout*uz;
// f15 = f16 + 0.16666666666666678*dout*uz;
// f18 = f15+f3-f4-f16+f17+f7-f8+f10-f9;
// f11= 0.5*(dout*uz+f6+ f16+f17+f12+f13-f5
// -f15-f18-f1+f2-f13+f12-f7+f8+f10-f9);
// f14= dout*uz+f6+ f16+f17+f12+f13-f5-f15-f18-f11;
uz = -1.0 + (f0+f4+f3+f2+f1+f8+f7+f9+f10 + 2*(f6+f16+f17+f12+f13))/dout;
f5 = f6 - 0.33333333333333338*dout* uz;
f15 = f16 - 0.16666666666666678*dout* uz;
f18 = f15 - f4 + f3-f16+f17-f8+f7-f9+f10;
f11 = (-dout*uz+f6+ f16+f17+f12+f13-f5-f15-f18+f2-f1-f13+f12+f8-f7-f9+f10)*0.5;
f14 = -dout*uz+f6+ f16+f17+f12+f13-f5-f15-f18-f11;
//........Store in "opposite" memory location..........
distodd[2*N+n] = f5;
distodd[5*N+n] = f11;
disteven[7*N+n] = f14;
distodd[7*N+n] = f15;
disteven[9*N+n] = f18;
//...................................................
}
}
//*************************************************************************
extern void ComputeColorGradient(char *ID, double *phi, double *ColorGrad, int Nx, int Ny, int Nz)
{
int n,N,i,j,k,nn;
// distributions
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
double nx,ny,nz;
// non-conserved moments
// additional variables needed for computations
N = Nx*Ny*Nz;
for ( n=0; n<N; n++){
//.......Back out the 3-D indices for node n..............
k = n/(Nx*Ny);
j = (n-Nx*Ny*k)/Nx;
i = n-Nx*Ny*k-Nx*j;
//........................................................................
//........Get 1-D index for this thread....................
// n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
//........................................................................
// COMPUTE THE COLOR GRADIENT
//........................................................................
//.................Read Phase Indicator Values............................
//........................................................................
nn = n-1; // neighbor index (get convention)
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
f1 = phi[nn]; // get neighbor for phi - 1
//........................................................................
nn = n+1; // neighbor index (get convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
f2 = phi[nn]; // get neighbor for phi - 2
//........................................................................
nn = n-Nx; // neighbor index (get convention)
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
f3 = phi[nn]; // get neighbor for phi - 3
//........................................................................
nn = n+Nx; // neighbor index (get convention)
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
f4 = phi[nn]; // get neighbor for phi - 4
//........................................................................
nn = n-Nx*Ny; // neighbor index (get convention)
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
f5 = phi[nn]; // get neighbor for phi - 5
//........................................................................
nn = n+Nx*Ny; // neighbor index (get convention)
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
f6 = phi[nn]; // get neighbor for phi - 6
//........................................................................
nn = n-Nx-1; // neighbor index (get convention)
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
f7 = phi[nn]; // get neighbor for phi - 7
//........................................................................
nn = n+Nx+1; // neighbor index (get convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
f8 = phi[nn]; // get neighbor for phi - 8
//........................................................................
nn = n+Nx-1; // neighbor index (get convention)
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
f9 = phi[nn]; // get neighbor for phi - 9
//........................................................................
nn = n-Nx+1; // neighbor index (get convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
f10 = phi[nn]; // get neighbor for phi - 10
//........................................................................
nn = n-Nx*Ny-1; // neighbor index (get convention)
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
f11 = phi[nn]; // get neighbor for phi - 11
//........................................................................
nn = n+Nx*Ny+1; // neighbor index (get convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
f12 = phi[nn]; // get neighbor for phi - 12
//........................................................................
nn = n+Nx*Ny-1; // neighbor index (get convention)
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
f13 = phi[nn]; // get neighbor for phi - 13
//........................................................................
nn = n-Nx*Ny+1; // neighbor index (get convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
f14 = phi[nn]; // get neighbor for phi - 14
//........................................................................
nn = n-Nx*Ny-Nx; // neighbor index (get convention)
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
f15 = phi[nn]; // get neighbor for phi - 15
//........................................................................
nn = n+Nx*Ny+Nx; // neighbor index (get convention)
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
f16 = phi[nn]; // get neighbor for phi - 16
//........................................................................
nn = n+Nx*Ny-Nx; // neighbor index (get convention)
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
f17 = phi[nn]; // get neighbor for phi - 17
//........................................................................
nn = n-Nx*Ny+Nx; // neighbor index (get convention)
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
f18 = phi[nn]; // get neighbor for phi - 18
//............Compute the Color Gradient...................................
nx = -(f1-f2+0.5*(f7-f8+f9-f10+f11-f12+f13-f14));
ny = -(f3-f4+0.5*(f7-f8-f9+f10+f15-f16+f17-f18));
nz = -(f5-f6+0.5*(f11-f12-f13+f14+f15-f16-f17+f18));
//...........Normalize the Color Gradient.................................
// C = sqrt(nx*nx+ny*ny+nz*nz);
// nx = nx/C;
// ny = ny/C;
// nz = nz/C;
//...Store the Color Gradient....................
ColorGrad[3*n] = nx;
ColorGrad[3*n+1] = ny;
ColorGrad[3*n+2] = nz;
//...............................................
}
}
//*************************************************************************
extern void ColorCollide( char *ID, double *disteven, double *distodd, double *ColorGrad,
double *Velocity, int Nx, int Ny, int Nz, double rlx_setA, double rlx_setB,
double alpha, double beta, double Fx, double Fy, double Fz, bool pBC)
{
int n,N;
// distributions
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
// non-conserved moments
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
// additional variables needed for computations
double rho,jx,jy,jz,C,nx,ny,nz;
N = Nx*Ny*Nz;
char id;
for ( n=0; n<N; n++){
id = ID[n];
if (id > 0){
// Retrieve the color gradient
nx = ColorGrad[3*n];
ny = ColorGrad[3*n+1];
nz = ColorGrad[3*n+2];
//...........Normalize the Color Gradient.................................
C = sqrt(nx*nx+ny*ny+nz*nz);
nx = nx/C;
ny = ny/C;
nz = nz/C;
//......No color gradient at z-boundary if pressure BC are set.............
// if (pBC && k==0) nx = ny = nz = 0.f;
// if (pBC && k==Nz-1) nx = ny = nz = 0.f;
//........................................................................
// READ THE DISTRIBUTIONS
// (read from opposite array due to previous swap operation)
//........................................................................
f2 = distodd[n];
f4 = distodd[N+n];
f6 = distodd[2*N+n];
f8 = distodd[3*N+n];
f10 = distodd[4*N+n];
f12 = distodd[5*N+n];
f14 = distodd[6*N+n];
f16 = distodd[7*N+n];
f18 = distodd[8*N+n];
//........................................................................
f0 = disteven[n];
f1 = disteven[N+n];
f3 = disteven[2*N+n];
f5 = disteven[3*N+n];
f7 = disteven[4*N+n];
f9 = disteven[5*N+n];
f11 = disteven[6*N+n];
f13 = disteven[7*N+n];
f15 = disteven[8*N+n];
f17 = disteven[9*N+n];
//........................................................................
// PERFORM RELAXATION PROCESS
//........................................................................
//....................compute the moments...............................................
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
m1 = -30*f0-11*(f2+f1+f4+f3+f6+f5)+8*(f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18 +f17);
m2 = 12*f0-4*(f2+f1 +f4+f3+f6 +f5)+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
jx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
m4 = 4*(-f1+f2)+f7-f8+f9-f10+f11-f12+f13-f14;
jy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
m6 = -4*(f3-f4)+f7-f8-f9+f10+f15-f16+f17-f18;
jz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
m8 = -4*(f5-f6)+f11-f12-f13+f14+f15-f16-f17+f18;
m9 = 2*(f1+f2)-f3-f4-f5-f6+f7+f8+f9+f10+f11+f12+f13+f14-2*(f15+f16+f17+f18);
m10 = -4*(f1+f2)+2*(f4+f3+f6+f5)+f8+f7+f10+f9+f12+f11+f14+f13-2*(f16+f15+f18+f17);
m11 = f4+f3-f6-f5+f8+f7+f10+f9-f12-f11-f14-f13;
m12 = -2*(f4+f3-f6-f5)+f8+f7+f10+f9-f12-f11-f14-f13;
m13 = f8+f7-f10-f9;
m14 = f16+f15-f18-f17;
m15 = f12+f11-f14-f13;
m16 = f7-f8+f9-f10-f11+f12-f13+f14;
m17 = -f7+f8+f9-f10+f15-f16+f17-f18;
m18 = f11-f12-f13+f14-f15+f16+f17-f18;
//..........Toelke, Fruediger et. al. 2006...............
if (C == 0.0) nx = ny = nz = 1.0;
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) -alpha*C - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho)- m2);
m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4);
m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6);
m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9);
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) + 0.5*alpha*C*(ny*ny-nz*nz)- m11);
m12 = m12 + rlx_setA*( -0.5*((jy*jy-jz*jz)/rho) - m12);
m13 = m13 + rlx_setA*( (jx*jy/rho) + 0.5*alpha*C*nx*ny - m13);
m14 = m14 + rlx_setA*( (jy*jz/rho) + 0.5*alpha*C*ny*nz - m14);
m15 = m15 + rlx_setA*( (jx*jz/rho) + 0.5*alpha*C*nx*nz - m15);
m16 = m16 + rlx_setB*( - m16);
m17 = m17 + rlx_setB*( - m17);
m18 = m18 + rlx_setB*( - m18);
//.................inverse transformation......................................................
f0 = 0.05263157894736842*rho-0.012531328320802*m1+0.04761904761904762*m2;
f1 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(jx-m4)+0.0555555555555555555555555*(m9-m10);
f2 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(m4-jx)+0.0555555555555555555555555*(m9-m10);
f3 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(jy-m6)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
f4 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(m6-jy)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
f5 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(jz-m8)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
f6 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(m8-jz)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
f7 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx+jy)+0.025*(m4+m6)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12+0.25*m13+0.125*(m16-m17);
f8 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2-0.1*(jx+jy)-0.025*(m4+m6)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12+0.25*m13+0.125*(m17-m16);
f9 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx-jy)+0.025*(m4-m6)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12-0.25*m13+0.125*(m16+m17);
f10 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jy-jx)+0.025*(m6-m4)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12-0.25*m13-0.125*(m16+m17);
f11 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jx+jz)+0.025*(m4+m8)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12+0.25*m15+0.125*(m18-m16);
f12 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2-0.1*(jx+jz)-0.025*(m4+m8)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12+0.25*m15+0.125*(m16-m18);
f13 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jx-jz)+0.025*(m4-m8)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12-0.25*m15-0.125*(m16+m18);
f14 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jz-jx)+0.025*(m8-m4)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12-0.25*m15+0.125*(m16+m18);
f15 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jy+jz)+0.025*(m6+m8)
-0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m17-m18);
f16 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2-0.1*(jy+jz)-0.025*(m6+m8)
-0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m18-m17);
f17 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jy-jz)+0.025*(m6-m8)
-0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14+0.125*(m17+m18);
f18 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jz-jy)+0.025*(m8-m6)
-0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14-0.125*(m17+m18);
//.......................................................................................................
// incorporate external force
f1 += 0.16666666*Fx;
f2 -= 0.16666666*Fx;
f3 += 0.16666666*Fy;
f4 -= 0.16666666*Fy;
f5 += 0.16666666*Fz;
f6 -= 0.16666666*Fz;
f7 += 0.08333333333*(Fx+Fy);
f8 -= 0.08333333333*(Fx+Fy);
f9 += 0.08333333333*(Fx-Fy);
f10 -= 0.08333333333*(Fx-Fy);
f11 += 0.08333333333*(Fx+Fz);
f12 -= 0.08333333333*(Fx+Fz);
f13 += 0.08333333333*(Fx-Fz);
f14 -= 0.08333333333*(Fx-Fz);
f15 += 0.08333333333*(Fy+Fz);
f16 -= 0.08333333333*(Fy+Fz);
f17 += 0.08333333333*(Fy-Fz);
f18 -= 0.08333333333*(Fy-Fz);
//*********** WRITE UPDATED VALUES TO MEMORY ******************
// Write the updated distributions
//....EVEN.....................................
disteven[n] = f0;
disteven[N+n] = f2;
disteven[2*N+n] = f4;
disteven[3*N+n] = f6;
disteven[4*N+n] = f8;
disteven[5*N+n] = f10;
disteven[6*N+n] = f12;
disteven[7*N+n] = f14;
disteven[8*N+n] = f16;
disteven[9*N+n] = f18;
//....ODD......................................
distodd[n] = f1;
distodd[N+n] = f3;
distodd[2*N+n] = f5;
distodd[3*N+n] = f7;
distodd[4*N+n] = f9;
distodd[5*N+n] = f11;
distodd[6*N+n] = f13;
distodd[7*N+n] = f15;
distodd[8*N+n] = f17;
//...Store the Velocity..........................
Velocity[3*n] = jx;
Velocity[3*n+1] = jy;
Velocity[3*n+2] = jz;
/* //...Store the Color Gradient....................
ColorGrad[3*n] = C*nx;
ColorGrad[3*n+1] = C*ny;
ColorGrad[3*n+2] = C*nz;
*/ //...............................................
//***************************************************************
} // check if n is in the solid
} // check if n is in the domain
}
//*************************************************************************
extern void DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
double beta, int Nx, int Ny, int Nz, bool pBC)
{
char id;
int idx;
int in,jn,kn,n,nn,N;
int q,Cqx,Cqy,Cqz;
// int sendLoc;
double na,nb; // density values
double ux,uy,uz; // flow velocity
double nx,ny,nz,C; // color gradient components
double a1,a2,b1,b2;
double sp,delta;
double feq[6]; // equilibrium distributions
// Set of Discrete velocities for the D3Q19 Model
int D3Q7[3][3]={{1,0,0},{0,1,0},{0,0,1}};
N = Nx*Ny*Nz;
for ( n=0; n<N; n++){
id = ID[n];
// Local Density Values
na = Copy[2*n];
nb = Copy[2*n+1];
if (id > 0 && na+nb > 0.0){
//.......Back out the 3-D indices for node n..............
int k = n/(Nx*Ny);
int j = (n-Nx*Ny*k)/Nx;
int i = n-Nx*Ny*k-Nx*j;
//.....Load the Color gradient.........
nx = ColorGrad[3*n];
ny = ColorGrad[3*n+1];
nz = ColorGrad[3*n+2];
C = sqrt(nx*nx+ny*ny+nz*nz);
nx = nx/C;
ny = ny/C;
nz = nz/C;
//....Load the flow velocity...........
ux = Velocity[3*n];
uy = Velocity[3*n+1];
uz = Velocity[3*n+2];
//....Instantiate the density distributions
// Generate Equilibrium Distributions and stream
// Stationary value - distribution 0
Den[2*n] += 0.3333333333333333*na;
Den[2*n+1] += 0.3333333333333333*nb;
// Non-Stationary equilibrium distributions
feq[0] = 0.1111111111111111*(1+3*ux);
feq[1] = 0.1111111111111111*(1-3*ux);
feq[2] = 0.1111111111111111*(1+3*uy);
feq[3] = 0.1111111111111111*(1-3*uy);
feq[4] = 0.1111111111111111*(1+3*uz);
feq[5] = 0.1111111111111111*(1-3*uz);
// Construction and streaming for the components
for (idx=0; idx<3; idx++){
// Distribution index
q = 2*idx;
// Associated discrete velocity
Cqx = D3Q7[idx][0];
Cqy = D3Q7[idx][1];
Cqz = D3Q7[idx][2];
// Generate the Equilibrium Distribution
a1 = na*feq[q];
b1 = nb*feq[q];
a2 = na*feq[q+1];
b2 = nb*feq[q+1];
// Recolor the distributions
if (C > 0.0){
sp = nx*double(Cqx)+ny*double(Cqy)+nz*double(Cqz);
//if (idx > 2) sp = 0.7071067811865475*sp;
//delta = sp*min( min(a1,a2), min(b1,b2) );
delta = na*nb/(na+nb)*0.1111111111111111*sp;
//if (a1>0 && b1>0){
a1 += beta*delta;
a2 -= beta*delta;
b1 -= beta*delta;
b2 += beta*delta;
}
// .......Get the neighbor node..............
//nn = n + Stride[idx];
in = i+Cqx;
jn = j+Cqy;
kn = k+Cqz;
// Adjust for periodic BC, if necessary
if (in<0) in+= Nx;
if (jn<0) jn+= Ny;
if (kn<0) kn+= Nz;
if (!(in<Nx)) in-= Nx;
if (!(jn<Ny)) jn-= Ny;
if (!(kn<Nz)) kn-= Nz;
// Perform streaming or bounce-back as needed
id = ID[kn*Nx*Ny+jn*Nx+in];
if (id == 0){ //.....Bounce-back Rule...........
Den[2*n] += a1;
Den[2*n+1] += b1;
// atomicAdd(&Den[2*n], a1);
// atomicAdd(&Den[2*n+1], b1);
}
else{
//......Push the "distribution" to neighboring node...........
// Index of the neighbor in the local process
//nn = (kn-zmin[rank]+1)*Nxp*Nyp + (jn-ymin[rank]+1)*Nxp + (in-xmin[rank]+1);
nn = kn*Nx*Ny+jn*Nx+in;
// Push to neighboring node
Den[2*nn] += a1;
Den[2*nn+1] += b1;
// atomicAdd(&Den[2*nn], a1);
// atomicAdd(&Den[2*nn+1], b1);
}
// .......Get the neighbor node..............
q = 2*idx+1;
in = i-Cqx;
jn = j-Cqy;
kn = k-Cqz;
// Adjust for periodic BC, if necessary
if (in<0) in+= Nx;
if (jn<0) jn+= Ny;
if (kn<0) kn+= Nz;
if (!(in<Nx)) in-= Nx;
if (!(jn<Ny)) jn-= Ny;
if (!(kn<Nz)) kn-= Nz;
// Perform streaming or bounce-back as needed
id = ID[kn*Nx*Ny+jn*Nx+in];
if (id == 0){
//.....Bounce-back Rule...........
Den[2*n] += a2;
Den[2*n+1] += b2;
// atomicAdd(&Den[2*n], a2);
// atomicAdd(&Den[2*n+1], b2);
}
else{
//......Push the "distribution" to neighboring node...........
// Index of the neighbor in the local process
//nn = (kn-zmin[rank]+1)*Nxp*Nyp + (jn-ymin[rank]+1)*Nxp + (in-xmin[rank]+1);
nn = kn*Nx*Ny+jn*Nx+in;
// Push to neighboring node
Den[2*nn] += a2;
Den[2*nn+1] += b2;
// atomicAdd(&Den[2*nn], a2);
// atomicAdd(&Den[2*nn+1], b2);
}
}
}
}
}
extern void ComputePhi(char *ID, double *Phi, double *Copy, double *Den, int N)
{
int n;
double Na,Nb;
//...................................................................
// Update Phi
for ( n=0; n<N; n++){
if (ID[n] > 0){
// Get the density value (Streaming already performed)
Na = Den[2*n];
Nb = Den[2*n+1];
Phi[n] = (Na-Nb)/(Na+Nb);
// Store the copy of the current density
Copy[2*n] = Na;
Copy[2*n+1] = Nb;
// Zero the Density value to get ready for the next streaming
Den[2*n] = 0.0;
Den[2*n+1] = 0.0;
}
}
//...................................................................
}

756
cpu/Color.cpp~ Normal file
View File

@ -0,0 +1,756 @@
#include <math.h>
inline void InitDenColor(char *ID, double *Den, double *Phi, double das, double dbs, int N)
{
int n;
for (n=0; n<N; n++){
if ( ID[n] == 1){
Den[2*n] = 1.0;
Den[2*n+1] = 0.0;
Phi[n] = 1.0;
}
else if ( ID[n] == 2){
Den[2*n] = 0.0;
Den[2*n+1] = 1.0;
Phi[n] = -1.0;
}
else{
Den[2*n] = das;
Den[2*n+1] = dbs;
Phi[n] = (das-dbs)/(das+dbs);
}
}
}
inline void InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
{
int n,N;
N = Nx*Ny*Nz;
for (n=0; n<N; n++){
if (ID[n] > 0){
f_even[n] = 0.3333333333333333;
f_odd[n] = 0.055555555555555555; //double(100*n)+1.f;
f_even[N+n] = 0.055555555555555555; //double(100*n)+2.f;
f_odd[N+n] = 0.055555555555555555; //double(100*n)+3.f;
f_even[2*N+n] = 0.055555555555555555; //double(100*n)+4.f;
f_odd[2*N+n] = 0.055555555555555555; //double(100*n)+5.f;
f_even[3*N+n] = 0.055555555555555555; //double(100*n)+6.f;
f_odd[3*N+n] = 0.0277777777777778; //double(100*n)+7.f;
f_even[4*N+n] = 0.0277777777777778; //double(100*n)+8.f;
f_odd[4*N+n] = 0.0277777777777778; //double(100*n)+9.f;
f_even[5*N+n] = 0.0277777777777778; //double(100*n)+10.f;
f_odd[5*N+n] = 0.0277777777777778; //double(100*n)+11.f;
f_even[6*N+n] = 0.0277777777777778; //double(100*n)+12.f;
f_odd[6*N+n] = 0.0277777777777778; //double(100*n)+13.f;
f_even[7*N+n] = 0.0277777777777778; //double(100*n)+14.f;
f_odd[7*N+n] = 0.0277777777777778; //double(100*n)+15.f;
f_even[8*N+n] = 0.0277777777777778; //double(100*n)+16.f;
f_odd[8*N+n] = 0.0277777777777778; //double(100*n)+17.f;
f_even[9*N+n] = 0.0277777777777778; //double(100*n)+18.f;
}
else{
for(int q=0; q<9; q++){
f_even[q*N+n] = -1.0;
f_odd[q*N+n] = -1.0;
}
f_even[9*N+n] = -1.0;
}
}
}
inline void Compute_VELOCITY(char *ID, double *disteven, double *distodd, double *vel, int Nx, int Ny, int Nz)
{
int n,N;
// distributions
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
double vx,vy,vz;
N = Nx*Ny*Nz;
for (n=0; n<N; n++){
if (ID[n] > 0){
//........................................................................
// Registers to store the distributions
//........................................................................
f2 = disteven[N+n];
f4 = disteven[2*N+n];
f6 = disteven[3*N+n];
f8 = disteven[4*N+n];
f10 = disteven[5*N+n];
f12 = disteven[6*N+n];
f14 = disteven[7*N+n];
f16 = disteven[8*N+n];
f18 = disteven[9*N+n];
//........................................................................
f1 = distodd[n];
f3 = distodd[1*N+n];
f5 = distodd[2*N+n];
f7 = distodd[3*N+n];
f9 = distodd[4*N+n];
f11 = distodd[5*N+n];
f13 = distodd[6*N+n];
f15 = distodd[7*N+n];
f17 = distodd[8*N+n];
//.................Compute the velocity...................................
vx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
vy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
vz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
//..................Write the velocity.....................................
vel[n] = vx;
vel[N+n] = vy;
vel[2*N+n] = vz;
//........................................................................
}
}
}
//*************************************************************************
//*************************************************************************
inline void PressureBC_inlet(double *disteven, double *distodd, double din,
int Nx, int Ny, int Nz)
{
int n,N;
// distributions
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
double uz;
N = Nx*Ny*Nz;
for (n=0; n<Nx*Ny; n++){
//........................................................................
// Read distributions from "opposite" memory convention
//........................................................................
//........................................................................
f1 = distodd[n];
f3 = distodd[N+n];
f5 = distodd[2*N+n];
f7 = distodd[3*N+n];
f9 = distodd[4*N+n];
f11 = distodd[5*N+n];
f13 = distodd[6*N+n];
f15 = distodd[7*N+n];
f17 = distodd[8*N+n];
//........................................................................
f0 = disteven[n];
f2 = disteven[N+n];
f4 = disteven[2*N+n];
f6 = disteven[3*N+n];
f8 = disteven[4*N+n];
f10 = disteven[5*N+n];
f12 = disteven[6*N+n];
f14 = disteven[7*N+n];
f16 = disteven[8*N+n];
f18 = disteven[9*N+n];
//...................................................
//........Determine the intlet flow velocity.........
// uz = -1 + (f0+f3+f4+f1+f2+f7+f8+f10+f9
// + 2*(f5+f15+f18+f11+f14))/din;
//........Set the unknown distributions..............
// f6 = f5 - 0.3333333333333333*din*uz;
// f16 = f15 - 0.1666666666666667*din*uz;
// f17 = f16 - f3 + f4-f15+f18-f7+f8-f10+f9;
// f12= 0.5*(-din*uz+f5+f15+f18+f11+f14-f6-f16-
// f17+f1-f2-f14+f11+f7-f8-f10+f9);
// f13= -din*uz+f5+f15+f18+f11+f14-f6-f16-f17-f12;
// Determine the outlet flow velocity
uz = 1.0 - (f0+f4+f3+f2+f1+f8+f7+f9+ f10 +
2*(f5+ f15+f18+f11+f14))/din;
// Set the unknown distributions:
f6 = f5 + 0.3333333333333333*din*uz;
f16 = f15 + 0.1666666666666667*din*uz;
f17 = f16 + f4 - f3-f15+f18+f8-f7 +f9-f10;
f12= (din*uz+f5+ f15+f18+f11+f14-f6-f16-f17-f2+f1-f14+f11-f8+f7+f9-f10)*0.5;
f13= din*uz+f5+ f15+f18+f11+f14-f6-f16-f17-f12;
//........Store in "opposite" memory location..........
disteven[3*N+n] = f6;
disteven[6*N+n] = f12;
distodd[6*N+n] = f13;
disteven[8*N+n] = f16;
distodd[8*N+n] = f17;
//...................................................
}
}
inline void PressureBC_outlet(double *disteven, double *distodd, double dout,
int Nx, int Ny, int Nz, int S, int outlet)
{
int n,N;
// distributions
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
double uz;
N = Nx*Ny*Nz;
// Loop over the boundary - threadblocks delineated by start...finish
for ( n=0; n<N; n++){
//........................................................................
// Read distributions from "opposite" memory convention
//........................................................................
f1 = distodd[n];
f3 = distodd[N+n];
f5 = distodd[2*N+n];
f7 = distodd[3*N+n];
f9 = distodd[4*N+n];
f11 = distodd[5*N+n];
f13 = distodd[6*N+n];
f15 = distodd[7*N+n];
f17 = distodd[8*N+n];
//........................................................................
f0 = disteven[n];
f2 = disteven[N+n];
f4 = disteven[2*N+n];
f6 = disteven[3*N+n];
f8 = disteven[4*N+n];
f10 = disteven[5*N+n];
f12 = disteven[6*N+n];
f14 = disteven[7*N+n];
f16 = disteven[8*N+n];
f18 = disteven[9*N+n];
//........Determine the outlet flow velocity.........
// uz = 1 - (f0+f3+f4+f1+f2+f7+f8+f10+f9+
// 2*(f6+f16+f17+f12+f13))/dout;
//...................................................
//........Set the Unknown Distributions..............
// f5 = f6 + 0.33333333333333338*dout*uz;
// f15 = f16 + 0.16666666666666678*dout*uz;
// f18 = f15+f3-f4-f16+f17+f7-f8+f10-f9;
// f11= 0.5*(dout*uz+f6+ f16+f17+f12+f13-f5
// -f15-f18-f1+f2-f13+f12-f7+f8+f10-f9);
// f14= dout*uz+f6+ f16+f17+f12+f13-f5-f15-f18-f11;
uz = -1.0 + (f0+f4+f3+f2+f1+f8+f7+f9+f10 + 2*(f6+f16+f17+f12+f13))/dout;
f5 = f6 - 0.33333333333333338*dout* uz;
f15 = f16 - 0.16666666666666678*dout* uz;
f18 = f15 - f4 + f3-f16+f17-f8+f7-f9+f10;
f11 = (-dout*uz+f6+ f16+f17+f12+f13-f5-f15-f18+f2-f1-f13+f12+f8-f7-f9+f10)*0.5;
f14 = -dout*uz+f6+ f16+f17+f12+f13-f5-f15-f18-f11;
//........Store in "opposite" memory location..........
distodd[2*N+n] = f5;
distodd[5*N+n] = f11;
disteven[7*N+n] = f14;
distodd[7*N+n] = f15;
disteven[9*N+n] = f18;
//...................................................
}
}
//*************************************************************************
inline void ComputeColorGradient(char *ID, double *phi, double *ColorGrad, int Nx, int Ny, int Nz)
{
int n,N,i,j,k,nn;
// distributions
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
double nx,ny,nz;
// non-conserved moments
// additional variables needed for computations
N = Nx*Ny*Nz;
for ( n=0; n<N; n++){
//.......Back out the 3-D indices for node n..............
k = n/(Nx*Ny);
j = (n-Nx*Ny*k)/Nx;
i = n-Nx*Ny*k-Nx*j;
//........................................................................
//........Get 1-D index for this thread....................
// n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
//........................................................................
// COMPUTE THE COLOR GRADIENT
//........................................................................
//.................Read Phase Indicator Values............................
//........................................................................
nn = n-1; // neighbor index (get convention)
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
f1 = phi[nn]; // get neighbor for phi - 1
//........................................................................
nn = n+1; // neighbor index (get convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
f2 = phi[nn]; // get neighbor for phi - 2
//........................................................................
nn = n-Nx; // neighbor index (get convention)
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
f3 = phi[nn]; // get neighbor for phi - 3
//........................................................................
nn = n+Nx; // neighbor index (get convention)
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
f4 = phi[nn]; // get neighbor for phi - 4
//........................................................................
nn = n-Nx*Ny; // neighbor index (get convention)
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
f5 = phi[nn]; // get neighbor for phi - 5
//........................................................................
nn = n+Nx*Ny; // neighbor index (get convention)
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
f6 = phi[nn]; // get neighbor for phi - 6
//........................................................................
nn = n-Nx-1; // neighbor index (get convention)
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
f7 = phi[nn]; // get neighbor for phi - 7
//........................................................................
nn = n+Nx+1; // neighbor index (get convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
f8 = phi[nn]; // get neighbor for phi - 8
//........................................................................
nn = n+Nx-1; // neighbor index (get convention)
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
f9 = phi[nn]; // get neighbor for phi - 9
//........................................................................
nn = n-Nx+1; // neighbor index (get convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
f10 = phi[nn]; // get neighbor for phi - 10
//........................................................................
nn = n-Nx*Ny-1; // neighbor index (get convention)
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
f11 = phi[nn]; // get neighbor for phi - 11
//........................................................................
nn = n+Nx*Ny+1; // neighbor index (get convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
f12 = phi[nn]; // get neighbor for phi - 12
//........................................................................
nn = n+Nx*Ny-1; // neighbor index (get convention)
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
f13 = phi[nn]; // get neighbor for phi - 13
//........................................................................
nn = n-Nx*Ny+1; // neighbor index (get convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
f14 = phi[nn]; // get neighbor for phi - 14
//........................................................................
nn = n-Nx*Ny-Nx; // neighbor index (get convention)
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
f15 = phi[nn]; // get neighbor for phi - 15
//........................................................................
nn = n+Nx*Ny+Nx; // neighbor index (get convention)
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
f16 = phi[nn]; // get neighbor for phi - 16
//........................................................................
nn = n+Nx*Ny-Nx; // neighbor index (get convention)
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
f17 = phi[nn]; // get neighbor for phi - 17
//........................................................................
nn = n-Nx*Ny+Nx; // neighbor index (get convention)
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
f18 = phi[nn]; // get neighbor for phi - 18
//............Compute the Color Gradient...................................
nx = -(f1-f2+0.5*(f7-f8+f9-f10+f11-f12+f13-f14));
ny = -(f3-f4+0.5*(f7-f8-f9+f10+f15-f16+f17-f18));
nz = -(f5-f6+0.5*(f11-f12-f13+f14+f15-f16-f17+f18));
//...........Normalize the Color Gradient.................................
// C = sqrt(nx*nx+ny*ny+nz*nz);
// nx = nx/C;
// ny = ny/C;
// nz = nz/C;
//...Store the Color Gradient....................
ColorGrad[3*n] = nx;
ColorGrad[3*n+1] = ny;
ColorGrad[3*n+2] = nz;
//...............................................
}
}
//*************************************************************************
inline void ColorCollide( char *ID, double *disteven, double *distodd, double *ColorGrad,
double *Velocity, int Nx, int Ny, int Nz, double rlx_setA, double rlx_setB,
double alpha, double beta, double Fx, double Fy, double Fz, bool pBC)
{
int n,N;
// distributions
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
// non-conserved moments
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
// additional variables needed for computations
double rho,jx,jy,jz,C,nx,ny,nz;
N = Nx*Ny*Nz;
char id;
for ( n=0; n<N; n++){
id = ID[n];
if (id > 0){
// Retrieve the color gradient
nx = ColorGrad[3*n];
ny = ColorGrad[3*n+1];
nz = ColorGrad[3*n+2];
//...........Normalize the Color Gradient.................................
C = sqrt(nx*nx+ny*ny+nz*nz);
nx = nx/C;
ny = ny/C;
nz = nz/C;
//......No color gradient at z-boundary if pressure BC are set.............
// if (pBC && k==0) nx = ny = nz = 0.f;
// if (pBC && k==Nz-1) nx = ny = nz = 0.f;
//........................................................................
// READ THE DISTRIBUTIONS
// (read from opposite array due to previous swap operation)
//........................................................................
f2 = distodd[n];
f4 = distodd[N+n];
f6 = distodd[2*N+n];
f8 = distodd[3*N+n];
f10 = distodd[4*N+n];
f12 = distodd[5*N+n];
f14 = distodd[6*N+n];
f16 = distodd[7*N+n];
f18 = distodd[8*N+n];
//........................................................................
f0 = disteven[n];
f1 = disteven[N+n];
f3 = disteven[2*N+n];
f5 = disteven[3*N+n];
f7 = disteven[4*N+n];
f9 = disteven[5*N+n];
f11 = disteven[6*N+n];
f13 = disteven[7*N+n];
f15 = disteven[8*N+n];
f17 = disteven[9*N+n];
//........................................................................
// PERFORM RELAXATION PROCESS
//........................................................................
//....................compute the moments...............................................
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
m1 = -30*f0-11*(f2+f1+f4+f3+f6+f5)+8*(f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18 +f17);
m2 = 12*f0-4*(f2+f1 +f4+f3+f6 +f5)+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
jx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
m4 = 4*(-f1+f2)+f7-f8+f9-f10+f11-f12+f13-f14;
jy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
m6 = -4*(f3-f4)+f7-f8-f9+f10+f15-f16+f17-f18;
jz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
m8 = -4*(f5-f6)+f11-f12-f13+f14+f15-f16-f17+f18;
m9 = 2*(f1+f2)-f3-f4-f5-f6+f7+f8+f9+f10+f11+f12+f13+f14-2*(f15+f16+f17+f18);
m10 = -4*(f1+f2)+2*(f4+f3+f6+f5)+f8+f7+f10+f9+f12+f11+f14+f13-2*(f16+f15+f18+f17);
m11 = f4+f3-f6-f5+f8+f7+f10+f9-f12-f11-f14-f13;
m12 = -2*(f4+f3-f6-f5)+f8+f7+f10+f9-f12-f11-f14-f13;
m13 = f8+f7-f10-f9;
m14 = f16+f15-f18-f17;
m15 = f12+f11-f14-f13;
m16 = f7-f8+f9-f10-f11+f12-f13+f14;
m17 = -f7+f8+f9-f10+f15-f16+f17-f18;
m18 = f11-f12-f13+f14-f15+f16+f17-f18;
//..........Toelke, Fruediger et. al. 2006...............
if (C == 0.0) nx = ny = nz = 1.0;
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) -alpha*C - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho)- m2);
m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4);
m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6);
m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9);
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) + 0.5*alpha*C*(ny*ny-nz*nz)- m11);
m12 = m12 + rlx_setA*( -0.5*((jy*jy-jz*jz)/rho) - m12);
m13 = m13 + rlx_setA*( (jx*jy/rho) + 0.5*alpha*C*nx*ny - m13);
m14 = m14 + rlx_setA*( (jy*jz/rho) + 0.5*alpha*C*ny*nz - m14);
m15 = m15 + rlx_setA*( (jx*jz/rho) + 0.5*alpha*C*nx*nz - m15);
m16 = m16 + rlx_setB*( - m16);
m17 = m17 + rlx_setB*( - m17);
m18 = m18 + rlx_setB*( - m18);
//.................inverse transformation......................................................
f0 = 0.05263157894736842*rho-0.012531328320802*m1+0.04761904761904762*m2;
f1 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(jx-m4)+0.0555555555555555555555555*(m9-m10);
f2 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(m4-jx)+0.0555555555555555555555555*(m9-m10);
f3 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(jy-m6)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
f4 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(m6-jy)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
f5 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(jz-m8)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
f6 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(m8-jz)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
f7 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx+jy)+0.025*(m4+m6)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12+0.25*m13+0.125*(m16-m17);
f8 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2-0.1*(jx+jy)-0.025*(m4+m6)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12+0.25*m13+0.125*(m17-m16);
f9 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx-jy)+0.025*(m4-m6)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12-0.25*m13+0.125*(m16+m17);
f10 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jy-jx)+0.025*(m6-m4)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12-0.25*m13-0.125*(m16+m17);
f11 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jx+jz)+0.025*(m4+m8)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12+0.25*m15+0.125*(m18-m16);
f12 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2-0.1*(jx+jz)-0.025*(m4+m8)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12+0.25*m15+0.125*(m16-m18);
f13 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jx-jz)+0.025*(m4-m8)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12-0.25*m15-0.125*(m16+m18);
f14 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jz-jx)+0.025*(m8-m4)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12-0.25*m15+0.125*(m16+m18);
f15 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jy+jz)+0.025*(m6+m8)
-0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m17-m18);
f16 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2-0.1*(jy+jz)-0.025*(m6+m8)
-0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m18-m17);
f17 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jy-jz)+0.025*(m6-m8)
-0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14+0.125*(m17+m18);
f18 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jz-jy)+0.025*(m8-m6)
-0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14-0.125*(m17+m18);
//.......................................................................................................
// incorporate external force
f1 += 0.16666666*Fx;
f2 -= 0.16666666*Fx;
f3 += 0.16666666*Fy;
f4 -= 0.16666666*Fy;
f5 += 0.16666666*Fz;
f6 -= 0.16666666*Fz;
f7 += 0.08333333333*(Fx+Fy);
f8 -= 0.08333333333*(Fx+Fy);
f9 += 0.08333333333*(Fx-Fy);
f10 -= 0.08333333333*(Fx-Fy);
f11 += 0.08333333333*(Fx+Fz);
f12 -= 0.08333333333*(Fx+Fz);
f13 += 0.08333333333*(Fx-Fz);
f14 -= 0.08333333333*(Fx-Fz);
f15 += 0.08333333333*(Fy+Fz);
f16 -= 0.08333333333*(Fy+Fz);
f17 += 0.08333333333*(Fy-Fz);
f18 -= 0.08333333333*(Fy-Fz);
//*********** WRITE UPDATED VALUES TO MEMORY ******************
// Write the updated distributions
//....EVEN.....................................
disteven[n] = f0;
disteven[N+n] = f2;
disteven[2*N+n] = f4;
disteven[3*N+n] = f6;
disteven[4*N+n] = f8;
disteven[5*N+n] = f10;
disteven[6*N+n] = f12;
disteven[7*N+n] = f14;
disteven[8*N+n] = f16;
disteven[9*N+n] = f18;
//....ODD......................................
distodd[n] = f1;
distodd[N+n] = f3;
distodd[2*N+n] = f5;
distodd[3*N+n] = f7;
distodd[4*N+n] = f9;
distodd[5*N+n] = f11;
distodd[6*N+n] = f13;
distodd[7*N+n] = f15;
distodd[8*N+n] = f17;
//...Store the Velocity..........................
Velocity[3*n] = jx;
Velocity[3*n+1] = jy;
Velocity[3*n+2] = jz;
/* //...Store the Color Gradient....................
ColorGrad[3*n] = C*nx;
ColorGrad[3*n+1] = C*ny;
ColorGrad[3*n+2] = C*nz;
*/ //...............................................
//***************************************************************
} // check if n is in the solid
} // check if n is in the domain
}
//*************************************************************************
inline void DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
double beta, int Nx, int Ny, int Nz, bool pBC)
{
char id;
int idx;
int in,jn,kn,n,nn,N;
int q,Cqx,Cqy,Cqz;
// int sendLoc;
double na,nb; // density values
double ux,uy,uz; // flow velocity
double nx,ny,nz,C; // color gradient components
double a1,a2,b1,b2;
double sp,delta;
double feq[6]; // equilibrium distributions
// Set of Discrete velocities for the D3Q19 Model
int D3Q7[3][3]={{1,0,0},{0,1,0},{0,0,1}};
N = Nx*Ny*Nz;
for ( n=0; n<N; n++){
id = ID[n];
// Local Density Values
na = Copy[2*n];
nb = Copy[2*n+1];
if (id > 0 && na+nb > 0.0){
//.......Back out the 3-D indices for node n..............
int k = n/(Nx*Ny);
int j = (n-Nx*Ny*k)/Nx;
int i = n-Nx*Ny*k-Nx*j;
//.....Load the Color gradient.........
nx = ColorGrad[3*n];
ny = ColorGrad[3*n+1];
nz = ColorGrad[3*n+2];
C = sqrt(nx*nx+ny*ny+nz*nz);
nx = nx/C;
ny = ny/C;
nz = nz/C;
//....Load the flow velocity...........
ux = Velocity[3*n];
uy = Velocity[3*n+1];
uz = Velocity[3*n+2];
//....Instantiate the density distributions
// Generate Equilibrium Distributions and stream
// Stationary value - distribution 0
Den[2*n] += 0.3333333333333333*na;
Den[2*n+1] += 0.3333333333333333*nb;
// Non-Stationary equilibrium distributions
feq[0] = 0.1111111111111111*(1+3*ux);
feq[1] = 0.1111111111111111*(1-3*ux);
feq[2] = 0.1111111111111111*(1+3*uy);
feq[3] = 0.1111111111111111*(1-3*uy);
feq[4] = 0.1111111111111111*(1+3*uz);
feq[5] = 0.1111111111111111*(1-3*uz);
// Construction and streaming for the components
for (idx=0; idx<3; idx++){
// Distribution index
q = 2*idx;
// Associated discrete velocity
Cqx = D3Q7[idx][0];
Cqy = D3Q7[idx][1];
Cqz = D3Q7[idx][2];
// Generate the Equilibrium Distribution
a1 = na*feq[q];
b1 = nb*feq[q];
a2 = na*feq[q+1];
b2 = nb*feq[q+1];
// Recolor the distributions
if (C > 0.0){
sp = nx*double(Cqx)+ny*double(Cqy)+nz*double(Cqz);
//if (idx > 2) sp = 0.7071067811865475*sp;
//delta = sp*min( min(a1,a2), min(b1,b2) );
delta = na*nb/(na+nb)*0.1111111111111111*sp;
//if (a1>0 && b1>0){
a1 += beta*delta;
a2 -= beta*delta;
b1 -= beta*delta;
b2 += beta*delta;
}
// .......Get the neighbor node..............
//nn = n + Stride[idx];
in = i+Cqx;
jn = j+Cqy;
kn = k+Cqz;
// Adjust for periodic BC, if necessary
if (in<0) in+= Nx;
if (jn<0) jn+= Ny;
if (kn<0) kn+= Nz;
if (!(in<Nx)) in-= Nx;
if (!(jn<Ny)) jn-= Ny;
if (!(kn<Nz)) kn-= Nz;
// Perform streaming or bounce-back as needed
id = ID[kn*Nx*Ny+jn*Nx+in];
if (id == 0){ //.....Bounce-back Rule...........
Den[2*n] += a1;
Den[2*n+1] += b1;
// atomicAdd(&Den[2*n], a1);
// atomicAdd(&Den[2*n+1], b1);
}
else{
//......Push the "distribution" to neighboring node...........
// Index of the neighbor in the local process
//nn = (kn-zmin[rank]+1)*Nxp*Nyp + (jn-ymin[rank]+1)*Nxp + (in-xmin[rank]+1);
nn = kn*Nx*Ny+jn*Nx+in;
// Push to neighboring node
Den[2*nn] += a1;
Den[2*nn+1] += b1;
// atomicAdd(&Den[2*nn], a1);
// atomicAdd(&Den[2*nn+1], b1);
}
// .......Get the neighbor node..............
q = 2*idx+1;
in = i-Cqx;
jn = j-Cqy;
kn = k-Cqz;
// Adjust for periodic BC, if necessary
if (in<0) in+= Nx;
if (jn<0) jn+= Ny;
if (kn<0) kn+= Nz;
if (!(in<Nx)) in-= Nx;
if (!(jn<Ny)) jn-= Ny;
if (!(kn<Nz)) kn-= Nz;
// Perform streaming or bounce-back as needed
id = ID[kn*Nx*Ny+jn*Nx+in];
if (id == 0){
//.....Bounce-back Rule...........
Den[2*n] += a2;
Den[2*n+1] += b2;
// atomicAdd(&Den[2*n], a2);
// atomicAdd(&Den[2*n+1], b2);
}
else{
//......Push the "distribution" to neighboring node...........
// Index of the neighbor in the local process
//nn = (kn-zmin[rank]+1)*Nxp*Nyp + (jn-ymin[rank]+1)*Nxp + (in-xmin[rank]+1);
nn = kn*Nx*Ny+jn*Nx+in;
// Push to neighboring node
Den[2*nn] += a2;
Den[2*nn+1] += b2;
// atomicAdd(&Den[2*nn], a2);
// atomicAdd(&Den[2*nn+1], b2);
}
}
}
}
}
inline void ComputePhi(char *ID, double *Phi, double *Copy, double *Den, int N)
{
int n;
double Na,Nb;
//...................................................................
// Update Phi
for ( n=0; n<N; n++){
if (ID[n] > 0){
// Get the density value (Streaming already performed)
Na = Den[2*n];
Nb = Den[2*n+1];
Phi[n] = (Na-Nb)/(Na+Nb);
// Store the copy of the current density
Copy[2*n] = Na;
Copy[2*n+1] = Nb;
// Zero the Density value to get ready for the next streaming
Den[2*n] = 0.0;
Den[2*n+1] = 0.0;
}
}
//...................................................................
}

21
cpu/Color.h Normal file
View File

@ -0,0 +1,21 @@
extern void InitDenColor(char *ID, double *Den, double *Phi, double das, double dbs, int N);
extern void InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
extern void Compute_VELOCITY(char *ID, double *disteven, double *distodd, double *vel, int Nx, int Ny, int Nz);
//*************************************************************************
//*************************************************************************
extern void PressureBC_inlet(double *disteven, double *distodd, double din,
int Nx, int Ny, int Nz);
extern void PressureBC_outlet(double *disteven, double *distodd, double dout,
int Nx, int Ny, int Nz, int S, int outlet);
//*************************************************************************
extern void ComputeColorGradient(char *ID, double *phi, double *ColorGrad, int Nx, int Ny, int Nz);
//*************************************************************************
extern void ColorCollide( char *ID, double *disteven, double *distodd, double *ColorGrad,
double *Velocity, int Nx, int Ny, int Nz, double rlx_setA, double rlx_setB,
double alpha, double beta, double Fx, double Fy, double Fz, bool pBC);
//*************************************************************************
extern void DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
double beta, int Nx, int Ny, int Nz, bool pBC);
extern void ComputePhi(char *ID, double *Phi, double *Copy, double *Den, int N);

21
cpu/Color.h~ Normal file
View File

@ -0,0 +1,21 @@
extern void InitDenColor(char *ID, double *Den, double *Phi, double das, double dbs, int N);
extern void InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
extern void Compute_VELOCITY(char *ID, double *disteven, double *distodd, double *vel, int Nx, int Ny, int Nz);
//*************************************************************************
//*************************************************************************
extern void PressureBC_inlet(double *disteven, double *distodd, double din,
int Nx, int Ny, int Nz);
extern void PressureBC_outlet(double *disteven, double *distodd, double dout,
int Nx, int Ny, int Nz, int S, int outlet);
//*************************************************************************
extern void ComputeColorGradient(char *ID, double *phi, double *ColorGrad, int Nx, int Ny, int Nz);
//*************************************************************************
extern void ColorCollide( char *ID, double *disteven, double *distodd, double *ColorGrad,
double *Velocity, int Nx, int Ny, int Nz, int S,double rlx_setA, double rlx_setB,
double alpha, double beta, double Fx, double Fy, double Fz, bool pBC);
//*************************************************************************
extern void DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
double beta, int Nx, int Ny, int Nz, bool pBC);
extern void ComputePhi(char *ID, double *Phi, double *Copy, double *Den, int N);

BIN
cpu/Color.o Normal file

Binary file not shown.

BIN
cpu/ColorLBM-cpu Executable file

Binary file not shown.

190
cpu/D3Q19.cpp Normal file
View File

@ -0,0 +1,190 @@
extern void PackDist(int q, int *list, int start, int count, double *sendbuf, double *dist, int N){
//....................................................................................
// Pack distribution q into the send buffer for the listed lattice sites
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int idx,n;
for (idx=0; idx<count; idx++){
n = list[idx];
sendbuf[start+idx] = dist[q*N+n];
}
}
extern void MapRecvDist(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count,
double *recvbuf, double *dist, int Nx, int Ny, int Nz){
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int i,j,k,n,nn,idx;
int N = Nx*Ny*Nz;
for (idx=0; idx<count; idx++){
// Get the value from the list -- note that n is the index is from the send (non-local) process
n = list[idx];
// Get the 3-D indices
k = n/(Nx*Ny);
j = (n-Nx*Ny*k)/Nx;
i = n-Nx*Ny*k-Nz*j;
// Streaming for the non-local distribution
i += Cqx;
j += Cqy;
k += Cqz;
/* if (i < 0) i += Nx;
if (j < 0) j += Ny;
if (k < 0) k += Nz;
if (!(i<Nx)) i -= Nx;
if (!(j<Ny)) j -= Ny;
if (!(k<Nz)) k -= Nz;
*/
nn = k*Nx*Ny+j*Nx+i;
// unpack the distribution to the proper location
// if (recvbuf[start+idx] != dist[q*N+nn]){
// printf("Stopping to check error \n");
// printf("recvbuf[start+idx] = %f \n",recvbuf[start+idx]);
// printf("dist[q*N+nn] = %f \n",dist[q*N+nn]);
// printf("A bug! Again? \n");
// idx = count;
// }
// list[idx] = nn;
dist[q*N+nn] = recvbuf[start+idx];
}
}
//*************************************************************************
extern void SwapD3Q19(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz)
{
int n,nn,N;
// distributions
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
N = Nx*Ny*Nz;
for (n=0; n<N; n++){
//.......Back out the 3-D indices for node n..............
int k = n/(Nx*Ny);
int j = (n-Nx*Ny*k)/Nx;
int i = n-Nx*Ny*k-Nz*j;
if (ID[n] > 0){
//........................................................................
// Retrieve even distributions from the local node (swap convention)
// f0 = disteven[n]; // Does not particupate in streaming
f1 = distodd[n];
f3 = distodd[N+n];
f5 = distodd[2*N+n];
f7 = distodd[3*N+n];
f9 = distodd[4*N+n];
f11 = distodd[5*N+n];
f13 = distodd[6*N+n];
f15 = distodd[7*N+n];
f17 = distodd[8*N+n];
//........................................................................
//........................................................................
// Retrieve odd distributions from neighboring nodes (swap convention)
//........................................................................
nn = n+1; // neighbor index (pull convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
//if (i+1<Nx){
f2 = disteven[N+nn]; // pull neighbor for distribution 2
if (f2 > 0){
distodd[n] = f2;
disteven[N+nn] = f1;
}
//}
//........................................................................
nn = n+Nx; // neighbor index (pull convention)
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
//if (j+1<Ny){
f4 = disteven[2*N+nn]; // pull neighbor for distribution 4
if (f4 > 0){
distodd[N+n] = f4;
disteven[2*N+nn] = f3;
// }
}
//........................................................................
nn = n+Nx*Ny; // neighbor index (pull convention)
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
//if (k+1<Nz){
f6 = disteven[3*N+nn]; // pull neighbor for distribution 6
if (f6 > 0){
distodd[2*N+n] = f6;
disteven[3*N+nn] = f5;
// }
}
//........................................................................
nn = n+Nx+1; // neighbor index (pull convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
//if ((i+1<Nx) && (j+1<Ny)){
f8 = disteven[4*N+nn]; // pull neighbor for distribution 8
if (f8 > 0){
distodd[3*N+n] = f8;
disteven[4*N+nn] = f7;
// }
}
//........................................................................
nn = n-Nx+1; // neighbor index (pull convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
//if (!(i-1<0) && (j+1<Ny)){
f10 = disteven[5*N+nn]; // pull neighbor for distribution 9
if (f10 > 0){
distodd[4*N+n] = f10;
disteven[5*N+nn] = f9;
// }
}
//........................................................................
nn = n+Nx*Ny+1; // neighbor index (pull convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
//if ( !(i-1<0) && !(k-1<0)){
f12 = disteven[6*N+nn]; // pull distribution 11
if (f12 > 0){
distodd[5*N+n] = f12;
disteven[6*N+nn] = f11;
// }
}
//........................................................................
nn = n-Nx*Ny+1; // neighbor index (pull convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
//if (!(i-1<0) && (k+1<Nz)){
f14 = disteven[7*N+nn]; // pull neighbor for distribution 13
if (f14 > 0){
distodd[6*N+n] = f14;
disteven[7*N+nn] = f13;
// }
}
//........................................................................
nn = n+Nx*Ny+Nx; // neighbor index (pull convention)
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
//if (!(j-1<0) && !(k-1<0)){
f16 = disteven[8*N+nn]; // pull neighbor for distribution 15
if (f16 > 0){
distodd[7*N+n] = f16;
disteven[8*N+nn] = f15;
// }
}
//........................................................................
nn = n-Nx*Ny+Nx; // neighbor index (pull convention)
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
//if (!(j-1<0) && (k+1<Nz)){
f18 = disteven[9*N+nn]; // pull neighbor for distribution 17
if (f18 > 0){
distodd[8*N+n] = f18;
disteven[9*N+nn] = f17;
// }
}
//........................................................................
}
}
}

189
cpu/D3Q19.cpp~ Normal file
View File

@ -0,0 +1,189 @@
inline void PackDist(int q, int *list, int start, int count, double *sendbuf, double *dist, int N){
//....................................................................................
// Pack distribution q into the send buffer for the listed lattice sites
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int idx,n;
for (idx=0; idx<count; idx++){
n = list[idx];
sendbuf[start+idx] = dist[q*N+n];
}
}
inline void MapRecvDist(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count,
double *recvbuf, double *dist, int Nx, int Ny, int Nz){
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int i,j,k,n,nn,idx;
int N = Nx*Ny*Nz;
for (idx=0; idx<count; idx++){
// Get the value from the list -- note that n is the index is from the send (non-local) process
n = list[idx];
// Get the 3-D indices
k = n/(Nx*Ny);
j = (n-Nx*Ny*k)/Nx;
i = n-Nx*Ny*k-Nz*j;
// Streaming for the non-local distribution
i += Cqx;
j += Cqy;
k += Cqz;
/* if (i < 0) i += Nx;
if (j < 0) j += Ny;
if (k < 0) k += Nz;
if (!(i<Nx)) i -= Nx;
if (!(j<Ny)) j -= Ny;
if (!(k<Nz)) k -= Nz;
*/
nn = k*Nx*Ny+j*Nx+i;
// unpack the distribution to the proper location
// if (recvbuf[start+idx] != dist[q*N+nn]){
// printf("Stopping to check error \n");
// printf("recvbuf[start+idx] = %f \n",recvbuf[start+idx]);
// printf("dist[q*N+nn] = %f \n",dist[q*N+nn]);
// printf("A bug! Again? \n");
// idx = count;
// }
// list[idx] = nn;
dist[q*N+nn] = recvbuf[start+idx];
}
}
//*************************************************************************
inline void SwapD3Q19(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz)
{
int n,nn,N;
// distributions
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
N = Nx*Ny*Nz;
for (n=0; n<N; n++){
//.......Back out the 3-D indices for node n..............
int k = n/(Nx*Ny);
int j = (n-Nx*Ny*k)/Nx;
int i = n-Nx*Ny*k-Nz*j;
if (ID[n] > 0){
//........................................................................
// Retrieve even distributions from the local node (swap convention)
// f0 = disteven[n]; // Does not particupate in streaming
f1 = distodd[n];
f3 = distodd[N+n];
f5 = distodd[2*N+n];
f7 = distodd[3*N+n];
f9 = distodd[4*N+n];
f11 = distodd[5*N+n];
f13 = distodd[6*N+n];
f15 = distodd[7*N+n];
f17 = distodd[8*N+n];
//........................................................................
//........................................................................
// Retrieve odd distributions from neighboring nodes (swap convention)
//........................................................................
nn = n+1; // neighbor index (pull convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
//if (i+1<Nx){
f2 = disteven[N+nn]; // pull neighbor for distribution 2
if (f2 > 0){
distodd[n] = f2;
disteven[N+nn] = f1;
}
//}
//........................................................................
nn = n+Nx; // neighbor index (pull convention)
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
//if (j+1<Ny){
f4 = disteven[2*N+nn]; // pull neighbor for distribution 4
if (f4 > 0){
distodd[N+n] = f4;
disteven[2*N+nn] = f3;
// }
}
//........................................................................
nn = n+Nx*Ny; // neighbor index (pull convention)
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
//if (k+1<Nz){
f6 = disteven[3*N+nn]; // pull neighbor for distribution 6
if (f6 > 0){
distodd[2*N+n] = f6;
disteven[3*N+nn] = f5;
// }
}
//........................................................................
nn = n+Nx+1; // neighbor index (pull convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
//if ((i+1<Nx) && (j+1<Ny)){
f8 = disteven[4*N+nn]; // pull neighbor for distribution 8
if (f8 > 0){
distodd[3*N+n] = f8;
disteven[4*N+nn] = f7;
// }
}
//........................................................................
nn = n-Nx+1; // neighbor index (pull convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
//if (!(i-1<0) && (j+1<Ny)){
f10 = disteven[5*N+nn]; // pull neighbor for distribution 9
if (f10 > 0){
distodd[4*N+n] = f10;
disteven[5*N+nn] = f9;
// }
}
//........................................................................
nn = n+Nx*Ny+1; // neighbor index (pull convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
//if ( !(i-1<0) && !(k-1<0)){
f12 = disteven[6*N+nn]; // pull distribution 11
if (f12 > 0){
distodd[5*N+n] = f12;
disteven[6*N+nn] = f11;
// }
}
//........................................................................
nn = n-Nx*Ny+1; // neighbor index (pull convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
//if (!(i-1<0) && (k+1<Nz)){
f14 = disteven[7*N+nn]; // pull neighbor for distribution 13
if (f14 > 0){
distodd[6*N+n] = f14;
disteven[7*N+nn] = f13;
// }
}
//........................................................................
nn = n+Nx*Ny+Nx; // neighbor index (pull convention)
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
//if (!(j-1<0) && !(k-1<0)){
f16 = disteven[8*N+nn]; // pull neighbor for distribution 15
if (f16 > 0){
distodd[7*N+n] = f16;
disteven[8*N+nn] = f15;
// }
}
//........................................................................
nn = n-Nx*Ny+Nx; // neighbor index (pull convention)
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
//if (!(j-1<0) && (k+1<Nz)){
f18 = disteven[9*N+nn]; // pull neighbor for distribution 17
if (f18 > 0){
distodd[8*N+n] = f18;
disteven[9*N+nn] = f17;
// }
}
//........................................................................
}
}
}

6
cpu/D3Q19.h Normal file
View File

@ -0,0 +1,6 @@
extern void PackDist(int q, int *list, int start, int count, double *sendbuf, double *dist, int N);
extern void MapRecvDist(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count,
double *recvbuf, double *dist, int Nx, int Ny, int Nz);
//*************************************************************************
extern void SwapD3Q19(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz);

7
cpu/D3Q19.h~ Normal file
View File

@ -0,0 +1,7 @@
extern void PackDist(int q, int *list, int start, int count, double *sendbuf, double *dist, int N);
extern void MapRecvDist(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count,
double *recvbuf, double *dist, int Nx, int Ny, int Nz);
//*************************************************************************
extern void SwapD3Q19(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz);

BIN
cpu/D3Q19.o Normal file

Binary file not shown.

54
cpu/D3Q7.cpp Normal file
View File

@ -0,0 +1,54 @@
// GPU Functions for D3Q7 Lattice Boltzmann Methods
extern void PackValues(int *list, int count, double *sendbuf, double *Data, int N){
//....................................................................................
// Pack distribution q into the send buffer for the listed lattice sites
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int idx,n;
for (idx=0; idx<count; idx++){
n = list[idx];
sendbuf[idx] = Data[n];
}
}
extern void UnpackValues(int *list, int count, double *recvbuf, double *Data, int N){
//....................................................................................
// Pack distribution q into the send buffer for the listed lattice sites
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int idx,n;
for (idx=0; idx<count; idx++){
n = list[idx];
Data[n] = recvbuf[idx];
}
}
extern void PackDenD3Q7(int *list, int count, double *sendbuf, int number, double *Data, int N){
//....................................................................................
// Pack distribution into the send buffer for the listed lattice sites
//....................................................................................
int idx,n,component;
for (idx=0; idx<count; idx++){
for (component=0; component<number; component++){
n = list[idx];
sendbuf[idx*number+component] = Data[number*n+component];
Data[number*n+component] = 0.0; // Set the data value to zero once it's in the buffer!
}
}
}
extern void UnpackDenD3Q7(int *list, int count, double *recvbuf, int number, double *Data, int N){
//....................................................................................
// Unack distribution from the recv buffer
// Sum to the existing density value
//....................................................................................
int idx,n,component;
for (idx=0; idx<count; idx++){
for (component=0; component<number; component++){
n = list[idx];
Data[number*n+component] += recvbuf[idx*number+component];
}
}
}

53
cpu/D3Q7.cpp~ Normal file
View File

@ -0,0 +1,53 @@
// GPU Functions for D3Q7 Lattice Boltzmann Methods
inline void PackValues(int *list, int count, double *sendbuf, double *Data, int N){
//....................................................................................
// Pack distribution q into the send buffer for the listed lattice sites
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int idx,n;
for (idx=0; idx<count; idx++){
n = list[idx];
sendbuf[idx] = Data[n];
}
}
inline void UnpackValues(int *list, int count, double *recvbuf, double *Data, int N){
//....................................................................................
// Pack distribution q into the send buffer for the listed lattice sites
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int idx,n;
for (idx=0; idx<count; idx++){
n = list[idx];
Data[n] = recvbuf[idx];
}
}
inline void PackDenD3Q7(int *list, int count, double *sendbuf, int number, double *Data, int N){
//....................................................................................
// Pack distribution into the send buffer for the listed lattice sites
//....................................................................................
int idx,n,component;
for (idx=0; idx<count; idx++){
for (component=0; component<number; component++){
n = list[idx];
sendbuf[idx*number+component] = Data[number*n+component];
}
}
}
inline void UnpackDenD3Q7(int *list, int count, double *recvbuf, int number, double *Data, int N){
//....................................................................................
// Unack distribution from the recv buffer
// Sum to the existing density value
//....................................................................................
int idx,n,component;
for (idx=0; idx<count; idx++){
for (component=0; component<number; component++){
n = list[idx];
Data[number*n+component] += recvbuf[idx*number+component];
}
}
}

9
cpu/D3Q7.h Normal file
View File

@ -0,0 +1,9 @@
// CPU Functions for D3Q7 Lattice Boltzmann Methods
extern void PackValues(int *list, int count, double *sendbuf, double *Data, int N);
extern void UnpackValues(int *list, int count, double *recvbuf, double *Data, int N);
extern void PackDenD3Q7(int *list, int count, double *sendbuf, int number, double *Data, int N);
extern void UnpackDenD3Q7(int *list, int count, double *recvbuf, int number, double *Data, int N);

53
cpu/D3Q7.h~ Normal file
View File

@ -0,0 +1,53 @@
// GPU Functions for D3Q7 Lattice Boltzmann Methods
inline void PackValues(int *list, int count, double *sendbuf, double *Data, int N){
//....................................................................................
// Pack distribution q into the send buffer for the listed lattice sites
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int idx,n;
for (idx=0; idx<count; idx++){
n = list[idx];
sendbuf[idx] = Data[n];
}
}
inline void UnpackValues(int *list, int count, double *recvbuf, double *Data, int N){
//....................................................................................
// Pack distribution q into the send buffer for the listed lattice sites
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int idx,n;
for (idx=0; idx<count; idx++){
n = list[idx];
Data[n] = recvbuf[idx];
}
}
inline void PackDenD3Q7(int *list, int count, double *sendbuf, int number, double *Data, int N){
//....................................................................................
// Pack distribution into the send buffer for the listed lattice sites
//....................................................................................
int idx,n,component;
for (idx=0; idx<count; idx++){
for (component=0; component<number; component++){
n = list[idx];
sendbuf[idx*number+component] = Data[number*n+component];
}
}
}
inline void UnpackDenD3Q7(int *list, int count, double *recvbuf, int number, double *Data, int N){
//....................................................................................
// Unack distribution from the recv buffer
// Sum to the existing density value
//....................................................................................
int idx,n,component;
for (idx=0; idx<count; idx++){
for (component=0; component<number; component++){
n = list[idx];
Data[number*n+component] += recvbuf[idx*number+component];
}
}
}

BIN
cpu/D3Q7.o Normal file

Binary file not shown.

BIN
cpu/LBC-MPI Executable file

Binary file not shown.

BIN
cpu/LBC-MPI.o Normal file

Binary file not shown.

BIN
cpu/MRT-MPI Executable file

Binary file not shown.

BIN
cpu/MRT-MPI.o Normal file

Binary file not shown.

312
cpu/MRT.cu Normal file
View File

@ -0,0 +1,312 @@
#include <cuda.h>
// CUDA kernels for single-phase MRT code
// James McClure
//*************************************************************************
__global__ void INITIALIZE(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz, int S)
{
int n,N;
N = Nx*Ny*Nz;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<N){
if (ID[n] > 0){
f_even[n] = 0.3333333333333333;
f_odd[n] = 0.055555555555555555; //double(100*n)+1.f;
f_even[N+n] = 0.055555555555555555; //double(100*n)+2.f;
f_odd[N+n] = 0.055555555555555555; //double(100*n)+3.f;
f_even[2*N+n] = 0.055555555555555555; //double(100*n)+4.f;
f_odd[2*N+n] = 0.055555555555555555; //double(100*n)+5.f;
f_even[3*N+n] = 0.055555555555555555; //double(100*n)+6.f;
f_odd[3*N+n] = 0.0277777777777778; //double(100*n)+7.f;
f_even[4*N+n] = 0.0277777777777778; //double(100*n)+8.f;
f_odd[4*N+n] = 0.0277777777777778; //double(100*n)+9.f;
f_even[5*N+n] = 0.0277777777777778; //double(100*n)+10.f;
f_odd[5*N+n] = 0.0277777777777778; //double(100*n)+11.f;
f_even[6*N+n] = 0.0277777777777778; //double(100*n)+12.f;
f_odd[6*N+n] = 0.0277777777777778; //double(100*n)+13.f;
f_even[7*N+n] = 0.0277777777777778; //double(100*n)+14.f;
f_odd[7*N+n] = 0.0277777777777778; //double(100*n)+15.f;
f_even[8*N+n] = 0.0277777777777778; //double(100*n)+16.f;
f_odd[8*N+n] = 0.0277777777777778; //double(100*n)+17.f;
f_even[9*N+n] = 0.0277777777777778; //double(100*n)+18.f;
}
else{
for(int q=0; q<9; q++){
f_even[q*N+n] = -1.0;
f_odd[q*N+n] = -1.0;
}
f_even[9*N+n] = -1.0;
}
}
}
}
__global__ void Compute_VELOCITY(char *ID, double *disteven, double *distodd, double *vel, int Nx, int Ny, int Nz, int S)
{
int n,N;
// distributions
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
double vx,vy,vz;
N = Nx*Ny*Nz;
// S - number of threadblocks per grid block
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<N){
if (ID[n] > 0){
//........................................................................
// Registers to store the distributions
//........................................................................
f2 = disteven[N+n];
f4 = disteven[2*N+n];
f6 = disteven[3*N+n];
f8 = disteven[4*N+n];
f10 = disteven[5*N+n];
f12 = disteven[6*N+n];
f14 = disteven[7*N+n];
f16 = disteven[8*N+n];
f18 = disteven[9*N+n];
//........................................................................
f1 = distodd[n];
f3 = distodd[1*N+n];
f5 = distodd[2*N+n];
f7 = distodd[3*N+n];
f9 = distodd[4*N+n];
f11 = distodd[5*N+n];
f13 = distodd[6*N+n];
f15 = distodd[7*N+n];
f17 = distodd[8*N+n];
//.................Compute the velocity...................................
vx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
vy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
vz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
//..................Write the velocity.....................................
vel[n] = vx;
vel[N+n] = vy;
vel[2*N+n] = vz;
//........................................................................
}
}
}
}
//*************************************************************************
__global__ void MRT(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz, int S,
double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz)
{
int n,N;
// distributions
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
// conserved momemnts
double rho,jx,jy,jz;
// non-conserved moments
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
N = Nx*Ny*Nz;
char id;
// S - number of threadblocks per grid block
for (int s=0; s<S; s++){
// for (int n=0; n<N; n++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
id = ID[n];
if (n<N){
if (id > 0){
//........................................................................
// Registers to store the distributions - read based on swap convention
//........................................................................
f2 = distodd[n];
f4 = distodd[N+n];
f6 = distodd[2*N+n];
f8 = distodd[3*N+n];
f10 = distodd[4*N+n];
f12 = distodd[5*N+n];
f14 = distodd[6*N+n];
f16 = distodd[7*N+n];
f18 = distodd[8*N+n];
//........................................................................
f0 = disteven[n];
f1 = disteven[N+n];
f3 = disteven[2*N+n];
f5 = disteven[3*N+n];
f7 = disteven[4*N+n];
f9 = disteven[5*N+n];
f11 = disteven[6*N+n];
f13 = disteven[7*N+n];
f15 = disteven[8*N+n];
f17 = disteven[9*N+n];
//........................................................................
//....................compute the moments...............................................
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
m1 = -30*f0-11*(f2+f1+f4+f3+f6+f5)+8*(f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18 +f17);
m2 = 12*f0-4*(f2+f1 +f4+f3+f6 +f5)+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
jx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
m4 = 4*(-f1+f2)+f7-f8+f9-f10+f11-f12+f13-f14;
jy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
m6 = -4*(f3-f4)+f7-f8-f9+f10+f15-f16+f17-f18;
jz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
m8 = -4*(f5-f6)+f11-f12-f13+f14+f15-f16-f17+f18;
m9 = 2*(f1+f2)-f3-f4-f5-f6+f7+f8+f9+f10+f11+f12+f13+f14-2*(f15+f16+f17+f18);
m10 = -4*(f1+f2)+2*(f4+f3+f6+f5)+f8+f7+f10+f9+f12+f11+f14+f13-2*(f16+f15+f18+f17);
m11 = f4+f3-f6-f5+f8+f7+f10+f9-f12-f11-f14-f13;
m12 = -2*(f4+f3-f6-f5)+f8+f7+f10+f9-f12-f11-f14-f13;
m13 = f8+f7-f10-f9;
m14 = f16+f15-f18-f17;
m15 = f12+f11-f14-f13;
m16 = f7-f8+f9-f10-f11+f12-f13+f14;
m17 = -f7+f8+f9-f10+f15-f16+f17-f18;
m18 = f11-f12-f13+f14-f15+f16+f17-f18;
//..............incorporate external force................................................
//jx += 0.5*Fx;
//jy += 0.5*Fy;
//jz += 0.5*Fz;
//..............carry out relaxation process...............................................
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho) - m2);
m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4);
m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6);
m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) - m9);
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) - m11);
m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho) - m12);
m13 = m13 + rlx_setA*((jx*jy/rho) - m13);
m14 = m14 + rlx_setA*((jy*jz/rho) - m14);
m15 = m15 + rlx_setA*((jx*jz/rho) - m15);
m16 = m16 + rlx_setB*( - m16);
m17 = m17 + rlx_setB*( - m17);
m18 = m18 + rlx_setB*( - m18);
//.................inverse transformation......................................................
f0 = 0.05263157894736842*rho-0.012531328320802*m1+0.04761904761904762*m2;
f1 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(jx-m4)+0.05555555555555555*(m9-m10);
f2 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(m4-jx)+0.05555555555555555*(m9-m10);
f3 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(jy-m6)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
f4 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(m6-jy)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
f5 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(jz-m8)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
f6 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(m8-jz)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
f7 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx+jy)+0.025*(m4+m6)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12+0.25*m13+0.125*(m16-m17);
f8 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2-0.1*(jx+jy)-0.025*(m4+m6)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12+0.25*m13+0.125*(m17-m16);
f9 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx-jy)+0.025*(m4-m6)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12-0.25*m13+0.125*(m16+m17);
f10 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jy-jx)+0.025*(m6-m4)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12-0.25*m13-0.125*(m16+m17);
f11 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jx+jz)+0.025*(m4+m8)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12+0.25*m15+0.125*(m18-m16);
f12 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2-0.1*(jx+jz)-0.025*(m4+m8)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12+0.25*m15+0.125*(m16-m18);
f13 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jx-jz)+0.025*(m4-m8)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12-0.25*m15-0.125*(m16+m18);
f14 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jz-jx)+0.025*(m8-m4)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12-0.25*m15+0.125*(m16+m18);
f15 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jy+jz)+0.025*(m6+m8)
-0.05555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m17-m18);
f16 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2-0.1*(jy+jz)-0.025*(m6+m8)
-0.05555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m18-m17);
f17 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jy-jz)+0.025*(m6-m8)
-0.05555555555555555*m9-0.02777777777777778*m10-0.25*m14+0.125*(m17+m18);
f18 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jz-jy)+0.025*(m8-m6)
-0.05555555555555555*m9-0.02777777777777778*m10-0.25*m14-0.125*(m17+m18);
//.......................................................................................................
// incorporate external force
f1 += 0.16666666*Fx;
f2 -= 0.16666666*Fx;
f3 += 0.16666666*Fy;
f4 -= 0.16666666*Fy;
f5 += 0.16666666*Fz;
f6 -= 0.16666666*Fz;
f7 += 0.08333333333*(Fx+Fy);
f8 -= 0.08333333333*(Fx+Fy);
f9 += 0.08333333333*(Fx-Fy);
f10 -= 0.08333333333*(Fx-Fy);
f11 += 0.08333333333*(Fx+Fz);
f12 -= 0.08333333333*(Fx+Fz);
f13 += 0.08333333333*(Fx-Fz);
f14 -= 0.08333333333*(Fx-Fz);
f15 += 0.08333333333*(Fy+Fz);
f16 -= 0.08333333333*(Fy+Fz);
f17 += 0.08333333333*(Fy-Fz);
f18 -= 0.08333333333*(Fy-Fz);
//.......................................................................................................
// Write data based on un-swapped convention
disteven[n] = f0;
disteven[N+n] = f2;
disteven[2*N+n] = f4;
disteven[3*N+n] = f6;
disteven[4*N+n] = f8;
disteven[5*N+n] = f10;
disteven[6*N+n] = f12;
disteven[7*N+n] = f14;
disteven[8*N+n] = f16;
disteven[9*N+n] = f18;
distodd[n] = f1;
distodd[N+n] = f3;
distodd[2*N+n] = f5;
distodd[3*N+n] = f7;
distodd[4*N+n] = f9;
distodd[5*N+n] = f11;
distodd[6*N+n] = f13;
distodd[7*N+n] = f15;
distodd[8*N+n] = f17;
//.......................................................................................................
}
}
}
}
extern "C" void dvc_MRT(int nblocks, int nthreads, int S, char *ID,
double *f_even, double *f_odd, double rlxA, double rlxB, double Fx, double Fy, double Fz,
int Nx, int Ny, int Nz)
{
MRT <<< nblocks, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S,rlxA, rlxB, Fx, Fy, Fz);
}
extern "C" void dvc_InitD3Q19( int nblocks, int nthreads, int S,
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
{
INITIALIZE <<< nblocks, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
}

28
cpu/Makefile Normal file
View File

@ -0,0 +1,28 @@
CXX=mpicxx
FLAGS=-O3
ColorLBM-cpu:D3Q19.o D3Q7.o Color.o lb2_Color_mpi.o
$(CXX) $(FLAGS) -o ColorLBM-cpu lb2_Color_mpi.o D3Q19.o D3Q7.o Color.o
D3Q19.o:D3Q19.cpp
$(CXX) $(FLAGS) -c -o D3Q19.o D3Q19.cpp
D3Q7.o:D3Q7.cpp
$(CXX) $(FLAGS) -c -o D3Q7.o D3Q7.cpp
Color.o:Color.cpp
$(CXX) $(FLAGS) -c -o Color.o Color.cpp
lb2_Color_mpi.o:lb2_Color_mpi.cpp
$(CXX) $(FLAGS) -c -o lb2_Color_mpi.o lb2_Color_mpi.cpp
#MRT-MPI.o:lb1_MRT_mpi.cpp
# $(CXX) -c -o MRT-MPI.o lb1_MRT_mpi.cpp
#MRT-MPI:D3Q19.o MRT-MPI.o
# $(CXX) -o MRT-MPI D3Q19.o MRT-MPI.o
clean:
rm *.o
# rm bin/*
# rm lib/*

28
cpu/Makefile~ Normal file
View File

@ -0,0 +1,28 @@
CXX=$(VT_MPI_BIN)/mpicxx
FLAGS=-O3
ColorLBM-cpu:D3Q19.o D3Q7.o Color.o lb2_Color_mpi.o
$(CXX) $(FLAGS) -o ColorLBM-cpu lb2_Color_mpi.o D3Q19.o D3Q7.o Color.o
D3Q19.o:D3Q19.cpp
$(CXX) $(FLAGS) -c -o D3Q19.o D3Q19.cpp
D3Q7.o:D3Q7.cpp
$(CXX) $(FLAGS) -c -o D3Q7.o D3Q7.cpp
Color.o:Color.cpp
$(CXX) $(FLAGS) -c -o Color.o Color.cpp
lb2_Color_mpi.o:lb2_Color_mpi.cpp
$(CXX) $(FLAGS) -c -o lb2_Color_mpi.o lb2_Color_mpi.cpp
#MRT-MPI.o:lb1_MRT_mpi.cpp
# $(CXX) -c -o MRT-MPI.o lb1_MRT_mpi.cpp
#MRT-MPI:D3Q19.o MRT-MPI.o
# $(CXX) -o MRT-MPI D3Q19.o MRT-MPI.o
clean:
rm *.o
# rm bin/*
# rm lib/*

248
cpu/lb1_MRT-swap.cu Normal file
View File

@ -0,0 +1,248 @@
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <cuda.h>
//#include <cutil.h>
using namespace std;
//*************************************************************************
extern "C" void dvc_InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
int Ny, int Nz, int nblocks, int nthreads, int S);
//*************************************************************************
extern "C" void dvc_SwapD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
int Ny, int Nz, int nblocks, int nthreads, int S);
//*************************************************************************
extern "C" void dvc_MRT(char *ID, double *f_even, double *f_odd, double rlxA, double rlxB, double Fx, double Fy, double Fz,
int Nx, int Ny, int Nz, int nblocks, int nthreads, int S);
//*************************************************************************
void Write_Out(double *array, int Nx, int Ny, int Nz){
int value;
FILE *output;
output = fopen("dist.list","w");
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
int index = k*Nx*Ny+j*Nx+i;
value = int(array[index]);
fprintf(output, "| %i",value);
}
fprintf(output, " | \n");
}
fprintf(output,"************************************** \n");
}
fclose(output);
}
//**************************************************************************
// MRT implementation of the LBM using CUDA
//**************************************************************************
int main(void)
{
int deviceCount;
cudaGetDeviceCount(&deviceCount);
int device = 1;
printf("Number of devices = %i \n", deviceCount);
printf("Current device is = %i \n", device);
cudaSetDevice(device);
// BGK Model parameters
string FILENAME;
unsigned int nBlocks, nthreads;
int timestepMax, interval;
double tau,Fx,Fy,Fz,tol;
// Domain variables
int Nx,Ny,Nz;
ifstream input("MRT.in");
input >> FILENAME; // name of the input file
input >> Nz; // number of nodes (x,y,z)
input >> nBlocks;
input >> nthreads;
input >> tau; // relaxation time
input >> Fx; // External force components (x,y,z)
input >> Fy;
input >> Fz;
input >> timestepMax; // max no. of timesteps
input >> interval; // error interval
input >> tol; // error tolerance
double rlx_setA = 1.f/tau;
double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA);
printf("tau = %f \n", tau);
printf("Set A = %f \n", rlx_setA);
printf("Set B = %f \n", rlx_setB);
printf("Force(x) = %f \n", Fx);
printf("Force(y) = %f \n", Fy);
printf("Force(z) = %f \n", Fz);
Nx = Ny = Nz; // Cubic domain
int N = Nx*Ny*Nz;
int dist_mem_size = N*sizeof(double);
// unsigned int nBlocks = 32;
// int nthreads = 128;
int S = N/nthreads/nBlocks;
// unsigned int nBlocks = N/nthreads + (N%nthreads == 0?0:1);
dim3 grid(nBlocks,1,1);
printf("Number of blocks = %i \n", nBlocks);
printf("Threads per block = %i \n", nthreads);
printf("Sweeps per thread = %i \n", S);
printf("Number of nodes per side = %i \n", Nx);
printf("Total Number of nodes = %i \n", N);
//.......................................................................
printf("Read input media... \n");
// .......... READ THE INPUT FILE .......................................
int n;
char value;
char *id;
id = new char[N];
int sum = 0;
double porosity;
ifstream PM(FILENAME.c_str(),ios::binary);
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
PM.read((char *) (&value), sizeof(value));
n = k*Nx*Ny+j*Nx+i;
id[n] = value;
if (value > 0) sum++;
}
}
}
PM.close();
printf("File porosity = %f\n", double(sum)/N);
//.......................................................................
//...........device phase ID.................................................
char *ID;
cudaMalloc((void **) &ID, N); // Allocate device memory
// Copy to the device
cudaMemcpy(ID, id, N, cudaMemcpyHostToDevice);
//...........................................................................
//......................device distributions.................................
double *f_even,*f_odd;
//...........................................................................
cudaMalloc((void **) &f_even, 10*dist_mem_size); // Allocate device memory
cudaMalloc((void **) &f_odd, 9*dist_mem_size); // Allocate device memory
//...........................................................................
//...........................................................................
// cudaHostAlloc(&fa,dist_mem_size,cudaHostAllocPortable);
// cudaHostAlloc(&fb,dist_mem_size,cudaHostAllocPortable);
// cudaHostRegister(fa,dist_mem_size,cudaHostRegisterPortable);
// cudaHostRegister(fb,dist_mem_size,cudaHostRegisterPortable);
// cudaHostRegister(id,N*sizeof(char),cudaHostAllocPortable);
printf("Setting the distributions, size = : %i\n", N);
//...........................................................................
// INITIALIZE <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
//...........................................................................
dvc_InitD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
//*************************************************************************
int timestep = 0;
printf("No. of timesteps: %i \n", timestepMax);
//.......create a stream for the LB calculation.......
cudaStream_t stream;
cudaStreamCreate(&stream);
//.......create and start timer............
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord( start, 0 );
//.........................................
//************ MAIN ITERATION LOOP ***************************************/
while (timestep < timestepMax){
//...................................................................
//........ Execute the swap kernel (device) .........................
// SWAP <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
//...................................................................
dvc_SwapD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
//........ Execute the collision kernel (device) ....................
// MRT <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S,
// rlx_setA, rlx_setB, Fx, Fy, Fz);
//............................................................
dvc_MRT(ID, f_even, f_odd, rlx_setA, rlx_setB, Fx, Fy, Fz,Nx,Ny,Nz,nBlocks,nthreads,S);
// Iteration completed!
timestep++;
//...................................................................
}
//************************************************************************/
cudaThreadSynchronize();
//.......... stop and destroy timer.............................
cudaEventRecord( stop, stream);
cudaEventSynchronize( stop );
cudaEventElapsedTime( &time, start, stop );
printf("CPU time = %f \n", time);
float MLUPS = 0.001*float(Nx*Ny*Nz)*timestep/time;
printf("MLUPS = %f \n", MLUPS);
cudaStreamDestroy(stream);
cudaEventDestroy( start );
cudaEventDestroy( stop );
//..............................................................
//..............................................................
//.........Compute the velocity and copy result to host ........
double *velocity;
velocity = new double[3*N];
//......................device distributions....................................
double *vel;
//..............................................................................
cudaMalloc((void **) &vel, 3*dist_mem_size); // Allocate device memory
//..............................................................................
// Compute_VELOCITY <<< grid, nthreads >>> (ID, f_even, f_odd, vel, Nx, Ny, Nz, S);
//..............................................................................
cudaMemcpy(velocity, vel, 3*dist_mem_size, cudaMemcpyDeviceToHost);
//..............................................................................
//............................................................
//....Write the z-velocity to test poiseuille flow............
double vz,vz_avg;
vz_avg = 0.0;
FILE *output;
output = fopen("velocity.out","w");
for (int k=0; k<1; k++){
for (int j=0; j<1; j++){
for (int i=0; i<Nx; i++){
int n = k*Nx*Ny+j*Nx+i;
//.....print value........
vz = velocity[2*N+n];
vz_avg += vz;
fprintf(output, " %e",vz);
}
}
}
fclose(output);
vz = vz_avg/double(sum);
printf("Average Velocity = %e\n", vz);
// cleanup
cudaFree(f_even); cudaFree(f_odd); cudaFree(vel); cudaFree(ID);
free (velocity); free(id);
}

246
cpu/lb1_MRT.cu Normal file
View File

@ -0,0 +1,246 @@
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <cuda.h>
using namespace std;
//*************************************************************************
extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size);
//*************************************************************************
extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size);
//*************************************************************************
extern "C" void dvc_Barrier();
//*************************************************************************
extern "C" void dvc_InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
int Ny, int Nz, int nblocks, int nthreads, int S);
//*************************************************************************
extern "C" void dvc_SwapD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
int Ny, int Nz, int nblocks, int nthreads, int S);
//*************************************************************************
extern "C" void dvc_MRT(char *ID, double *f_even, double *f_odd, double rlxA, double rlxB, double Fx, double Fy, double Fz,
int Nx, int Ny, int Nz, int nblocks, int nthreads, int S);
//*************************************************************************
void Write_Out(double *array, int Nx, int Ny, int Nz){
int value;
FILE *output;
output = fopen("dist.list","w");
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
int index = k*Nx*Ny+j*Nx+i;
value = int(array[index]);
fprintf(output, "| %i",value);
}
fprintf(output, " | \n");
}
fprintf(output,"************************************** \n");
}
fclose(output);
}
//**************************************************************************
// MRT implementation of the LBM using CUDA
//**************************************************************************
int main(void)
{
// BGK Model parameters
string FILENAME;
unsigned int nBlocks, nthreads;
int timestepMax, interval;
double tau,Fx,Fy,Fz,tol;
// Domain variables
int Nx,Ny,Nz;
ifstream input("MRT.in");
input >> FILENAME; // name of the input file
input >> Nz; // number of nodes (x,y,z)
input >> nBlocks;
input >> nthreads;
input >> tau; // relaxation time
input >> Fx; // External force components (x,y,z)
input >> Fy;
input >> Fz;
input >> timestepMax; // max no. of timesteps
input >> interval; // error interval
input >> tol; // error tolerance
double rlx_setA = 1.f/tau;
double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA);
printf("tau = %f \n", tau);
printf("Set A = %f \n", rlx_setA);
printf("Set B = %f \n", rlx_setB);
printf("Force(x) = %f \n", Fx);
printf("Force(y) = %f \n", Fy);
printf("Force(z) = %f \n", Fz);
Nx = Ny = Nz; // Cubic domain
int N = Nx*Ny*Nz;
int dist_mem_size = N*sizeof(double);
// unsigned int nBlocks = 32;
// int nthreads = 128;
int S = N/nthreads/nBlocks;
// unsigned int nBlocks = N/nthreads + (N%nthreads == 0?0:1);
dim3 grid(nBlocks,1,1);
printf("Number of blocks = %i \n", nBlocks);
printf("Threads per block = %i \n", nthreads);
printf("Sweeps per thread = %i \n", S);
printf("Number of nodes per side = %i \n", Nx);
printf("Total Number of nodes = %i \n", N);
//.......................................................................
printf("Read input media... \n");
// .......... READ THE INPUT FILE .......................................
int n;
char value;
char *id;
id = new char[N];
int sum = 0;
double porosity;
ifstream PM(FILENAME.c_str(),ios::binary);
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
PM.read((char *) (&value), sizeof(value));
n = k*Nx*Ny+j*Nx+i;
id[n] = value;
if (value > 0) sum++;
}
}
}
PM.close();
printf("File porosity = %f\n", double(sum)/N);
//.......................................................................
//...........device phase ID.................................................
char *ID;
dvc_AllocateDeviceMemory((void **) &ID, N); // Allocate device memory
// Copy to the device
dvc_CopyToDevice(ID, id, N);
//...........................................................................
//......................device distributions.................................
double *f_even,*f_odd;
//...........................................................................
dvc_AllocateDeviceMemory((void **) &f_even, 10*dist_mem_size); // Allocate device memory
dvc_AllocateDeviceMemory((void **) &f_odd, 9*dist_mem_size); // Allocate device memory
//...........................................................................
//...........................................................................
// cudaHostAlloc(&fa,dist_mem_size,cudaHostAllocPortable);
// cudaHostAlloc(&fb,dist_mem_size,cudaHostAllocPortable);
// cudaHostRegister(fa,dist_mem_size,cudaHostRegisterPortable);
// cudaHostRegister(fb,dist_mem_size,cudaHostRegisterPortable);
// cudaHostRegister(id,N*sizeof(char),cudaHostAllocPortable);
printf("Setting the distributions, size = : %i\n", N);
//...........................................................................
// INITIALIZE <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
//...........................................................................
dvc_InitD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
//*************************************************************************
int timestep = 0;
printf("No. of timesteps: %i \n", timestepMax);
//.......create a stream for the LB calculation.......
cudaStream_t stream;
cudaStreamCreate(&stream);
//.......create and start timer............
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord( start, 0 );
//.........................................
//************ MAIN ITERATION LOOP ***************************************/
while (timestep < timestepMax){
//...................................................................
//........ Execute the swap kernel (device) .........................
// SWAP <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
//...................................................................
dvc_SwapD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
//........ Execute the collision kernel (device) ....................
// MRT <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S,
// rlx_setA, rlx_setB, Fx, Fy, Fz);
//............................................................
dvc_MRT(ID, f_even, f_odd, rlx_setA, rlx_setB, Fx, Fy, Fz,Nx,Ny,Nz,nBlocks,nthreads,S);
// Iteration completed!
timestep++;
//...................................................................
}
//************************************************************************/
// cudaThreadSynchronize();
dvc_Barrier();
//.......... stop and destroy timer.............................
cudaEventRecord( stop, stream);
cudaEventSynchronize( stop );
cudaEventElapsedTime( &time, start, stop );
printf("CPU time = %f \n", time);
float MLUPS = 0.001*float(Nx*Ny*Nz)*timestep/time;
printf("MLUPS = %f \n", MLUPS);
cudaStreamDestroy(stream);
cudaEventDestroy( start );
cudaEventDestroy( stop );
//..............................................................
//..............................................................
/*//.........Compute the velocity and copy result to host ........
double *velocity;
velocity = new double[3*N];
//......................device distributions....................................
double *vel;
//..............................................................................
dvc_AllocateDeviceMemory((void **) &vel, 3*dist_mem_size); // Allocate device memory
//..............................................................................
// Compute_VELOCITY <<< grid, nthreads >>> (ID, f_even, f_odd, vel, Nx, Ny, Nz, S);
//..............................................................................
// cudaMemcpy(velocity, vel, 3*dist_mem_size, cudaMemcpyDeviceToHost);
//..............................................................................
//............................................................
//....Write the z-velocity to test poiseuille flow............
double vz,vz_avg;
vz_avg = 0.0;
/* FILE *output;
output = fopen("velocity.out","w");
for (int k=0; k<1; k++){
for (int j=0; j<1; j++){
for (int i=0; i<Nx; i++){
int n = k*Nx*Ny+j*Nx+i;
//.....print value........
vz = velocity[2*N+n];
vz_avg += vz;
fprintf(output, " %e",vz);
}
}
}
fclose(output);
vz = vz_avg/double(sum);
printf("Average Velocity = %e\n", vz);
*/
// cleanup
// cudaFree(f_even); cudaFree(f_odd); cudaFree(vel); cudaFree(ID);
// free (velocity); free(id);
}

1450
cpu/lb1_MRT_mpi.cpp Normal file

File diff suppressed because it is too large Load Diff

1836
cpu/lb1_MRT_mpi.cu Normal file

File diff suppressed because it is too large Load Diff

423
cpu/lb2_Color.cu Normal file
View File

@ -0,0 +1,423 @@
#ifdef useMPI
#include <mpi.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include <math.h>
#include <cuda.h>
using namespace std;
//*************************************************************************
// HokieSpeed
//nvcc -Xcompiler -fopenmp -lgomp -O3 -arch sm_20 -o hybridATLKR lb2_ATLKR_hybrid.cu
// -I$VT_MPI_INC -L$VT_MPI_LIB -lmpi
//*************************************************************************
//*************************************************************************
// Implementation of Two-Phase Immiscible LBM using CUDA
//*************************************************************************
//*************************************************************************
extern "C" void dvc_InitD3Q19(int nblocks, int nthreads, int S,
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
//*************************************************************************
extern "C" void dvc_InitDenColor( int nblocks, int nthreads, int S,
char *ID, double *Den, double *Phi, double das, double dbs, int N);
//*************************************************************************
extern "C" void dvc_ComputeColorGradient(int nBlocks, int nthreads, int S,
char *ID, double *Phi, double *ColorGrad, int Nx, int Ny, int Nz);
//*************************************************************************
extern "C" void dvc_ColorCollide(int nBlocks, int nthreads, int S,
char *ID, double *f_even, double *f_odd, double *ColorGrad, double *Velocity,
double rlxA, double rlxB,double alpha, double beta, double Fx, double Fy, double Fz,
int Nx, int Ny, int Nz, bool pBC);
//*************************************************************************
extern "C" void dvc_DensityStreamD3Q7(int nBlocks, int nthreads, int S,
char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
double beta, int Nx, int Ny, int Nz, bool pBC);
//*************************************************************************
extern "C" void dvc_ComputePhi(int nBlocks, int nthreads, int S,
char *ID, double *Phi, double *Copy, double *Den, int N);
//*************************************************************************
extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size);
//*************************************************************************
extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size);
//*************************************************************************
extern "C" void dvc_Barrier();
//*************************************************************************
extern "C" void dvc_SwapD3Q19(int nblocks, int nthreads, int S,
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
//*************************************************************************
extern "C" void dvc_PackDist(int grid, int threads, int q, int *SendList, int start,
int sendCount, double *sendbuf, double *Dist, int N);
//*************************************************************************
extern "C" void dvc_UnpackDist(int grid, int threads, int q, int Cqx, int Cqy, int Cqz, int *RecvList, int start,
int recvCount, double *recvbuf, double *Dist, int Nx, int Ny, int Nz);
//*************************************************************************
int main(int argc, char *argv[])
{
//********** Initialize MPI ****************
int numprocs,rank;
#ifdef useMPI
MPI_Status stat;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
#else
numprocs = 1;
rank = 0;
#endif
//******************************************
if (rank == 0){
printf("********************************************************\n");
printf("Running Hybrid Implementation of Color LBM \n");
printf("********************************************************\n");
}
// Color Model parameters
string FILENAME;
unsigned int nBlocks, nthreads;
int Nx,Ny,Nz;
int timestepMax, interval;
double tau,Fx,Fy,Fz,tol;
double alpha, beta;
double das, dbs;
double din,dout;
bool pBC;
if (rank==0){
//.............................................................
// READ SIMULATION PARMAETERS FROM INPUT FILE
//.............................................................
ifstream input("Color.in");
// Line 1: Name of the phase indicator file (s=0,w=1,n=2)
input >> FILENAME;
// Line 2: domain size (Nx, Ny, Nz)
input >> Nz; // number of nodes (x,y,z)
input >> nBlocks;
input >> nthreads;
// Line 3: model parameters (tau, alpha, beta, das, dbs)
input >> tau;
input >> alpha;
input >> beta;
input >> das;
input >> dbs;
// Line 4: External force components (Fx,Fy, Fz)
input >> Fx;
input >> Fy;
input >> Fz;
// Line 5: Pressure Boundary conditions
input >> pBC;
input >> din;
input >> dout;
// Line 6: time-stepping criteria
input >> timestepMax; // max no. of timesteps
input >> interval; // error interval
input >> tol; // error tolerance
//.............................................................
}
#ifdef useMPI
// **************************************************************
// Broadcast simulation parameters from rank 0 to all other procs
MPI_Barrier(MPI_COMM_WORLD);
//.................................................
MPI_Bcast(&Nz,1,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(&nBlocks,1,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(&nthreads,1,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(&Fx,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&Fy,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&Fz,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&tau,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&alpha,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&beta,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&das,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&dbs,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&pBC,1,MPI_LOGICAL,0,MPI_COMM_WORLD);
MPI_Bcast(&din,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&dout,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&timestepMax,1,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(&interval,1,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(&tol,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
//.................................................
MPI_Barrier(MPI_COMM_WORLD);
// **************************************************************
#endif
double rlxA = 1.f/tau;
double rlxB = 8.f*(2.f-rlxA)/(8.f-rlxA);
if (pBC && rank == 0){
printf("Assigning presusre boundary conditions \n");
printf("Inlet density = %f \n", din);
printf("Outlet density = %f \n", dout);
}
if (rank==0){
printf("....Parameters................\n");
printf("tau = %f \n", tau);
printf("alpha = %f \n", alpha);
printf("beta = %f \n", beta);
printf("das = %f \n", das);
printf("dbs = %f \n", dbs);
printf("Force(x) = %f \n", Fx);
printf("Force(y) = %f \n", Fy);
printf("Force(z) = %f \n", Fz);
printf("Nz = %i \n", Nz);
printf("timestepMax = %i \n", timestepMax);
printf("...............................\n");
}
// Identical cubic sub-domains
Nx = Ny = Nz;// = 16*s; // Cubic domain
int N = Nx*Ny*Nz;
int dist_mem_size = N*sizeof(double);
// unsigned int nBlocks = 32;
// int nthreads = 128;
int S = N/nthreads/nBlocks;
if (nBlocks*nthreads*S < N) S++;
// int S = 1;
// unsigned int nBlocks = N/nthreads + (N%nthreads == 0?0:1);
// dim3 grid(nBlocks,1,1);
if (rank==1){
printf("Number of blocks = %i \n", nBlocks);
printf("Threads per block = %i \n", nthreads);
printf("Sweeps per thread = %i \n", S);
printf("Number of nodes per side = %i \n", Nx);
printf("Total Number of nodes = %i \n", N);
printf("...............................\n");
}
//.......................................................................
// .......... READ THE INPUT FILE .......................................
int n;
char value;
char *id;
id = new char[N];
int sum = 0;
// RANK 0 READS THE INPUT FILE
if (rank==0){
printf("Read input media... \n");
ifstream PM(FILENAME.c_str(),ios::binary);
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
PM.read((char *) (&value), sizeof(value));
n = k*Nx*Ny+j*Nx+i;
if (value>0){
if (pBC) value=2; // Saturate with NWP
if (k<8){
value=1;
}
}
id[n] = value;
if (value > 0) sum++;
}
}
}
PM.close();
printf("File porosity = %f\n", double(sum)/N);
}
//......... for pressure BC only............................
// Void the first / last rows if pressure BC are to be used
if (pBC){
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
n = k*Nx*Ny+j*Nx+i;
if (k<4) id[n] = 1;
if (k>Nz-5) id[n] = 2;
}
}
// Skip the non-boundary values
if (k==4) k=Nz-5;
}
}
#ifdef useMPI //............................................................
MPI_Barrier(MPI_COMM_WORLD);
MPI_Bcast(&id[0],N,MPI_CHAR,0,MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
#endif
if (rank == 0) printf("Domain set.\n");
//...........................................................................
int SBC;
int outlet = N-Nx*Ny;
if (pBC){
SBC = Nx*Ny/nthreads/nBlocks+1;
printf("Number of sweeps for inlet / outlet: %i \n", SBC);
}
//...........................................................................
//...........................................................................
//...........device phase ID.................................................
char *ID;
cudaMalloc((void **) &ID, N); // Allocate device memory
// Copy to the device
cudaMemcpy(ID, id, N, cudaMemcpyHostToDevice);
//...........................................................................
//......................device distributions.................................
double *f_even,*f_odd;
//...........................................................................
cudaMalloc((void **) &f_even, 10*dist_mem_size); // Allocate device memory
cudaMalloc((void **) &f_odd, 9*dist_mem_size); // Allocate device memory
// f_even = new double[10*N];
// f_odd = new double[9*N];
//...........................................................................
//...........................................................................
// MAIN VARIABLES ALLOCATED HERE
//...........................................................................
double *Phi,*Den,*Copy;
double *ColorGrad, *Velocity;
//...........................................................................
cudaMalloc((void **) &Phi, dist_mem_size);
cudaMalloc((void **) &Den, 2*dist_mem_size);
cudaMalloc((void **) &Copy, 2*dist_mem_size);
cudaMalloc((void **) &Velocity, 3*dist_mem_size);
cudaMalloc((void **) &ColorGrad, 3*dist_mem_size);
//...........................................................................
//...........................................................................
if (rank==0) printf("Setting the distributions, size = : %i\n", N);
//...........................................................................
dvc_InitD3Q19(nBlocks, nthreads, S, ID, f_even, f_odd, Nx, Ny, Nz);
dvc_InitDenColor(nBlocks, nthreads, S, ID, Den, Phi, das, dbs, N);
//...........................................................................
dvc_ComputePhi(nBlocks, nthreads, S,ID, Phi, Copy, Den, N);
//...........................................................................
int timestep;
// double starttime,stoptime;
if (rank==0) printf("No. of timesteps: %i \n", timestepMax);
timestep = 0;
//.......create and start timer............
cudaEvent_t start, stop;
float time;
//.......create a stream for the LB calculation.......
cudaStream_t stream;
cudaStreamCreate(&stream);
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord( start, 0 );
//.........................................
//************ MAIN TIMESTEP LOOP ***************************************/
while (timestep < timestepMax){
//*************************************************************************
// Compute the color gradient
//*************************************************************************
dvc_ComputeColorGradient(nBlocks, nthreads, S,
ID, Phi, ColorGrad, Nx, Ny, Nz);
//*************************************************************************
//*************************************************************************
// Perform collision step for the momentum transport
//*************************************************************************
dvc_ColorCollide(nBlocks, nthreads, S,
ID, f_even, f_odd, ColorGrad, Velocity,
rlxA, rlxB,alpha, beta, Fx, Fy, Fz, Nx, Ny, Nz, pBC);
//*************************************************************************
//*************************************************************************
// Carry out the density streaming step for mass transport
//*************************************************************************
dvc_DensityStreamD3Q7(nBlocks, nthreads, S,
ID, Den, Copy, Phi, ColorGrad, Velocity,beta, Nx, Ny, Nz, pBC);
//*************************************************************************
//*************************************************************************
// Swap the distributions for momentum transport
//*************************************************************************
dvc_SwapD3Q19(nBlocks, nthreads, S, ID, f_even, f_odd, Nx, Ny, Nz);
//*************************************************************************
//*************************************************************************
// Compute the phase indicator field and reset Copy, Den
//*************************************************************************
dvc_ComputePhi(nBlocks, nthreads, S,ID, Phi, Copy, Den, N);
//*************************************************************************
dvc_Barrier();
timestep++;
//.............................................................................
}
//************************************************************************/
dvc_Barrier();
//.......... stop and destroy timer.............................
cudaEventRecord( stop, stream);
cudaEventSynchronize( stop );
cudaEventElapsedTime( &time, start, stop );
printf("CPU time = %f \n", time);
float MLUPS = 0.001*float(Nx*Ny*Nz)*timestep/time;
printf("MLUPS = %f \n", MLUPS);
cudaEventDestroy( start );
cudaEventDestroy( stop );
double *Data;
Data = new double[3*N];
cudaMemcpy(Data, Phi, dist_mem_size, cudaMemcpyDeviceToHost);
// Write out the Phase Indicator Field
FILE *phase;
phase = fopen("Phase.out","wb");
fwrite(Data,8,N,phase);
fclose(phase);
//....................................................
// Write out the pressure - (reuse Phi arrays since we're done with those)
// ComputeDensity<<< grid, nthreads>>> (ID, f_even, f_odd, Phi, Nx, Ny, Nz, S);
// cudaMemcpy(Data, Phi, dist_mem_size, cudaMemcpyDeviceToHost);
// FILE *PRESSURE;
// PRESSURE = fopen("Pressure.out","wb");
// fwrite(Phi,8,N,PRESSURE);
// fclose(PRESSURE);
//....................................................
// Write out the Color Gradient
cudaMemcpy(Data, ColorGrad, 3*dist_mem_size, cudaMemcpyDeviceToHost);
FILE *CG;
CG = fopen("ColorGrad.out","wb");
fwrite(Data,8,3*N,CG);
fclose(CG);
// Write out the Velocity
// FILE *VEL;
// VEL = fopen("Velocity.out","wb");
// fwrite(Velocity,8,3*N,VEL);
// fclose(VEL);
// cleanup
cudaFree(ID);
cudaFree(f_even); cudaFree(f_odd);
cudaFree(Velocity);
cudaFree(Phi);
cudaFree (ColorGrad);
cudaFree (Den); cudaFree(Copy);
cudaFree (Phi);
free(id);
//***********Finish up!*********************************
#ifdef useMPI
MPI_Finalize();
#endif
return 0;
}

1464
cpu/lb2_Color_mpi.cpp Normal file

File diff suppressed because it is too large Load Diff

1487
cpu/lb2_Color_mpi.cpp~ Normal file

File diff suppressed because it is too large Load Diff

8
example-cpu/Color.in Normal file
View File

@ -0,0 +1,8 @@
ID
80 32 128
1.0
1.0e-2 0.9 0.1 0.9
0.0 0.0 0.0
0 1.0 1.0
500 500 1e-5

1
example-cpu/Domain.in Normal file
View File

@ -0,0 +1 @@
2 2 2

BIN
example-cpu/ID.00000 Normal file

Binary file not shown.

BIN
example-cpu/ID.00001 Normal file

Binary file not shown.

BIN
example-cpu/ID.00002 Normal file

Binary file not shown.

BIN
example-cpu/ID.00003 Normal file

Binary file not shown.

BIN
example-cpu/ID.00004 Normal file

Binary file not shown.

BIN
example-cpu/ID.00005 Normal file

Binary file not shown.

BIN
example-cpu/ID.00006 Normal file

Binary file not shown.

BIN
example-cpu/ID.00007 Normal file

Binary file not shown.

View File

@ -0,0 +1,27 @@
#!/bin/bash
#PBS -l walltime=00:10:00
#PBS -l nodes=1:ppn=8
# Access group, queue, and accounting project
#PBS -W group_list=hokiespeed
# Queue name. Replace normal_q with long_q to submit a job to the long queue.
#PBS -q normal_q
#PBS -A hokiespeed
echo "------------------------------------------"
echo "Running Color LBM using MPI!"
echo "Number of processors = " $PBS_NP
echo "------------------------------------------"
module purge
#module load intel mvapich2
module load gcc cuda mvapich2/1.9rc1
cd $PBS_O_WORKDIR
export MV2_CPU_BINDING_POLICY=scatter
#export VIADEV_CPU_MAPPING=0:2:3:4:1:5:6:7
export MV2_SHOW_CPU_BINDING=1
mpirun -np $PBS_NP ~/LBPM-WIA/cpu/ColorLBM-cpu
exit;

404
example-cpu/pack.out Normal file
View File

@ -0,0 +1,404 @@
Number of Spheres: 399
Domain Length (x,y,z): 1, 1, 1
Media porosity: 0.369
log(r) Normal with mean -2.47144, variance 0
Mean coordination No. 11.84
0.0358075 0.0887041 0.112468 0.0844631
0.118482 0.318651 0.473828 0.0844631
0.582752 0.0454491 0.529264 0.0844631
0.852622 0.828954 0.547441 0.0844631
0.807778 0.54718 0.595125 0.0844631
0.557502 0.0185902 0.694119 0.0844631
0.832159 0.72426 0.207901 0.0844631
0.84196 0.996673 0.530338 0.0844631
0.0811649 0.0792821 0.70289 0.0844631
0.994893 0.869664 0.237064 0.0844631
0.488065 0.221434 0.32131 0.0844631
0.170562 0.759156 0.532848 0.0844631
0.844254 0.0475049 0.812703 0.0844631
0.711717 0.835039 0.165973 0.0844631
0.545693 0.415679 0.814859 0.0844631
0.291362 0.221277 0.740625 0.0844631
0.625387 0.177012 0.665196 0.0844631
0.226285 0.692765 0.689361 0.0844631
0.891466 0.0957847 0.657861 0.0844631
0.517532 0.273905 0.902284 0.0844631
0.410399 0.682132 0.16299 0.0844631
0.0570839 0.629902 0.0756787 0.0844631
0.00559676 0.182636 0.975355 0.0844631
0.132737 0.531505 0.667699 0.0844631
0.662373 0.662491 0.762575 0.0844631
0.572523 0.557371 0.0770451 0.0844631
0.241216 0.554304 0.795169 0.0844631
0.404127 0.9559 0.727017 0.0844631
0.84986 0.285616 0.807583 0.0844631
0.622755 0.508751 0.695781 0.0844631
0.0647357 0.683784 0.64081 0.0844631
0.017351 0.0335981 0.271083 0.0844631
0.634026 0.657202 0.955442 0.0844631
0.665255 0.637861 0.595479 0.0844631
0.936277 0.0301754 0.9533 0.0844631
0.465166 0.519937 0.229393 0.0844631
0.27117 0.85172 0.724784 0.0844631
0.404611 0.539355 0.0728933 0.0844631
0.738345 0.774204 0.892481 0.0844631
0.784738 0.235691 0.659926 0.0844631
0.694499 0.92715 0.574585 0.0844631
0.203094 0.29767 0.618523 0.0844631
0.997892 0.98119 0.593439 0.0844631
0.51469 0.833673 0.950843 0.0844631
0.131958 0.596077 0.511604 0.0844631
0.429379 0.264579 0.0654981 0.0844631
0.98469 0.958944 0.771791 0.0844631
0.952019 0.25238 0.676499 0.0844631
0.0395681 0.440658 0.559985 0.0844631
0.663478 0.959076 0.270011 0.0844631
0.118073 0.821444 0.0698155 0.0844631
0.597588 0.270182 0.0509888 0.0844631
0.823071 0.555628 0.203843 0.0844631
0.710083 0.00668089 0.425246 0.0844631
0.708692 0.904726 0.788856 0.0844631
0.293722 0.292769 0.47605 0.0844631
0.33054 0.402152 0.352697 0.0844631
0.103216 0.475309 0.852193 0.0844631
0.54144 0.881093 0.790931 0.0844631
0.588392 0.889518 0.424822 0.0844631
0.54267 0.681877 0.268062 0.0844631
0.671421 0.784318 0.321985 0.0844631
0.893617 0.749839 0.954392 0.0844631
0.612948 0.108126 0.331386 0.0844631
0.40227 0.503457 0.798711 0.0844631
0.854573 0.0324028 0.101137 0.0844631
0.426354 0.122944 0.715253 0.0844631
0.973041 0.632152 0.222197 0.0844631
0.232692 0.0935994 0.176676 0.0844631
0.303504 0.575786 0.59508 0.0844631
0.365845 0.667468 0.781117 0.0844631
0.764071 0.282961 0.49907 0.0844631
0.31194 0.763128 0.995006 0.0844631
0.16544 0.598159 0.950031 0.0844631
0.988502 0.421201 0.400142 0.0844631
0.0904952 0.804367 0.790716 0.0844631
0.247648 0.450753 0.942993 0.0844631
0.269592 0.0566081 0.709864 0.0844631
0.251452 0.261402 0.171533 0.0844631
0.701449 0.298528 0.342954 0.0844631
0.613014 0.423649 0.982098 0.0844631
0.408804 0.0300075 0.575286 0.0844631
0.299217 0.816215 0.42207 0.0844631
0.576412 0.321367 0.588916 0.0844631
0.519125 0.705171 0.841281 0.0844631
0.200777 0.684171 0.384522 0.0844631
0.553896 0.522574 0.542147 0.0844631
0.128416 0.958809 0.168028 0.0844631
0.724615 0.0432676 0.69352 0.0844631
0.770004 0.00074753 0.958129 0.0844631
0.207296 0.447809 0.541212 0.0844631
0.903614 0.193834 0.109557 0.0844631
0.215384 0.415782 0.229858 0.0844631
0.666485 0.318737 0.747423 0.0844631
0.745934 0.349421 0.0668263 0.0844631
0.766981 0.0877808 0.234542 0.0844631
0.315324 0.397626 0.0947259 0.0844631
0.610816 0.534216 0.313762 0.0844631
0.687703 0.775593 0.500285 0.0844631
0.43313 0.83717 0.0998827 0.0844631
0.561228 0.135274 0.816213 0.0844631
0.213419 0.355151 0.807995 0.0844631
0.975543 0.153798 0.811644 0.0844631
0.161034 0.342383 0.0537506 0.0844631
0.725965 0.170735 0.804364 0.0844631
0.91098 0.16578 0.275974 0.0844631
0.275188 0.74229 0.244446 0.0844631
0.389144 0.64072 0.325376 0.0844631
0.554735 0.372235 0.282359 0.0844631
0.104658 0.191284 0.227677 0.0844631
0.499806 0.985717 0.302229 0.0844631
0.470468 0.486829 0.394958 0.0844631
0.587684 0.750647 0.08832 0.0844631
0.299257 0.432828 0.684974 0.0844631
0.424695 0.92756 0.441914 0.0844631
0.504459 0.272912 0.733867 0.0844631
0.807186 0.287712 0.211658 0.0844631
0.460013 0.682085 0.00151398 0.0844631
0.860599 0.886291 0.860442 0.0844631
0.53648 0.087021 0.172119 0.0844631
0.82388 0.678662 0.475869 0.0844631
0.196931 0.186369 0.876273 0.0844631
0.463229 0.765994 0.411131 0.0844631
0.987373 0.930252 0.0795568 0.0844631
0.513417 0.544658 0.919064 0.0844631
0.185582 0.164945 0.453624 0.0844631
0.0924416 0.635728 0.800369 0.0844631
0.889638 0.614201 0.0598228 0.0844631
0.941545 0.724731 0.793543 0.0844631
0.731609 0.123715 0.545145 0.0844631
0.679436 0.640664 0.177882 0.0844631
0.218729 0.671364 0.101905 0.0844631
0.334539 0.739691 0.568468 0.0844631
0.215164 0.00844407 0.0318398 0.0844631
0.669247 0.522968 0.85756 0.0844631
0.299222 0.158529 0.578448 0.0844631
0.919157 0.140163 0.485759 0.0844631
0.591358 0.657196 0.427926 0.0844631
0.268799 0.869125 0.133054 0.0844631
0.612547 0.990973 0.897731 0.0844631
0.15907 0.958743 0.791981 0.0844631
0.548442 0.844609 0.223106 0.0844631
0.888657 0.308625 0.986192 0.0844631
0.795381 0.439411 0.932689 0.0844631
0.635763 0.80385 0.674 0.0844631
0.0371748 0.202381 0.38214 0.0844631
0.386407 0.16184 0.192518 0.0844631
0.758207 0.719974 0.0512279 0.0844631
0.457535 0.0925071 0.426104 0.0844631
0.972372 0.903228 0.445768 0.0844631
0.0607607 0.730332 0.939615 0.0844631
0.981198 0.828728 0.664238 0.0844631
0.739213 0.53735 0.0583449 0.0844631
0.82734 0.158482 0.938934 0.0844631
0.879107 0.0201421 0.367227 0.0844631
0.687105 0.0110493 0.106996 0.0844631
0.0818178 0.316873 0.906747 0.0844631
0.200611 0.873239 0.931621 0.0844631
0.106137 0.24103 0.744726 0.0844631
0.435493 0.793483 0.692769 0.0844631
0.528055 0.731786 0.56564 0.0844631
0.0571424 0.0492633 0.45064 0.0844631
0.135322 0.891012 0.632386 0.0844631
0.328101 0.134057 0.0302443 0.0844631
0.869313 0.301052 0.368178 0.0844631
0.961967 0.765697 0.108057 0.0844631
0.668496 0.446111 0.181676 0.0844631
0.919568 0.756006 0.348926 0.0844631
0.991666 0.744856 0.501286 0.0844631
0.295511 0.290355 0.965744 0.0844631
0.458841 0.605759 0.654311 0.0844631
0.794554 0.61734 0.920233 0.0844631
0.0019474 0.33119 0.109448 0.0844631
0.46369 0.309833 0.463181 0.0844631
0.296394 0.904238 0.566223 0.0844631
0.890565 0.405413 0.63493 0.0844631
0.144195 0.575774 0.262804 0.0844631
0.210047 0.888592 0.29819 0.0844631
0.617947 0.229081 0.213569 0.0844631
0.495406 0.122105 0.00322827 0.0844631
0.831011 0.872402 0.0523457 0.0844631
0.279675 0.0391166 0.878759 0.0844631
0.433519 0.406039 0.973258 0.0844631
0.702773 0.535258 0.463338 0.0844631
0.842781 0.922694 0.687626 0.0844631
0.703096 0.29956 0.911215 0.0844631
0.0380851 0.639181 0.37794 0.0844631
0.379016 0.820676 0.851045 0.0844631
0.723109 0.401954 0.611759 0.0844631
0.955443 0.579247 0.512529 0.0844631
0.296401 0.037199 0.449389 0.0844631
0.881657 0.447792 0.087761 0.0844631
0.656098 0.143532 0.95574 0.0844631
0.965936 0.546205 0.690001 0.0844631
0.334146 0.0146719 0.286282 0.0844631
0.174903 0.0508213 0.329537 0.0844631
0.668068 0.875575 0.00789851 0.0844631
0.517478 0.402511 0.119801 0.0844631
0.0335784 0.895984 0.920728 0.0844631
0.353489 0.926216 0.98045 0.0844631
0.515858 0.899496 0.581788 0.0844631
0.0214488 0.483245 0.999798 0.0844631
0.836999 0.433238 0.473884 0.0844631
0.381474 0.299744 0.621214 0.0844631
0.893548 0.554202 0.357359 0.0844631
0.385287 0.00679129 0.125476 0.0844631
0.382082 0.176473 0.875897 0.0844631
0.375338 0.439285 0.526206 0.0844631
0.333229 0.595868 0.930602 0.0844631
0.533036 0.965661 0.0546621 0.0844631
0.0348509 0.392455 0.721819 0.0844631
0.613491 0.216291 0.46143 0.0844631
0.17733 0.0419469 0.56912 0.0844631
0.185888 0.506077 0.0901699 0.0844631
0.166015 0.914118 0.459266 0.0844631
0.746306 0.633286 0.332833 0.0844631
0.225214 0.724578 0.855262 0.0844631
0.820434 0.603526 0.753874 0.0844631
0.16378 0.173693 0.0414764 0.0844631
0.0995727 0.0546697 0.917656 0.0844631
0.447591 0.020769 0.876813 0.0844631
0.0623956 0.183214 0.56894 0.0844631
0.94763 0.410834 0.865314 0.0844631
0.953689 0.579147 0.878359 0.0844631
0.404145 0.327586 0.22019 0.0844631
0.800574 0.868069 0.391566 0.0844631
0.287223 0.546206 0.429562 0.0844631
0.606297 0.397675 0.441199 0.0844631
0.380349 0.33851 0.827832 0.0844631
0.202245 0.28176 0.331846 0.0844631
0.47476 0.434332 0.662686 0.0844631
0.081255 0.802026 0.365527 0.0844631
0.790427 0.768306 0.731892 0.0844631
0.952141 0.303167 0.515389 0.0844631
0.735282 0.184001 0.0993647 0.0844631
0.108137 0.733294 0.213575 0.0844631
0.0626769 0.350381 0.265908 0.0844631
0.328084 0.17067 0.350811 0.0844631
0.0373343 0.485658 0.167958 0.0844631
0.487331 0.179389 0.567878 0.0844631
0.782697 0.157339 0.387679 0.0844631
0.392496 0.856145 0.287012 0.0844631
0.78666 0.43832 0.763992 0.0844631
0.836323 0.927719 0.232454 0.0844631
0.152772 0.455916 0.381533 0.0844631
0.302341 0.558622 0.205956 0.0844631
0.426957 0.620411 0.488758 0.0844631
0.760088 0.455904 0.324774 0.0844631
0.907569 0.417477 0.251912 0.0844631
0.896312 0.687184 0.628241 0.0844631
1.03581 0.0887041 0.112468 0.0844631
0.582752 1.04545 0.529264 0.0844631
0.557502 1.01859 0.694119 0.0844631
0.84196 -0.003327 0.530338 0.0844631
1.08116 0.0792821 0.70289 0.0844631
0.0811649 1.07928 0.70289 0.0844631
1.08116 1.07928 0.70289 0.0844631
-0.005107 0.869664 0.237064 0.0844631
0.844254 1.0475 0.812703 0.0844631
1.05708 0.629902 0.0756787 0.0844631
0.0570839 0.629902 1.07568 0.0844631
1.05708 0.629902 1.07568 0.0844631
0.00559676 0.182636 -0.024645 0.0844631
1.0056 0.182636 0.975355 0.0844631
1.0056 0.182636 -0.024645 0.0844631
0.572523 0.557371 1.07705 0.0844631
0.404127 -0.0441 0.727017 0.0844631
1.06474 0.683784 0.64081 0.0844631
1.01735 0.0335981 0.271083 0.0844631
0.017351 1.0336 0.271083 0.0844631
1.01735 1.0336 0.271083 0.0844631
0.634026 0.657202 -0.044558 0.0844631
-0.063723 0.0301754 0.9533 0.0844631
0.936277 0.0301754 -0.0467 0.0844631
0.936277 1.03018 0.9533 0.0844631
-0.063723 0.0301754 -0.0467 0.0844631
-0.063723 1.03018 0.9533 0.0844631
0.936277 1.03018 -0.0467 0.0844631
-0.063723 1.03018 -0.0467 0.0844631
0.404611 0.539355 1.07289 0.0844631
0.694499 -0.07285 0.574585 0.0844631
-0.002108 0.98119 0.593439 0.0844631
0.997892 -0.01881 0.593439 0.0844631
-0.002108 -0.01881 0.593439 0.0844631
0.51469 0.833673 -0.049157 0.0844631
0.429379 0.264579 1.0655 0.0844631
-0.01531 0.958944 0.771791 0.0844631
0.98469 -0.041056 0.771791 0.0844631
-0.01531 -0.041056 0.771791 0.0844631
-0.047981 0.25238 0.676499 0.0844631
1.03957 0.440658 0.559985 0.0844631
0.663478 -0.040924 0.270011 0.0844631
0.118073 0.821444 1.06982 0.0844631
0.597588 0.270182 1.05099 0.0844631
0.710083 1.00668 0.425246 0.0844631
0.893617 0.749839 -0.045608 0.0844631
0.854573 1.0324 0.101137 0.0844631
-0.026959 0.632152 0.222197 0.0844631
0.31194 0.763128 -0.004994 0.0844631
0.16544 0.598159 -0.049969 0.0844631
-0.011498 0.421201 0.400142 0.0844631
0.247648 0.450753 -0.057007 0.0844631
0.269592 1.05661 0.709864 0.0844631
0.613014 0.423649 -0.017902 0.0844631
0.408804 1.03001 0.575286 0.0844631
0.128416 -0.041191 0.168028 0.0844631
0.724615 1.04327 0.69352 0.0844631
0.770004 0.00074753 -0.041871 0.0844631
0.770004 1.00075 0.958129 0.0844631
0.770004 1.00075 -0.041871 0.0844631
0.745934 0.349421 1.06683 0.0844631
-0.024457 0.153798 0.811644 0.0844631
0.161034 0.342383 1.05375 0.0844631
0.499806 -0.014283 0.302229 0.0844631
0.424695 -0.07244 0.441914 0.0844631
0.460013 0.682085 1.00151 0.0844631
-0.012627 0.930252 0.0795568 0.0844631
0.987373 -0.069748 0.0795568 0.0844631
0.987373 0.930252 1.07956 0.0844631
-0.012627 -0.069748 0.0795568 0.0844631
-0.012627 0.930252 1.07956 0.0844631
0.987373 -0.069748 1.07956 0.0844631
-0.012627 -0.069748 1.07956 0.0844631
0.513417 0.544658 -0.080936 0.0844631
0.889638 0.614201 1.05982 0.0844631
-0.058455 0.724731 0.793543 0.0844631
0.215164 1.00844 0.0318398 0.0844631
0.215164 0.00844407 1.03184 0.0844631
0.215164 1.00844 1.03184 0.0844631
-0.080843 0.140163 0.485759 0.0844631
0.612547 -0.009027 0.897731 0.0844631
0.15907 -0.041257 0.791981 0.0844631
0.888657 0.308625 -0.013808 0.0844631
0.795381 0.439411 -0.067311 0.0844631
1.03717 0.202381 0.38214 0.0844631
0.758207 0.719974 1.05123 0.0844631
-0.027628 0.903228 0.445768 0.0844631
0.0607607 0.730332 -0.060385 0.0844631
1.06076 0.730332 0.939615 0.0844631
1.06076 0.730332 -0.060385 0.0844631
-0.018802 0.828728 0.664238 0.0844631
0.739213 0.53735 1.05834 0.0844631
0.82734 0.158482 -0.061066 0.0844631
0.879107 1.02014 0.367227 0.0844631
0.687105 1.01105 0.106996 0.0844631
1.08182 0.316873 0.906747 0.0844631
0.200611 0.873239 -0.068379 0.0844631
1.05714 0.0492633 0.45064 0.0844631
0.0571424 1.04926 0.45064 0.0844631
1.05714 1.04926 0.45064 0.0844631
0.328101 0.134057 1.03024 0.0844631
-0.038033 0.765697 0.108057 0.0844631
-0.080432 0.756006 0.348926 0.0844631
-0.008334 0.744856 0.501286 0.0844631
0.295511 0.290355 -0.034256 0.0844631
0.794554 0.61734 -0.079767 0.0844631
1.00195 0.33119 0.109448 0.0844631
0.495406 0.122105 1.00323 0.0844631
0.831011 0.872402 1.05235 0.0844631
0.279675 1.03912 0.878759 0.0844631
0.433519 0.406039 -0.026742 0.0844631
0.842781 -0.077306 0.687626 0.0844631
1.03809 0.639181 0.37794 0.0844631
-0.044557 0.579247 0.512529 0.0844631
0.296401 1.0372 0.449389 0.0844631
0.656098 0.143532 -0.04426 0.0844631
-0.034064 0.546205 0.690001 0.0844631
0.334146 1.01467 0.286282 0.0844631
0.174903 1.05082 0.329537 0.0844631
0.668068 0.875575 1.0079 0.0844631
0.0335784 0.895984 -0.079272 0.0844631
1.03358 0.895984 0.920728 0.0844631
1.03358 0.895984 -0.079272 0.0844631
0.353489 -0.073784 0.98045 0.0844631
0.353489 0.926216 -0.01955 0.0844631
0.353489 -0.073784 -0.01955 0.0844631
0.0214488 0.483245 -0.000202 0.0844631
1.02145 0.483245 0.999798 0.0844631
1.02145 0.483245 -0.000202 0.0844631
0.385287 1.00679 0.125476 0.0844631
0.333229 0.595868 -0.069398 0.0844631
0.533036 -0.034339 0.0546621 0.0844631
0.533036 0.965661 1.05466 0.0844631
0.533036 -0.034339 1.05466 0.0844631
1.03485 0.392455 0.721819 0.0844631
0.17733 1.04195 0.56912 0.0844631
0.16378 0.173693 1.04148 0.0844631
0.0995727 0.0546697 -0.082344 0.0844631
0.0995727 1.05467 0.917656 0.0844631
0.0995727 1.05467 -0.082344 0.0844631
0.447591 1.02077 0.876813 0.0844631
1.0624 0.183214 0.56894 0.0844631
-0.05237 0.410834 0.865314 0.0844631
-0.046311 0.579147 0.878359 0.0844631
1.08126 0.802026 0.365527 0.0844631
-0.047859 0.303167 0.515389 0.0844631
1.06268 0.350381 0.265908 0.0844631
1.03733 0.485658 0.167958 0.0844631
0.836323 -0.072281 0.232454 0.0844631

8
example-gpu/Color.in Normal file
View File

@ -0,0 +1,8 @@
ID
80 32 128
1.0
1.0e-2 0.9 0.1 0.9
0.0 0.0 0.0
0 1.0 1.0
500 500 1e-5

1
example-gpu/Domain.in Normal file
View File

@ -0,0 +1 @@
2 2 2

BIN
example-gpu/ID.00000 Normal file

Binary file not shown.

BIN
example-gpu/ID.00001 Normal file

Binary file not shown.

BIN
example-gpu/ID.00002 Normal file

Binary file not shown.

BIN
example-gpu/ID.00003 Normal file

Binary file not shown.

BIN
example-gpu/ID.00004 Normal file

Binary file not shown.

BIN
example-gpu/ID.00005 Normal file

Binary file not shown.

BIN
example-gpu/ID.00006 Normal file

Binary file not shown.

BIN
example-gpu/ID.00007 Normal file

Binary file not shown.

22
example-gpu/RunColor.hokiespeed Executable file
View File

@ -0,0 +1,22 @@
#!/bin/bash
#PBS -l walltime=00:10:00
# Set the number of nodes, and the number of processors per node (generally should be 6)
#PBS -l nodes=8:ppn=1
#PBS -A hokiespeed
# Access group, queue, and accounting project
#PBS -W group_list=hokiespeed
module purge
module load gcc cuda mvapich2/1.9rc1
cd $PBS_O_WORKDIR
echo "------------------------------------------"
echo "Running LBM using MPI!"
echo "Number of processors = " $PBS_NP
echo "------------------------------------------"
mpirun_rsh -np $PBS_NP -hostfile $PBS_NODEFILE MV2_USE_CUDA=1 ~/LBPM-WIA/bin/ColorLBM
exit;

View File

@ -0,0 +1,96 @@
hs060
hs060
hs060
hs060
hs060
hs060
hs060
hs060
hs060
hs060
hs060
hs060
hs061
hs061
hs061
hs061
hs061
hs061
hs061
hs061
hs061
hs061
hs061
hs061
hs064
hs064
hs064
hs064
hs064
hs064
hs064
hs064
hs064
hs064
hs064
hs064
hs065
hs065
hs065
hs065
hs065
hs065
hs065
hs065
hs065
hs065
hs065
hs065
hs066
hs066
hs066
hs066
hs066
hs066
hs066
hs066
hs066
hs066
hs066
hs066
hs067
hs067
hs067
hs067
hs067
hs067
hs067
hs067
hs067
hs067
hs067
hs067
hs068
hs068
hs068
hs068
hs068
hs068
hs068
hs068
hs068
hs068
hs068
hs068
hs069
hs069
hs069
hs069
hs069
hs069
hs069
hs069
hs069
hs069
hs069
hs069

404
example-gpu/pack.out Normal file
View File

@ -0,0 +1,404 @@
Number of Spheres: 399
Domain Length (x,y,z): 1, 1, 1
Media porosity: 0.369
log(r) Normal with mean -2.47144, variance 0
Mean coordination No. 11.84
0.0358075 0.0887041 0.112468 0.0844631
0.118482 0.318651 0.473828 0.0844631
0.582752 0.0454491 0.529264 0.0844631
0.852622 0.828954 0.547441 0.0844631
0.807778 0.54718 0.595125 0.0844631
0.557502 0.0185902 0.694119 0.0844631
0.832159 0.72426 0.207901 0.0844631
0.84196 0.996673 0.530338 0.0844631
0.0811649 0.0792821 0.70289 0.0844631
0.994893 0.869664 0.237064 0.0844631
0.488065 0.221434 0.32131 0.0844631
0.170562 0.759156 0.532848 0.0844631
0.844254 0.0475049 0.812703 0.0844631
0.711717 0.835039 0.165973 0.0844631
0.545693 0.415679 0.814859 0.0844631
0.291362 0.221277 0.740625 0.0844631
0.625387 0.177012 0.665196 0.0844631
0.226285 0.692765 0.689361 0.0844631
0.891466 0.0957847 0.657861 0.0844631
0.517532 0.273905 0.902284 0.0844631
0.410399 0.682132 0.16299 0.0844631
0.0570839 0.629902 0.0756787 0.0844631
0.00559676 0.182636 0.975355 0.0844631
0.132737 0.531505 0.667699 0.0844631
0.662373 0.662491 0.762575 0.0844631
0.572523 0.557371 0.0770451 0.0844631
0.241216 0.554304 0.795169 0.0844631
0.404127 0.9559 0.727017 0.0844631
0.84986 0.285616 0.807583 0.0844631
0.622755 0.508751 0.695781 0.0844631
0.0647357 0.683784 0.64081 0.0844631
0.017351 0.0335981 0.271083 0.0844631
0.634026 0.657202 0.955442 0.0844631
0.665255 0.637861 0.595479 0.0844631
0.936277 0.0301754 0.9533 0.0844631
0.465166 0.519937 0.229393 0.0844631
0.27117 0.85172 0.724784 0.0844631
0.404611 0.539355 0.0728933 0.0844631
0.738345 0.774204 0.892481 0.0844631
0.784738 0.235691 0.659926 0.0844631
0.694499 0.92715 0.574585 0.0844631
0.203094 0.29767 0.618523 0.0844631
0.997892 0.98119 0.593439 0.0844631
0.51469 0.833673 0.950843 0.0844631
0.131958 0.596077 0.511604 0.0844631
0.429379 0.264579 0.0654981 0.0844631
0.98469 0.958944 0.771791 0.0844631
0.952019 0.25238 0.676499 0.0844631
0.0395681 0.440658 0.559985 0.0844631
0.663478 0.959076 0.270011 0.0844631
0.118073 0.821444 0.0698155 0.0844631
0.597588 0.270182 0.0509888 0.0844631
0.823071 0.555628 0.203843 0.0844631
0.710083 0.00668089 0.425246 0.0844631
0.708692 0.904726 0.788856 0.0844631
0.293722 0.292769 0.47605 0.0844631
0.33054 0.402152 0.352697 0.0844631
0.103216 0.475309 0.852193 0.0844631
0.54144 0.881093 0.790931 0.0844631
0.588392 0.889518 0.424822 0.0844631
0.54267 0.681877 0.268062 0.0844631
0.671421 0.784318 0.321985 0.0844631
0.893617 0.749839 0.954392 0.0844631
0.612948 0.108126 0.331386 0.0844631
0.40227 0.503457 0.798711 0.0844631
0.854573 0.0324028 0.101137 0.0844631
0.426354 0.122944 0.715253 0.0844631
0.973041 0.632152 0.222197 0.0844631
0.232692 0.0935994 0.176676 0.0844631
0.303504 0.575786 0.59508 0.0844631
0.365845 0.667468 0.781117 0.0844631
0.764071 0.282961 0.49907 0.0844631
0.31194 0.763128 0.995006 0.0844631
0.16544 0.598159 0.950031 0.0844631
0.988502 0.421201 0.400142 0.0844631
0.0904952 0.804367 0.790716 0.0844631
0.247648 0.450753 0.942993 0.0844631
0.269592 0.0566081 0.709864 0.0844631
0.251452 0.261402 0.171533 0.0844631
0.701449 0.298528 0.342954 0.0844631
0.613014 0.423649 0.982098 0.0844631
0.408804 0.0300075 0.575286 0.0844631
0.299217 0.816215 0.42207 0.0844631
0.576412 0.321367 0.588916 0.0844631
0.519125 0.705171 0.841281 0.0844631
0.200777 0.684171 0.384522 0.0844631
0.553896 0.522574 0.542147 0.0844631
0.128416 0.958809 0.168028 0.0844631
0.724615 0.0432676 0.69352 0.0844631
0.770004 0.00074753 0.958129 0.0844631
0.207296 0.447809 0.541212 0.0844631
0.903614 0.193834 0.109557 0.0844631
0.215384 0.415782 0.229858 0.0844631
0.666485 0.318737 0.747423 0.0844631
0.745934 0.349421 0.0668263 0.0844631
0.766981 0.0877808 0.234542 0.0844631
0.315324 0.397626 0.0947259 0.0844631
0.610816 0.534216 0.313762 0.0844631
0.687703 0.775593 0.500285 0.0844631
0.43313 0.83717 0.0998827 0.0844631
0.561228 0.135274 0.816213 0.0844631
0.213419 0.355151 0.807995 0.0844631
0.975543 0.153798 0.811644 0.0844631
0.161034 0.342383 0.0537506 0.0844631
0.725965 0.170735 0.804364 0.0844631
0.91098 0.16578 0.275974 0.0844631
0.275188 0.74229 0.244446 0.0844631
0.389144 0.64072 0.325376 0.0844631
0.554735 0.372235 0.282359 0.0844631
0.104658 0.191284 0.227677 0.0844631
0.499806 0.985717 0.302229 0.0844631
0.470468 0.486829 0.394958 0.0844631
0.587684 0.750647 0.08832 0.0844631
0.299257 0.432828 0.684974 0.0844631
0.424695 0.92756 0.441914 0.0844631
0.504459 0.272912 0.733867 0.0844631
0.807186 0.287712 0.211658 0.0844631
0.460013 0.682085 0.00151398 0.0844631
0.860599 0.886291 0.860442 0.0844631
0.53648 0.087021 0.172119 0.0844631
0.82388 0.678662 0.475869 0.0844631
0.196931 0.186369 0.876273 0.0844631
0.463229 0.765994 0.411131 0.0844631
0.987373 0.930252 0.0795568 0.0844631
0.513417 0.544658 0.919064 0.0844631
0.185582 0.164945 0.453624 0.0844631
0.0924416 0.635728 0.800369 0.0844631
0.889638 0.614201 0.0598228 0.0844631
0.941545 0.724731 0.793543 0.0844631
0.731609 0.123715 0.545145 0.0844631
0.679436 0.640664 0.177882 0.0844631
0.218729 0.671364 0.101905 0.0844631
0.334539 0.739691 0.568468 0.0844631
0.215164 0.00844407 0.0318398 0.0844631
0.669247 0.522968 0.85756 0.0844631
0.299222 0.158529 0.578448 0.0844631
0.919157 0.140163 0.485759 0.0844631
0.591358 0.657196 0.427926 0.0844631
0.268799 0.869125 0.133054 0.0844631
0.612547 0.990973 0.897731 0.0844631
0.15907 0.958743 0.791981 0.0844631
0.548442 0.844609 0.223106 0.0844631
0.888657 0.308625 0.986192 0.0844631
0.795381 0.439411 0.932689 0.0844631
0.635763 0.80385 0.674 0.0844631
0.0371748 0.202381 0.38214 0.0844631
0.386407 0.16184 0.192518 0.0844631
0.758207 0.719974 0.0512279 0.0844631
0.457535 0.0925071 0.426104 0.0844631
0.972372 0.903228 0.445768 0.0844631
0.0607607 0.730332 0.939615 0.0844631
0.981198 0.828728 0.664238 0.0844631
0.739213 0.53735 0.0583449 0.0844631
0.82734 0.158482 0.938934 0.0844631
0.879107 0.0201421 0.367227 0.0844631
0.687105 0.0110493 0.106996 0.0844631
0.0818178 0.316873 0.906747 0.0844631
0.200611 0.873239 0.931621 0.0844631
0.106137 0.24103 0.744726 0.0844631
0.435493 0.793483 0.692769 0.0844631
0.528055 0.731786 0.56564 0.0844631
0.0571424 0.0492633 0.45064 0.0844631
0.135322 0.891012 0.632386 0.0844631
0.328101 0.134057 0.0302443 0.0844631
0.869313 0.301052 0.368178 0.0844631
0.961967 0.765697 0.108057 0.0844631
0.668496 0.446111 0.181676 0.0844631
0.919568 0.756006 0.348926 0.0844631
0.991666 0.744856 0.501286 0.0844631
0.295511 0.290355 0.965744 0.0844631
0.458841 0.605759 0.654311 0.0844631
0.794554 0.61734 0.920233 0.0844631
0.0019474 0.33119 0.109448 0.0844631
0.46369 0.309833 0.463181 0.0844631
0.296394 0.904238 0.566223 0.0844631
0.890565 0.405413 0.63493 0.0844631
0.144195 0.575774 0.262804 0.0844631
0.210047 0.888592 0.29819 0.0844631
0.617947 0.229081 0.213569 0.0844631
0.495406 0.122105 0.00322827 0.0844631
0.831011 0.872402 0.0523457 0.0844631
0.279675 0.0391166 0.878759 0.0844631
0.433519 0.406039 0.973258 0.0844631
0.702773 0.535258 0.463338 0.0844631
0.842781 0.922694 0.687626 0.0844631
0.703096 0.29956 0.911215 0.0844631
0.0380851 0.639181 0.37794 0.0844631
0.379016 0.820676 0.851045 0.0844631
0.723109 0.401954 0.611759 0.0844631
0.955443 0.579247 0.512529 0.0844631
0.296401 0.037199 0.449389 0.0844631
0.881657 0.447792 0.087761 0.0844631
0.656098 0.143532 0.95574 0.0844631
0.965936 0.546205 0.690001 0.0844631
0.334146 0.0146719 0.286282 0.0844631
0.174903 0.0508213 0.329537 0.0844631
0.668068 0.875575 0.00789851 0.0844631
0.517478 0.402511 0.119801 0.0844631
0.0335784 0.895984 0.920728 0.0844631
0.353489 0.926216 0.98045 0.0844631
0.515858 0.899496 0.581788 0.0844631
0.0214488 0.483245 0.999798 0.0844631
0.836999 0.433238 0.473884 0.0844631
0.381474 0.299744 0.621214 0.0844631
0.893548 0.554202 0.357359 0.0844631
0.385287 0.00679129 0.125476 0.0844631
0.382082 0.176473 0.875897 0.0844631
0.375338 0.439285 0.526206 0.0844631
0.333229 0.595868 0.930602 0.0844631
0.533036 0.965661 0.0546621 0.0844631
0.0348509 0.392455 0.721819 0.0844631
0.613491 0.216291 0.46143 0.0844631
0.17733 0.0419469 0.56912 0.0844631
0.185888 0.506077 0.0901699 0.0844631
0.166015 0.914118 0.459266 0.0844631
0.746306 0.633286 0.332833 0.0844631
0.225214 0.724578 0.855262 0.0844631
0.820434 0.603526 0.753874 0.0844631
0.16378 0.173693 0.0414764 0.0844631
0.0995727 0.0546697 0.917656 0.0844631
0.447591 0.020769 0.876813 0.0844631
0.0623956 0.183214 0.56894 0.0844631
0.94763 0.410834 0.865314 0.0844631
0.953689 0.579147 0.878359 0.0844631
0.404145 0.327586 0.22019 0.0844631
0.800574 0.868069 0.391566 0.0844631
0.287223 0.546206 0.429562 0.0844631
0.606297 0.397675 0.441199 0.0844631
0.380349 0.33851 0.827832 0.0844631
0.202245 0.28176 0.331846 0.0844631
0.47476 0.434332 0.662686 0.0844631
0.081255 0.802026 0.365527 0.0844631
0.790427 0.768306 0.731892 0.0844631
0.952141 0.303167 0.515389 0.0844631
0.735282 0.184001 0.0993647 0.0844631
0.108137 0.733294 0.213575 0.0844631
0.0626769 0.350381 0.265908 0.0844631
0.328084 0.17067 0.350811 0.0844631
0.0373343 0.485658 0.167958 0.0844631
0.487331 0.179389 0.567878 0.0844631
0.782697 0.157339 0.387679 0.0844631
0.392496 0.856145 0.287012 0.0844631
0.78666 0.43832 0.763992 0.0844631
0.836323 0.927719 0.232454 0.0844631
0.152772 0.455916 0.381533 0.0844631
0.302341 0.558622 0.205956 0.0844631
0.426957 0.620411 0.488758 0.0844631
0.760088 0.455904 0.324774 0.0844631
0.907569 0.417477 0.251912 0.0844631
0.896312 0.687184 0.628241 0.0844631
1.03581 0.0887041 0.112468 0.0844631
0.582752 1.04545 0.529264 0.0844631
0.557502 1.01859 0.694119 0.0844631
0.84196 -0.003327 0.530338 0.0844631
1.08116 0.0792821 0.70289 0.0844631
0.0811649 1.07928 0.70289 0.0844631
1.08116 1.07928 0.70289 0.0844631
-0.005107 0.869664 0.237064 0.0844631
0.844254 1.0475 0.812703 0.0844631
1.05708 0.629902 0.0756787 0.0844631
0.0570839 0.629902 1.07568 0.0844631
1.05708 0.629902 1.07568 0.0844631
0.00559676 0.182636 -0.024645 0.0844631
1.0056 0.182636 0.975355 0.0844631
1.0056 0.182636 -0.024645 0.0844631
0.572523 0.557371 1.07705 0.0844631
0.404127 -0.0441 0.727017 0.0844631
1.06474 0.683784 0.64081 0.0844631
1.01735 0.0335981 0.271083 0.0844631
0.017351 1.0336 0.271083 0.0844631
1.01735 1.0336 0.271083 0.0844631
0.634026 0.657202 -0.044558 0.0844631
-0.063723 0.0301754 0.9533 0.0844631
0.936277 0.0301754 -0.0467 0.0844631
0.936277 1.03018 0.9533 0.0844631
-0.063723 0.0301754 -0.0467 0.0844631
-0.063723 1.03018 0.9533 0.0844631
0.936277 1.03018 -0.0467 0.0844631
-0.063723 1.03018 -0.0467 0.0844631
0.404611 0.539355 1.07289 0.0844631
0.694499 -0.07285 0.574585 0.0844631
-0.002108 0.98119 0.593439 0.0844631
0.997892 -0.01881 0.593439 0.0844631
-0.002108 -0.01881 0.593439 0.0844631
0.51469 0.833673 -0.049157 0.0844631
0.429379 0.264579 1.0655 0.0844631
-0.01531 0.958944 0.771791 0.0844631
0.98469 -0.041056 0.771791 0.0844631
-0.01531 -0.041056 0.771791 0.0844631
-0.047981 0.25238 0.676499 0.0844631
1.03957 0.440658 0.559985 0.0844631
0.663478 -0.040924 0.270011 0.0844631
0.118073 0.821444 1.06982 0.0844631
0.597588 0.270182 1.05099 0.0844631
0.710083 1.00668 0.425246 0.0844631
0.893617 0.749839 -0.045608 0.0844631
0.854573 1.0324 0.101137 0.0844631
-0.026959 0.632152 0.222197 0.0844631
0.31194 0.763128 -0.004994 0.0844631
0.16544 0.598159 -0.049969 0.0844631
-0.011498 0.421201 0.400142 0.0844631
0.247648 0.450753 -0.057007 0.0844631
0.269592 1.05661 0.709864 0.0844631
0.613014 0.423649 -0.017902 0.0844631
0.408804 1.03001 0.575286 0.0844631
0.128416 -0.041191 0.168028 0.0844631
0.724615 1.04327 0.69352 0.0844631
0.770004 0.00074753 -0.041871 0.0844631
0.770004 1.00075 0.958129 0.0844631
0.770004 1.00075 -0.041871 0.0844631
0.745934 0.349421 1.06683 0.0844631
-0.024457 0.153798 0.811644 0.0844631
0.161034 0.342383 1.05375 0.0844631
0.499806 -0.014283 0.302229 0.0844631
0.424695 -0.07244 0.441914 0.0844631
0.460013 0.682085 1.00151 0.0844631
-0.012627 0.930252 0.0795568 0.0844631
0.987373 -0.069748 0.0795568 0.0844631
0.987373 0.930252 1.07956 0.0844631
-0.012627 -0.069748 0.0795568 0.0844631
-0.012627 0.930252 1.07956 0.0844631
0.987373 -0.069748 1.07956 0.0844631
-0.012627 -0.069748 1.07956 0.0844631
0.513417 0.544658 -0.080936 0.0844631
0.889638 0.614201 1.05982 0.0844631
-0.058455 0.724731 0.793543 0.0844631
0.215164 1.00844 0.0318398 0.0844631
0.215164 0.00844407 1.03184 0.0844631
0.215164 1.00844 1.03184 0.0844631
-0.080843 0.140163 0.485759 0.0844631
0.612547 -0.009027 0.897731 0.0844631
0.15907 -0.041257 0.791981 0.0844631
0.888657 0.308625 -0.013808 0.0844631
0.795381 0.439411 -0.067311 0.0844631
1.03717 0.202381 0.38214 0.0844631
0.758207 0.719974 1.05123 0.0844631
-0.027628 0.903228 0.445768 0.0844631
0.0607607 0.730332 -0.060385 0.0844631
1.06076 0.730332 0.939615 0.0844631
1.06076 0.730332 -0.060385 0.0844631
-0.018802 0.828728 0.664238 0.0844631
0.739213 0.53735 1.05834 0.0844631
0.82734 0.158482 -0.061066 0.0844631
0.879107 1.02014 0.367227 0.0844631
0.687105 1.01105 0.106996 0.0844631
1.08182 0.316873 0.906747 0.0844631
0.200611 0.873239 -0.068379 0.0844631
1.05714 0.0492633 0.45064 0.0844631
0.0571424 1.04926 0.45064 0.0844631
1.05714 1.04926 0.45064 0.0844631
0.328101 0.134057 1.03024 0.0844631
-0.038033 0.765697 0.108057 0.0844631
-0.080432 0.756006 0.348926 0.0844631
-0.008334 0.744856 0.501286 0.0844631
0.295511 0.290355 -0.034256 0.0844631
0.794554 0.61734 -0.079767 0.0844631
1.00195 0.33119 0.109448 0.0844631
0.495406 0.122105 1.00323 0.0844631
0.831011 0.872402 1.05235 0.0844631
0.279675 1.03912 0.878759 0.0844631
0.433519 0.406039 -0.026742 0.0844631
0.842781 -0.077306 0.687626 0.0844631
1.03809 0.639181 0.37794 0.0844631
-0.044557 0.579247 0.512529 0.0844631
0.296401 1.0372 0.449389 0.0844631
0.656098 0.143532 -0.04426 0.0844631
-0.034064 0.546205 0.690001 0.0844631
0.334146 1.01467 0.286282 0.0844631
0.174903 1.05082 0.329537 0.0844631
0.668068 0.875575 1.0079 0.0844631
0.0335784 0.895984 -0.079272 0.0844631
1.03358 0.895984 0.920728 0.0844631
1.03358 0.895984 -0.079272 0.0844631
0.353489 -0.073784 0.98045 0.0844631
0.353489 0.926216 -0.01955 0.0844631
0.353489 -0.073784 -0.01955 0.0844631
0.0214488 0.483245 -0.000202 0.0844631
1.02145 0.483245 0.999798 0.0844631
1.02145 0.483245 -0.000202 0.0844631
0.385287 1.00679 0.125476 0.0844631
0.333229 0.595868 -0.069398 0.0844631
0.533036 -0.034339 0.0546621 0.0844631
0.533036 0.965661 1.05466 0.0844631
0.533036 -0.034339 1.05466 0.0844631
1.03485 0.392455 0.721819 0.0844631
0.17733 1.04195 0.56912 0.0844631
0.16378 0.173693 1.04148 0.0844631
0.0995727 0.0546697 -0.082344 0.0844631
0.0995727 1.05467 0.917656 0.0844631
0.0995727 1.05467 -0.082344 0.0844631
0.447591 1.02077 0.876813 0.0844631
1.0624 0.183214 0.56894 0.0844631
-0.05237 0.410834 0.865314 0.0844631
-0.046311 0.579147 0.878359 0.0844631
1.08126 0.802026 0.365527 0.0844631
-0.047859 0.303167 0.515389 0.0844631
1.06268 0.350381 0.265908 0.0844631
1.03733 0.485658 0.167958 0.0844631
0.836323 -0.072281 0.232454 0.0844631

813
gpu/Color.cu Normal file
View File

@ -0,0 +1,813 @@
#include <cuda.h>
__device__ double atomicAdd(double* address, double val)
{
unsigned long long int* address_as_ull =
(unsigned long long int*)address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = atomicCAS(address_as_ull, assumed,
__double_as_longlong(val +
__longlong_as_double(assumed)));
} while (assumed != old);
return __longlong_as_double(old);
}
__global__ void InitDenColor(char *ID, double *Den, double *Phi, double das, double dbs, int N, int S)
{
int n;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<N){
if ( ID[n] == 1){
Den[2*n] = 1.0;
Den[2*n+1] = 0.0;
Phi[n] = 1.0;
}
else if ( ID[n] == 2){
Den[2*n] = 0.0;
Den[2*n+1] = 1.0;
Phi[n] = -1.0;
}
else{
Den[2*n] = das;
Den[2*n+1] = dbs;
Phi[n] = (das-dbs)/(das+dbs);
}
}
}
}
__global__ void Compute_VELOCITY(char *ID, double *disteven, double *distodd, double *vel, int Nx, int Ny, int Nz, int S)
{
int n,N;
// distributions
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
double vx,vy,vz;
N = Nx*Ny*Nz;
// S - number of threadblocks per grid block
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<N){
if (ID[n] > 0){
//........................................................................
// Registers to store the distributions
//........................................................................
f2 = disteven[N+n];
f4 = disteven[2*N+n];
f6 = disteven[3*N+n];
f8 = disteven[4*N+n];
f10 = disteven[5*N+n];
f12 = disteven[6*N+n];
f14 = disteven[7*N+n];
f16 = disteven[8*N+n];
f18 = disteven[9*N+n];
//........................................................................
f1 = distodd[n];
f3 = distodd[1*N+n];
f5 = distodd[2*N+n];
f7 = distodd[3*N+n];
f9 = distodd[4*N+n];
f11 = distodd[5*N+n];
f13 = distodd[6*N+n];
f15 = distodd[7*N+n];
f17 = distodd[8*N+n];
//.................Compute the velocity...................................
vx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
vy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
vz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
//..................Write the velocity.....................................
vel[n] = vx;
vel[N+n] = vy;
vel[2*N+n] = vz;
//........................................................................
}
}
}
}
//*************************************************************************
//*************************************************************************
__global__ void PressureBC_inlet(double *disteven, double *distodd, double din,
int Nx, int Ny, int Nz, int S)
{
int n,N;
// distributions
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
double uz;
N = Nx*Ny*Nz;
// Loop over the boundary - threadblocks delineated by start...finish
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<Nx*Ny){
//........................................................................
// Read distributions from "opposite" memory convention
//........................................................................
//........................................................................
f1 = distodd[n];
f3 = distodd[N+n];
f5 = distodd[2*N+n];
f7 = distodd[3*N+n];
f9 = distodd[4*N+n];
f11 = distodd[5*N+n];
f13 = distodd[6*N+n];
f15 = distodd[7*N+n];
f17 = distodd[8*N+n];
//........................................................................
f0 = disteven[n];
f2 = disteven[N+n];
f4 = disteven[2*N+n];
f6 = disteven[3*N+n];
f8 = disteven[4*N+n];
f10 = disteven[5*N+n];
f12 = disteven[6*N+n];
f14 = disteven[7*N+n];
f16 = disteven[8*N+n];
f18 = disteven[9*N+n];
//...................................................
//........Determine the intlet flow velocity.........
// uz = -1 + (f0+f3+f4+f1+f2+f7+f8+f10+f9
// + 2*(f5+f15+f18+f11+f14))/din;
//........Set the unknown distributions..............
// f6 = f5 - 0.3333333333333333*din*uz;
// f16 = f15 - 0.1666666666666667*din*uz;
// f17 = f16 - f3 + f4-f15+f18-f7+f8-f10+f9;
// f12= 0.5*(-din*uz+f5+f15+f18+f11+f14-f6-f16-
// f17+f1-f2-f14+f11+f7-f8-f10+f9);
// f13= -din*uz+f5+f15+f18+f11+f14-f6-f16-f17-f12;
// Determine the outlet flow velocity
uz = 1.0 - (f0+f4+f3+f2+f1+f8+f7+f9+ f10 +
2*(f5+ f15+f18+f11+f14))/din;
// Set the unknown distributions:
f6 = f5 + 0.3333333333333333*din*uz;
f16 = f15 + 0.1666666666666667*din*uz;
f17 = f16 + f4 - f3-f15+f18+f8-f7 +f9-f10;
f12= (din*uz+f5+ f15+f18+f11+f14-f6-f16-f17-f2+f1-f14+f11-f8+f7+f9-f10)*0.5;
f13= din*uz+f5+ f15+f18+f11+f14-f6-f16-f17-f12;
//........Store in "opposite" memory location..........
disteven[3*N+n] = f6;
disteven[6*N+n] = f12;
distodd[6*N+n] = f13;
disteven[8*N+n] = f16;
distodd[8*N+n] = f17;
//...................................................
}
}
}
__global__ void PressureBC_outlet(double *disteven, double *distodd, double dout,
int Nx, int Ny, int Nz, int S, int outlet)
{
int n,N;
// distributions
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
double uz;
N = Nx*Ny*Nz;
// Loop over the boundary - threadblocks delineated by start...finish
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = outlet + S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<N){
//........................................................................
// Read distributions from "opposite" memory convention
//........................................................................
f1 = distodd[n];
f3 = distodd[N+n];
f5 = distodd[2*N+n];
f7 = distodd[3*N+n];
f9 = distodd[4*N+n];
f11 = distodd[5*N+n];
f13 = distodd[6*N+n];
f15 = distodd[7*N+n];
f17 = distodd[8*N+n];
//........................................................................
f0 = disteven[n];
f2 = disteven[N+n];
f4 = disteven[2*N+n];
f6 = disteven[3*N+n];
f8 = disteven[4*N+n];
f10 = disteven[5*N+n];
f12 = disteven[6*N+n];
f14 = disteven[7*N+n];
f16 = disteven[8*N+n];
f18 = disteven[9*N+n];
//........Determine the outlet flow velocity.........
// uz = 1 - (f0+f3+f4+f1+f2+f7+f8+f10+f9+
// 2*(f6+f16+f17+f12+f13))/dout;
//...................................................
//........Set the Unknown Distributions..............
// f5 = f6 + 0.33333333333333338*dout*uz;
// f15 = f16 + 0.16666666666666678*dout*uz;
// f18 = f15+f3-f4-f16+f17+f7-f8+f10-f9;
// f11= 0.5*(dout*uz+f6+ f16+f17+f12+f13-f5
// -f15-f18-f1+f2-f13+f12-f7+f8+f10-f9);
// f14= dout*uz+f6+ f16+f17+f12+f13-f5-f15-f18-f11;
uz = -1.0 + (f0+f4+f3+f2+f1+f8+f7+f9+f10 + 2*(f6+f16+f17+f12+f13))/dout;
f5 = f6 - 0.33333333333333338*dout* uz;
f15 = f16 - 0.16666666666666678*dout* uz;
f18 = f15 - f4 + f3-f16+f17-f8+f7-f9+f10;
f11 = (-dout*uz+f6+ f16+f17+f12+f13-f5-f15-f18+f2-f1-f13+f12+f8-f7-f9+f10)*0.5;
f14 = -dout*uz+f6+ f16+f17+f12+f13-f5-f15-f18-f11;
//........Store in "opposite" memory location..........
distodd[2*N+n] = f5;
distodd[5*N+n] = f11;
disteven[7*N+n] = f14;
distodd[7*N+n] = f15;
disteven[9*N+n] = f18;
//...................................................
}
}
}
//*************************************************************************
__global__ void ComputeColorGradient(char *ID, double *phi, double *ColorGrad, int Nx, int Ny, int Nz, int S)
{
int n,N,i,j,k,nn;
// distributions
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
double nx,ny,nz;
// non-conserved moments
// additional variables needed for computations
N = Nx*Ny*Nz;
for (int s=0; s<S; s++){
// for (int n=0; n<N; n++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<N){
//.......Back out the 3-D indices for node n..............
k = n/(Nx*Ny);
j = (n-Nx*Ny*k)/Nx;
i = n-Nx*Ny*k-Nx*j;
//........................................................................
//........Get 1-D index for this thread....................
// n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
//........................................................................
// COMPUTE THE COLOR GRADIENT
//........................................................................
//.................Read Phase Indicator Values............................
//........................................................................
nn = n-1; // neighbor index (get convention)
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
f1 = phi[nn]; // get neighbor for phi - 1
//........................................................................
nn = n+1; // neighbor index (get convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
f2 = phi[nn]; // get neighbor for phi - 2
//........................................................................
nn = n-Nx; // neighbor index (get convention)
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
f3 = phi[nn]; // get neighbor for phi - 3
//........................................................................
nn = n+Nx; // neighbor index (get convention)
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
f4 = phi[nn]; // get neighbor for phi - 4
//........................................................................
nn = n-Nx*Ny; // neighbor index (get convention)
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
f5 = phi[nn]; // get neighbor for phi - 5
//........................................................................
nn = n+Nx*Ny; // neighbor index (get convention)
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
f6 = phi[nn]; // get neighbor for phi - 6
//........................................................................
nn = n-Nx-1; // neighbor index (get convention)
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
f7 = phi[nn]; // get neighbor for phi - 7
//........................................................................
nn = n+Nx+1; // neighbor index (get convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
f8 = phi[nn]; // get neighbor for phi - 8
//........................................................................
nn = n+Nx-1; // neighbor index (get convention)
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
f9 = phi[nn]; // get neighbor for phi - 9
//........................................................................
nn = n-Nx+1; // neighbor index (get convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
f10 = phi[nn]; // get neighbor for phi - 10
//........................................................................
nn = n-Nx*Ny-1; // neighbor index (get convention)
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
f11 = phi[nn]; // get neighbor for phi - 11
//........................................................................
nn = n+Nx*Ny+1; // neighbor index (get convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
f12 = phi[nn]; // get neighbor for phi - 12
//........................................................................
nn = n+Nx*Ny-1; // neighbor index (get convention)
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
f13 = phi[nn]; // get neighbor for phi - 13
//........................................................................
nn = n-Nx*Ny+1; // neighbor index (get convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
f14 = phi[nn]; // get neighbor for phi - 14
//........................................................................
nn = n-Nx*Ny-Nx; // neighbor index (get convention)
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
f15 = phi[nn]; // get neighbor for phi - 15
//........................................................................
nn = n+Nx*Ny+Nx; // neighbor index (get convention)
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
f16 = phi[nn]; // get neighbor for phi - 16
//........................................................................
nn = n+Nx*Ny-Nx; // neighbor index (get convention)
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
f17 = phi[nn]; // get neighbor for phi - 17
//........................................................................
nn = n-Nx*Ny+Nx; // neighbor index (get convention)
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
f18 = phi[nn]; // get neighbor for phi - 18
//............Compute the Color Gradient...................................
nx = -(f1-f2+0.5*(f7-f8+f9-f10+f11-f12+f13-f14));
ny = -(f3-f4+0.5*(f7-f8-f9+f10+f15-f16+f17-f18));
nz = -(f5-f6+0.5*(f11-f12-f13+f14+f15-f16-f17+f18));
//...........Normalize the Color Gradient.................................
// C = sqrt(nx*nx+ny*ny+nz*nz);
// nx = nx/C;
// ny = ny/C;
// nz = nz/C;
//...Store the Color Gradient....................
ColorGrad[3*n] = nx;
ColorGrad[3*n+1] = ny;
ColorGrad[3*n+2] = nz;
//...............................................
}
}
}
//*************************************************************************
__global__ void ColorCollide( char *ID, double *disteven, double *distodd, double *ColorGrad,
double *Velocity, int Nx, int Ny, int Nz, int S,double rlx_setA, double rlx_setB,
double alpha, double beta, double Fx, double Fy, double Fz, bool pBC)
{
int n,N;
// distributions
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
// non-conserved moments
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
// additional variables needed for computations
double rho,jx,jy,jz,C,nx,ny,nz;
N = Nx*Ny*Nz;
char id;
// S - number of threadblocks per grid block
for (int s=0; s<S; s++){
// for (int n=0; n<N; n++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
id = ID[n];
if (n<N){
if (id > 0){
// Retrieve the color gradient
nx = ColorGrad[3*n];
ny = ColorGrad[3*n+1];
nz = ColorGrad[3*n+2];
//...........Normalize the Color Gradient.................................
C = sqrt(nx*nx+ny*ny+nz*nz);
nx = nx/C;
ny = ny/C;
nz = nz/C;
//......No color gradient at z-boundary if pressure BC are set.............
// if (pBC && k==0) nx = ny = nz = 0.f;
// if (pBC && k==Nz-1) nx = ny = nz = 0.f;
//........................................................................
// READ THE DISTRIBUTIONS
// (read from opposite array due to previous swap operation)
//........................................................................
f2 = distodd[n];
f4 = distodd[N+n];
f6 = distodd[2*N+n];
f8 = distodd[3*N+n];
f10 = distodd[4*N+n];
f12 = distodd[5*N+n];
f14 = distodd[6*N+n];
f16 = distodd[7*N+n];
f18 = distodd[8*N+n];
//........................................................................
f0 = disteven[n];
f1 = disteven[N+n];
f3 = disteven[2*N+n];
f5 = disteven[3*N+n];
f7 = disteven[4*N+n];
f9 = disteven[5*N+n];
f11 = disteven[6*N+n];
f13 = disteven[7*N+n];
f15 = disteven[8*N+n];
f17 = disteven[9*N+n];
//........................................................................
// PERFORM RELAXATION PROCESS
//........................................................................
//....................compute the moments...............................................
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
m1 = -30*f0-11*(f2+f1+f4+f3+f6+f5)+8*(f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18 +f17);
m2 = 12*f0-4*(f2+f1 +f4+f3+f6 +f5)+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
jx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
m4 = 4*(-f1+f2)+f7-f8+f9-f10+f11-f12+f13-f14;
jy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
m6 = -4*(f3-f4)+f7-f8-f9+f10+f15-f16+f17-f18;
jz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
m8 = -4*(f5-f6)+f11-f12-f13+f14+f15-f16-f17+f18;
m9 = 2*(f1+f2)-f3-f4-f5-f6+f7+f8+f9+f10+f11+f12+f13+f14-2*(f15+f16+f17+f18);
m10 = -4*(f1+f2)+2*(f4+f3+f6+f5)+f8+f7+f10+f9+f12+f11+f14+f13-2*(f16+f15+f18+f17);
m11 = f4+f3-f6-f5+f8+f7+f10+f9-f12-f11-f14-f13;
m12 = -2*(f4+f3-f6-f5)+f8+f7+f10+f9-f12-f11-f14-f13;
m13 = f8+f7-f10-f9;
m14 = f16+f15-f18-f17;
m15 = f12+f11-f14-f13;
m16 = f7-f8+f9-f10-f11+f12-f13+f14;
m17 = -f7+f8+f9-f10+f15-f16+f17-f18;
m18 = f11-f12-f13+f14-f15+f16+f17-f18;
//..........Toelke, Fruediger et. al. 2006...............
if (C == 0.0) nx = ny = nz = 1.0;
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) -alpha*C - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho)- m2);
m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4);
m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6);
m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9);
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) + 0.5*alpha*C*(ny*ny-nz*nz)- m11);
m12 = m12 + rlx_setA*( -0.5*((jy*jy-jz*jz)/rho) - m12);
m13 = m13 + rlx_setA*( (jx*jy/rho) + 0.5*alpha*C*nx*ny - m13);
m14 = m14 + rlx_setA*( (jy*jz/rho) + 0.5*alpha*C*ny*nz - m14);
m15 = m15 + rlx_setA*( (jx*jz/rho) + 0.5*alpha*C*nx*nz - m15);
m16 = m16 + rlx_setB*( - m16);
m17 = m17 + rlx_setB*( - m17);
m18 = m18 + rlx_setB*( - m18);
//.................inverse transformation......................................................
f0 = 0.05263157894736842*rho-0.012531328320802*m1+0.04761904761904762*m2;
f1 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(jx-m4)+0.0555555555555555555555555*(m9-m10);
f2 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(m4-jx)+0.0555555555555555555555555*(m9-m10);
f3 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(jy-m6)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
f4 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(m6-jy)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
f5 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(jz-m8)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
f6 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(m8-jz)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
f7 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx+jy)+0.025*(m4+m6)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12+0.25*m13+0.125*(m16-m17);
f8 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2-0.1*(jx+jy)-0.025*(m4+m6)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12+0.25*m13+0.125*(m17-m16);
f9 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx-jy)+0.025*(m4-m6)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12-0.25*m13+0.125*(m16+m17);
f10 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jy-jx)+0.025*(m6-m4)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12-0.25*m13-0.125*(m16+m17);
f11 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jx+jz)+0.025*(m4+m8)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12+0.25*m15+0.125*(m18-m16);
f12 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2-0.1*(jx+jz)-0.025*(m4+m8)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12+0.25*m15+0.125*(m16-m18);
f13 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jx-jz)+0.025*(m4-m8)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12-0.25*m15-0.125*(m16+m18);
f14 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jz-jx)+0.025*(m8-m4)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12-0.25*m15+0.125*(m16+m18);
f15 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jy+jz)+0.025*(m6+m8)
-0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m17-m18);
f16 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2-0.1*(jy+jz)-0.025*(m6+m8)
-0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m18-m17);
f17 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jy-jz)+0.025*(m6-m8)
-0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14+0.125*(m17+m18);
f18 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jz-jy)+0.025*(m8-m6)
-0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14-0.125*(m17+m18);
//.......................................................................................................
// incorporate external force
f1 += 0.16666666*Fx;
f2 -= 0.16666666*Fx;
f3 += 0.16666666*Fy;
f4 -= 0.16666666*Fy;
f5 += 0.16666666*Fz;
f6 -= 0.16666666*Fz;
f7 += 0.08333333333*(Fx+Fy);
f8 -= 0.08333333333*(Fx+Fy);
f9 += 0.08333333333*(Fx-Fy);
f10 -= 0.08333333333*(Fx-Fy);
f11 += 0.08333333333*(Fx+Fz);
f12 -= 0.08333333333*(Fx+Fz);
f13 += 0.08333333333*(Fx-Fz);
f14 -= 0.08333333333*(Fx-Fz);
f15 += 0.08333333333*(Fy+Fz);
f16 -= 0.08333333333*(Fy+Fz);
f17 += 0.08333333333*(Fy-Fz);
f18 -= 0.08333333333*(Fy-Fz);
//*********** WRITE UPDATED VALUES TO MEMORY ******************
// Write the updated distributions
//....EVEN.....................................
disteven[n] = f0;
disteven[N+n] = f2;
disteven[2*N+n] = f4;
disteven[3*N+n] = f6;
disteven[4*N+n] = f8;
disteven[5*N+n] = f10;
disteven[6*N+n] = f12;
disteven[7*N+n] = f14;
disteven[8*N+n] = f16;
disteven[9*N+n] = f18;
//....ODD......................................
distodd[n] = f1;
distodd[N+n] = f3;
distodd[2*N+n] = f5;
distodd[3*N+n] = f7;
distodd[4*N+n] = f9;
distodd[5*N+n] = f11;
distodd[6*N+n] = f13;
distodd[7*N+n] = f15;
distodd[8*N+n] = f17;
//...Store the Velocity..........................
Velocity[3*n] = jx;
Velocity[3*n+1] = jy;
Velocity[3*n+2] = jz;
/* //...Store the Color Gradient....................
ColorGrad[3*n] = C*nx;
ColorGrad[3*n+1] = C*ny;
ColorGrad[3*n+2] = C*nz;
*/ //...............................................
//***************************************************************
} // check if n is in the solid
} // check if n is in the domain
} // loop over s
}
//*************************************************************************
__global__ void DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
double beta, int Nx, int Ny, int Nz, bool pBC, int S)
{
char id;
int idx;
int in,jn,kn,n,nn,N;
int q,Cqx,Cqy,Cqz;
// int sendLoc;
double na,nb; // density values
double ux,uy,uz; // flow velocity
double nx,ny,nz,C; // color gradient components
double a1,a2,b1,b2;
double sp,delta;
double feq[6]; // equilibrium distributions
// Set of Discrete velocities for the D3Q19 Model
int D3Q7[3][3]={{1,0,0},{0,1,0},{0,0,1}};
N = Nx*Ny*Nz;
// S - number of threadblocks per grid block
for (int s=0; s<S; s++){
// for (int n=0; n<N; n++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<N){
id = ID[n];
// Local Density Values
na = Copy[2*n];
nb = Copy[2*n+1];
if (id > 0 && na+nb > 0.0){
//.......Back out the 3-D indices for node n..............
int k = n/(Nx*Ny);
int j = (n-Nx*Ny*k)/Nx;
int i = n-Nx*Ny*k-Nx*j;
//.....Load the Color gradient.........
nx = ColorGrad[3*n];
ny = ColorGrad[3*n+1];
nz = ColorGrad[3*n+2];
C = sqrt(nx*nx+ny*ny+nz*nz);
nx = nx/C;
ny = ny/C;
nz = nz/C;
//....Load the flow velocity...........
ux = Velocity[3*n];
uy = Velocity[3*n+1];
uz = Velocity[3*n+2];
//....Instantiate the density distributions
// Generate Equilibrium Distributions and stream
// Stationary value - distribution 0
Den[2*n] += 0.3333333333333333*na;
Den[2*n+1] += 0.3333333333333333*nb;
// Non-Stationary equilibrium distributions
feq[0] = 0.1111111111111111*(1+3*ux);
feq[1] = 0.1111111111111111*(1-3*ux);
feq[2] = 0.1111111111111111*(1+3*uy);
feq[3] = 0.1111111111111111*(1-3*uy);
feq[4] = 0.1111111111111111*(1+3*uz);
feq[5] = 0.1111111111111111*(1-3*uz);
// Construction and streaming for the components
for (idx=0; idx<3; idx++){
// Distribution index
q = 2*idx;
// Associated discrete velocity
Cqx = D3Q7[idx][0];
Cqy = D3Q7[idx][1];
Cqz = D3Q7[idx][2];
// Generate the Equilibrium Distribution
a1 = na*feq[q];
b1 = nb*feq[q];
a2 = na*feq[q+1];
b2 = nb*feq[q+1];
// Recolor the distributions
if (C > 0.0){
sp = nx*double(Cqx)+ny*double(Cqy)+nz*double(Cqz);
//if (idx > 2) sp = 0.7071067811865475*sp;
//delta = sp*min( min(a1,a2), min(b1,b2) );
delta = na*nb/(na+nb)*0.1111111111111111*sp;
//if (a1>0 && b1>0){
a1 += beta*delta;
a2 -= beta*delta;
b1 -= beta*delta;
b2 += beta*delta;
}
// .......Get the neighbor node..............
//nn = n + Stride[idx];
in = i+Cqx;
jn = j+Cqy;
kn = k+Cqz;
// Adjust for periodic BC, if necessary
if (in<0) in+= Nx;
if (jn<0) jn+= Ny;
if (kn<0) kn+= Nz;
if (!(in<Nx)) in-= Nx;
if (!(jn<Ny)) jn-= Ny;
if (!(kn<Nz)) kn-= Nz;
// Perform streaming or bounce-back as needed
id = ID[kn*Nx*Ny+jn*Nx+in];
if (id == 0){ //.....Bounce-back Rule...........
// Den[2*n] += a1;
// Den[2*n+1] += b1;
atomicAdd(&Den[2*n], a1);
atomicAdd(&Den[2*n+1], b1);
}
else{
//......Push the "distribution" to neighboring node...........
// Index of the neighbor in the local process
//nn = (kn-zmin[rank]+1)*Nxp*Nyp + (jn-ymin[rank]+1)*Nxp + (in-xmin[rank]+1);
nn = kn*Nx*Ny+jn*Nx+in;
// Push to neighboring node
// Den[2*nn] += a1;
// Den[2*nn+1] += b1;
atomicAdd(&Den[2*nn], a1);
atomicAdd(&Den[2*nn+1], b1);
}
// .......Get the neighbor node..............
q = 2*idx+1;
in = i-Cqx;
jn = j-Cqy;
kn = k-Cqz;
// Adjust for periodic BC, if necessary
if (in<0) in+= Nx;
if (jn<0) jn+= Ny;
if (kn<0) kn+= Nz;
if (!(in<Nx)) in-= Nx;
if (!(jn<Ny)) jn-= Ny;
if (!(kn<Nz)) kn-= Nz;
// Perform streaming or bounce-back as needed
id = ID[kn*Nx*Ny+jn*Nx+in];
if (id == 0){
//.....Bounce-back Rule...........
// Den[2*n] += a2;
// Den[2*n+1] += b2;
atomicAdd(&Den[2*n], a2);
atomicAdd(&Den[2*n+1], b2);
}
else{
//......Push the "distribution" to neighboring node...........
// Index of the neighbor in the local process
//nn = (kn-zmin[rank]+1)*Nxp*Nyp + (jn-ymin[rank]+1)*Nxp + (in-xmin[rank]+1);
nn = kn*Nx*Ny+jn*Nx+in;
// Push to neighboring node
// Den[2*nn] += a2;
// Den[2*nn+1] += b2;
atomicAdd(&Den[2*nn], a2);
atomicAdd(&Den[2*nn+1], b2);
}
}
}
}
}
}
__global__ void ComputePhi(char *ID, double *Phi, double *Copy, double *Den, int N, int S)
{
int n;
double Na,Nb;
//...................................................................
// Update Phi
// S - number of threadblocks per grID block
for (int s=0; s<S; s++){
// for (int n=0; n<N; n++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (ID[n] > 0 && n<N){
// Get the density value (Streaming already performed)
Na = Den[2*n];
Nb = Den[2*n+1];
Phi[n] = (Na-Nb)/(Na+Nb);
// Store the copy of the current density
Copy[2*n] = Na;
Copy[2*n+1] = Nb;
// Zero the Density value to get ready for the next streaming
Den[2*n] = 0.0;
Den[2*n+1] = 0.0;
}
}
//...................................................................
}
//*************************************************************************
extern "C" void dvc_InitDenColor( int nblocks, int nthreads, int S,
char *ID, double *Den, double *Phi, double das, double dbs, int N)
{
InitDenColor <<<nblocks, nthreads>>> (ID, Den, Phi, das, dbs, N, S);
}
//*************************************************************************
extern "C" void dvc_ComputeColorGradient(int nBlocks, int nthreads, int S,
char *ID, double *Phi, double *ColorGrad, int Nx, int Ny, int Nz)
{
ComputeColorGradient<<<nBlocks,nthreads>>>(ID, Phi, ColorGrad, Nx, Ny, Nz, S);
}
//*************************************************************************
extern "C" void dvc_ColorCollide(int nBlocks, int nthreads, int S,
char *ID, double *f_even, double *f_odd, double *ColorGrad, double *Velocity,
double rlxA, double rlxB,double alpha, double beta, double Fx, double Fy, double Fz,
int Nx, int Ny, int Nz, bool pBC)
{
ColorCollide<<<nBlocks, nthreads>>>(ID, f_even, f_odd, ColorGrad, Velocity, Nx, Ny, Nz, S,
rlxA, rlxB, alpha, beta, Fx, Fy, Fz, pBC);
}
//*************************************************************************
extern "C" void dvc_DensityStreamD3Q7(int nBlocks, int nthreads, int S,
char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
double beta, int Nx, int Ny, int Nz, bool pBC)
{
DensityStreamD3Q7<<<nBlocks, nthreads>>>(ID,Den,Copy,Phi,ColorGrad,Velocity,beta,Nx,Ny,Nz,pBC,S);
}
//*************************************************************************
extern "C" void dvc_ComputePhi(int nBlocks, int nthreads, int S,
char *ID, double *Phi, double *Copy, double *Den, int N)
{
ComputePhi<<<nBlocks, nthreads>>>(ID,Phi,Copy,Den,N,S);
}
//*************************************************************************

21
gpu/Color.h Normal file
View File

@ -0,0 +1,21 @@
//
//*************************************************************************
//*************************************************************************
extern "C" void dvc_InitDenColor( int nblocks, int nthreads, int S,
char *ID, double *Den, double *Phi, double das, double dbs, int N);
//*************************************************************************
extern "C" void dvc_ComputeColorGradient(int nBlocks, int nthreads, int S,
char *ID, double *Phi, double *ColorGrad, int Nx, int Ny, int Nz);
//*************************************************************************
extern "C" void dvc_ColorCollide(int nBlocks, int nthreads, int S,
char *ID, double *f_even, double *f_odd, double *ColorGrad, double *Velocity,
double rlxA, double rlxB,double alpha, double beta, double Fx, double Fy, double Fz,
int Nx, int Ny, int Nz, bool pBC);
//*************************************************************************
extern "C" void dvc_DensityStreamD3Q7(int nBlocks, int nthreads, int S,
char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
double beta, int Nx, int Ny, int Nz, bool pBC);
//*************************************************************************
extern "C" void dvc_ComputePhi(int nBlocks, int nthreads, int S,
char *ID, double *Phi, double *Copy, double *Den, int N);
//*************************************************************************

19
gpu/CudaExtras.cu Normal file
View File

@ -0,0 +1,19 @@
// Basic cuda functions callable from C/C++ code
#include <cuda.h>
extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size){
cudaMalloc(address,size);
}
extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size){
cudaMemcpy(dest,source,size,cudaMemcpyHostToDevice);
}
extern "C" void dvc_CopyToHost(void* dest, void* source, size_t size){
cudaMemcpy(dest,source,size,cudaMemcpyDeviceToHost);
}
extern "C" void dvc_Barrier(){
cudaDeviceSynchronize();
}

12
gpu/CudaExtras.h Normal file
View File

@ -0,0 +1,12 @@
//*************************************************************************
// A few basic cuda functions callable from C / C++ code
//************************************************************************
//*************************************************************************
extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size);
//*************************************************************************
extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size);
//*************************************************************************
extern "C" void dvc_CopyToHost(void* dest, void* source, size_t size);
//*************************************************************************
extern "C" void dvc_Barrier();
//*************************************************************************

359
gpu/D3Q19.cu Normal file
View File

@ -0,0 +1,359 @@
#include <cuda.h>
__global__ void InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz, int S)
{
int n,N;
N = Nx*Ny*Nz;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<N){
if (ID[n] > 0){
f_even[n] = 0.3333333333333333;
f_odd[n] = 0.055555555555555555; //double(100*n)+1.f;
f_even[N+n] = 0.055555555555555555; //double(100*n)+2.f;
f_odd[N+n] = 0.055555555555555555; //double(100*n)+3.f;
f_even[2*N+n] = 0.055555555555555555; //double(100*n)+4.f;
f_odd[2*N+n] = 0.055555555555555555; //double(100*n)+5.f;
f_even[3*N+n] = 0.055555555555555555; //double(100*n)+6.f;
f_odd[3*N+n] = 0.0277777777777778; //double(100*n)+7.f;
f_even[4*N+n] = 0.0277777777777778; //double(100*n)+8.f;
f_odd[4*N+n] = 0.0277777777777778; //double(100*n)+9.f;
f_even[5*N+n] = 0.0277777777777778; //double(100*n)+10.f;
f_odd[5*N+n] = 0.0277777777777778; //double(100*n)+11.f;
f_even[6*N+n] = 0.0277777777777778; //double(100*n)+12.f;
f_odd[6*N+n] = 0.0277777777777778; //double(100*n)+13.f;
f_even[7*N+n] = 0.0277777777777778; //double(100*n)+14.f;
f_odd[7*N+n] = 0.0277777777777778; //double(100*n)+15.f;
f_even[8*N+n] = 0.0277777777777778; //double(100*n)+16.f;
f_odd[8*N+n] = 0.0277777777777778; //double(100*n)+17.f;
f_even[9*N+n] = 0.0277777777777778; //double(100*n)+18.f;
}
else{
for(int q=0; q<9; q++){
f_even[q*N+n] = -1.0;
f_odd[q*N+n] = -1.0;
}
f_even[9*N+n] = -1.0;
}
}
}
}
__global__ void PackDist(int q, int *list, int start, int count, double *sendbuf, double *dist, int N){
//....................................................................................
// Pack distribution q into the send buffer for the listed lattice sites
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int idx,n;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx<count){
// for (idx=0; idx<count; idx++){
n = list[idx];
sendbuf[start+idx] = dist[q*N+n];
}
}
__global__ void MapRecvDist(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count,
double *recvbuf, double *dist, int Nx, int Ny, int Nz){
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int i,j,k,n,nn,idx;
int N = Nx*Ny*Nz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx<count){
// for (idx=0; idx<count; idx++){
// Get the value from the list -- note that n is the index is from the send (non-local) process
n = list[idx];
// Get the 3-D indices
k = n/(Nx*Ny);
j = (n-Nx*Ny*k)/Nx;
i = n-Nx*Ny*k-Nz*j;
// Streaming for the non-local distribution
i += Cqx;
j += Cqy;
k += Cqz;
/* if (i < 0) i += Nx;
if (j < 0) j += Ny;
if (k < 0) k += Nz;
if (!(i<Nx)) i -= Nx;
if (!(j<Ny)) j -= Ny;
if (!(k<Nz)) k -= Nz;
*/
nn = k*Nx*Ny+j*Nx+i;
// unpack the distribution to the proper location
// if (recvbuf[start+idx] != dist[q*N+nn]){
// printf("Stopping to check error \n");
// printf("recvbuf[start+idx] = %f \n",recvbuf[start+idx]);
// printf("dist[q*N+nn] = %f \n",dist[q*N+nn]);
// printf("A bug! Again? \n");
// idx = count;
// }
// list[idx] = nn;
dist[q*N+nn] = recvbuf[start+idx];
}
}
//*************************************************************************
__global__ void SwapD3Q19(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz, int S)
{
int n,nn,N;
// distributions
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
N = Nx*Ny*Nz;
// S - number of threadblocks per grid block
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
// for (n=0; n<N; n++){
//.......Back out the 3-D indices for node n..............
int k = n/(Nx*Ny);
int j = (n-Nx*Ny*k)/Nx;
int i = n-Nx*Ny*k-Nz*j;
if (n<N){
if (ID[n] > 0){
//........................................................................
// Retrieve even distributions from the local node (swap convention)
// f0 = disteven[n]; // Does not particupate in streaming
f1 = distodd[n];
f3 = distodd[N+n];
f5 = distodd[2*N+n];
f7 = distodd[3*N+n];
f9 = distodd[4*N+n];
f11 = distodd[5*N+n];
f13 = distodd[6*N+n];
f15 = distodd[7*N+n];
f17 = distodd[8*N+n];
//........................................................................
//........................................................................
// Retrieve odd distributions from neighboring nodes (swap convention)
//........................................................................
nn = n+1; // neighbor index (pull convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
//if (i+1<Nx){
f2 = disteven[N+nn]; // pull neighbor for distribution 2
if (f2 > 0){
distodd[n] = f2;
disteven[N+nn] = f1;
}
//}
//........................................................................
nn = n+Nx; // neighbor index (pull convention)
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
//if (j+1<Ny){
f4 = disteven[2*N+nn]; // pull neighbor for distribution 4
if (f4 > 0){
distodd[N+n] = f4;
disteven[2*N+nn] = f3;
// }
}
//........................................................................
nn = n+Nx*Ny; // neighbor index (pull convention)
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
//if (k+1<Nz){
f6 = disteven[3*N+nn]; // pull neighbor for distribution 6
if (f6 > 0){
distodd[2*N+n] = f6;
disteven[3*N+nn] = f5;
// }
}
//........................................................................
nn = n+Nx+1; // neighbor index (pull convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
//if ((i+1<Nx) && (j+1<Ny)){
f8 = disteven[4*N+nn]; // pull neighbor for distribution 8
if (f8 > 0){
distodd[3*N+n] = f8;
disteven[4*N+nn] = f7;
// }
}
//........................................................................
nn = n-Nx+1; // neighbor index (pull convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
//if (!(i-1<0) && (j+1<Ny)){
f10 = disteven[5*N+nn]; // pull neighbor for distribution 9
if (f10 > 0){
distodd[4*N+n] = f10;
disteven[5*N+nn] = f9;
// }
}
//........................................................................
nn = n+Nx*Ny+1; // neighbor index (pull convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
//if ( !(i-1<0) && !(k-1<0)){
f12 = disteven[6*N+nn]; // pull distribution 11
if (f12 > 0){
distodd[5*N+n] = f12;
disteven[6*N+nn] = f11;
// }
}
//........................................................................
nn = n-Nx*Ny+1; // neighbor index (pull convention)
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
//if (!(i-1<0) && (k+1<Nz)){
f14 = disteven[7*N+nn]; // pull neighbor for distribution 13
if (f14 > 0){
distodd[6*N+n] = f14;
disteven[7*N+nn] = f13;
// }
}
//........................................................................
nn = n+Nx*Ny+Nx; // neighbor index (pull convention)
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
//if (!(j-1<0) && !(k-1<0)){
f16 = disteven[8*N+nn]; // pull neighbor for distribution 15
if (f16 > 0){
distodd[7*N+n] = f16;
disteven[8*N+nn] = f15;
// }
}
//........................................................................
nn = n-Nx*Ny+Nx; // neighbor index (pull convention)
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
//if (!(j-1<0) && (k+1<Nz)){
f18 = disteven[9*N+nn]; // pull neighbor for distribution 17
if (f18 > 0){
distodd[8*N+n] = f18;
disteven[9*N+nn] = f17;
// }
}
//........................................................................
}
}
}
}
//*************************************************************************
extern "C" void dvc_PackD3Q19(int faceGrid, int edgeGrid, int threads,double *f_even, double *f_odd, int N,
int *dvcSendList_x, int *dvcSendList_y, int *dvcSendList_z, int *dvcSendList_X, int *dvcSendList_Y, int *dvcSendList_Z,
int *dvcSendList_xy, int *dvcSendList_XY, int *dvcSendList_xY, int *dvcSendList_Xy,
int *dvcSendList_xz, int *dvcSendList_XZ, int *dvcSendList_xZ, int *dvcSendList_Xz,
int *dvcSendList_yz, int *dvcSendList_YZ, int *dvcSendList_yZ, int *dvcSendList_Yz,
int sendCount_x, int sendCount_y, int sendCount_z, int sendCount_X, int sendCount_Y, int sendCount_Z,
int sendCount_xy, int sendCount_XY, int sendCount_xY, int sendCount_Xy,
int sendCount_xz, int sendCount_XZ, int sendCount_xZ, int sendCount_Xz,
int sendCount_yz, int sendCount_YZ, int sendCount_yZ, int sendCount_Yz,
double *sendbuf_x, double *sendbuf_y, double *sendbuf_z, double *sendbuf_X, double *sendbuf_Y, double *sendbuf_Z,
double *sendbuf_xy, double *sendbuf_XY, double *sendbuf_xY, double *sendbuf_Xy,
double *sendbuf_xz, double *sendbuf_XZ, double *sendbuf_xZ, double *sendbuf_Xz,
double *sendbuf_yz, double *sendbuf_YZ, double *sendbuf_yZ, double *sendbuf_Yz)
{
//...................................................................................
PackDist<<<faceGrid,threads>>>(1,dvcSendList_x,0,sendCount_x,sendbuf_x,f_even,N);
PackDist<<<faceGrid,threads>>>(4,dvcSendList_x,sendCount_x,sendCount_x,sendbuf_x,f_even,N);
PackDist<<<faceGrid,threads>>>(5,dvcSendList_x,2*sendCount_x,sendCount_x,sendbuf_x,f_even,N);
PackDist<<<faceGrid,threads>>>(6,dvcSendList_x,3*sendCount_x,sendCount_x,sendbuf_x,f_even,N);
PackDist<<<faceGrid,threads>>>(7,dvcSendList_x,4*sendCount_x,sendCount_x,sendbuf_x,f_even,N);
//...Packing for X face<<<faceGrid,threads>>>(1,7,9,11,13)................................
PackDist<<<faceGrid,threads>>>(0,dvcSendList_X,0,sendCount_X,sendbuf_X,f_odd,N);
PackDist<<<faceGrid,threads>>>(3,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,f_odd,N);
PackDist<<<faceGrid,threads>>>(4,dvcSendList_X,2*sendCount_X,sendCount_X,sendbuf_X,f_odd,N);
PackDist<<<faceGrid,threads>>>(5,dvcSendList_X,3*sendCount_X,sendCount_X,sendbuf_X,f_odd,N);
PackDist<<<faceGrid,threads>>>(6,dvcSendList_X,4*sendCount_X,sendCount_X,sendbuf_X,f_odd,N);
//...Packing for y face<<<faceGrid,threads>>>(4,8,9,16,18).................................
PackDist<<<faceGrid,threads>>>(2,dvcSendList_y,0,sendCount_y,sendbuf_y,f_even,N);
PackDist<<<faceGrid,threads>>>(4,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,f_even,N);
PackDist<<<faceGrid,threads>>>(4,dvcSendList_y,2*sendCount_y,sendCount_y,sendbuf_y,f_odd,N);
PackDist<<<faceGrid,threads>>>(8,dvcSendList_y,3*sendCount_y,sendCount_y,sendbuf_y,f_even,N);
PackDist<<<faceGrid,threads>>>(9,dvcSendList_y,4*sendCount_y,sendCount_y,sendbuf_y,f_even,N);
//...Packing for Y face<<<faceGrid,threads>>>(3,7,10,15,17).................................
PackDist<<<faceGrid,threads>>>(1,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,f_odd,N);
PackDist<<<faceGrid,threads>>>(3,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,f_odd,N);
PackDist<<<faceGrid,threads>>>(5,dvcSendList_Y,2*sendCount_Y,sendCount_Y,sendbuf_Y,f_even,N);
PackDist<<<faceGrid,threads>>>(7,dvcSendList_Y,3*sendCount_Y,sendCount_Y,sendbuf_Y,f_odd,N);
PackDist<<<faceGrid,threads>>>(8,dvcSendList_Y,4*sendCount_Y,sendCount_Y,sendbuf_Y,f_odd,N);
//...Packing for z face<<<faceGrid,threads>>>(6,12,13,16,17)................................
PackDist<<<faceGrid,threads>>>(3,dvcSendList_z,0,sendCount_z,sendbuf_z,f_even,N);
PackDist<<<faceGrid,threads>>>(6,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,f_even,N);
PackDist<<<faceGrid,threads>>>(6,dvcSendList_z,2*sendCount_z,sendCount_z,sendbuf_z,f_odd,N);
PackDist<<<faceGrid,threads>>>(8,dvcSendList_z,3*sendCount_z,sendCount_z,sendbuf_z,f_even,N);
PackDist<<<faceGrid,threads>>>(8,dvcSendList_z,4*sendCount_z,sendCount_z,sendbuf_z,f_odd,N);
//...Packing for Z face<<<faceGrid,threads>>>(5,11,14,15,18)................................
PackDist<<<faceGrid,threads>>>(2,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,f_odd,N);
PackDist<<<faceGrid,threads>>>(5,dvcSendList_Z,sendCount_Z,sendCount_Z,sendbuf_Z,f_odd,N);
PackDist<<<faceGrid,threads>>>(7,dvcSendList_Z,2*sendCount_Z,sendCount_Z,sendbuf_Z,f_even,N);
PackDist<<<faceGrid,threads>>>(7,dvcSendList_Z,3*sendCount_Z,sendCount_Z,sendbuf_Z,f_odd,N);
PackDist<<<faceGrid,threads>>>(9,dvcSendList_Z,4*sendCount_Z,sendCount_Z,sendbuf_Z,f_even,N);
//...Pack the xy edge <<<edgeGrid,threads>>>(8)................................
PackDist<<<edgeGrid,threads>>>(4,dvcSendList_xy,0,sendCount_xy,sendbuf_xy,f_even,N);
//...Pack the Xy edge <<<edgeGrid,threads>>>(9)................................
PackDist<<<edgeGrid,threads>>>(4,dvcSendList_Xy,0,sendCount_Xy,sendbuf_Xy,f_odd,N);
//...Pack the xY edge <<<edgeGrid,threads>>>(10)................................
PackDist<<<edgeGrid,threads>>>(5,dvcSendList_xY,0,sendCount_xY,sendbuf_xY,f_even,N);
//...Pack the XY edge <<<edgeGrid,threads>>>(7)................................
PackDist<<<edgeGrid,threads>>>(3,dvcSendList_XY,0,sendCount_XY,sendbuf_XY,f_odd,N);
//...Pack the xz edge <<<edgeGrid,threads>>>(12)................................
PackDist<<<edgeGrid,threads>>>(6,dvcSendList_xz,0,sendCount_xz,sendbuf_xz,f_even,N);
//...Pack the xZ edge <<<edgeGrid,threads>>>(14)................................
PackDist<<<edgeGrid,threads>>>(7,dvcSendList_xZ,0,sendCount_xZ,sendbuf_xZ,f_even,N);
//...Pack the Xz edge <<<edgeGrid,threads>>>(13)................................
PackDist<<<edgeGrid,threads>>>(6,dvcSendList_Xz,0,sendCount_Xz,sendbuf_Xz,f_odd,N);
//...Pack the XZ edge <<<edgeGrid,threads>>>(11)................................
PackDist<<<edgeGrid,threads>>>(5,dvcSendList_XZ,0,sendCount_XZ,sendbuf_XZ,f_odd,N);
//...Pack the xz edge <<<edgeGrid,threads>>>(12)................................
PackDist<<<edgeGrid,threads>>>(6,dvcSendList_xz,0,sendCount_xz,sendbuf_xz,f_even,N);
//...Pack the xZ edge <<<edgeGrid,threads>>>(14)................................
PackDist<<<edgeGrid,threads>>>(7,dvcSendList_xZ,0,sendCount_xZ,sendbuf_xZ,f_even,N);
//...Pack the Xz edge <<<edgeGrid,threads>>>(13)................................
PackDist<<<edgeGrid,threads>>>(6,dvcSendList_Xz,0,sendCount_Xz,sendbuf_Xz,f_odd,N);
//...Pack the XZ edge <<<edgeGrid,threads>>>(11)................................
PackDist<<<edgeGrid,threads>>>(5,dvcSendList_XZ,0,sendCount_XZ,sendbuf_XZ,f_odd,N);
//...Pack the yz edge <<<edgeGrid,threads>>>(16)................................
PackDist<<<edgeGrid,threads>>>(8,dvcSendList_yz,0,sendCount_yz,sendbuf_yz,f_even,N);
//...Pack the yZ edge <<<edgeGrid,threads>>>(18)................................
PackDist<<<edgeGrid,threads>>>(9,dvcSendList_yZ,0,sendCount_yZ,sendbuf_yZ,f_even,N);
//...Pack the Yz edge <<<edgeGrid,threads>>>(17)................................
PackDist<<<edgeGrid,threads>>>(8,dvcSendList_Yz,0,sendCount_Yz,sendbuf_Yz,f_odd,N);
//...Pack the YZ edge <<<edgeGrid,threads>>>(15)................................
PackDist<<<edgeGrid,threads>>>(7,dvcSendList_YZ,0,sendCount_YZ,sendbuf_YZ,f_odd,N);
}
//...................................................................................
//*************************************************************************
extern "C" void dvc_PackDist(int grid, int threads, int q, int *SendList, int start,
int sendCount, double *sendbuf, double *Dist, int N)
{
//...................................................................................
PackDist<<<grid,threads>>>(q,SendList,start,sendCount,sendbuf,Dist,N);
}
//*************************************************************************
extern "C" void dvc_UnpackDist(int grid, int threads, int q, int Cqx, int Cqy, int Cqz, int *RecvList, int start,
int recvCount, double *recvbuf, double *Dist, int Nx, int Ny, int Nz)
{
//...................................................................................
MapRecvDist<<<grid,threads>>>(q,Cqx,Cqy,Cqz,RecvList,start,recvCount,recvbuf,Dist,Nx,Ny,Nz);
}
//*************************************************************************
extern "C" void dvc_SwapD3Q19( int nblocks, int nthreads, int S,
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
{
SwapD3Q19 <<< nblocks, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
}
//*************************************************************************
extern "C" void dvc_InitD3Q19(int nblocks, int nthreads, int S, char *ID, double *f_even, double *f_odd, int Nx,
int Ny, int Nz)
{
InitD3Q19 <<<nblocks, nthreads>>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
}
//*************************************************************************

15
gpu/D3Q19.h Normal file
View File

@ -0,0 +1,15 @@
//*************************************************************************
//*************************************************************************
extern "C" void dvc_InitD3Q19(int nblocks, int nthreads, int S, char *ID, double *f_even, double *f_odd, int Nx,
int Ny, int Nz);
//*************************************************************************
extern "C" void dvc_SwapD3Q19(int nblocks, int nthreads, int S,
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
//*************************************************************************
extern "C" void dvc_PackDist(int grid, int threads, int q, int *SendList, int start,
int sendCount, double *sendbuf, double *Dist, int N);
//*************************************************************************
extern "C" void dvc_UnpackDist(int grid, int threads, int q, int Cqx, int Cqy, int Cqz, int *RecvList, int start,
int recvCount, double *recvbuf, double *Dist, int Nx, int Ny, int Nz);
//*************************************************************************

89
gpu/D3Q7.cu Normal file
View File

@ -0,0 +1,89 @@
// GPU Functions for D3Q7 Lattice Boltzmann Methods
__global__ void PackValues(int *list, int count, double *sendbuf, double *Data, int N){
//....................................................................................
// Pack distribution q into the send buffer for the listed lattice sites
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int idx,n;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx<count){
n = list[idx];
sendbuf[idx] = Data[n];
}
}
__global__ void UnpackValues(int *list, int count, double *recvbuf, double *Data, int N){
//....................................................................................
// Pack distribution q into the send buffer for the listed lattice sites
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int idx,n;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx<count){
n = list[idx];
Data[n] = recvbuf[idx];
}
}
__global__ void PackDenD3Q7(int *list, int count, double *sendbuf, int number, double *Data, int N){
//....................................................................................
// Pack distribution into the send buffer for the listed lattice sites
//....................................................................................
int idx,n,component;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx<count){
// for (idx=0; idx<count; idx++){
for (component=0; component<number; component++){
n = list[idx];
sendbuf[idx*number+component] = Data[number*n+component];
Data[number*n+component] = 0.0; // Set the data value to zero once it's in the buffer!
}
}
}
__global__ void UnpackDenD3Q7(int *list, int count, double *recvbuf, int number, double *Data, int N){
//....................................................................................
// Unack distribution from the recv buffer
// Sum to the existing density value
//....................................................................................
int idx,n,component;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx<count){
// for (idx=0; idx<count; idx++){
for (component=0; component<number; component++){
n = list[idx];
Data[number*n+component] += recvbuf[idx*number+component];
}
}
}
//***************************************************************************************
extern "C" void dvc_PackDenD3Q7(int grid, int threads, int *list, int count, double *sendbuf,
int number, double *Data, int N)
{
//...................................................................................
PackDenD3Q7<<<grid,threads>>>(list,count,sendbuf,number,Data,N);
}
//***************************************************************************************
extern "C" void dvc_UnpackDenD3Q7(int grid, int threads, int *list, int count, double *recvbuf,
int number, double *Data, int N)
{
//...................................................................................
UnpackDenD3Q7<<<grid,threads>>>(list,count,recvbuf,number,Data,N);
}
//***************************************************************************************
extern "C" void dvc_PackValues(int grid, int threads, int *list, int count, double *sendbuf,
double *Data, int N)
{
//...................................................................................
PackValues<<<grid,threads>>>(list,count,sendbuf,Data,N);
}
//***************************************************************************************
extern "C" void dvc_UnpackValues(int grid, int threads, int *list, int count, double *recvbuf,
double *Data, int N)
{
//...................................................................................
UnpackValues<<<grid,threads>>>(list,count,recvbuf,Data,N);
}
//***************************************************************************************

15
gpu/D3Q7.h Normal file
View File

@ -0,0 +1,15 @@
//
//***************************************************************************************
//***************************************************************************************
extern "C" void dvc_PackDenD3Q7(int grid, int threads, int *list, int count, double *sendbuf,
int number, double *Data, int N);
//***************************************************************************************
extern "C" void dvc_UnpackDenD3Q7(int grid, int threads, int *list, int count, double *recvbuf,
int number, double *Data, int N);
//***************************************************************************************
extern "C" void dvc_PackValues(int grid, int threads, int *list, int count, double *sendbuf,
double *Data, int N);
//***************************************************************************************
extern "C" void dvc_UnpackValues(int grid, int threads, int *list, int count, double *recvbuf,
double *Data, int N);
//***************************************************************************************

312
gpu/MRT.cu Normal file
View File

@ -0,0 +1,312 @@
#include <cuda.h>
// CUDA kernels for single-phase MRT code
// James McClure
//*************************************************************************
__global__ void INITIALIZE(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz, int S)
{
int n,N;
N = Nx*Ny*Nz;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<N){
if (ID[n] > 0){
f_even[n] = 0.3333333333333333;
f_odd[n] = 0.055555555555555555; //double(100*n)+1.f;
f_even[N+n] = 0.055555555555555555; //double(100*n)+2.f;
f_odd[N+n] = 0.055555555555555555; //double(100*n)+3.f;
f_even[2*N+n] = 0.055555555555555555; //double(100*n)+4.f;
f_odd[2*N+n] = 0.055555555555555555; //double(100*n)+5.f;
f_even[3*N+n] = 0.055555555555555555; //double(100*n)+6.f;
f_odd[3*N+n] = 0.0277777777777778; //double(100*n)+7.f;
f_even[4*N+n] = 0.0277777777777778; //double(100*n)+8.f;
f_odd[4*N+n] = 0.0277777777777778; //double(100*n)+9.f;
f_even[5*N+n] = 0.0277777777777778; //double(100*n)+10.f;
f_odd[5*N+n] = 0.0277777777777778; //double(100*n)+11.f;
f_even[6*N+n] = 0.0277777777777778; //double(100*n)+12.f;
f_odd[6*N+n] = 0.0277777777777778; //double(100*n)+13.f;
f_even[7*N+n] = 0.0277777777777778; //double(100*n)+14.f;
f_odd[7*N+n] = 0.0277777777777778; //double(100*n)+15.f;
f_even[8*N+n] = 0.0277777777777778; //double(100*n)+16.f;
f_odd[8*N+n] = 0.0277777777777778; //double(100*n)+17.f;
f_even[9*N+n] = 0.0277777777777778; //double(100*n)+18.f;
}
else{
for(int q=0; q<9; q++){
f_even[q*N+n] = -1.0;
f_odd[q*N+n] = -1.0;
}
f_even[9*N+n] = -1.0;
}
}
}
}
__global__ void Compute_VELOCITY(char *ID, double *disteven, double *distodd, double *vel, int Nx, int Ny, int Nz, int S)
{
int n,N;
// distributions
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
double vx,vy,vz;
N = Nx*Ny*Nz;
// S - number of threadblocks per grid block
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<N){
if (ID[n] > 0){
//........................................................................
// Registers to store the distributions
//........................................................................
f2 = disteven[N+n];
f4 = disteven[2*N+n];
f6 = disteven[3*N+n];
f8 = disteven[4*N+n];
f10 = disteven[5*N+n];
f12 = disteven[6*N+n];
f14 = disteven[7*N+n];
f16 = disteven[8*N+n];
f18 = disteven[9*N+n];
//........................................................................
f1 = distodd[n];
f3 = distodd[1*N+n];
f5 = distodd[2*N+n];
f7 = distodd[3*N+n];
f9 = distodd[4*N+n];
f11 = distodd[5*N+n];
f13 = distodd[6*N+n];
f15 = distodd[7*N+n];
f17 = distodd[8*N+n];
//.................Compute the velocity...................................
vx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
vy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
vz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
//..................Write the velocity.....................................
vel[n] = vx;
vel[N+n] = vy;
vel[2*N+n] = vz;
//........................................................................
}
}
}
}
//*************************************************************************
__global__ void MRT(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz, int S,
double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz)
{
int n,N;
// distributions
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
// conserved momemnts
double rho,jx,jy,jz;
// non-conserved moments
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
N = Nx*Ny*Nz;
char id;
// S - number of threadblocks per grid block
for (int s=0; s<S; s++){
// for (int n=0; n<N; n++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
id = ID[n];
if (n<N){
if (id > 0){
//........................................................................
// Registers to store the distributions - read based on swap convention
//........................................................................
f2 = distodd[n];
f4 = distodd[N+n];
f6 = distodd[2*N+n];
f8 = distodd[3*N+n];
f10 = distodd[4*N+n];
f12 = distodd[5*N+n];
f14 = distodd[6*N+n];
f16 = distodd[7*N+n];
f18 = distodd[8*N+n];
//........................................................................
f0 = disteven[n];
f1 = disteven[N+n];
f3 = disteven[2*N+n];
f5 = disteven[3*N+n];
f7 = disteven[4*N+n];
f9 = disteven[5*N+n];
f11 = disteven[6*N+n];
f13 = disteven[7*N+n];
f15 = disteven[8*N+n];
f17 = disteven[9*N+n];
//........................................................................
//....................compute the moments...............................................
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
m1 = -30*f0-11*(f2+f1+f4+f3+f6+f5)+8*(f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18 +f17);
m2 = 12*f0-4*(f2+f1 +f4+f3+f6 +f5)+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
jx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
m4 = 4*(-f1+f2)+f7-f8+f9-f10+f11-f12+f13-f14;
jy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
m6 = -4*(f3-f4)+f7-f8-f9+f10+f15-f16+f17-f18;
jz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
m8 = -4*(f5-f6)+f11-f12-f13+f14+f15-f16-f17+f18;
m9 = 2*(f1+f2)-f3-f4-f5-f6+f7+f8+f9+f10+f11+f12+f13+f14-2*(f15+f16+f17+f18);
m10 = -4*(f1+f2)+2*(f4+f3+f6+f5)+f8+f7+f10+f9+f12+f11+f14+f13-2*(f16+f15+f18+f17);
m11 = f4+f3-f6-f5+f8+f7+f10+f9-f12-f11-f14-f13;
m12 = -2*(f4+f3-f6-f5)+f8+f7+f10+f9-f12-f11-f14-f13;
m13 = f8+f7-f10-f9;
m14 = f16+f15-f18-f17;
m15 = f12+f11-f14-f13;
m16 = f7-f8+f9-f10-f11+f12-f13+f14;
m17 = -f7+f8+f9-f10+f15-f16+f17-f18;
m18 = f11-f12-f13+f14-f15+f16+f17-f18;
//..............incorporate external force................................................
//jx += 0.5*Fx;
//jy += 0.5*Fy;
//jz += 0.5*Fz;
//..............carry out relaxation process...............................................
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho) - m2);
m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4);
m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6);
m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) - m9);
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) - m11);
m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho) - m12);
m13 = m13 + rlx_setA*((jx*jy/rho) - m13);
m14 = m14 + rlx_setA*((jy*jz/rho) - m14);
m15 = m15 + rlx_setA*((jx*jz/rho) - m15);
m16 = m16 + rlx_setB*( - m16);
m17 = m17 + rlx_setB*( - m17);
m18 = m18 + rlx_setB*( - m18);
//.................inverse transformation......................................................
f0 = 0.05263157894736842*rho-0.012531328320802*m1+0.04761904761904762*m2;
f1 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(jx-m4)+0.05555555555555555*(m9-m10);
f2 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(m4-jx)+0.05555555555555555*(m9-m10);
f3 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(jy-m6)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
f4 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(m6-jy)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
f5 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(jz-m8)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
f6 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
+0.1*(m8-jz)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
f7 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx+jy)+0.025*(m4+m6)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12+0.25*m13+0.125*(m16-m17);
f8 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2-0.1*(jx+jy)-0.025*(m4+m6)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12+0.25*m13+0.125*(m17-m16);
f9 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx-jy)+0.025*(m4-m6)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12-0.25*m13+0.125*(m16+m17);
f10 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jy-jx)+0.025*(m6-m4)
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
+0.04166666666666666*m12-0.25*m13-0.125*(m16+m17);
f11 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jx+jz)+0.025*(m4+m8)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12+0.25*m15+0.125*(m18-m16);
f12 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2-0.1*(jx+jz)-0.025*(m4+m8)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12+0.25*m15+0.125*(m16-m18);
f13 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jx-jz)+0.025*(m4-m8)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12-0.25*m15-0.125*(m16+m18);
f14 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jz-jx)+0.025*(m8-m4)
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
-0.04166666666666666*m12-0.25*m15+0.125*(m16+m18);
f15 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jy+jz)+0.025*(m6+m8)
-0.05555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m17-m18);
f16 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2-0.1*(jy+jz)-0.025*(m6+m8)
-0.05555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m18-m17);
f17 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jy-jz)+0.025*(m6-m8)
-0.05555555555555555*m9-0.02777777777777778*m10-0.25*m14+0.125*(m17+m18);
f18 = 0.05263157894736842*rho+0.003341687552213868*m1
+0.003968253968253968*m2+0.1*(jz-jy)+0.025*(m8-m6)
-0.05555555555555555*m9-0.02777777777777778*m10-0.25*m14-0.125*(m17+m18);
//.......................................................................................................
// incorporate external force
f1 += 0.16666666*Fx;
f2 -= 0.16666666*Fx;
f3 += 0.16666666*Fy;
f4 -= 0.16666666*Fy;
f5 += 0.16666666*Fz;
f6 -= 0.16666666*Fz;
f7 += 0.08333333333*(Fx+Fy);
f8 -= 0.08333333333*(Fx+Fy);
f9 += 0.08333333333*(Fx-Fy);
f10 -= 0.08333333333*(Fx-Fy);
f11 += 0.08333333333*(Fx+Fz);
f12 -= 0.08333333333*(Fx+Fz);
f13 += 0.08333333333*(Fx-Fz);
f14 -= 0.08333333333*(Fx-Fz);
f15 += 0.08333333333*(Fy+Fz);
f16 -= 0.08333333333*(Fy+Fz);
f17 += 0.08333333333*(Fy-Fz);
f18 -= 0.08333333333*(Fy-Fz);
//.......................................................................................................
// Write data based on un-swapped convention
disteven[n] = f0;
disteven[N+n] = f2;
disteven[2*N+n] = f4;
disteven[3*N+n] = f6;
disteven[4*N+n] = f8;
disteven[5*N+n] = f10;
disteven[6*N+n] = f12;
disteven[7*N+n] = f14;
disteven[8*N+n] = f16;
disteven[9*N+n] = f18;
distodd[n] = f1;
distodd[N+n] = f3;
distodd[2*N+n] = f5;
distodd[3*N+n] = f7;
distodd[4*N+n] = f9;
distodd[5*N+n] = f11;
distodd[6*N+n] = f13;
distodd[7*N+n] = f15;
distodd[8*N+n] = f17;
//.......................................................................................................
}
}
}
}
extern "C" void dvc_MRT(int nblocks, int nthreads, int S, char *ID,
double *f_even, double *f_odd, double rlxA, double rlxB, double Fx, double Fy, double Fz,
int Nx, int Ny, int Nz)
{
MRT <<< nblocks, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S,rlxA, rlxB, Fx, Fy, Fz);
}
extern "C" void dvc_InitD3Q19( int nblocks, int nthreads, int S,
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
{
INITIALIZE <<< nblocks, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
}

BIN
gpu/bin/ColorLBM Executable file

Binary file not shown.

248
gpu/lb1_MRT-swap.cu Normal file
View File

@ -0,0 +1,248 @@
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <cuda.h>
//#include <cutil.h>
using namespace std;
//*************************************************************************
extern "C" void dvc_InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
int Ny, int Nz, int nblocks, int nthreads, int S);
//*************************************************************************
extern "C" void dvc_SwapD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
int Ny, int Nz, int nblocks, int nthreads, int S);
//*************************************************************************
extern "C" void dvc_MRT(char *ID, double *f_even, double *f_odd, double rlxA, double rlxB, double Fx, double Fy, double Fz,
int Nx, int Ny, int Nz, int nblocks, int nthreads, int S);
//*************************************************************************
void Write_Out(double *array, int Nx, int Ny, int Nz){
int value;
FILE *output;
output = fopen("dist.list","w");
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
int index = k*Nx*Ny+j*Nx+i;
value = int(array[index]);
fprintf(output, "| %i",value);
}
fprintf(output, " | \n");
}
fprintf(output,"************************************** \n");
}
fclose(output);
}
//**************************************************************************
// MRT implementation of the LBM using CUDA
//**************************************************************************
int main(void)
{
int deviceCount;
cudaGetDeviceCount(&deviceCount);
int device = 1;
printf("Number of devices = %i \n", deviceCount);
printf("Current device is = %i \n", device);
cudaSetDevice(device);
// BGK Model parameters
string FILENAME;
unsigned int nBlocks, nthreads;
int timestepMax, interval;
double tau,Fx,Fy,Fz,tol;
// Domain variables
int Nx,Ny,Nz;
ifstream input("MRT.in");
input >> FILENAME; // name of the input file
input >> Nz; // number of nodes (x,y,z)
input >> nBlocks;
input >> nthreads;
input >> tau; // relaxation time
input >> Fx; // External force components (x,y,z)
input >> Fy;
input >> Fz;
input >> timestepMax; // max no. of timesteps
input >> interval; // error interval
input >> tol; // error tolerance
double rlx_setA = 1.f/tau;
double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA);
printf("tau = %f \n", tau);
printf("Set A = %f \n", rlx_setA);
printf("Set B = %f \n", rlx_setB);
printf("Force(x) = %f \n", Fx);
printf("Force(y) = %f \n", Fy);
printf("Force(z) = %f \n", Fz);
Nx = Ny = Nz; // Cubic domain
int N = Nx*Ny*Nz;
int dist_mem_size = N*sizeof(double);
// unsigned int nBlocks = 32;
// int nthreads = 128;
int S = N/nthreads/nBlocks;
// unsigned int nBlocks = N/nthreads + (N%nthreads == 0?0:1);
dim3 grid(nBlocks,1,1);
printf("Number of blocks = %i \n", nBlocks);
printf("Threads per block = %i \n", nthreads);
printf("Sweeps per thread = %i \n", S);
printf("Number of nodes per side = %i \n", Nx);
printf("Total Number of nodes = %i \n", N);
//.......................................................................
printf("Read input media... \n");
// .......... READ THE INPUT FILE .......................................
int n;
char value;
char *id;
id = new char[N];
int sum = 0;
double porosity;
ifstream PM(FILENAME.c_str(),ios::binary);
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
PM.read((char *) (&value), sizeof(value));
n = k*Nx*Ny+j*Nx+i;
id[n] = value;
if (value > 0) sum++;
}
}
}
PM.close();
printf("File porosity = %f\n", double(sum)/N);
//.......................................................................
//...........device phase ID.................................................
char *ID;
cudaMalloc((void **) &ID, N); // Allocate device memory
// Copy to the device
cudaMemcpy(ID, id, N, cudaMemcpyHostToDevice);
//...........................................................................
//......................device distributions.................................
double *f_even,*f_odd;
//...........................................................................
cudaMalloc((void **) &f_even, 10*dist_mem_size); // Allocate device memory
cudaMalloc((void **) &f_odd, 9*dist_mem_size); // Allocate device memory
//...........................................................................
//...........................................................................
// cudaHostAlloc(&fa,dist_mem_size,cudaHostAllocPortable);
// cudaHostAlloc(&fb,dist_mem_size,cudaHostAllocPortable);
// cudaHostRegister(fa,dist_mem_size,cudaHostRegisterPortable);
// cudaHostRegister(fb,dist_mem_size,cudaHostRegisterPortable);
// cudaHostRegister(id,N*sizeof(char),cudaHostAllocPortable);
printf("Setting the distributions, size = : %i\n", N);
//...........................................................................
// INITIALIZE <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
//...........................................................................
dvc_InitD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
//*************************************************************************
int timestep = 0;
printf("No. of timesteps: %i \n", timestepMax);
//.......create a stream for the LB calculation.......
cudaStream_t stream;
cudaStreamCreate(&stream);
//.......create and start timer............
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord( start, 0 );
//.........................................
//************ MAIN ITERATION LOOP ***************************************/
while (timestep < timestepMax){
//...................................................................
//........ Execute the swap kernel (device) .........................
// SWAP <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
//...................................................................
dvc_SwapD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
//........ Execute the collision kernel (device) ....................
// MRT <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S,
// rlx_setA, rlx_setB, Fx, Fy, Fz);
//............................................................
dvc_MRT(ID, f_even, f_odd, rlx_setA, rlx_setB, Fx, Fy, Fz,Nx,Ny,Nz,nBlocks,nthreads,S);
// Iteration completed!
timestep++;
//...................................................................
}
//************************************************************************/
cudaThreadSynchronize();
//.......... stop and destroy timer.............................
cudaEventRecord( stop, stream);
cudaEventSynchronize( stop );
cudaEventElapsedTime( &time, start, stop );
printf("CPU time = %f \n", time);
float MLUPS = 0.001*float(Nx*Ny*Nz)*timestep/time;
printf("MLUPS = %f \n", MLUPS);
cudaStreamDestroy(stream);
cudaEventDestroy( start );
cudaEventDestroy( stop );
//..............................................................
//..............................................................
//.........Compute the velocity and copy result to host ........
double *velocity;
velocity = new double[3*N];
//......................device distributions....................................
double *vel;
//..............................................................................
cudaMalloc((void **) &vel, 3*dist_mem_size); // Allocate device memory
//..............................................................................
// Compute_VELOCITY <<< grid, nthreads >>> (ID, f_even, f_odd, vel, Nx, Ny, Nz, S);
//..............................................................................
cudaMemcpy(velocity, vel, 3*dist_mem_size, cudaMemcpyDeviceToHost);
//..............................................................................
//............................................................
//....Write the z-velocity to test poiseuille flow............
double vz,vz_avg;
vz_avg = 0.0;
FILE *output;
output = fopen("velocity.out","w");
for (int k=0; k<1; k++){
for (int j=0; j<1; j++){
for (int i=0; i<Nx; i++){
int n = k*Nx*Ny+j*Nx+i;
//.....print value........
vz = velocity[2*N+n];
vz_avg += vz;
fprintf(output, " %e",vz);
}
}
}
fclose(output);
vz = vz_avg/double(sum);
printf("Average Velocity = %e\n", vz);
// cleanup
cudaFree(f_even); cudaFree(f_odd); cudaFree(vel); cudaFree(ID);
free (velocity); free(id);
}

246
gpu/lb1_MRT.cu Normal file
View File

@ -0,0 +1,246 @@
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <cuda.h>
using namespace std;
//*************************************************************************
extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size);
//*************************************************************************
extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size);
//*************************************************************************
extern "C" void dvc_Barrier();
//*************************************************************************
extern "C" void dvc_InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
int Ny, int Nz, int nblocks, int nthreads, int S);
//*************************************************************************
extern "C" void dvc_SwapD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
int Ny, int Nz, int nblocks, int nthreads, int S);
//*************************************************************************
extern "C" void dvc_MRT(char *ID, double *f_even, double *f_odd, double rlxA, double rlxB, double Fx, double Fy, double Fz,
int Nx, int Ny, int Nz, int nblocks, int nthreads, int S);
//*************************************************************************
void Write_Out(double *array, int Nx, int Ny, int Nz){
int value;
FILE *output;
output = fopen("dist.list","w");
for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){
int index = k*Nx*Ny+j*Nx+i;
value = int(array[index]);
fprintf(output, "| %i",value);
}
fprintf(output, " | \n");
}
fprintf(output,"************************************** \n");
}
fclose(output);
}
//**************************************************************************
// MRT implementation of the LBM using CUDA
//**************************************************************************
int main(void)
{
// BGK Model parameters
string FILENAME;
unsigned int nBlocks, nthreads;
int timestepMax, interval;
double tau,Fx,Fy,Fz,tol;
// Domain variables
int Nx,Ny,Nz;
ifstream input("MRT.in");
input >> FILENAME; // name of the input file
input >> Nz; // number of nodes (x,y,z)
input >> nBlocks;
input >> nthreads;
input >> tau; // relaxation time
input >> Fx; // External force components (x,y,z)
input >> Fy;
input >> Fz;
input >> timestepMax; // max no. of timesteps
input >> interval; // error interval
input >> tol; // error tolerance
double rlx_setA = 1.f/tau;
double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA);
printf("tau = %f \n", tau);
printf("Set A = %f \n", rlx_setA);
printf("Set B = %f \n", rlx_setB);
printf("Force(x) = %f \n", Fx);
printf("Force(y) = %f \n", Fy);
printf("Force(z) = %f \n", Fz);
Nx = Ny = Nz; // Cubic domain
int N = Nx*Ny*Nz;
int dist_mem_size = N*sizeof(double);
// unsigned int nBlocks = 32;
// int nthreads = 128;
int S = N/nthreads/nBlocks;
// unsigned int nBlocks = N/nthreads + (N%nthreads == 0?0:1);
dim3 grid(nBlocks,1,1);
printf("Number of blocks = %i \n", nBlocks);
printf("Threads per block = %i \n", nthreads);
printf("Sweeps per thread = %i \n", S);
printf("Number of nodes per side = %i \n", Nx);
printf("Total Number of nodes = %i \n", N);
//.......................................................................
printf("Read input media... \n");
// .......... READ THE INPUT FILE .......................................
int n;
char value;
char *id;
id = new char[N];
int sum = 0;
double porosity;
ifstream PM(FILENAME.c_str(),ios::binary);
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
PM.read((char *) (&value), sizeof(value));
n = k*Nx*Ny+j*Nx+i;
id[n] = value;
if (value > 0) sum++;
}
}
}
PM.close();
printf("File porosity = %f\n", double(sum)/N);
//.......................................................................
//...........device phase ID.................................................
char *ID;
dvc_AllocateDeviceMemory((void **) &ID, N); // Allocate device memory
// Copy to the device
dvc_CopyToDevice(ID, id, N);
//...........................................................................
//......................device distributions.................................
double *f_even,*f_odd;
//...........................................................................
dvc_AllocateDeviceMemory((void **) &f_even, 10*dist_mem_size); // Allocate device memory
dvc_AllocateDeviceMemory((void **) &f_odd, 9*dist_mem_size); // Allocate device memory
//...........................................................................
//...........................................................................
// cudaHostAlloc(&fa,dist_mem_size,cudaHostAllocPortable);
// cudaHostAlloc(&fb,dist_mem_size,cudaHostAllocPortable);
// cudaHostRegister(fa,dist_mem_size,cudaHostRegisterPortable);
// cudaHostRegister(fb,dist_mem_size,cudaHostRegisterPortable);
// cudaHostRegister(id,N*sizeof(char),cudaHostAllocPortable);
printf("Setting the distributions, size = : %i\n", N);
//...........................................................................
// INITIALIZE <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
//...........................................................................
dvc_InitD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
//*************************************************************************
int timestep = 0;
printf("No. of timesteps: %i \n", timestepMax);
//.......create a stream for the LB calculation.......
cudaStream_t stream;
cudaStreamCreate(&stream);
//.......create and start timer............
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord( start, 0 );
//.........................................
//************ MAIN ITERATION LOOP ***************************************/
while (timestep < timestepMax){
//...................................................................
//........ Execute the swap kernel (device) .........................
// SWAP <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
//...................................................................
dvc_SwapD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
//........ Execute the collision kernel (device) ....................
// MRT <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S,
// rlx_setA, rlx_setB, Fx, Fy, Fz);
//............................................................
dvc_MRT(ID, f_even, f_odd, rlx_setA, rlx_setB, Fx, Fy, Fz,Nx,Ny,Nz,nBlocks,nthreads,S);
// Iteration completed!
timestep++;
//...................................................................
}
//************************************************************************/
// cudaThreadSynchronize();
dvc_Barrier();
//.......... stop and destroy timer.............................
cudaEventRecord( stop, stream);
cudaEventSynchronize( stop );
cudaEventElapsedTime( &time, start, stop );
printf("CPU time = %f \n", time);
float MLUPS = 0.001*float(Nx*Ny*Nz)*timestep/time;
printf("MLUPS = %f \n", MLUPS);
cudaStreamDestroy(stream);
cudaEventDestroy( start );
cudaEventDestroy( stop );
//..............................................................
//..............................................................
/*//.........Compute the velocity and copy result to host ........
double *velocity;
velocity = new double[3*N];
//......................device distributions....................................
double *vel;
//..............................................................................
dvc_AllocateDeviceMemory((void **) &vel, 3*dist_mem_size); // Allocate device memory
//..............................................................................
// Compute_VELOCITY <<< grid, nthreads >>> (ID, f_even, f_odd, vel, Nx, Ny, Nz, S);
//..............................................................................
// cudaMemcpy(velocity, vel, 3*dist_mem_size, cudaMemcpyDeviceToHost);
//..............................................................................
//............................................................
//....Write the z-velocity to test poiseuille flow............
double vz,vz_avg;
vz_avg = 0.0;
/* FILE *output;
output = fopen("velocity.out","w");
for (int k=0; k<1; k++){
for (int j=0; j<1; j++){
for (int i=0; i<Nx; i++){
int n = k*Nx*Ny+j*Nx+i;
//.....print value........
vz = velocity[2*N+n];
vz_avg += vz;
fprintf(output, " %e",vz);
}
}
}
fclose(output);
vz = vz_avg/double(sum);
printf("Average Velocity = %e\n", vz);
*/
// cleanup
// cudaFree(f_even); cudaFree(f_odd); cudaFree(vel); cudaFree(ID);
// free (velocity); free(id);
}

1334
gpu/lb1_MRT_mpi.cpp Normal file

File diff suppressed because it is too large Load Diff

1836
gpu/lb1_MRT_mpi.cu Normal file

File diff suppressed because it is too large Load Diff

423
gpu/lb2_Color.cu Normal file
View File

@ -0,0 +1,423 @@
#ifdef useMPI
#include <mpi.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include <math.h>
#include <cuda.h>
using namespace std;
//*************************************************************************
// HokieSpeed
//nvcc -Xcompiler -fopenmp -lgomp -O3 -arch sm_20 -o hybridATLKR lb2_ATLKR_hybrid.cu
// -I$VT_MPI_INC -L$VT_MPI_LIB -lmpi
//*************************************************************************
//*************************************************************************
// Implementation of Two-Phase Immiscible LBM using CUDA
//*************************************************************************
//*************************************************************************
extern "C" void dvc_InitD3Q19(int nblocks, int nthreads, int S,
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
//*************************************************************************
extern "C" void dvc_InitDenColor( int nblocks, int nthreads, int S,
char *ID, double *Den, double *Phi, double das, double dbs, int N);
//*************************************************************************
extern "C" void dvc_ComputeColorGradient(int nBlocks, int nthreads, int S,
char *ID, double *Phi, double *ColorGrad, int Nx, int Ny, int Nz);
//*************************************************************************
extern "C" void dvc_ColorCollide(int nBlocks, int nthreads, int S,
char *ID, double *f_even, double *f_odd, double *ColorGrad, double *Velocity,
double rlxA, double rlxB,double alpha, double beta, double Fx, double Fy, double Fz,
int Nx, int Ny, int Nz, bool pBC);
//*************************************************************************
extern "C" void dvc_DensityStreamD3Q7(int nBlocks, int nthreads, int S,
char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
double beta, int Nx, int Ny, int Nz, bool pBC);
//*************************************************************************
extern "C" void dvc_ComputePhi(int nBlocks, int nthreads, int S,
char *ID, double *Phi, double *Copy, double *Den, int N);
//*************************************************************************
extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size);
//*************************************************************************
extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size);
//*************************************************************************
extern "C" void dvc_Barrier();
//*************************************************************************
extern "C" void dvc_SwapD3Q19(int nblocks, int nthreads, int S,
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
//*************************************************************************
extern "C" void dvc_PackDist(int grid, int threads, int q, int *SendList, int start,
int sendCount, double *sendbuf, double *Dist, int N);
//*************************************************************************
extern "C" void dvc_UnpackDist(int grid, int threads, int q, int Cqx, int Cqy, int Cqz, int *RecvList, int start,
int recvCount, double *recvbuf, double *Dist, int Nx, int Ny, int Nz);
//*************************************************************************
int main(int argc, char *argv[])
{
//********** Initialize MPI ****************
int numprocs,rank;
#ifdef useMPI
MPI_Status stat;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
#else
numprocs = 1;
rank = 0;
#endif
//******************************************
if (rank == 0){
printf("********************************************************\n");
printf("Running Hybrid Implementation of Color LBM \n");
printf("********************************************************\n");
}
// Color Model parameters
string FILENAME;
unsigned int nBlocks, nthreads;
int Nx,Ny,Nz;
int timestepMax, interval;
double tau,Fx,Fy,Fz,tol;
double alpha, beta;
double das, dbs;
double din,dout;
bool pBC;
if (rank==0){
//.............................................................
// READ SIMULATION PARMAETERS FROM INPUT FILE
//.............................................................
ifstream input("Color.in");
// Line 1: Name of the phase indicator file (s=0,w=1,n=2)
input >> FILENAME;
// Line 2: domain size (Nx, Ny, Nz)
input >> Nz; // number of nodes (x,y,z)
input >> nBlocks;
input >> nthreads;
// Line 3: model parameters (tau, alpha, beta, das, dbs)
input >> tau;
input >> alpha;
input >> beta;
input >> das;
input >> dbs;
// Line 4: External force components (Fx,Fy, Fz)
input >> Fx;
input >> Fy;
input >> Fz;
// Line 5: Pressure Boundary conditions
input >> pBC;
input >> din;
input >> dout;
// Line 6: time-stepping criteria
input >> timestepMax; // max no. of timesteps
input >> interval; // error interval
input >> tol; // error tolerance
//.............................................................
}
#ifdef useMPI
// **************************************************************
// Broadcast simulation parameters from rank 0 to all other procs
MPI_Barrier(MPI_COMM_WORLD);
//.................................................
MPI_Bcast(&Nz,1,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(&nBlocks,1,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(&nthreads,1,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(&Fx,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&Fy,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&Fz,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&tau,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&alpha,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&beta,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&das,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&dbs,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&pBC,1,MPI_LOGICAL,0,MPI_COMM_WORLD);
MPI_Bcast(&din,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&dout,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&timestepMax,1,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(&interval,1,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(&tol,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
//.................................................
MPI_Barrier(MPI_COMM_WORLD);
// **************************************************************
#endif
double rlxA = 1.f/tau;
double rlxB = 8.f*(2.f-rlxA)/(8.f-rlxA);
if (pBC && rank == 0){
printf("Assigning presusre boundary conditions \n");
printf("Inlet density = %f \n", din);
printf("Outlet density = %f \n", dout);
}
if (rank==0){
printf("....Parameters................\n");
printf("tau = %f \n", tau);
printf("alpha = %f \n", alpha);
printf("beta = %f \n", beta);
printf("das = %f \n", das);
printf("dbs = %f \n", dbs);
printf("Force(x) = %f \n", Fx);
printf("Force(y) = %f \n", Fy);
printf("Force(z) = %f \n", Fz);
printf("Nz = %i \n", Nz);
printf("timestepMax = %i \n", timestepMax);
printf("...............................\n");
}
// Identical cubic sub-domains
Nx = Ny = Nz;// = 16*s; // Cubic domain
int N = Nx*Ny*Nz;
int dist_mem_size = N*sizeof(double);
// unsigned int nBlocks = 32;
// int nthreads = 128;
int S = N/nthreads/nBlocks;
if (nBlocks*nthreads*S < N) S++;
// int S = 1;
// unsigned int nBlocks = N/nthreads + (N%nthreads == 0?0:1);
// dim3 grid(nBlocks,1,1);
if (rank==1){
printf("Number of blocks = %i \n", nBlocks);
printf("Threads per block = %i \n", nthreads);
printf("Sweeps per thread = %i \n", S);
printf("Number of nodes per side = %i \n", Nx);
printf("Total Number of nodes = %i \n", N);
printf("...............................\n");
}
//.......................................................................
// .......... READ THE INPUT FILE .......................................
int n;
char value;
char *id;
id = new char[N];
int sum = 0;
// RANK 0 READS THE INPUT FILE
if (rank==0){
printf("Read input media... \n");
ifstream PM(FILENAME.c_str(),ios::binary);
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
PM.read((char *) (&value), sizeof(value));
n = k*Nx*Ny+j*Nx+i;
if (value>0){
if (pBC) value=2; // Saturate with NWP
if (k<8){
value=1;
}
}
id[n] = value;
if (value > 0) sum++;
}
}
}
PM.close();
printf("File porosity = %f\n", double(sum)/N);
}
//......... for pressure BC only............................
// Void the first / last rows if pressure BC are to be used
if (pBC){
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
n = k*Nx*Ny+j*Nx+i;
if (k<4) id[n] = 1;
if (k>Nz-5) id[n] = 2;
}
}
// Skip the non-boundary values
if (k==4) k=Nz-5;
}
}
#ifdef useMPI //............................................................
MPI_Barrier(MPI_COMM_WORLD);
MPI_Bcast(&id[0],N,MPI_CHAR,0,MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
#endif
if (rank == 0) printf("Domain set.\n");
//...........................................................................
int SBC;
int outlet = N-Nx*Ny;
if (pBC){
SBC = Nx*Ny/nthreads/nBlocks+1;
printf("Number of sweeps for inlet / outlet: %i \n", SBC);
}
//...........................................................................
//...........................................................................
//...........device phase ID.................................................
char *ID;
cudaMalloc((void **) &ID, N); // Allocate device memory
// Copy to the device
cudaMemcpy(ID, id, N, cudaMemcpyHostToDevice);
//...........................................................................
//......................device distributions.................................
double *f_even,*f_odd;
//...........................................................................
cudaMalloc((void **) &f_even, 10*dist_mem_size); // Allocate device memory
cudaMalloc((void **) &f_odd, 9*dist_mem_size); // Allocate device memory
// f_even = new double[10*N];
// f_odd = new double[9*N];
//...........................................................................
//...........................................................................
// MAIN VARIABLES ALLOCATED HERE
//...........................................................................
double *Phi,*Den,*Copy;
double *ColorGrad, *Velocity;
//...........................................................................
cudaMalloc((void **) &Phi, dist_mem_size);
cudaMalloc((void **) &Den, 2*dist_mem_size);
cudaMalloc((void **) &Copy, 2*dist_mem_size);
cudaMalloc((void **) &Velocity, 3*dist_mem_size);
cudaMalloc((void **) &ColorGrad, 3*dist_mem_size);
//...........................................................................
//...........................................................................
if (rank==0) printf("Setting the distributions, size = : %i\n", N);
//...........................................................................
dvc_InitD3Q19(nBlocks, nthreads, S, ID, f_even, f_odd, Nx, Ny, Nz);
dvc_InitDenColor(nBlocks, nthreads, S, ID, Den, Phi, das, dbs, N);
//...........................................................................
dvc_ComputePhi(nBlocks, nthreads, S,ID, Phi, Copy, Den, N);
//...........................................................................
int timestep;
// double starttime,stoptime;
if (rank==0) printf("No. of timesteps: %i \n", timestepMax);
timestep = 0;
//.......create and start timer............
cudaEvent_t start, stop;
float time;
//.......create a stream for the LB calculation.......
cudaStream_t stream;
cudaStreamCreate(&stream);
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord( start, 0 );
//.........................................
//************ MAIN TIMESTEP LOOP ***************************************/
while (timestep < timestepMax){
//*************************************************************************
// Compute the color gradient
//*************************************************************************
dvc_ComputeColorGradient(nBlocks, nthreads, S,
ID, Phi, ColorGrad, Nx, Ny, Nz);
//*************************************************************************
//*************************************************************************
// Perform collision step for the momentum transport
//*************************************************************************
dvc_ColorCollide(nBlocks, nthreads, S,
ID, f_even, f_odd, ColorGrad, Velocity,
rlxA, rlxB,alpha, beta, Fx, Fy, Fz, Nx, Ny, Nz, pBC);
//*************************************************************************
//*************************************************************************
// Carry out the density streaming step for mass transport
//*************************************************************************
dvc_DensityStreamD3Q7(nBlocks, nthreads, S,
ID, Den, Copy, Phi, ColorGrad, Velocity,beta, Nx, Ny, Nz, pBC);
//*************************************************************************
//*************************************************************************
// Swap the distributions for momentum transport
//*************************************************************************
dvc_SwapD3Q19(nBlocks, nthreads, S, ID, f_even, f_odd, Nx, Ny, Nz);
//*************************************************************************
//*************************************************************************
// Compute the phase indicator field and reset Copy, Den
//*************************************************************************
dvc_ComputePhi(nBlocks, nthreads, S,ID, Phi, Copy, Den, N);
//*************************************************************************
dvc_Barrier();
timestep++;
//.............................................................................
}
//************************************************************************/
dvc_Barrier();
//.......... stop and destroy timer.............................
cudaEventRecord( stop, stream);
cudaEventSynchronize( stop );
cudaEventElapsedTime( &time, start, stop );
printf("CPU time = %f \n", time);
float MLUPS = 0.001*float(Nx*Ny*Nz)*timestep/time;
printf("MLUPS = %f \n", MLUPS);
cudaEventDestroy( start );
cudaEventDestroy( stop );
double *Data;
Data = new double[3*N];
cudaMemcpy(Data, Phi, dist_mem_size, cudaMemcpyDeviceToHost);
// Write out the Phase Indicator Field
FILE *phase;
phase = fopen("Phase.out","wb");
fwrite(Data,8,N,phase);
fclose(phase);
//....................................................
// Write out the pressure - (reuse Phi arrays since we're done with those)
// ComputeDensity<<< grid, nthreads>>> (ID, f_even, f_odd, Phi, Nx, Ny, Nz, S);
// cudaMemcpy(Data, Phi, dist_mem_size, cudaMemcpyDeviceToHost);
// FILE *PRESSURE;
// PRESSURE = fopen("Pressure.out","wb");
// fwrite(Phi,8,N,PRESSURE);
// fclose(PRESSURE);
//....................................................
// Write out the Color Gradient
cudaMemcpy(Data, ColorGrad, 3*dist_mem_size, cudaMemcpyDeviceToHost);
FILE *CG;
CG = fopen("ColorGrad.out","wb");
fwrite(Data,8,3*N,CG);
fclose(CG);
// Write out the Velocity
// FILE *VEL;
// VEL = fopen("Velocity.out","wb");
// fwrite(Velocity,8,3*N,VEL);
// fclose(VEL);
// cleanup
cudaFree(ID);
cudaFree(f_even); cudaFree(f_odd);
cudaFree(Velocity);
cudaFree(Phi);
cudaFree (ColorGrad);
cudaFree (Den); cudaFree(Copy);
cudaFree (Phi);
free(id);
//***********Finish up!*********************************
#ifdef useMPI
MPI_Finalize();
#endif
return 0;
}

1621
gpu/lb2_Color_mpi.cpp Normal file

File diff suppressed because it is too large Load Diff

BIN
gpu/lib/libcuColor.a Normal file

Binary file not shown.

BIN
gpu/lib/libcuD3Q19.a Normal file

Binary file not shown.

BIN
gpu/lib/libcuD3Q7.a Normal file

Binary file not shown.

BIN
gpu/lib/libcuExtra.a Normal file

Binary file not shown.