CPU and GPU versions without averaging tools, examples for each case using 8 MPI processes, currently set up for HokieSpeed supercomputing cluster
This commit is contained in:
parent
29ccc29a61
commit
438f075331
37
Makefile
Normal file
37
Makefile
Normal file
@ -0,0 +1,37 @@
|
||||
CUDA_FLAGS=-arch sm_20
|
||||
|
||||
bin/ColorLBM:gpu/lb2_Color_mpi.cpp lib/libcuColor.a lib/libcuD3Q19.a lib/libcuD3Q7.a lib/libcuExtras.a
|
||||
mkdir -p bin
|
||||
mpicxx -O3 -o bin/ColorLBM gpu/lb2_Color_mpi.cpp -lcuColor -lcuD3Q19 -lcuD3Q7 -lcuExtras -Llib
|
||||
|
||||
#bin/gpuMRT:gpu/lb1_MRT.cu lib/libcuMRT.a lib/libcuD3Q19.a
|
||||
# mkdir -p bin
|
||||
# nvcc -O3 -o bin/gpuMRT $(CUDA_FLAGS) gpu/lb1_MRT.cu -lcuMRT -lcuD3Q19 -Llib
|
||||
|
||||
#bin/gpuColor:gpu/lb2_Color.cu lib/libcuColor.a lib/libcuD3Q19.a
|
||||
# mkdir -p bin
|
||||
# nvcc -o bin/gpuColor $(CUDA_FLAGS) gpu/lb2_Color.cu -lcuColor -lcuD3Q19 -Llib
|
||||
|
||||
lib/libcuExtras.a: gpu/CudaExtras.cu
|
||||
mkdir -p lib
|
||||
nvcc -lib $(CUDA_FLAGS) gpu/CudaExtras.cu -o lib/libcuExtras.a
|
||||
|
||||
#lib/libcuMRT.a: gpu/MRT.cu
|
||||
# mkdir -p lib
|
||||
# nvcc -lib $(CUDA_FLAGS) gpu/MRT.cu -o lib/libcuMRT.a
|
||||
|
||||
lib/libcuD3Q7.a: gpu/D3Q7.cu
|
||||
mkdir -p lib
|
||||
nvcc -lib $(CUDA_FLAGS) gpu/D3Q7.cu -o lib/libcuD3Q7.a
|
||||
|
||||
lib/libcuD3Q19.a: gpu/D3Q19.cu
|
||||
mkdir -p lib
|
||||
nvcc -lib $(CUDA_FLAGS) gpu/D3Q19.cu -o lib/libcuD3Q19.a
|
||||
|
||||
lib/libcuColor.a: gpu/Color.cu
|
||||
mkdir -p lib
|
||||
nvcc -lib $(CUDA_FLAGS) gpu/Color.cu -o lib/libcuColor.a
|
||||
|
||||
clean:
|
||||
rm bin/*
|
||||
rm lib/*
|
756
cpu/Color.cpp
Normal file
756
cpu/Color.cpp
Normal file
@ -0,0 +1,756 @@
|
||||
#include <math.h>
|
||||
|
||||
extern void InitDenColor(char *ID, double *Den, double *Phi, double das, double dbs, int N)
|
||||
{
|
||||
int n;
|
||||
for (n=0; n<N; n++){
|
||||
|
||||
if ( ID[n] == 1){
|
||||
Den[2*n] = 1.0;
|
||||
Den[2*n+1] = 0.0;
|
||||
Phi[n] = 1.0;
|
||||
}
|
||||
else if ( ID[n] == 2){
|
||||
Den[2*n] = 0.0;
|
||||
Den[2*n+1] = 1.0;
|
||||
Phi[n] = -1.0;
|
||||
}
|
||||
else{
|
||||
Den[2*n] = das;
|
||||
Den[2*n+1] = dbs;
|
||||
Phi[n] = (das-dbs)/(das+dbs);
|
||||
}
|
||||
}
|
||||
}
|
||||
extern void InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
|
||||
{
|
||||
int n,N;
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
for (n=0; n<N; n++){
|
||||
if (ID[n] > 0){
|
||||
f_even[n] = 0.3333333333333333;
|
||||
f_odd[n] = 0.055555555555555555; //double(100*n)+1.f;
|
||||
f_even[N+n] = 0.055555555555555555; //double(100*n)+2.f;
|
||||
f_odd[N+n] = 0.055555555555555555; //double(100*n)+3.f;
|
||||
f_even[2*N+n] = 0.055555555555555555; //double(100*n)+4.f;
|
||||
f_odd[2*N+n] = 0.055555555555555555; //double(100*n)+5.f;
|
||||
f_even[3*N+n] = 0.055555555555555555; //double(100*n)+6.f;
|
||||
f_odd[3*N+n] = 0.0277777777777778; //double(100*n)+7.f;
|
||||
f_even[4*N+n] = 0.0277777777777778; //double(100*n)+8.f;
|
||||
f_odd[4*N+n] = 0.0277777777777778; //double(100*n)+9.f;
|
||||
f_even[5*N+n] = 0.0277777777777778; //double(100*n)+10.f;
|
||||
f_odd[5*N+n] = 0.0277777777777778; //double(100*n)+11.f;
|
||||
f_even[6*N+n] = 0.0277777777777778; //double(100*n)+12.f;
|
||||
f_odd[6*N+n] = 0.0277777777777778; //double(100*n)+13.f;
|
||||
f_even[7*N+n] = 0.0277777777777778; //double(100*n)+14.f;
|
||||
f_odd[7*N+n] = 0.0277777777777778; //double(100*n)+15.f;
|
||||
f_even[8*N+n] = 0.0277777777777778; //double(100*n)+16.f;
|
||||
f_odd[8*N+n] = 0.0277777777777778; //double(100*n)+17.f;
|
||||
f_even[9*N+n] = 0.0277777777777778; //double(100*n)+18.f;
|
||||
}
|
||||
else{
|
||||
for(int q=0; q<9; q++){
|
||||
f_even[q*N+n] = -1.0;
|
||||
f_odd[q*N+n] = -1.0;
|
||||
}
|
||||
f_even[9*N+n] = -1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern void Compute_VELOCITY(char *ID, double *disteven, double *distodd, double *vel, int Nx, int Ny, int Nz)
|
||||
{
|
||||
int n,N;
|
||||
// distributions
|
||||
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
double vx,vy,vz;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
for (n=0; n<N; n++){
|
||||
|
||||
if (ID[n] > 0){
|
||||
//........................................................................
|
||||
// Registers to store the distributions
|
||||
//........................................................................
|
||||
f2 = disteven[N+n];
|
||||
f4 = disteven[2*N+n];
|
||||
f6 = disteven[3*N+n];
|
||||
f8 = disteven[4*N+n];
|
||||
f10 = disteven[5*N+n];
|
||||
f12 = disteven[6*N+n];
|
||||
f14 = disteven[7*N+n];
|
||||
f16 = disteven[8*N+n];
|
||||
f18 = disteven[9*N+n];
|
||||
//........................................................................
|
||||
f1 = distodd[n];
|
||||
f3 = distodd[1*N+n];
|
||||
f5 = distodd[2*N+n];
|
||||
f7 = distodd[3*N+n];
|
||||
f9 = distodd[4*N+n];
|
||||
f11 = distodd[5*N+n];
|
||||
f13 = distodd[6*N+n];
|
||||
f15 = distodd[7*N+n];
|
||||
f17 = distodd[8*N+n];
|
||||
//.................Compute the velocity...................................
|
||||
vx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
vy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
vz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
//..................Write the velocity.....................................
|
||||
vel[n] = vx;
|
||||
vel[N+n] = vy;
|
||||
vel[2*N+n] = vz;
|
||||
//........................................................................
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//*************************************************************************
|
||||
//*************************************************************************
|
||||
extern void PressureBC_inlet(double *disteven, double *distodd, double din,
|
||||
int Nx, int Ny, int Nz)
|
||||
{
|
||||
int n,N;
|
||||
// distributions
|
||||
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
double uz;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
for (n=0; n<Nx*Ny; n++){
|
||||
|
||||
//........................................................................
|
||||
// Read distributions from "opposite" memory convention
|
||||
//........................................................................
|
||||
//........................................................................
|
||||
f1 = distodd[n];
|
||||
f3 = distodd[N+n];
|
||||
f5 = distodd[2*N+n];
|
||||
f7 = distodd[3*N+n];
|
||||
f9 = distodd[4*N+n];
|
||||
f11 = distodd[5*N+n];
|
||||
f13 = distodd[6*N+n];
|
||||
f15 = distodd[7*N+n];
|
||||
f17 = distodd[8*N+n];
|
||||
//........................................................................
|
||||
f0 = disteven[n];
|
||||
f2 = disteven[N+n];
|
||||
f4 = disteven[2*N+n];
|
||||
f6 = disteven[3*N+n];
|
||||
f8 = disteven[4*N+n];
|
||||
f10 = disteven[5*N+n];
|
||||
f12 = disteven[6*N+n];
|
||||
f14 = disteven[7*N+n];
|
||||
f16 = disteven[8*N+n];
|
||||
f18 = disteven[9*N+n];
|
||||
//...................................................
|
||||
//........Determine the intlet flow velocity.........
|
||||
// uz = -1 + (f0+f3+f4+f1+f2+f7+f8+f10+f9
|
||||
// + 2*(f5+f15+f18+f11+f14))/din;
|
||||
//........Set the unknown distributions..............
|
||||
// f6 = f5 - 0.3333333333333333*din*uz;
|
||||
// f16 = f15 - 0.1666666666666667*din*uz;
|
||||
// f17 = f16 - f3 + f4-f15+f18-f7+f8-f10+f9;
|
||||
// f12= 0.5*(-din*uz+f5+f15+f18+f11+f14-f6-f16-
|
||||
// f17+f1-f2-f14+f11+f7-f8-f10+f9);
|
||||
// f13= -din*uz+f5+f15+f18+f11+f14-f6-f16-f17-f12;
|
||||
|
||||
// Determine the outlet flow velocity
|
||||
uz = 1.0 - (f0+f4+f3+f2+f1+f8+f7+f9+ f10 +
|
||||
2*(f5+ f15+f18+f11+f14))/din;
|
||||
// Set the unknown distributions:
|
||||
f6 = f5 + 0.3333333333333333*din*uz;
|
||||
f16 = f15 + 0.1666666666666667*din*uz;
|
||||
f17 = f16 + f4 - f3-f15+f18+f8-f7 +f9-f10;
|
||||
f12= (din*uz+f5+ f15+f18+f11+f14-f6-f16-f17-f2+f1-f14+f11-f8+f7+f9-f10)*0.5;
|
||||
f13= din*uz+f5+ f15+f18+f11+f14-f6-f16-f17-f12;
|
||||
|
||||
//........Store in "opposite" memory location..........
|
||||
disteven[3*N+n] = f6;
|
||||
disteven[6*N+n] = f12;
|
||||
distodd[6*N+n] = f13;
|
||||
disteven[8*N+n] = f16;
|
||||
distodd[8*N+n] = f17;
|
||||
//...................................................
|
||||
}
|
||||
}
|
||||
|
||||
extern void PressureBC_outlet(double *disteven, double *distodd, double dout,
|
||||
int Nx, int Ny, int Nz, int S, int outlet)
|
||||
{
|
||||
int n,N;
|
||||
// distributions
|
||||
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
double uz;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
// Loop over the boundary - threadblocks delineated by start...finish
|
||||
for ( n=0; n<N; n++){
|
||||
|
||||
//........................................................................
|
||||
// Read distributions from "opposite" memory convention
|
||||
//........................................................................
|
||||
f1 = distodd[n];
|
||||
f3 = distodd[N+n];
|
||||
f5 = distodd[2*N+n];
|
||||
f7 = distodd[3*N+n];
|
||||
f9 = distodd[4*N+n];
|
||||
f11 = distodd[5*N+n];
|
||||
f13 = distodd[6*N+n];
|
||||
f15 = distodd[7*N+n];
|
||||
f17 = distodd[8*N+n];
|
||||
//........................................................................
|
||||
f0 = disteven[n];
|
||||
f2 = disteven[N+n];
|
||||
f4 = disteven[2*N+n];
|
||||
f6 = disteven[3*N+n];
|
||||
f8 = disteven[4*N+n];
|
||||
f10 = disteven[5*N+n];
|
||||
f12 = disteven[6*N+n];
|
||||
f14 = disteven[7*N+n];
|
||||
f16 = disteven[8*N+n];
|
||||
f18 = disteven[9*N+n];
|
||||
//........Determine the outlet flow velocity.........
|
||||
// uz = 1 - (f0+f3+f4+f1+f2+f7+f8+f10+f9+
|
||||
// 2*(f6+f16+f17+f12+f13))/dout;
|
||||
//...................................................
|
||||
//........Set the Unknown Distributions..............
|
||||
// f5 = f6 + 0.33333333333333338*dout*uz;
|
||||
// f15 = f16 + 0.16666666666666678*dout*uz;
|
||||
// f18 = f15+f3-f4-f16+f17+f7-f8+f10-f9;
|
||||
// f11= 0.5*(dout*uz+f6+ f16+f17+f12+f13-f5
|
||||
// -f15-f18-f1+f2-f13+f12-f7+f8+f10-f9);
|
||||
// f14= dout*uz+f6+ f16+f17+f12+f13-f5-f15-f18-f11;
|
||||
|
||||
uz = -1.0 + (f0+f4+f3+f2+f1+f8+f7+f9+f10 + 2*(f6+f16+f17+f12+f13))/dout;
|
||||
|
||||
f5 = f6 - 0.33333333333333338*dout* uz;
|
||||
f15 = f16 - 0.16666666666666678*dout* uz;
|
||||
f18 = f15 - f4 + f3-f16+f17-f8+f7-f9+f10;
|
||||
f11 = (-dout*uz+f6+ f16+f17+f12+f13-f5-f15-f18+f2-f1-f13+f12+f8-f7-f9+f10)*0.5;
|
||||
f14 = -dout*uz+f6+ f16+f17+f12+f13-f5-f15-f18-f11;
|
||||
//........Store in "opposite" memory location..........
|
||||
distodd[2*N+n] = f5;
|
||||
distodd[5*N+n] = f11;
|
||||
disteven[7*N+n] = f14;
|
||||
distodd[7*N+n] = f15;
|
||||
disteven[9*N+n] = f18;
|
||||
//...................................................
|
||||
|
||||
}
|
||||
}
|
||||
//*************************************************************************
|
||||
extern void ComputeColorGradient(char *ID, double *phi, double *ColorGrad, int Nx, int Ny, int Nz)
|
||||
{
|
||||
int n,N,i,j,k,nn;
|
||||
// distributions
|
||||
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
double nx,ny,nz;
|
||||
|
||||
// non-conserved moments
|
||||
// additional variables needed for computations
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
for ( n=0; n<N; n++){
|
||||
//.......Back out the 3-D indices for node n..............
|
||||
k = n/(Nx*Ny);
|
||||
j = (n-Nx*Ny*k)/Nx;
|
||||
i = n-Nx*Ny*k-Nx*j;
|
||||
//........................................................................
|
||||
//........Get 1-D index for this thread....................
|
||||
// n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
//........................................................................
|
||||
// COMPUTE THE COLOR GRADIENT
|
||||
//........................................................................
|
||||
//.................Read Phase Indicator Values............................
|
||||
//........................................................................
|
||||
nn = n-1; // neighbor index (get convention)
|
||||
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
|
||||
f1 = phi[nn]; // get neighbor for phi - 1
|
||||
//........................................................................
|
||||
nn = n+1; // neighbor index (get convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
f2 = phi[nn]; // get neighbor for phi - 2
|
||||
//........................................................................
|
||||
nn = n-Nx; // neighbor index (get convention)
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
f3 = phi[nn]; // get neighbor for phi - 3
|
||||
//........................................................................
|
||||
nn = n+Nx; // neighbor index (get convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
f4 = phi[nn]; // get neighbor for phi - 4
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny; // neighbor index (get convention)
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f5 = phi[nn]; // get neighbor for phi - 5
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny; // neighbor index (get convention)
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f6 = phi[nn]; // get neighbor for phi - 6
|
||||
//........................................................................
|
||||
nn = n-Nx-1; // neighbor index (get convention)
|
||||
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
f7 = phi[nn]; // get neighbor for phi - 7
|
||||
//........................................................................
|
||||
nn = n+Nx+1; // neighbor index (get convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
f8 = phi[nn]; // get neighbor for phi - 8
|
||||
//........................................................................
|
||||
nn = n+Nx-1; // neighbor index (get convention)
|
||||
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
f9 = phi[nn]; // get neighbor for phi - 9
|
||||
//........................................................................
|
||||
nn = n-Nx+1; // neighbor index (get convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
f10 = phi[nn]; // get neighbor for phi - 10
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny-1; // neighbor index (get convention)
|
||||
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f11 = phi[nn]; // get neighbor for phi - 11
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny+1; // neighbor index (get convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f12 = phi[nn]; // get neighbor for phi - 12
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny-1; // neighbor index (get convention)
|
||||
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f13 = phi[nn]; // get neighbor for phi - 13
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny+1; // neighbor index (get convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f14 = phi[nn]; // get neighbor for phi - 14
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny-Nx; // neighbor index (get convention)
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f15 = phi[nn]; // get neighbor for phi - 15
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny+Nx; // neighbor index (get convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f16 = phi[nn]; // get neighbor for phi - 16
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny-Nx; // neighbor index (get convention)
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f17 = phi[nn]; // get neighbor for phi - 17
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny+Nx; // neighbor index (get convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f18 = phi[nn]; // get neighbor for phi - 18
|
||||
//............Compute the Color Gradient...................................
|
||||
nx = -(f1-f2+0.5*(f7-f8+f9-f10+f11-f12+f13-f14));
|
||||
ny = -(f3-f4+0.5*(f7-f8-f9+f10+f15-f16+f17-f18));
|
||||
nz = -(f5-f6+0.5*(f11-f12-f13+f14+f15-f16-f17+f18));
|
||||
//...........Normalize the Color Gradient.................................
|
||||
// C = sqrt(nx*nx+ny*ny+nz*nz);
|
||||
// nx = nx/C;
|
||||
// ny = ny/C;
|
||||
// nz = nz/C;
|
||||
//...Store the Color Gradient....................
|
||||
ColorGrad[3*n] = nx;
|
||||
ColorGrad[3*n+1] = ny;
|
||||
ColorGrad[3*n+2] = nz;
|
||||
//...............................................
|
||||
}
|
||||
}
|
||||
//*************************************************************************
|
||||
extern void ColorCollide( char *ID, double *disteven, double *distodd, double *ColorGrad,
|
||||
double *Velocity, int Nx, int Ny, int Nz, double rlx_setA, double rlx_setB,
|
||||
double alpha, double beta, double Fx, double Fy, double Fz, bool pBC)
|
||||
{
|
||||
|
||||
int n,N;
|
||||
// distributions
|
||||
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
|
||||
// non-conserved moments
|
||||
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
|
||||
// additional variables needed for computations
|
||||
double rho,jx,jy,jz,C,nx,ny,nz;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
char id;
|
||||
|
||||
for ( n=0; n<N; n++){
|
||||
id = ID[n];
|
||||
|
||||
if (id > 0){
|
||||
|
||||
// Retrieve the color gradient
|
||||
nx = ColorGrad[3*n];
|
||||
ny = ColorGrad[3*n+1];
|
||||
nz = ColorGrad[3*n+2];
|
||||
//...........Normalize the Color Gradient.................................
|
||||
C = sqrt(nx*nx+ny*ny+nz*nz);
|
||||
nx = nx/C;
|
||||
ny = ny/C;
|
||||
nz = nz/C;
|
||||
//......No color gradient at z-boundary if pressure BC are set.............
|
||||
// if (pBC && k==0) nx = ny = nz = 0.f;
|
||||
// if (pBC && k==Nz-1) nx = ny = nz = 0.f;
|
||||
//........................................................................
|
||||
// READ THE DISTRIBUTIONS
|
||||
// (read from opposite array due to previous swap operation)
|
||||
//........................................................................
|
||||
f2 = distodd[n];
|
||||
f4 = distodd[N+n];
|
||||
f6 = distodd[2*N+n];
|
||||
f8 = distodd[3*N+n];
|
||||
f10 = distodd[4*N+n];
|
||||
f12 = distodd[5*N+n];
|
||||
f14 = distodd[6*N+n];
|
||||
f16 = distodd[7*N+n];
|
||||
f18 = distodd[8*N+n];
|
||||
//........................................................................
|
||||
f0 = disteven[n];
|
||||
f1 = disteven[N+n];
|
||||
f3 = disteven[2*N+n];
|
||||
f5 = disteven[3*N+n];
|
||||
f7 = disteven[4*N+n];
|
||||
f9 = disteven[5*N+n];
|
||||
f11 = disteven[6*N+n];
|
||||
f13 = disteven[7*N+n];
|
||||
f15 = disteven[8*N+n];
|
||||
f17 = disteven[9*N+n];
|
||||
//........................................................................
|
||||
// PERFORM RELAXATION PROCESS
|
||||
//........................................................................
|
||||
//....................compute the moments...............................................
|
||||
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
|
||||
m1 = -30*f0-11*(f2+f1+f4+f3+f6+f5)+8*(f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18 +f17);
|
||||
m2 = 12*f0-4*(f2+f1 +f4+f3+f6 +f5)+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
|
||||
jx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
m4 = 4*(-f1+f2)+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
jy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
m6 = -4*(f3-f4)+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
jz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
m8 = -4*(f5-f6)+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
m9 = 2*(f1+f2)-f3-f4-f5-f6+f7+f8+f9+f10+f11+f12+f13+f14-2*(f15+f16+f17+f18);
|
||||
m10 = -4*(f1+f2)+2*(f4+f3+f6+f5)+f8+f7+f10+f9+f12+f11+f14+f13-2*(f16+f15+f18+f17);
|
||||
m11 = f4+f3-f6-f5+f8+f7+f10+f9-f12-f11-f14-f13;
|
||||
m12 = -2*(f4+f3-f6-f5)+f8+f7+f10+f9-f12-f11-f14-f13;
|
||||
m13 = f8+f7-f10-f9;
|
||||
m14 = f16+f15-f18-f17;
|
||||
m15 = f12+f11-f14-f13;
|
||||
m16 = f7-f8+f9-f10-f11+f12-f13+f14;
|
||||
m17 = -f7+f8+f9-f10+f15-f16+f17-f18;
|
||||
m18 = f11-f12-f13+f14-f15+f16+f17-f18;
|
||||
//..........Toelke, Fruediger et. al. 2006...............
|
||||
if (C == 0.0) nx = ny = nz = 1.0;
|
||||
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) -alpha*C - m1);
|
||||
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho)- m2);
|
||||
m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4);
|
||||
m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6);
|
||||
m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8);
|
||||
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9);
|
||||
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
|
||||
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) + 0.5*alpha*C*(ny*ny-nz*nz)- m11);
|
||||
m12 = m12 + rlx_setA*( -0.5*((jy*jy-jz*jz)/rho) - m12);
|
||||
m13 = m13 + rlx_setA*( (jx*jy/rho) + 0.5*alpha*C*nx*ny - m13);
|
||||
m14 = m14 + rlx_setA*( (jy*jz/rho) + 0.5*alpha*C*ny*nz - m14);
|
||||
m15 = m15 + rlx_setA*( (jx*jz/rho) + 0.5*alpha*C*nx*nz - m15);
|
||||
m16 = m16 + rlx_setB*( - m16);
|
||||
m17 = m17 + rlx_setB*( - m17);
|
||||
m18 = m18 + rlx_setB*( - m18);
|
||||
//.................inverse transformation......................................................
|
||||
f0 = 0.05263157894736842*rho-0.012531328320802*m1+0.04761904761904762*m2;
|
||||
f1 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(jx-m4)+0.0555555555555555555555555*(m9-m10);
|
||||
f2 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(m4-jx)+0.0555555555555555555555555*(m9-m10);
|
||||
f3 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(jy-m6)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
|
||||
f4 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(m6-jy)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
|
||||
f5 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(jz-m8)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
|
||||
f6 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(m8-jz)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
|
||||
f7 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx+jy)+0.025*(m4+m6)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12+0.25*m13+0.125*(m16-m17);
|
||||
f8 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2-0.1*(jx+jy)-0.025*(m4+m6)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12+0.25*m13+0.125*(m17-m16);
|
||||
f9 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx-jy)+0.025*(m4-m6)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12-0.25*m13+0.125*(m16+m17);
|
||||
f10 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jy-jx)+0.025*(m6-m4)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12-0.25*m13-0.125*(m16+m17);
|
||||
f11 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jx+jz)+0.025*(m4+m8)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12+0.25*m15+0.125*(m18-m16);
|
||||
f12 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2-0.1*(jx+jz)-0.025*(m4+m8)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12+0.25*m15+0.125*(m16-m18);
|
||||
f13 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jx-jz)+0.025*(m4-m8)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12-0.25*m15-0.125*(m16+m18);
|
||||
f14 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jz-jx)+0.025*(m8-m4)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12-0.25*m15+0.125*(m16+m18);
|
||||
f15 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jy+jz)+0.025*(m6+m8)
|
||||
-0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m17-m18);
|
||||
f16 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2-0.1*(jy+jz)-0.025*(m6+m8)
|
||||
-0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m18-m17);
|
||||
f17 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jy-jz)+0.025*(m6-m8)
|
||||
-0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14+0.125*(m17+m18);
|
||||
f18 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jz-jy)+0.025*(m8-m6)
|
||||
-0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14-0.125*(m17+m18);
|
||||
//.......................................................................................................
|
||||
// incorporate external force
|
||||
f1 += 0.16666666*Fx;
|
||||
f2 -= 0.16666666*Fx;
|
||||
f3 += 0.16666666*Fy;
|
||||
f4 -= 0.16666666*Fy;
|
||||
f5 += 0.16666666*Fz;
|
||||
f6 -= 0.16666666*Fz;
|
||||
f7 += 0.08333333333*(Fx+Fy);
|
||||
f8 -= 0.08333333333*(Fx+Fy);
|
||||
f9 += 0.08333333333*(Fx-Fy);
|
||||
f10 -= 0.08333333333*(Fx-Fy);
|
||||
f11 += 0.08333333333*(Fx+Fz);
|
||||
f12 -= 0.08333333333*(Fx+Fz);
|
||||
f13 += 0.08333333333*(Fx-Fz);
|
||||
f14 -= 0.08333333333*(Fx-Fz);
|
||||
f15 += 0.08333333333*(Fy+Fz);
|
||||
f16 -= 0.08333333333*(Fy+Fz);
|
||||
f17 += 0.08333333333*(Fy-Fz);
|
||||
f18 -= 0.08333333333*(Fy-Fz);
|
||||
//*********** WRITE UPDATED VALUES TO MEMORY ******************
|
||||
// Write the updated distributions
|
||||
//....EVEN.....................................
|
||||
disteven[n] = f0;
|
||||
disteven[N+n] = f2;
|
||||
disteven[2*N+n] = f4;
|
||||
disteven[3*N+n] = f6;
|
||||
disteven[4*N+n] = f8;
|
||||
disteven[5*N+n] = f10;
|
||||
disteven[6*N+n] = f12;
|
||||
disteven[7*N+n] = f14;
|
||||
disteven[8*N+n] = f16;
|
||||
disteven[9*N+n] = f18;
|
||||
//....ODD......................................
|
||||
distodd[n] = f1;
|
||||
distodd[N+n] = f3;
|
||||
distodd[2*N+n] = f5;
|
||||
distodd[3*N+n] = f7;
|
||||
distodd[4*N+n] = f9;
|
||||
distodd[5*N+n] = f11;
|
||||
distodd[6*N+n] = f13;
|
||||
distodd[7*N+n] = f15;
|
||||
distodd[8*N+n] = f17;
|
||||
//...Store the Velocity..........................
|
||||
Velocity[3*n] = jx;
|
||||
Velocity[3*n+1] = jy;
|
||||
Velocity[3*n+2] = jz;
|
||||
/* //...Store the Color Gradient....................
|
||||
ColorGrad[3*n] = C*nx;
|
||||
ColorGrad[3*n+1] = C*ny;
|
||||
ColorGrad[3*n+2] = C*nz;
|
||||
*/ //...............................................
|
||||
//***************************************************************
|
||||
} // check if n is in the solid
|
||||
} // check if n is in the domain
|
||||
}
|
||||
//*************************************************************************
|
||||
extern void DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
|
||||
double beta, int Nx, int Ny, int Nz, bool pBC)
|
||||
{
|
||||
char id;
|
||||
|
||||
int idx;
|
||||
int in,jn,kn,n,nn,N;
|
||||
int q,Cqx,Cqy,Cqz;
|
||||
// int sendLoc;
|
||||
|
||||
double na,nb; // density values
|
||||
double ux,uy,uz; // flow velocity
|
||||
double nx,ny,nz,C; // color gradient components
|
||||
double a1,a2,b1,b2;
|
||||
double sp,delta;
|
||||
double feq[6]; // equilibrium distributions
|
||||
// Set of Discrete velocities for the D3Q19 Model
|
||||
int D3Q7[3][3]={{1,0,0},{0,1,0},{0,0,1}};
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
for ( n=0; n<N; n++){
|
||||
id = ID[n];
|
||||
// Local Density Values
|
||||
na = Copy[2*n];
|
||||
nb = Copy[2*n+1];
|
||||
if (id > 0 && na+nb > 0.0){
|
||||
//.......Back out the 3-D indices for node n..............
|
||||
int k = n/(Nx*Ny);
|
||||
int j = (n-Nx*Ny*k)/Nx;
|
||||
int i = n-Nx*Ny*k-Nx*j;
|
||||
//.....Load the Color gradient.........
|
||||
nx = ColorGrad[3*n];
|
||||
ny = ColorGrad[3*n+1];
|
||||
nz = ColorGrad[3*n+2];
|
||||
C = sqrt(nx*nx+ny*ny+nz*nz);
|
||||
nx = nx/C;
|
||||
ny = ny/C;
|
||||
nz = nz/C;
|
||||
//....Load the flow velocity...........
|
||||
ux = Velocity[3*n];
|
||||
uy = Velocity[3*n+1];
|
||||
uz = Velocity[3*n+2];
|
||||
//....Instantiate the density distributions
|
||||
// Generate Equilibrium Distributions and stream
|
||||
// Stationary value - distribution 0
|
||||
Den[2*n] += 0.3333333333333333*na;
|
||||
Den[2*n+1] += 0.3333333333333333*nb;
|
||||
// Non-Stationary equilibrium distributions
|
||||
feq[0] = 0.1111111111111111*(1+3*ux);
|
||||
feq[1] = 0.1111111111111111*(1-3*ux);
|
||||
feq[2] = 0.1111111111111111*(1+3*uy);
|
||||
feq[3] = 0.1111111111111111*(1-3*uy);
|
||||
feq[4] = 0.1111111111111111*(1+3*uz);
|
||||
feq[5] = 0.1111111111111111*(1-3*uz);
|
||||
// Construction and streaming for the components
|
||||
for (idx=0; idx<3; idx++){
|
||||
// Distribution index
|
||||
q = 2*idx;
|
||||
// Associated discrete velocity
|
||||
Cqx = D3Q7[idx][0];
|
||||
Cqy = D3Q7[idx][1];
|
||||
Cqz = D3Q7[idx][2];
|
||||
// Generate the Equilibrium Distribution
|
||||
a1 = na*feq[q];
|
||||
b1 = nb*feq[q];
|
||||
a2 = na*feq[q+1];
|
||||
b2 = nb*feq[q+1];
|
||||
// Recolor the distributions
|
||||
if (C > 0.0){
|
||||
sp = nx*double(Cqx)+ny*double(Cqy)+nz*double(Cqz);
|
||||
//if (idx > 2) sp = 0.7071067811865475*sp;
|
||||
//delta = sp*min( min(a1,a2), min(b1,b2) );
|
||||
delta = na*nb/(na+nb)*0.1111111111111111*sp;
|
||||
//if (a1>0 && b1>0){
|
||||
a1 += beta*delta;
|
||||
a2 -= beta*delta;
|
||||
b1 -= beta*delta;
|
||||
b2 += beta*delta;
|
||||
}
|
||||
|
||||
// .......Get the neighbor node..............
|
||||
//nn = n + Stride[idx];
|
||||
in = i+Cqx;
|
||||
jn = j+Cqy;
|
||||
kn = k+Cqz;
|
||||
|
||||
// Adjust for periodic BC, if necessary
|
||||
if (in<0) in+= Nx;
|
||||
if (jn<0) jn+= Ny;
|
||||
if (kn<0) kn+= Nz;
|
||||
if (!(in<Nx)) in-= Nx;
|
||||
if (!(jn<Ny)) jn-= Ny;
|
||||
if (!(kn<Nz)) kn-= Nz;
|
||||
// Perform streaming or bounce-back as needed
|
||||
id = ID[kn*Nx*Ny+jn*Nx+in];
|
||||
if (id == 0){ //.....Bounce-back Rule...........
|
||||
Den[2*n] += a1;
|
||||
Den[2*n+1] += b1;
|
||||
// atomicAdd(&Den[2*n], a1);
|
||||
// atomicAdd(&Den[2*n+1], b1);
|
||||
}
|
||||
else{
|
||||
//......Push the "distribution" to neighboring node...........
|
||||
// Index of the neighbor in the local process
|
||||
//nn = (kn-zmin[rank]+1)*Nxp*Nyp + (jn-ymin[rank]+1)*Nxp + (in-xmin[rank]+1);
|
||||
nn = kn*Nx*Ny+jn*Nx+in;
|
||||
// Push to neighboring node
|
||||
Den[2*nn] += a1;
|
||||
Den[2*nn+1] += b1;
|
||||
// atomicAdd(&Den[2*nn], a1);
|
||||
// atomicAdd(&Den[2*nn+1], b1);
|
||||
}
|
||||
|
||||
// .......Get the neighbor node..............
|
||||
q = 2*idx+1;
|
||||
in = i-Cqx;
|
||||
jn = j-Cqy;
|
||||
kn = k-Cqz;
|
||||
// Adjust for periodic BC, if necessary
|
||||
if (in<0) in+= Nx;
|
||||
if (jn<0) jn+= Ny;
|
||||
if (kn<0) kn+= Nz;
|
||||
if (!(in<Nx)) in-= Nx;
|
||||
if (!(jn<Ny)) jn-= Ny;
|
||||
if (!(kn<Nz)) kn-= Nz;
|
||||
// Perform streaming or bounce-back as needed
|
||||
id = ID[kn*Nx*Ny+jn*Nx+in];
|
||||
if (id == 0){
|
||||
//.....Bounce-back Rule...........
|
||||
Den[2*n] += a2;
|
||||
Den[2*n+1] += b2;
|
||||
// atomicAdd(&Den[2*n], a2);
|
||||
// atomicAdd(&Den[2*n+1], b2);
|
||||
}
|
||||
else{
|
||||
//......Push the "distribution" to neighboring node...........
|
||||
// Index of the neighbor in the local process
|
||||
//nn = (kn-zmin[rank]+1)*Nxp*Nyp + (jn-ymin[rank]+1)*Nxp + (in-xmin[rank]+1);
|
||||
nn = kn*Nx*Ny+jn*Nx+in;
|
||||
// Push to neighboring node
|
||||
Den[2*nn] += a2;
|
||||
Den[2*nn+1] += b2;
|
||||
// atomicAdd(&Den[2*nn], a2);
|
||||
// atomicAdd(&Den[2*nn+1], b2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern void ComputePhi(char *ID, double *Phi, double *Copy, double *Den, int N)
|
||||
{
|
||||
int n;
|
||||
double Na,Nb;
|
||||
//...................................................................
|
||||
// Update Phi
|
||||
for ( n=0; n<N; n++){
|
||||
if (ID[n] > 0){
|
||||
// Get the density value (Streaming already performed)
|
||||
Na = Den[2*n];
|
||||
Nb = Den[2*n+1];
|
||||
Phi[n] = (Na-Nb)/(Na+Nb);
|
||||
// Store the copy of the current density
|
||||
Copy[2*n] = Na;
|
||||
Copy[2*n+1] = Nb;
|
||||
// Zero the Density value to get ready for the next streaming
|
||||
Den[2*n] = 0.0;
|
||||
Den[2*n+1] = 0.0;
|
||||
}
|
||||
}
|
||||
//...................................................................
|
||||
}
|
756
cpu/Color.cpp~
Normal file
756
cpu/Color.cpp~
Normal file
@ -0,0 +1,756 @@
|
||||
#include <math.h>
|
||||
|
||||
inline void InitDenColor(char *ID, double *Den, double *Phi, double das, double dbs, int N)
|
||||
{
|
||||
int n;
|
||||
for (n=0; n<N; n++){
|
||||
|
||||
if ( ID[n] == 1){
|
||||
Den[2*n] = 1.0;
|
||||
Den[2*n+1] = 0.0;
|
||||
Phi[n] = 1.0;
|
||||
}
|
||||
else if ( ID[n] == 2){
|
||||
Den[2*n] = 0.0;
|
||||
Den[2*n+1] = 1.0;
|
||||
Phi[n] = -1.0;
|
||||
}
|
||||
else{
|
||||
Den[2*n] = das;
|
||||
Den[2*n+1] = dbs;
|
||||
Phi[n] = (das-dbs)/(das+dbs);
|
||||
}
|
||||
}
|
||||
}
|
||||
inline void InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
|
||||
{
|
||||
int n,N;
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
for (n=0; n<N; n++){
|
||||
if (ID[n] > 0){
|
||||
f_even[n] = 0.3333333333333333;
|
||||
f_odd[n] = 0.055555555555555555; //double(100*n)+1.f;
|
||||
f_even[N+n] = 0.055555555555555555; //double(100*n)+2.f;
|
||||
f_odd[N+n] = 0.055555555555555555; //double(100*n)+3.f;
|
||||
f_even[2*N+n] = 0.055555555555555555; //double(100*n)+4.f;
|
||||
f_odd[2*N+n] = 0.055555555555555555; //double(100*n)+5.f;
|
||||
f_even[3*N+n] = 0.055555555555555555; //double(100*n)+6.f;
|
||||
f_odd[3*N+n] = 0.0277777777777778; //double(100*n)+7.f;
|
||||
f_even[4*N+n] = 0.0277777777777778; //double(100*n)+8.f;
|
||||
f_odd[4*N+n] = 0.0277777777777778; //double(100*n)+9.f;
|
||||
f_even[5*N+n] = 0.0277777777777778; //double(100*n)+10.f;
|
||||
f_odd[5*N+n] = 0.0277777777777778; //double(100*n)+11.f;
|
||||
f_even[6*N+n] = 0.0277777777777778; //double(100*n)+12.f;
|
||||
f_odd[6*N+n] = 0.0277777777777778; //double(100*n)+13.f;
|
||||
f_even[7*N+n] = 0.0277777777777778; //double(100*n)+14.f;
|
||||
f_odd[7*N+n] = 0.0277777777777778; //double(100*n)+15.f;
|
||||
f_even[8*N+n] = 0.0277777777777778; //double(100*n)+16.f;
|
||||
f_odd[8*N+n] = 0.0277777777777778; //double(100*n)+17.f;
|
||||
f_even[9*N+n] = 0.0277777777777778; //double(100*n)+18.f;
|
||||
}
|
||||
else{
|
||||
for(int q=0; q<9; q++){
|
||||
f_even[q*N+n] = -1.0;
|
||||
f_odd[q*N+n] = -1.0;
|
||||
}
|
||||
f_even[9*N+n] = -1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void Compute_VELOCITY(char *ID, double *disteven, double *distodd, double *vel, int Nx, int Ny, int Nz)
|
||||
{
|
||||
int n,N;
|
||||
// distributions
|
||||
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
double vx,vy,vz;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
for (n=0; n<N; n++){
|
||||
|
||||
if (ID[n] > 0){
|
||||
//........................................................................
|
||||
// Registers to store the distributions
|
||||
//........................................................................
|
||||
f2 = disteven[N+n];
|
||||
f4 = disteven[2*N+n];
|
||||
f6 = disteven[3*N+n];
|
||||
f8 = disteven[4*N+n];
|
||||
f10 = disteven[5*N+n];
|
||||
f12 = disteven[6*N+n];
|
||||
f14 = disteven[7*N+n];
|
||||
f16 = disteven[8*N+n];
|
||||
f18 = disteven[9*N+n];
|
||||
//........................................................................
|
||||
f1 = distodd[n];
|
||||
f3 = distodd[1*N+n];
|
||||
f5 = distodd[2*N+n];
|
||||
f7 = distodd[3*N+n];
|
||||
f9 = distodd[4*N+n];
|
||||
f11 = distodd[5*N+n];
|
||||
f13 = distodd[6*N+n];
|
||||
f15 = distodd[7*N+n];
|
||||
f17 = distodd[8*N+n];
|
||||
//.................Compute the velocity...................................
|
||||
vx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
vy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
vz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
//..................Write the velocity.....................................
|
||||
vel[n] = vx;
|
||||
vel[N+n] = vy;
|
||||
vel[2*N+n] = vz;
|
||||
//........................................................................
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//*************************************************************************
|
||||
//*************************************************************************
|
||||
inline void PressureBC_inlet(double *disteven, double *distodd, double din,
|
||||
int Nx, int Ny, int Nz)
|
||||
{
|
||||
int n,N;
|
||||
// distributions
|
||||
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
double uz;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
for (n=0; n<Nx*Ny; n++){
|
||||
|
||||
//........................................................................
|
||||
// Read distributions from "opposite" memory convention
|
||||
//........................................................................
|
||||
//........................................................................
|
||||
f1 = distodd[n];
|
||||
f3 = distodd[N+n];
|
||||
f5 = distodd[2*N+n];
|
||||
f7 = distodd[3*N+n];
|
||||
f9 = distodd[4*N+n];
|
||||
f11 = distodd[5*N+n];
|
||||
f13 = distodd[6*N+n];
|
||||
f15 = distodd[7*N+n];
|
||||
f17 = distodd[8*N+n];
|
||||
//........................................................................
|
||||
f0 = disteven[n];
|
||||
f2 = disteven[N+n];
|
||||
f4 = disteven[2*N+n];
|
||||
f6 = disteven[3*N+n];
|
||||
f8 = disteven[4*N+n];
|
||||
f10 = disteven[5*N+n];
|
||||
f12 = disteven[6*N+n];
|
||||
f14 = disteven[7*N+n];
|
||||
f16 = disteven[8*N+n];
|
||||
f18 = disteven[9*N+n];
|
||||
//...................................................
|
||||
//........Determine the intlet flow velocity.........
|
||||
// uz = -1 + (f0+f3+f4+f1+f2+f7+f8+f10+f9
|
||||
// + 2*(f5+f15+f18+f11+f14))/din;
|
||||
//........Set the unknown distributions..............
|
||||
// f6 = f5 - 0.3333333333333333*din*uz;
|
||||
// f16 = f15 - 0.1666666666666667*din*uz;
|
||||
// f17 = f16 - f3 + f4-f15+f18-f7+f8-f10+f9;
|
||||
// f12= 0.5*(-din*uz+f5+f15+f18+f11+f14-f6-f16-
|
||||
// f17+f1-f2-f14+f11+f7-f8-f10+f9);
|
||||
// f13= -din*uz+f5+f15+f18+f11+f14-f6-f16-f17-f12;
|
||||
|
||||
// Determine the outlet flow velocity
|
||||
uz = 1.0 - (f0+f4+f3+f2+f1+f8+f7+f9+ f10 +
|
||||
2*(f5+ f15+f18+f11+f14))/din;
|
||||
// Set the unknown distributions:
|
||||
f6 = f5 + 0.3333333333333333*din*uz;
|
||||
f16 = f15 + 0.1666666666666667*din*uz;
|
||||
f17 = f16 + f4 - f3-f15+f18+f8-f7 +f9-f10;
|
||||
f12= (din*uz+f5+ f15+f18+f11+f14-f6-f16-f17-f2+f1-f14+f11-f8+f7+f9-f10)*0.5;
|
||||
f13= din*uz+f5+ f15+f18+f11+f14-f6-f16-f17-f12;
|
||||
|
||||
//........Store in "opposite" memory location..........
|
||||
disteven[3*N+n] = f6;
|
||||
disteven[6*N+n] = f12;
|
||||
distodd[6*N+n] = f13;
|
||||
disteven[8*N+n] = f16;
|
||||
distodd[8*N+n] = f17;
|
||||
//...................................................
|
||||
}
|
||||
}
|
||||
|
||||
inline void PressureBC_outlet(double *disteven, double *distodd, double dout,
|
||||
int Nx, int Ny, int Nz, int S, int outlet)
|
||||
{
|
||||
int n,N;
|
||||
// distributions
|
||||
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
double uz;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
// Loop over the boundary - threadblocks delineated by start...finish
|
||||
for ( n=0; n<N; n++){
|
||||
|
||||
//........................................................................
|
||||
// Read distributions from "opposite" memory convention
|
||||
//........................................................................
|
||||
f1 = distodd[n];
|
||||
f3 = distodd[N+n];
|
||||
f5 = distodd[2*N+n];
|
||||
f7 = distodd[3*N+n];
|
||||
f9 = distodd[4*N+n];
|
||||
f11 = distodd[5*N+n];
|
||||
f13 = distodd[6*N+n];
|
||||
f15 = distodd[7*N+n];
|
||||
f17 = distodd[8*N+n];
|
||||
//........................................................................
|
||||
f0 = disteven[n];
|
||||
f2 = disteven[N+n];
|
||||
f4 = disteven[2*N+n];
|
||||
f6 = disteven[3*N+n];
|
||||
f8 = disteven[4*N+n];
|
||||
f10 = disteven[5*N+n];
|
||||
f12 = disteven[6*N+n];
|
||||
f14 = disteven[7*N+n];
|
||||
f16 = disteven[8*N+n];
|
||||
f18 = disteven[9*N+n];
|
||||
//........Determine the outlet flow velocity.........
|
||||
// uz = 1 - (f0+f3+f4+f1+f2+f7+f8+f10+f9+
|
||||
// 2*(f6+f16+f17+f12+f13))/dout;
|
||||
//...................................................
|
||||
//........Set the Unknown Distributions..............
|
||||
// f5 = f6 + 0.33333333333333338*dout*uz;
|
||||
// f15 = f16 + 0.16666666666666678*dout*uz;
|
||||
// f18 = f15+f3-f4-f16+f17+f7-f8+f10-f9;
|
||||
// f11= 0.5*(dout*uz+f6+ f16+f17+f12+f13-f5
|
||||
// -f15-f18-f1+f2-f13+f12-f7+f8+f10-f9);
|
||||
// f14= dout*uz+f6+ f16+f17+f12+f13-f5-f15-f18-f11;
|
||||
|
||||
uz = -1.0 + (f0+f4+f3+f2+f1+f8+f7+f9+f10 + 2*(f6+f16+f17+f12+f13))/dout;
|
||||
|
||||
f5 = f6 - 0.33333333333333338*dout* uz;
|
||||
f15 = f16 - 0.16666666666666678*dout* uz;
|
||||
f18 = f15 - f4 + f3-f16+f17-f8+f7-f9+f10;
|
||||
f11 = (-dout*uz+f6+ f16+f17+f12+f13-f5-f15-f18+f2-f1-f13+f12+f8-f7-f9+f10)*0.5;
|
||||
f14 = -dout*uz+f6+ f16+f17+f12+f13-f5-f15-f18-f11;
|
||||
//........Store in "opposite" memory location..........
|
||||
distodd[2*N+n] = f5;
|
||||
distodd[5*N+n] = f11;
|
||||
disteven[7*N+n] = f14;
|
||||
distodd[7*N+n] = f15;
|
||||
disteven[9*N+n] = f18;
|
||||
//...................................................
|
||||
|
||||
}
|
||||
}
|
||||
//*************************************************************************
|
||||
inline void ComputeColorGradient(char *ID, double *phi, double *ColorGrad, int Nx, int Ny, int Nz)
|
||||
{
|
||||
int n,N,i,j,k,nn;
|
||||
// distributions
|
||||
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
double nx,ny,nz;
|
||||
|
||||
// non-conserved moments
|
||||
// additional variables needed for computations
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
for ( n=0; n<N; n++){
|
||||
//.......Back out the 3-D indices for node n..............
|
||||
k = n/(Nx*Ny);
|
||||
j = (n-Nx*Ny*k)/Nx;
|
||||
i = n-Nx*Ny*k-Nx*j;
|
||||
//........................................................................
|
||||
//........Get 1-D index for this thread....................
|
||||
// n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
//........................................................................
|
||||
// COMPUTE THE COLOR GRADIENT
|
||||
//........................................................................
|
||||
//.................Read Phase Indicator Values............................
|
||||
//........................................................................
|
||||
nn = n-1; // neighbor index (get convention)
|
||||
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
|
||||
f1 = phi[nn]; // get neighbor for phi - 1
|
||||
//........................................................................
|
||||
nn = n+1; // neighbor index (get convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
f2 = phi[nn]; // get neighbor for phi - 2
|
||||
//........................................................................
|
||||
nn = n-Nx; // neighbor index (get convention)
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
f3 = phi[nn]; // get neighbor for phi - 3
|
||||
//........................................................................
|
||||
nn = n+Nx; // neighbor index (get convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
f4 = phi[nn]; // get neighbor for phi - 4
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny; // neighbor index (get convention)
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f5 = phi[nn]; // get neighbor for phi - 5
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny; // neighbor index (get convention)
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f6 = phi[nn]; // get neighbor for phi - 6
|
||||
//........................................................................
|
||||
nn = n-Nx-1; // neighbor index (get convention)
|
||||
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
f7 = phi[nn]; // get neighbor for phi - 7
|
||||
//........................................................................
|
||||
nn = n+Nx+1; // neighbor index (get convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
f8 = phi[nn]; // get neighbor for phi - 8
|
||||
//........................................................................
|
||||
nn = n+Nx-1; // neighbor index (get convention)
|
||||
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
f9 = phi[nn]; // get neighbor for phi - 9
|
||||
//........................................................................
|
||||
nn = n-Nx+1; // neighbor index (get convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
f10 = phi[nn]; // get neighbor for phi - 10
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny-1; // neighbor index (get convention)
|
||||
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f11 = phi[nn]; // get neighbor for phi - 11
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny+1; // neighbor index (get convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f12 = phi[nn]; // get neighbor for phi - 12
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny-1; // neighbor index (get convention)
|
||||
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f13 = phi[nn]; // get neighbor for phi - 13
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny+1; // neighbor index (get convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f14 = phi[nn]; // get neighbor for phi - 14
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny-Nx; // neighbor index (get convention)
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f15 = phi[nn]; // get neighbor for phi - 15
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny+Nx; // neighbor index (get convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f16 = phi[nn]; // get neighbor for phi - 16
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny-Nx; // neighbor index (get convention)
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f17 = phi[nn]; // get neighbor for phi - 17
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny+Nx; // neighbor index (get convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f18 = phi[nn]; // get neighbor for phi - 18
|
||||
//............Compute the Color Gradient...................................
|
||||
nx = -(f1-f2+0.5*(f7-f8+f9-f10+f11-f12+f13-f14));
|
||||
ny = -(f3-f4+0.5*(f7-f8-f9+f10+f15-f16+f17-f18));
|
||||
nz = -(f5-f6+0.5*(f11-f12-f13+f14+f15-f16-f17+f18));
|
||||
//...........Normalize the Color Gradient.................................
|
||||
// C = sqrt(nx*nx+ny*ny+nz*nz);
|
||||
// nx = nx/C;
|
||||
// ny = ny/C;
|
||||
// nz = nz/C;
|
||||
//...Store the Color Gradient....................
|
||||
ColorGrad[3*n] = nx;
|
||||
ColorGrad[3*n+1] = ny;
|
||||
ColorGrad[3*n+2] = nz;
|
||||
//...............................................
|
||||
}
|
||||
}
|
||||
//*************************************************************************
|
||||
inline void ColorCollide( char *ID, double *disteven, double *distodd, double *ColorGrad,
|
||||
double *Velocity, int Nx, int Ny, int Nz, double rlx_setA, double rlx_setB,
|
||||
double alpha, double beta, double Fx, double Fy, double Fz, bool pBC)
|
||||
{
|
||||
|
||||
int n,N;
|
||||
// distributions
|
||||
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
|
||||
// non-conserved moments
|
||||
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
|
||||
// additional variables needed for computations
|
||||
double rho,jx,jy,jz,C,nx,ny,nz;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
char id;
|
||||
|
||||
for ( n=0; n<N; n++){
|
||||
id = ID[n];
|
||||
|
||||
if (id > 0){
|
||||
|
||||
// Retrieve the color gradient
|
||||
nx = ColorGrad[3*n];
|
||||
ny = ColorGrad[3*n+1];
|
||||
nz = ColorGrad[3*n+2];
|
||||
//...........Normalize the Color Gradient.................................
|
||||
C = sqrt(nx*nx+ny*ny+nz*nz);
|
||||
nx = nx/C;
|
||||
ny = ny/C;
|
||||
nz = nz/C;
|
||||
//......No color gradient at z-boundary if pressure BC are set.............
|
||||
// if (pBC && k==0) nx = ny = nz = 0.f;
|
||||
// if (pBC && k==Nz-1) nx = ny = nz = 0.f;
|
||||
//........................................................................
|
||||
// READ THE DISTRIBUTIONS
|
||||
// (read from opposite array due to previous swap operation)
|
||||
//........................................................................
|
||||
f2 = distodd[n];
|
||||
f4 = distodd[N+n];
|
||||
f6 = distodd[2*N+n];
|
||||
f8 = distodd[3*N+n];
|
||||
f10 = distodd[4*N+n];
|
||||
f12 = distodd[5*N+n];
|
||||
f14 = distodd[6*N+n];
|
||||
f16 = distodd[7*N+n];
|
||||
f18 = distodd[8*N+n];
|
||||
//........................................................................
|
||||
f0 = disteven[n];
|
||||
f1 = disteven[N+n];
|
||||
f3 = disteven[2*N+n];
|
||||
f5 = disteven[3*N+n];
|
||||
f7 = disteven[4*N+n];
|
||||
f9 = disteven[5*N+n];
|
||||
f11 = disteven[6*N+n];
|
||||
f13 = disteven[7*N+n];
|
||||
f15 = disteven[8*N+n];
|
||||
f17 = disteven[9*N+n];
|
||||
//........................................................................
|
||||
// PERFORM RELAXATION PROCESS
|
||||
//........................................................................
|
||||
//....................compute the moments...............................................
|
||||
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
|
||||
m1 = -30*f0-11*(f2+f1+f4+f3+f6+f5)+8*(f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18 +f17);
|
||||
m2 = 12*f0-4*(f2+f1 +f4+f3+f6 +f5)+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
|
||||
jx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
m4 = 4*(-f1+f2)+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
jy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
m6 = -4*(f3-f4)+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
jz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
m8 = -4*(f5-f6)+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
m9 = 2*(f1+f2)-f3-f4-f5-f6+f7+f8+f9+f10+f11+f12+f13+f14-2*(f15+f16+f17+f18);
|
||||
m10 = -4*(f1+f2)+2*(f4+f3+f6+f5)+f8+f7+f10+f9+f12+f11+f14+f13-2*(f16+f15+f18+f17);
|
||||
m11 = f4+f3-f6-f5+f8+f7+f10+f9-f12-f11-f14-f13;
|
||||
m12 = -2*(f4+f3-f6-f5)+f8+f7+f10+f9-f12-f11-f14-f13;
|
||||
m13 = f8+f7-f10-f9;
|
||||
m14 = f16+f15-f18-f17;
|
||||
m15 = f12+f11-f14-f13;
|
||||
m16 = f7-f8+f9-f10-f11+f12-f13+f14;
|
||||
m17 = -f7+f8+f9-f10+f15-f16+f17-f18;
|
||||
m18 = f11-f12-f13+f14-f15+f16+f17-f18;
|
||||
//..........Toelke, Fruediger et. al. 2006...............
|
||||
if (C == 0.0) nx = ny = nz = 1.0;
|
||||
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) -alpha*C - m1);
|
||||
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho)- m2);
|
||||
m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4);
|
||||
m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6);
|
||||
m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8);
|
||||
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9);
|
||||
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
|
||||
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) + 0.5*alpha*C*(ny*ny-nz*nz)- m11);
|
||||
m12 = m12 + rlx_setA*( -0.5*((jy*jy-jz*jz)/rho) - m12);
|
||||
m13 = m13 + rlx_setA*( (jx*jy/rho) + 0.5*alpha*C*nx*ny - m13);
|
||||
m14 = m14 + rlx_setA*( (jy*jz/rho) + 0.5*alpha*C*ny*nz - m14);
|
||||
m15 = m15 + rlx_setA*( (jx*jz/rho) + 0.5*alpha*C*nx*nz - m15);
|
||||
m16 = m16 + rlx_setB*( - m16);
|
||||
m17 = m17 + rlx_setB*( - m17);
|
||||
m18 = m18 + rlx_setB*( - m18);
|
||||
//.................inverse transformation......................................................
|
||||
f0 = 0.05263157894736842*rho-0.012531328320802*m1+0.04761904761904762*m2;
|
||||
f1 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(jx-m4)+0.0555555555555555555555555*(m9-m10);
|
||||
f2 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(m4-jx)+0.0555555555555555555555555*(m9-m10);
|
||||
f3 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(jy-m6)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
|
||||
f4 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(m6-jy)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
|
||||
f5 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(jz-m8)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
|
||||
f6 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(m8-jz)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
|
||||
f7 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx+jy)+0.025*(m4+m6)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12+0.25*m13+0.125*(m16-m17);
|
||||
f8 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2-0.1*(jx+jy)-0.025*(m4+m6)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12+0.25*m13+0.125*(m17-m16);
|
||||
f9 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx-jy)+0.025*(m4-m6)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12-0.25*m13+0.125*(m16+m17);
|
||||
f10 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jy-jx)+0.025*(m6-m4)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12-0.25*m13-0.125*(m16+m17);
|
||||
f11 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jx+jz)+0.025*(m4+m8)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12+0.25*m15+0.125*(m18-m16);
|
||||
f12 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2-0.1*(jx+jz)-0.025*(m4+m8)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12+0.25*m15+0.125*(m16-m18);
|
||||
f13 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jx-jz)+0.025*(m4-m8)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12-0.25*m15-0.125*(m16+m18);
|
||||
f14 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jz-jx)+0.025*(m8-m4)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12-0.25*m15+0.125*(m16+m18);
|
||||
f15 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jy+jz)+0.025*(m6+m8)
|
||||
-0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m17-m18);
|
||||
f16 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2-0.1*(jy+jz)-0.025*(m6+m8)
|
||||
-0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m18-m17);
|
||||
f17 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jy-jz)+0.025*(m6-m8)
|
||||
-0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14+0.125*(m17+m18);
|
||||
f18 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jz-jy)+0.025*(m8-m6)
|
||||
-0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14-0.125*(m17+m18);
|
||||
//.......................................................................................................
|
||||
// incorporate external force
|
||||
f1 += 0.16666666*Fx;
|
||||
f2 -= 0.16666666*Fx;
|
||||
f3 += 0.16666666*Fy;
|
||||
f4 -= 0.16666666*Fy;
|
||||
f5 += 0.16666666*Fz;
|
||||
f6 -= 0.16666666*Fz;
|
||||
f7 += 0.08333333333*(Fx+Fy);
|
||||
f8 -= 0.08333333333*(Fx+Fy);
|
||||
f9 += 0.08333333333*(Fx-Fy);
|
||||
f10 -= 0.08333333333*(Fx-Fy);
|
||||
f11 += 0.08333333333*(Fx+Fz);
|
||||
f12 -= 0.08333333333*(Fx+Fz);
|
||||
f13 += 0.08333333333*(Fx-Fz);
|
||||
f14 -= 0.08333333333*(Fx-Fz);
|
||||
f15 += 0.08333333333*(Fy+Fz);
|
||||
f16 -= 0.08333333333*(Fy+Fz);
|
||||
f17 += 0.08333333333*(Fy-Fz);
|
||||
f18 -= 0.08333333333*(Fy-Fz);
|
||||
//*********** WRITE UPDATED VALUES TO MEMORY ******************
|
||||
// Write the updated distributions
|
||||
//....EVEN.....................................
|
||||
disteven[n] = f0;
|
||||
disteven[N+n] = f2;
|
||||
disteven[2*N+n] = f4;
|
||||
disteven[3*N+n] = f6;
|
||||
disteven[4*N+n] = f8;
|
||||
disteven[5*N+n] = f10;
|
||||
disteven[6*N+n] = f12;
|
||||
disteven[7*N+n] = f14;
|
||||
disteven[8*N+n] = f16;
|
||||
disteven[9*N+n] = f18;
|
||||
//....ODD......................................
|
||||
distodd[n] = f1;
|
||||
distodd[N+n] = f3;
|
||||
distodd[2*N+n] = f5;
|
||||
distodd[3*N+n] = f7;
|
||||
distodd[4*N+n] = f9;
|
||||
distodd[5*N+n] = f11;
|
||||
distodd[6*N+n] = f13;
|
||||
distodd[7*N+n] = f15;
|
||||
distodd[8*N+n] = f17;
|
||||
//...Store the Velocity..........................
|
||||
Velocity[3*n] = jx;
|
||||
Velocity[3*n+1] = jy;
|
||||
Velocity[3*n+2] = jz;
|
||||
/* //...Store the Color Gradient....................
|
||||
ColorGrad[3*n] = C*nx;
|
||||
ColorGrad[3*n+1] = C*ny;
|
||||
ColorGrad[3*n+2] = C*nz;
|
||||
*/ //...............................................
|
||||
//***************************************************************
|
||||
} // check if n is in the solid
|
||||
} // check if n is in the domain
|
||||
}
|
||||
//*************************************************************************
|
||||
inline void DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
|
||||
double beta, int Nx, int Ny, int Nz, bool pBC)
|
||||
{
|
||||
char id;
|
||||
|
||||
int idx;
|
||||
int in,jn,kn,n,nn,N;
|
||||
int q,Cqx,Cqy,Cqz;
|
||||
// int sendLoc;
|
||||
|
||||
double na,nb; // density values
|
||||
double ux,uy,uz; // flow velocity
|
||||
double nx,ny,nz,C; // color gradient components
|
||||
double a1,a2,b1,b2;
|
||||
double sp,delta;
|
||||
double feq[6]; // equilibrium distributions
|
||||
// Set of Discrete velocities for the D3Q19 Model
|
||||
int D3Q7[3][3]={{1,0,0},{0,1,0},{0,0,1}};
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
for ( n=0; n<N; n++){
|
||||
id = ID[n];
|
||||
// Local Density Values
|
||||
na = Copy[2*n];
|
||||
nb = Copy[2*n+1];
|
||||
if (id > 0 && na+nb > 0.0){
|
||||
//.......Back out the 3-D indices for node n..............
|
||||
int k = n/(Nx*Ny);
|
||||
int j = (n-Nx*Ny*k)/Nx;
|
||||
int i = n-Nx*Ny*k-Nx*j;
|
||||
//.....Load the Color gradient.........
|
||||
nx = ColorGrad[3*n];
|
||||
ny = ColorGrad[3*n+1];
|
||||
nz = ColorGrad[3*n+2];
|
||||
C = sqrt(nx*nx+ny*ny+nz*nz);
|
||||
nx = nx/C;
|
||||
ny = ny/C;
|
||||
nz = nz/C;
|
||||
//....Load the flow velocity...........
|
||||
ux = Velocity[3*n];
|
||||
uy = Velocity[3*n+1];
|
||||
uz = Velocity[3*n+2];
|
||||
//....Instantiate the density distributions
|
||||
// Generate Equilibrium Distributions and stream
|
||||
// Stationary value - distribution 0
|
||||
Den[2*n] += 0.3333333333333333*na;
|
||||
Den[2*n+1] += 0.3333333333333333*nb;
|
||||
// Non-Stationary equilibrium distributions
|
||||
feq[0] = 0.1111111111111111*(1+3*ux);
|
||||
feq[1] = 0.1111111111111111*(1-3*ux);
|
||||
feq[2] = 0.1111111111111111*(1+3*uy);
|
||||
feq[3] = 0.1111111111111111*(1-3*uy);
|
||||
feq[4] = 0.1111111111111111*(1+3*uz);
|
||||
feq[5] = 0.1111111111111111*(1-3*uz);
|
||||
// Construction and streaming for the components
|
||||
for (idx=0; idx<3; idx++){
|
||||
// Distribution index
|
||||
q = 2*idx;
|
||||
// Associated discrete velocity
|
||||
Cqx = D3Q7[idx][0];
|
||||
Cqy = D3Q7[idx][1];
|
||||
Cqz = D3Q7[idx][2];
|
||||
// Generate the Equilibrium Distribution
|
||||
a1 = na*feq[q];
|
||||
b1 = nb*feq[q];
|
||||
a2 = na*feq[q+1];
|
||||
b2 = nb*feq[q+1];
|
||||
// Recolor the distributions
|
||||
if (C > 0.0){
|
||||
sp = nx*double(Cqx)+ny*double(Cqy)+nz*double(Cqz);
|
||||
//if (idx > 2) sp = 0.7071067811865475*sp;
|
||||
//delta = sp*min( min(a1,a2), min(b1,b2) );
|
||||
delta = na*nb/(na+nb)*0.1111111111111111*sp;
|
||||
//if (a1>0 && b1>0){
|
||||
a1 += beta*delta;
|
||||
a2 -= beta*delta;
|
||||
b1 -= beta*delta;
|
||||
b2 += beta*delta;
|
||||
}
|
||||
|
||||
// .......Get the neighbor node..............
|
||||
//nn = n + Stride[idx];
|
||||
in = i+Cqx;
|
||||
jn = j+Cqy;
|
||||
kn = k+Cqz;
|
||||
|
||||
// Adjust for periodic BC, if necessary
|
||||
if (in<0) in+= Nx;
|
||||
if (jn<0) jn+= Ny;
|
||||
if (kn<0) kn+= Nz;
|
||||
if (!(in<Nx)) in-= Nx;
|
||||
if (!(jn<Ny)) jn-= Ny;
|
||||
if (!(kn<Nz)) kn-= Nz;
|
||||
// Perform streaming or bounce-back as needed
|
||||
id = ID[kn*Nx*Ny+jn*Nx+in];
|
||||
if (id == 0){ //.....Bounce-back Rule...........
|
||||
Den[2*n] += a1;
|
||||
Den[2*n+1] += b1;
|
||||
// atomicAdd(&Den[2*n], a1);
|
||||
// atomicAdd(&Den[2*n+1], b1);
|
||||
}
|
||||
else{
|
||||
//......Push the "distribution" to neighboring node...........
|
||||
// Index of the neighbor in the local process
|
||||
//nn = (kn-zmin[rank]+1)*Nxp*Nyp + (jn-ymin[rank]+1)*Nxp + (in-xmin[rank]+1);
|
||||
nn = kn*Nx*Ny+jn*Nx+in;
|
||||
// Push to neighboring node
|
||||
Den[2*nn] += a1;
|
||||
Den[2*nn+1] += b1;
|
||||
// atomicAdd(&Den[2*nn], a1);
|
||||
// atomicAdd(&Den[2*nn+1], b1);
|
||||
}
|
||||
|
||||
// .......Get the neighbor node..............
|
||||
q = 2*idx+1;
|
||||
in = i-Cqx;
|
||||
jn = j-Cqy;
|
||||
kn = k-Cqz;
|
||||
// Adjust for periodic BC, if necessary
|
||||
if (in<0) in+= Nx;
|
||||
if (jn<0) jn+= Ny;
|
||||
if (kn<0) kn+= Nz;
|
||||
if (!(in<Nx)) in-= Nx;
|
||||
if (!(jn<Ny)) jn-= Ny;
|
||||
if (!(kn<Nz)) kn-= Nz;
|
||||
// Perform streaming or bounce-back as needed
|
||||
id = ID[kn*Nx*Ny+jn*Nx+in];
|
||||
if (id == 0){
|
||||
//.....Bounce-back Rule...........
|
||||
Den[2*n] += a2;
|
||||
Den[2*n+1] += b2;
|
||||
// atomicAdd(&Den[2*n], a2);
|
||||
// atomicAdd(&Den[2*n+1], b2);
|
||||
}
|
||||
else{
|
||||
//......Push the "distribution" to neighboring node...........
|
||||
// Index of the neighbor in the local process
|
||||
//nn = (kn-zmin[rank]+1)*Nxp*Nyp + (jn-ymin[rank]+1)*Nxp + (in-xmin[rank]+1);
|
||||
nn = kn*Nx*Ny+jn*Nx+in;
|
||||
// Push to neighboring node
|
||||
Den[2*nn] += a2;
|
||||
Den[2*nn+1] += b2;
|
||||
// atomicAdd(&Den[2*nn], a2);
|
||||
// atomicAdd(&Den[2*nn+1], b2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void ComputePhi(char *ID, double *Phi, double *Copy, double *Den, int N)
|
||||
{
|
||||
int n;
|
||||
double Na,Nb;
|
||||
//...................................................................
|
||||
// Update Phi
|
||||
for ( n=0; n<N; n++){
|
||||
if (ID[n] > 0){
|
||||
// Get the density value (Streaming already performed)
|
||||
Na = Den[2*n];
|
||||
Nb = Den[2*n+1];
|
||||
Phi[n] = (Na-Nb)/(Na+Nb);
|
||||
// Store the copy of the current density
|
||||
Copy[2*n] = Na;
|
||||
Copy[2*n+1] = Nb;
|
||||
// Zero the Density value to get ready for the next streaming
|
||||
Den[2*n] = 0.0;
|
||||
Den[2*n+1] = 0.0;
|
||||
}
|
||||
}
|
||||
//...................................................................
|
||||
}
|
21
cpu/Color.h
Normal file
21
cpu/Color.h
Normal file
@ -0,0 +1,21 @@
|
||||
extern void InitDenColor(char *ID, double *Den, double *Phi, double das, double dbs, int N);
|
||||
extern void InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
|
||||
|
||||
extern void Compute_VELOCITY(char *ID, double *disteven, double *distodd, double *vel, int Nx, int Ny, int Nz);
|
||||
|
||||
//*************************************************************************
|
||||
//*************************************************************************
|
||||
extern void PressureBC_inlet(double *disteven, double *distodd, double din,
|
||||
int Nx, int Ny, int Nz);
|
||||
extern void PressureBC_outlet(double *disteven, double *distodd, double dout,
|
||||
int Nx, int Ny, int Nz, int S, int outlet);
|
||||
//*************************************************************************
|
||||
extern void ComputeColorGradient(char *ID, double *phi, double *ColorGrad, int Nx, int Ny, int Nz);
|
||||
//*************************************************************************
|
||||
extern void ColorCollide( char *ID, double *disteven, double *distodd, double *ColorGrad,
|
||||
double *Velocity, int Nx, int Ny, int Nz, double rlx_setA, double rlx_setB,
|
||||
double alpha, double beta, double Fx, double Fy, double Fz, bool pBC);
|
||||
//*************************************************************************
|
||||
extern void DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
|
||||
double beta, int Nx, int Ny, int Nz, bool pBC);
|
||||
extern void ComputePhi(char *ID, double *Phi, double *Copy, double *Den, int N);
|
21
cpu/Color.h~
Normal file
21
cpu/Color.h~
Normal file
@ -0,0 +1,21 @@
|
||||
extern void InitDenColor(char *ID, double *Den, double *Phi, double das, double dbs, int N);
|
||||
extern void InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
|
||||
|
||||
extern void Compute_VELOCITY(char *ID, double *disteven, double *distodd, double *vel, int Nx, int Ny, int Nz);
|
||||
|
||||
//*************************************************************************
|
||||
//*************************************************************************
|
||||
extern void PressureBC_inlet(double *disteven, double *distodd, double din,
|
||||
int Nx, int Ny, int Nz);
|
||||
extern void PressureBC_outlet(double *disteven, double *distodd, double dout,
|
||||
int Nx, int Ny, int Nz, int S, int outlet);
|
||||
//*************************************************************************
|
||||
extern void ComputeColorGradient(char *ID, double *phi, double *ColorGrad, int Nx, int Ny, int Nz);
|
||||
//*************************************************************************
|
||||
extern void ColorCollide( char *ID, double *disteven, double *distodd, double *ColorGrad,
|
||||
double *Velocity, int Nx, int Ny, int Nz, int S,double rlx_setA, double rlx_setB,
|
||||
double alpha, double beta, double Fx, double Fy, double Fz, bool pBC);
|
||||
//*************************************************************************
|
||||
extern void DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
|
||||
double beta, int Nx, int Ny, int Nz, bool pBC);
|
||||
extern void ComputePhi(char *ID, double *Phi, double *Copy, double *Den, int N);
|
BIN
cpu/Color.o
Normal file
BIN
cpu/Color.o
Normal file
Binary file not shown.
BIN
cpu/ColorLBM-cpu
Executable file
BIN
cpu/ColorLBM-cpu
Executable file
Binary file not shown.
190
cpu/D3Q19.cpp
Normal file
190
cpu/D3Q19.cpp
Normal file
@ -0,0 +1,190 @@
|
||||
extern void PackDist(int q, int *list, int start, int count, double *sendbuf, double *dist, int N){
|
||||
//....................................................................................
|
||||
// Pack distribution q into the send buffer for the listed lattice sites
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int idx,n;
|
||||
for (idx=0; idx<count; idx++){
|
||||
n = list[idx];
|
||||
sendbuf[start+idx] = dist[q*N+n];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
extern void MapRecvDist(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count,
|
||||
double *recvbuf, double *dist, int Nx, int Ny, int Nz){
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Distribution q matche Cqx, Cqy, Cqz
|
||||
// swap rule means that the distributions in recvbuf are OPPOSITE of q
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int i,j,k,n,nn,idx;
|
||||
int N = Nx*Ny*Nz;
|
||||
for (idx=0; idx<count; idx++){
|
||||
// Get the value from the list -- note that n is the index is from the send (non-local) process
|
||||
n = list[idx];
|
||||
// Get the 3-D indices
|
||||
k = n/(Nx*Ny);
|
||||
j = (n-Nx*Ny*k)/Nx;
|
||||
i = n-Nx*Ny*k-Nz*j;
|
||||
// Streaming for the non-local distribution
|
||||
i += Cqx;
|
||||
j += Cqy;
|
||||
k += Cqz;
|
||||
/* if (i < 0) i += Nx;
|
||||
if (j < 0) j += Ny;
|
||||
if (k < 0) k += Nz;
|
||||
if (!(i<Nx)) i -= Nx;
|
||||
if (!(j<Ny)) j -= Ny;
|
||||
if (!(k<Nz)) k -= Nz;
|
||||
*/
|
||||
nn = k*Nx*Ny+j*Nx+i;
|
||||
// unpack the distribution to the proper location
|
||||
// if (recvbuf[start+idx] != dist[q*N+nn]){
|
||||
// printf("Stopping to check error \n");
|
||||
// printf("recvbuf[start+idx] = %f \n",recvbuf[start+idx]);
|
||||
// printf("dist[q*N+nn] = %f \n",dist[q*N+nn]);
|
||||
// printf("A bug! Again? \n");
|
||||
// idx = count;
|
||||
// }
|
||||
// list[idx] = nn;
|
||||
dist[q*N+nn] = recvbuf[start+idx];
|
||||
}
|
||||
}
|
||||
|
||||
//*************************************************************************
|
||||
extern void SwapD3Q19(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz)
|
||||
{
|
||||
int n,nn,N;
|
||||
// distributions
|
||||
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
for (n=0; n<N; n++){
|
||||
//.......Back out the 3-D indices for node n..............
|
||||
int k = n/(Nx*Ny);
|
||||
int j = (n-Nx*Ny*k)/Nx;
|
||||
int i = n-Nx*Ny*k-Nz*j;
|
||||
|
||||
if (ID[n] > 0){
|
||||
//........................................................................
|
||||
// Retrieve even distributions from the local node (swap convention)
|
||||
// f0 = disteven[n]; // Does not particupate in streaming
|
||||
f1 = distodd[n];
|
||||
f3 = distodd[N+n];
|
||||
f5 = distodd[2*N+n];
|
||||
f7 = distodd[3*N+n];
|
||||
f9 = distodd[4*N+n];
|
||||
f11 = distodd[5*N+n];
|
||||
f13 = distodd[6*N+n];
|
||||
f15 = distodd[7*N+n];
|
||||
f17 = distodd[8*N+n];
|
||||
//........................................................................
|
||||
|
||||
//........................................................................
|
||||
// Retrieve odd distributions from neighboring nodes (swap convention)
|
||||
//........................................................................
|
||||
nn = n+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
//if (i+1<Nx){
|
||||
f2 = disteven[N+nn]; // pull neighbor for distribution 2
|
||||
if (f2 > 0){
|
||||
distodd[n] = f2;
|
||||
disteven[N+nn] = f1;
|
||||
}
|
||||
//}
|
||||
//........................................................................
|
||||
nn = n+Nx; // neighbor index (pull convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
//if (j+1<Ny){
|
||||
f4 = disteven[2*N+nn]; // pull neighbor for distribution 4
|
||||
if (f4 > 0){
|
||||
distodd[N+n] = f4;
|
||||
disteven[2*N+nn] = f3;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny; // neighbor index (pull convention)
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (k+1<Nz){
|
||||
f6 = disteven[3*N+nn]; // pull neighbor for distribution 6
|
||||
if (f6 > 0){
|
||||
distodd[2*N+n] = f6;
|
||||
disteven[3*N+nn] = f5;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
//if ((i+1<Nx) && (j+1<Ny)){
|
||||
f8 = disteven[4*N+nn]; // pull neighbor for distribution 8
|
||||
if (f8 > 0){
|
||||
distodd[3*N+n] = f8;
|
||||
disteven[4*N+nn] = f7;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n-Nx+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
//if (!(i-1<0) && (j+1<Ny)){
|
||||
f10 = disteven[5*N+nn]; // pull neighbor for distribution 9
|
||||
if (f10 > 0){
|
||||
distodd[4*N+n] = f10;
|
||||
disteven[5*N+nn] = f9;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if ( !(i-1<0) && !(k-1<0)){
|
||||
f12 = disteven[6*N+nn]; // pull distribution 11
|
||||
if (f12 > 0){
|
||||
distodd[5*N+n] = f12;
|
||||
disteven[6*N+nn] = f11;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (!(i-1<0) && (k+1<Nz)){
|
||||
f14 = disteven[7*N+nn]; // pull neighbor for distribution 13
|
||||
if (f14 > 0){
|
||||
distodd[6*N+n] = f14;
|
||||
disteven[7*N+nn] = f13;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny+Nx; // neighbor index (pull convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (!(j-1<0) && !(k-1<0)){
|
||||
f16 = disteven[8*N+nn]; // pull neighbor for distribution 15
|
||||
if (f16 > 0){
|
||||
distodd[7*N+n] = f16;
|
||||
disteven[8*N+nn] = f15;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny+Nx; // neighbor index (pull convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (!(j-1<0) && (k+1<Nz)){
|
||||
f18 = disteven[9*N+nn]; // pull neighbor for distribution 17
|
||||
if (f18 > 0){
|
||||
distodd[8*N+n] = f18;
|
||||
disteven[9*N+nn] = f17;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
|
||||
}
|
||||
}
|
||||
}
|
189
cpu/D3Q19.cpp~
Normal file
189
cpu/D3Q19.cpp~
Normal file
@ -0,0 +1,189 @@
|
||||
|
||||
inline void PackDist(int q, int *list, int start, int count, double *sendbuf, double *dist, int N){
|
||||
//....................................................................................
|
||||
// Pack distribution q into the send buffer for the listed lattice sites
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int idx,n;
|
||||
for (idx=0; idx<count; idx++){
|
||||
n = list[idx];
|
||||
sendbuf[start+idx] = dist[q*N+n];
|
||||
}
|
||||
}
|
||||
|
||||
inline void MapRecvDist(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count,
|
||||
double *recvbuf, double *dist, int Nx, int Ny, int Nz){
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Distribution q matche Cqx, Cqy, Cqz
|
||||
// swap rule means that the distributions in recvbuf are OPPOSITE of q
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int i,j,k,n,nn,idx;
|
||||
int N = Nx*Ny*Nz;
|
||||
for (idx=0; idx<count; idx++){
|
||||
// Get the value from the list -- note that n is the index is from the send (non-local) process
|
||||
n = list[idx];
|
||||
// Get the 3-D indices
|
||||
k = n/(Nx*Ny);
|
||||
j = (n-Nx*Ny*k)/Nx;
|
||||
i = n-Nx*Ny*k-Nz*j;
|
||||
// Streaming for the non-local distribution
|
||||
i += Cqx;
|
||||
j += Cqy;
|
||||
k += Cqz;
|
||||
/* if (i < 0) i += Nx;
|
||||
if (j < 0) j += Ny;
|
||||
if (k < 0) k += Nz;
|
||||
if (!(i<Nx)) i -= Nx;
|
||||
if (!(j<Ny)) j -= Ny;
|
||||
if (!(k<Nz)) k -= Nz;
|
||||
*/
|
||||
nn = k*Nx*Ny+j*Nx+i;
|
||||
// unpack the distribution to the proper location
|
||||
// if (recvbuf[start+idx] != dist[q*N+nn]){
|
||||
// printf("Stopping to check error \n");
|
||||
// printf("recvbuf[start+idx] = %f \n",recvbuf[start+idx]);
|
||||
// printf("dist[q*N+nn] = %f \n",dist[q*N+nn]);
|
||||
// printf("A bug! Again? \n");
|
||||
// idx = count;
|
||||
// }
|
||||
// list[idx] = nn;
|
||||
dist[q*N+nn] = recvbuf[start+idx];
|
||||
}
|
||||
}
|
||||
|
||||
//*************************************************************************
|
||||
inline void SwapD3Q19(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz)
|
||||
{
|
||||
int n,nn,N;
|
||||
// distributions
|
||||
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
for (n=0; n<N; n++){
|
||||
//.......Back out the 3-D indices for node n..............
|
||||
int k = n/(Nx*Ny);
|
||||
int j = (n-Nx*Ny*k)/Nx;
|
||||
int i = n-Nx*Ny*k-Nz*j;
|
||||
|
||||
if (ID[n] > 0){
|
||||
//........................................................................
|
||||
// Retrieve even distributions from the local node (swap convention)
|
||||
// f0 = disteven[n]; // Does not particupate in streaming
|
||||
f1 = distodd[n];
|
||||
f3 = distodd[N+n];
|
||||
f5 = distodd[2*N+n];
|
||||
f7 = distodd[3*N+n];
|
||||
f9 = distodd[4*N+n];
|
||||
f11 = distodd[5*N+n];
|
||||
f13 = distodd[6*N+n];
|
||||
f15 = distodd[7*N+n];
|
||||
f17 = distodd[8*N+n];
|
||||
//........................................................................
|
||||
|
||||
//........................................................................
|
||||
// Retrieve odd distributions from neighboring nodes (swap convention)
|
||||
//........................................................................
|
||||
nn = n+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
//if (i+1<Nx){
|
||||
f2 = disteven[N+nn]; // pull neighbor for distribution 2
|
||||
if (f2 > 0){
|
||||
distodd[n] = f2;
|
||||
disteven[N+nn] = f1;
|
||||
}
|
||||
//}
|
||||
//........................................................................
|
||||
nn = n+Nx; // neighbor index (pull convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
//if (j+1<Ny){
|
||||
f4 = disteven[2*N+nn]; // pull neighbor for distribution 4
|
||||
if (f4 > 0){
|
||||
distodd[N+n] = f4;
|
||||
disteven[2*N+nn] = f3;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny; // neighbor index (pull convention)
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (k+1<Nz){
|
||||
f6 = disteven[3*N+nn]; // pull neighbor for distribution 6
|
||||
if (f6 > 0){
|
||||
distodd[2*N+n] = f6;
|
||||
disteven[3*N+nn] = f5;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
//if ((i+1<Nx) && (j+1<Ny)){
|
||||
f8 = disteven[4*N+nn]; // pull neighbor for distribution 8
|
||||
if (f8 > 0){
|
||||
distodd[3*N+n] = f8;
|
||||
disteven[4*N+nn] = f7;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n-Nx+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
//if (!(i-1<0) && (j+1<Ny)){
|
||||
f10 = disteven[5*N+nn]; // pull neighbor for distribution 9
|
||||
if (f10 > 0){
|
||||
distodd[4*N+n] = f10;
|
||||
disteven[5*N+nn] = f9;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if ( !(i-1<0) && !(k-1<0)){
|
||||
f12 = disteven[6*N+nn]; // pull distribution 11
|
||||
if (f12 > 0){
|
||||
distodd[5*N+n] = f12;
|
||||
disteven[6*N+nn] = f11;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (!(i-1<0) && (k+1<Nz)){
|
||||
f14 = disteven[7*N+nn]; // pull neighbor for distribution 13
|
||||
if (f14 > 0){
|
||||
distodd[6*N+n] = f14;
|
||||
disteven[7*N+nn] = f13;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny+Nx; // neighbor index (pull convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (!(j-1<0) && !(k-1<0)){
|
||||
f16 = disteven[8*N+nn]; // pull neighbor for distribution 15
|
||||
if (f16 > 0){
|
||||
distodd[7*N+n] = f16;
|
||||
disteven[8*N+nn] = f15;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny+Nx; // neighbor index (pull convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (!(j-1<0) && (k+1<Nz)){
|
||||
f18 = disteven[9*N+nn]; // pull neighbor for distribution 17
|
||||
if (f18 > 0){
|
||||
distodd[8*N+n] = f18;
|
||||
disteven[9*N+nn] = f17;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
|
||||
}
|
||||
}
|
||||
}
|
6
cpu/D3Q19.h
Normal file
6
cpu/D3Q19.h
Normal file
@ -0,0 +1,6 @@
|
||||
|
||||
extern void PackDist(int q, int *list, int start, int count, double *sendbuf, double *dist, int N);
|
||||
extern void MapRecvDist(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count,
|
||||
double *recvbuf, double *dist, int Nx, int Ny, int Nz);
|
||||
//*************************************************************************
|
||||
extern void SwapD3Q19(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz);
|
7
cpu/D3Q19.h~
Normal file
7
cpu/D3Q19.h~
Normal file
@ -0,0 +1,7 @@
|
||||
|
||||
extern void PackDist(int q, int *list, int start, int count, double *sendbuf, double *dist, int N);
|
||||
|
||||
extern void MapRecvDist(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count,
|
||||
double *recvbuf, double *dist, int Nx, int Ny, int Nz);
|
||||
//*************************************************************************
|
||||
extern void SwapD3Q19(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz);
|
BIN
cpu/D3Q19.o
Normal file
BIN
cpu/D3Q19.o
Normal file
Binary file not shown.
54
cpu/D3Q7.cpp
Normal file
54
cpu/D3Q7.cpp
Normal file
@ -0,0 +1,54 @@
|
||||
// GPU Functions for D3Q7 Lattice Boltzmann Methods
|
||||
|
||||
extern void PackValues(int *list, int count, double *sendbuf, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Pack distribution q into the send buffer for the listed lattice sites
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int idx,n;
|
||||
for (idx=0; idx<count; idx++){
|
||||
n = list[idx];
|
||||
sendbuf[idx] = Data[n];
|
||||
}
|
||||
}
|
||||
extern void UnpackValues(int *list, int count, double *recvbuf, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Pack distribution q into the send buffer for the listed lattice sites
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int idx,n;
|
||||
for (idx=0; idx<count; idx++){
|
||||
n = list[idx];
|
||||
Data[n] = recvbuf[idx];
|
||||
}
|
||||
}
|
||||
|
||||
extern void PackDenD3Q7(int *list, int count, double *sendbuf, int number, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Pack distribution into the send buffer for the listed lattice sites
|
||||
//....................................................................................
|
||||
int idx,n,component;
|
||||
for (idx=0; idx<count; idx++){
|
||||
for (component=0; component<number; component++){
|
||||
n = list[idx];
|
||||
sendbuf[idx*number+component] = Data[number*n+component];
|
||||
Data[number*n+component] = 0.0; // Set the data value to zero once it's in the buffer!
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern void UnpackDenD3Q7(int *list, int count, double *recvbuf, int number, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Sum to the existing density value
|
||||
//....................................................................................
|
||||
int idx,n,component;
|
||||
for (idx=0; idx<count; idx++){
|
||||
for (component=0; component<number; component++){
|
||||
n = list[idx];
|
||||
Data[number*n+component] += recvbuf[idx*number+component];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
53
cpu/D3Q7.cpp~
Normal file
53
cpu/D3Q7.cpp~
Normal file
@ -0,0 +1,53 @@
|
||||
// GPU Functions for D3Q7 Lattice Boltzmann Methods
|
||||
|
||||
inline void PackValues(int *list, int count, double *sendbuf, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Pack distribution q into the send buffer for the listed lattice sites
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int idx,n;
|
||||
for (idx=0; idx<count; idx++){
|
||||
n = list[idx];
|
||||
sendbuf[idx] = Data[n];
|
||||
}
|
||||
}
|
||||
inline void UnpackValues(int *list, int count, double *recvbuf, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Pack distribution q into the send buffer for the listed lattice sites
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int idx,n;
|
||||
for (idx=0; idx<count; idx++){
|
||||
n = list[idx];
|
||||
Data[n] = recvbuf[idx];
|
||||
}
|
||||
}
|
||||
|
||||
inline void PackDenD3Q7(int *list, int count, double *sendbuf, int number, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Pack distribution into the send buffer for the listed lattice sites
|
||||
//....................................................................................
|
||||
int idx,n,component;
|
||||
for (idx=0; idx<count; idx++){
|
||||
for (component=0; component<number; component++){
|
||||
n = list[idx];
|
||||
sendbuf[idx*number+component] = Data[number*n+component];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline void UnpackDenD3Q7(int *list, int count, double *recvbuf, int number, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Sum to the existing density value
|
||||
//....................................................................................
|
||||
int idx,n,component;
|
||||
for (idx=0; idx<count; idx++){
|
||||
for (component=0; component<number; component++){
|
||||
n = list[idx];
|
||||
Data[number*n+component] += recvbuf[idx*number+component];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
9
cpu/D3Q7.h
Normal file
9
cpu/D3Q7.h
Normal file
@ -0,0 +1,9 @@
|
||||
// CPU Functions for D3Q7 Lattice Boltzmann Methods
|
||||
|
||||
extern void PackValues(int *list, int count, double *sendbuf, double *Data, int N);
|
||||
|
||||
extern void UnpackValues(int *list, int count, double *recvbuf, double *Data, int N);
|
||||
|
||||
extern void PackDenD3Q7(int *list, int count, double *sendbuf, int number, double *Data, int N);
|
||||
|
||||
extern void UnpackDenD3Q7(int *list, int count, double *recvbuf, int number, double *Data, int N);
|
53
cpu/D3Q7.h~
Normal file
53
cpu/D3Q7.h~
Normal file
@ -0,0 +1,53 @@
|
||||
// GPU Functions for D3Q7 Lattice Boltzmann Methods
|
||||
|
||||
inline void PackValues(int *list, int count, double *sendbuf, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Pack distribution q into the send buffer for the listed lattice sites
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int idx,n;
|
||||
for (idx=0; idx<count; idx++){
|
||||
n = list[idx];
|
||||
sendbuf[idx] = Data[n];
|
||||
}
|
||||
}
|
||||
inline void UnpackValues(int *list, int count, double *recvbuf, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Pack distribution q into the send buffer for the listed lattice sites
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int idx,n;
|
||||
for (idx=0; idx<count; idx++){
|
||||
n = list[idx];
|
||||
Data[n] = recvbuf[idx];
|
||||
}
|
||||
}
|
||||
|
||||
inline void PackDenD3Q7(int *list, int count, double *sendbuf, int number, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Pack distribution into the send buffer for the listed lattice sites
|
||||
//....................................................................................
|
||||
int idx,n,component;
|
||||
for (idx=0; idx<count; idx++){
|
||||
for (component=0; component<number; component++){
|
||||
n = list[idx];
|
||||
sendbuf[idx*number+component] = Data[number*n+component];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline void UnpackDenD3Q7(int *list, int count, double *recvbuf, int number, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Sum to the existing density value
|
||||
//....................................................................................
|
||||
int idx,n,component;
|
||||
for (idx=0; idx<count; idx++){
|
||||
for (component=0; component<number; component++){
|
||||
n = list[idx];
|
||||
Data[number*n+component] += recvbuf[idx*number+component];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
BIN
cpu/D3Q7.o
Normal file
BIN
cpu/D3Q7.o
Normal file
Binary file not shown.
BIN
cpu/LBC-MPI
Executable file
BIN
cpu/LBC-MPI
Executable file
Binary file not shown.
BIN
cpu/LBC-MPI.o
Normal file
BIN
cpu/LBC-MPI.o
Normal file
Binary file not shown.
BIN
cpu/MRT-MPI
Executable file
BIN
cpu/MRT-MPI
Executable file
Binary file not shown.
BIN
cpu/MRT-MPI.o
Normal file
BIN
cpu/MRT-MPI.o
Normal file
Binary file not shown.
312
cpu/MRT.cu
Normal file
312
cpu/MRT.cu
Normal file
@ -0,0 +1,312 @@
|
||||
#include <cuda.h>
|
||||
|
||||
// CUDA kernels for single-phase MRT code
|
||||
// James McClure
|
||||
//*************************************************************************
|
||||
__global__ void INITIALIZE(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz, int S)
|
||||
{
|
||||
int n,N;
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
for (int s=0; s<S; s++){
|
||||
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
|
||||
if (n<N){
|
||||
if (ID[n] > 0){
|
||||
f_even[n] = 0.3333333333333333;
|
||||
f_odd[n] = 0.055555555555555555; //double(100*n)+1.f;
|
||||
f_even[N+n] = 0.055555555555555555; //double(100*n)+2.f;
|
||||
f_odd[N+n] = 0.055555555555555555; //double(100*n)+3.f;
|
||||
f_even[2*N+n] = 0.055555555555555555; //double(100*n)+4.f;
|
||||
f_odd[2*N+n] = 0.055555555555555555; //double(100*n)+5.f;
|
||||
f_even[3*N+n] = 0.055555555555555555; //double(100*n)+6.f;
|
||||
f_odd[3*N+n] = 0.0277777777777778; //double(100*n)+7.f;
|
||||
f_even[4*N+n] = 0.0277777777777778; //double(100*n)+8.f;
|
||||
f_odd[4*N+n] = 0.0277777777777778; //double(100*n)+9.f;
|
||||
f_even[5*N+n] = 0.0277777777777778; //double(100*n)+10.f;
|
||||
f_odd[5*N+n] = 0.0277777777777778; //double(100*n)+11.f;
|
||||
f_even[6*N+n] = 0.0277777777777778; //double(100*n)+12.f;
|
||||
f_odd[6*N+n] = 0.0277777777777778; //double(100*n)+13.f;
|
||||
f_even[7*N+n] = 0.0277777777777778; //double(100*n)+14.f;
|
||||
f_odd[7*N+n] = 0.0277777777777778; //double(100*n)+15.f;
|
||||
f_even[8*N+n] = 0.0277777777777778; //double(100*n)+16.f;
|
||||
f_odd[8*N+n] = 0.0277777777777778; //double(100*n)+17.f;
|
||||
f_even[9*N+n] = 0.0277777777777778; //double(100*n)+18.f;
|
||||
}
|
||||
else{
|
||||
for(int q=0; q<9; q++){
|
||||
f_even[q*N+n] = -1.0;
|
||||
f_odd[q*N+n] = -1.0;
|
||||
}
|
||||
f_even[9*N+n] = -1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void Compute_VELOCITY(char *ID, double *disteven, double *distodd, double *vel, int Nx, int Ny, int Nz, int S)
|
||||
{
|
||||
int n,N;
|
||||
// distributions
|
||||
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
double vx,vy,vz;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
// S - number of threadblocks per grid block
|
||||
for (int s=0; s<S; s++){
|
||||
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
|
||||
if (n<N){
|
||||
if (ID[n] > 0){
|
||||
//........................................................................
|
||||
// Registers to store the distributions
|
||||
//........................................................................
|
||||
f2 = disteven[N+n];
|
||||
f4 = disteven[2*N+n];
|
||||
f6 = disteven[3*N+n];
|
||||
f8 = disteven[4*N+n];
|
||||
f10 = disteven[5*N+n];
|
||||
f12 = disteven[6*N+n];
|
||||
f14 = disteven[7*N+n];
|
||||
f16 = disteven[8*N+n];
|
||||
f18 = disteven[9*N+n];
|
||||
//........................................................................
|
||||
f1 = distodd[n];
|
||||
f3 = distodd[1*N+n];
|
||||
f5 = distodd[2*N+n];
|
||||
f7 = distodd[3*N+n];
|
||||
f9 = distodd[4*N+n];
|
||||
f11 = distodd[5*N+n];
|
||||
f13 = distodd[6*N+n];
|
||||
f15 = distodd[7*N+n];
|
||||
f17 = distodd[8*N+n];
|
||||
//.................Compute the velocity...................................
|
||||
vx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
vy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
vz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
//..................Write the velocity.....................................
|
||||
vel[n] = vx;
|
||||
vel[N+n] = vy;
|
||||
vel[2*N+n] = vz;
|
||||
//........................................................................
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//*************************************************************************
|
||||
__global__ void MRT(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz, int S,
|
||||
double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz)
|
||||
{
|
||||
|
||||
int n,N;
|
||||
// distributions
|
||||
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
|
||||
// conserved momemnts
|
||||
double rho,jx,jy,jz;
|
||||
// non-conserved moments
|
||||
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
char id;
|
||||
|
||||
// S - number of threadblocks per grid block
|
||||
for (int s=0; s<S; s++){
|
||||
// for (int n=0; n<N; n++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
|
||||
id = ID[n];
|
||||
|
||||
if (n<N){
|
||||
if (id > 0){
|
||||
//........................................................................
|
||||
// Registers to store the distributions - read based on swap convention
|
||||
//........................................................................
|
||||
f2 = distodd[n];
|
||||
f4 = distodd[N+n];
|
||||
f6 = distodd[2*N+n];
|
||||
f8 = distodd[3*N+n];
|
||||
f10 = distodd[4*N+n];
|
||||
f12 = distodd[5*N+n];
|
||||
f14 = distodd[6*N+n];
|
||||
f16 = distodd[7*N+n];
|
||||
f18 = distodd[8*N+n];
|
||||
//........................................................................
|
||||
f0 = disteven[n];
|
||||
f1 = disteven[N+n];
|
||||
f3 = disteven[2*N+n];
|
||||
f5 = disteven[3*N+n];
|
||||
f7 = disteven[4*N+n];
|
||||
f9 = disteven[5*N+n];
|
||||
f11 = disteven[6*N+n];
|
||||
f13 = disteven[7*N+n];
|
||||
f15 = disteven[8*N+n];
|
||||
f17 = disteven[9*N+n];
|
||||
//........................................................................
|
||||
//....................compute the moments...............................................
|
||||
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
|
||||
m1 = -30*f0-11*(f2+f1+f4+f3+f6+f5)+8*(f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18 +f17);
|
||||
m2 = 12*f0-4*(f2+f1 +f4+f3+f6 +f5)+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
|
||||
jx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
m4 = 4*(-f1+f2)+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
jy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
m6 = -4*(f3-f4)+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
jz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
m8 = -4*(f5-f6)+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
m9 = 2*(f1+f2)-f3-f4-f5-f6+f7+f8+f9+f10+f11+f12+f13+f14-2*(f15+f16+f17+f18);
|
||||
m10 = -4*(f1+f2)+2*(f4+f3+f6+f5)+f8+f7+f10+f9+f12+f11+f14+f13-2*(f16+f15+f18+f17);
|
||||
m11 = f4+f3-f6-f5+f8+f7+f10+f9-f12-f11-f14-f13;
|
||||
m12 = -2*(f4+f3-f6-f5)+f8+f7+f10+f9-f12-f11-f14-f13;
|
||||
m13 = f8+f7-f10-f9;
|
||||
m14 = f16+f15-f18-f17;
|
||||
m15 = f12+f11-f14-f13;
|
||||
m16 = f7-f8+f9-f10-f11+f12-f13+f14;
|
||||
m17 = -f7+f8+f9-f10+f15-f16+f17-f18;
|
||||
m18 = f11-f12-f13+f14-f15+f16+f17-f18;
|
||||
//..............incorporate external force................................................
|
||||
//jx += 0.5*Fx;
|
||||
//jy += 0.5*Fy;
|
||||
//jz += 0.5*Fz;
|
||||
//..............carry out relaxation process...............................................
|
||||
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) - m1);
|
||||
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho) - m2);
|
||||
m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4);
|
||||
m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6);
|
||||
m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8);
|
||||
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) - m9);
|
||||
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
|
||||
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) - m11);
|
||||
m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho) - m12);
|
||||
m13 = m13 + rlx_setA*((jx*jy/rho) - m13);
|
||||
m14 = m14 + rlx_setA*((jy*jz/rho) - m14);
|
||||
m15 = m15 + rlx_setA*((jx*jz/rho) - m15);
|
||||
m16 = m16 + rlx_setB*( - m16);
|
||||
m17 = m17 + rlx_setB*( - m17);
|
||||
m18 = m18 + rlx_setB*( - m18);
|
||||
//.................inverse transformation......................................................
|
||||
f0 = 0.05263157894736842*rho-0.012531328320802*m1+0.04761904761904762*m2;
|
||||
f1 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(jx-m4)+0.05555555555555555*(m9-m10);
|
||||
f2 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(m4-jx)+0.05555555555555555*(m9-m10);
|
||||
f3 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(jy-m6)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
|
||||
f4 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(m6-jy)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
|
||||
f5 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(jz-m8)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
|
||||
f6 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(m8-jz)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
|
||||
f7 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx+jy)+0.025*(m4+m6)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12+0.25*m13+0.125*(m16-m17);
|
||||
f8 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2-0.1*(jx+jy)-0.025*(m4+m6)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12+0.25*m13+0.125*(m17-m16);
|
||||
f9 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx-jy)+0.025*(m4-m6)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12-0.25*m13+0.125*(m16+m17);
|
||||
f10 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jy-jx)+0.025*(m6-m4)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12-0.25*m13-0.125*(m16+m17);
|
||||
f11 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jx+jz)+0.025*(m4+m8)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12+0.25*m15+0.125*(m18-m16);
|
||||
f12 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2-0.1*(jx+jz)-0.025*(m4+m8)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12+0.25*m15+0.125*(m16-m18);
|
||||
f13 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jx-jz)+0.025*(m4-m8)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12-0.25*m15-0.125*(m16+m18);
|
||||
f14 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jz-jx)+0.025*(m8-m4)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12-0.25*m15+0.125*(m16+m18);
|
||||
f15 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jy+jz)+0.025*(m6+m8)
|
||||
-0.05555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m17-m18);
|
||||
f16 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2-0.1*(jy+jz)-0.025*(m6+m8)
|
||||
-0.05555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m18-m17);
|
||||
f17 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jy-jz)+0.025*(m6-m8)
|
||||
-0.05555555555555555*m9-0.02777777777777778*m10-0.25*m14+0.125*(m17+m18);
|
||||
f18 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jz-jy)+0.025*(m8-m6)
|
||||
-0.05555555555555555*m9-0.02777777777777778*m10-0.25*m14-0.125*(m17+m18);
|
||||
//.......................................................................................................
|
||||
// incorporate external force
|
||||
f1 += 0.16666666*Fx;
|
||||
f2 -= 0.16666666*Fx;
|
||||
f3 += 0.16666666*Fy;
|
||||
f4 -= 0.16666666*Fy;
|
||||
f5 += 0.16666666*Fz;
|
||||
f6 -= 0.16666666*Fz;
|
||||
f7 += 0.08333333333*(Fx+Fy);
|
||||
f8 -= 0.08333333333*(Fx+Fy);
|
||||
f9 += 0.08333333333*(Fx-Fy);
|
||||
f10 -= 0.08333333333*(Fx-Fy);
|
||||
f11 += 0.08333333333*(Fx+Fz);
|
||||
f12 -= 0.08333333333*(Fx+Fz);
|
||||
f13 += 0.08333333333*(Fx-Fz);
|
||||
f14 -= 0.08333333333*(Fx-Fz);
|
||||
f15 += 0.08333333333*(Fy+Fz);
|
||||
f16 -= 0.08333333333*(Fy+Fz);
|
||||
f17 += 0.08333333333*(Fy-Fz);
|
||||
f18 -= 0.08333333333*(Fy-Fz);
|
||||
//.......................................................................................................
|
||||
// Write data based on un-swapped convention
|
||||
disteven[n] = f0;
|
||||
disteven[N+n] = f2;
|
||||
disteven[2*N+n] = f4;
|
||||
disteven[3*N+n] = f6;
|
||||
disteven[4*N+n] = f8;
|
||||
disteven[5*N+n] = f10;
|
||||
disteven[6*N+n] = f12;
|
||||
disteven[7*N+n] = f14;
|
||||
disteven[8*N+n] = f16;
|
||||
disteven[9*N+n] = f18;
|
||||
|
||||
distodd[n] = f1;
|
||||
distodd[N+n] = f3;
|
||||
distodd[2*N+n] = f5;
|
||||
distodd[3*N+n] = f7;
|
||||
distodd[4*N+n] = f9;
|
||||
distodd[5*N+n] = f11;
|
||||
distodd[6*N+n] = f13;
|
||||
distodd[7*N+n] = f15;
|
||||
distodd[8*N+n] = f17;
|
||||
//.......................................................................................................
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void dvc_MRT(int nblocks, int nthreads, int S, char *ID,
|
||||
double *f_even, double *f_odd, double rlxA, double rlxB, double Fx, double Fy, double Fz,
|
||||
int Nx, int Ny, int Nz)
|
||||
{
|
||||
MRT <<< nblocks, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S,rlxA, rlxB, Fx, Fy, Fz);
|
||||
}
|
||||
|
||||
extern "C" void dvc_InitD3Q19( int nblocks, int nthreads, int S,
|
||||
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
|
||||
{
|
||||
INITIALIZE <<< nblocks, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
|
||||
}
|
||||
|
||||
|
28
cpu/Makefile
Normal file
28
cpu/Makefile
Normal file
@ -0,0 +1,28 @@
|
||||
CXX=mpicxx
|
||||
FLAGS=-O3
|
||||
|
||||
ColorLBM-cpu:D3Q19.o D3Q7.o Color.o lb2_Color_mpi.o
|
||||
$(CXX) $(FLAGS) -o ColorLBM-cpu lb2_Color_mpi.o D3Q19.o D3Q7.o Color.o
|
||||
|
||||
D3Q19.o:D3Q19.cpp
|
||||
$(CXX) $(FLAGS) -c -o D3Q19.o D3Q19.cpp
|
||||
|
||||
D3Q7.o:D3Q7.cpp
|
||||
$(CXX) $(FLAGS) -c -o D3Q7.o D3Q7.cpp
|
||||
|
||||
Color.o:Color.cpp
|
||||
$(CXX) $(FLAGS) -c -o Color.o Color.cpp
|
||||
|
||||
lb2_Color_mpi.o:lb2_Color_mpi.cpp
|
||||
$(CXX) $(FLAGS) -c -o lb2_Color_mpi.o lb2_Color_mpi.cpp
|
||||
|
||||
#MRT-MPI.o:lb1_MRT_mpi.cpp
|
||||
# $(CXX) -c -o MRT-MPI.o lb1_MRT_mpi.cpp
|
||||
|
||||
#MRT-MPI:D3Q19.o MRT-MPI.o
|
||||
# $(CXX) -o MRT-MPI D3Q19.o MRT-MPI.o
|
||||
|
||||
clean:
|
||||
rm *.o
|
||||
# rm bin/*
|
||||
# rm lib/*
|
28
cpu/Makefile~
Normal file
28
cpu/Makefile~
Normal file
@ -0,0 +1,28 @@
|
||||
CXX=$(VT_MPI_BIN)/mpicxx
|
||||
FLAGS=-O3
|
||||
|
||||
ColorLBM-cpu:D3Q19.o D3Q7.o Color.o lb2_Color_mpi.o
|
||||
$(CXX) $(FLAGS) -o ColorLBM-cpu lb2_Color_mpi.o D3Q19.o D3Q7.o Color.o
|
||||
|
||||
D3Q19.o:D3Q19.cpp
|
||||
$(CXX) $(FLAGS) -c -o D3Q19.o D3Q19.cpp
|
||||
|
||||
D3Q7.o:D3Q7.cpp
|
||||
$(CXX) $(FLAGS) -c -o D3Q7.o D3Q7.cpp
|
||||
|
||||
Color.o:Color.cpp
|
||||
$(CXX) $(FLAGS) -c -o Color.o Color.cpp
|
||||
|
||||
lb2_Color_mpi.o:lb2_Color_mpi.cpp
|
||||
$(CXX) $(FLAGS) -c -o lb2_Color_mpi.o lb2_Color_mpi.cpp
|
||||
|
||||
#MRT-MPI.o:lb1_MRT_mpi.cpp
|
||||
# $(CXX) -c -o MRT-MPI.o lb1_MRT_mpi.cpp
|
||||
|
||||
#MRT-MPI:D3Q19.o MRT-MPI.o
|
||||
# $(CXX) -o MRT-MPI D3Q19.o MRT-MPI.o
|
||||
|
||||
clean:
|
||||
rm *.o
|
||||
# rm bin/*
|
||||
# rm lib/*
|
248
cpu/lb1_MRT-swap.cu
Normal file
248
cpu/lb1_MRT-swap.cu
Normal file
@ -0,0 +1,248 @@
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cuda.h>
|
||||
//#include <cutil.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
|
||||
int Ny, int Nz, int nblocks, int nthreads, int S);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_SwapD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
|
||||
int Ny, int Nz, int nblocks, int nthreads, int S);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_MRT(char *ID, double *f_even, double *f_odd, double rlxA, double rlxB, double Fx, double Fy, double Fz,
|
||||
int Nx, int Ny, int Nz, int nblocks, int nthreads, int S);
|
||||
//*************************************************************************
|
||||
|
||||
void Write_Out(double *array, int Nx, int Ny, int Nz){
|
||||
int value;
|
||||
FILE *output;
|
||||
output = fopen("dist.list","w");
|
||||
for (int k=0; k<Nz; k++){
|
||||
for (int j=0; j<Ny; j++){
|
||||
for (int i=0; i<Nx; i++){
|
||||
int index = k*Nx*Ny+j*Nx+i;
|
||||
value = int(array[index]);
|
||||
fprintf(output, "| %i",value);
|
||||
}
|
||||
fprintf(output, " | \n");
|
||||
}
|
||||
fprintf(output,"************************************** \n");
|
||||
}
|
||||
fclose(output);
|
||||
}
|
||||
|
||||
//**************************************************************************
|
||||
// MRT implementation of the LBM using CUDA
|
||||
//**************************************************************************
|
||||
|
||||
int main(void)
|
||||
{
|
||||
|
||||
int deviceCount;
|
||||
cudaGetDeviceCount(&deviceCount);
|
||||
int device = 1;
|
||||
printf("Number of devices = %i \n", deviceCount);
|
||||
printf("Current device is = %i \n", device);
|
||||
cudaSetDevice(device);
|
||||
|
||||
// BGK Model parameters
|
||||
string FILENAME;
|
||||
unsigned int nBlocks, nthreads;
|
||||
int timestepMax, interval;
|
||||
double tau,Fx,Fy,Fz,tol;
|
||||
// Domain variables
|
||||
int Nx,Ny,Nz;
|
||||
|
||||
ifstream input("MRT.in");
|
||||
input >> FILENAME; // name of the input file
|
||||
input >> Nz; // number of nodes (x,y,z)
|
||||
input >> nBlocks;
|
||||
input >> nthreads;
|
||||
input >> tau; // relaxation time
|
||||
input >> Fx; // External force components (x,y,z)
|
||||
input >> Fy;
|
||||
input >> Fz;
|
||||
input >> timestepMax; // max no. of timesteps
|
||||
input >> interval; // error interval
|
||||
input >> tol; // error tolerance
|
||||
|
||||
double rlx_setA = 1.f/tau;
|
||||
double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA);
|
||||
|
||||
printf("tau = %f \n", tau);
|
||||
printf("Set A = %f \n", rlx_setA);
|
||||
printf("Set B = %f \n", rlx_setB);
|
||||
printf("Force(x) = %f \n", Fx);
|
||||
printf("Force(y) = %f \n", Fy);
|
||||
printf("Force(z) = %f \n", Fz);
|
||||
|
||||
Nx = Ny = Nz; // Cubic domain
|
||||
|
||||
int N = Nx*Ny*Nz;
|
||||
int dist_mem_size = N*sizeof(double);
|
||||
|
||||
// unsigned int nBlocks = 32;
|
||||
// int nthreads = 128;
|
||||
int S = N/nthreads/nBlocks;
|
||||
|
||||
// unsigned int nBlocks = N/nthreads + (N%nthreads == 0?0:1);
|
||||
dim3 grid(nBlocks,1,1);
|
||||
|
||||
printf("Number of blocks = %i \n", nBlocks);
|
||||
printf("Threads per block = %i \n", nthreads);
|
||||
printf("Sweeps per thread = %i \n", S);
|
||||
printf("Number of nodes per side = %i \n", Nx);
|
||||
printf("Total Number of nodes = %i \n", N);
|
||||
|
||||
//.......................................................................
|
||||
printf("Read input media... \n");
|
||||
// .......... READ THE INPUT FILE .......................................
|
||||
int n;
|
||||
char value;
|
||||
char *id;
|
||||
id = new char[N];
|
||||
int sum = 0;
|
||||
double porosity;
|
||||
ifstream PM(FILENAME.c_str(),ios::binary);
|
||||
for (int k=0;k<Nz;k++){
|
||||
for (int j=0;j<Ny;j++){
|
||||
for (int i=0;i<Nx;i++){
|
||||
PM.read((char *) (&value), sizeof(value));
|
||||
n = k*Nx*Ny+j*Nx+i;
|
||||
id[n] = value;
|
||||
if (value > 0) sum++;
|
||||
}
|
||||
}
|
||||
}
|
||||
PM.close();
|
||||
printf("File porosity = %f\n", double(sum)/N);
|
||||
//.......................................................................
|
||||
//...........device phase ID.................................................
|
||||
char *ID;
|
||||
cudaMalloc((void **) &ID, N); // Allocate device memory
|
||||
// Copy to the device
|
||||
cudaMemcpy(ID, id, N, cudaMemcpyHostToDevice);
|
||||
//...........................................................................
|
||||
|
||||
//......................device distributions.................................
|
||||
double *f_even,*f_odd;
|
||||
//...........................................................................
|
||||
cudaMalloc((void **) &f_even, 10*dist_mem_size); // Allocate device memory
|
||||
cudaMalloc((void **) &f_odd, 9*dist_mem_size); // Allocate device memory
|
||||
//...........................................................................
|
||||
|
||||
//...........................................................................
|
||||
// cudaHostAlloc(&fa,dist_mem_size,cudaHostAllocPortable);
|
||||
// cudaHostAlloc(&fb,dist_mem_size,cudaHostAllocPortable);
|
||||
// cudaHostRegister(fa,dist_mem_size,cudaHostRegisterPortable);
|
||||
// cudaHostRegister(fb,dist_mem_size,cudaHostRegisterPortable);
|
||||
// cudaHostRegister(id,N*sizeof(char),cudaHostAllocPortable);
|
||||
|
||||
printf("Setting the distributions, size = : %i\n", N);
|
||||
//...........................................................................
|
||||
// INITIALIZE <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
|
||||
//...........................................................................
|
||||
dvc_InitD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
|
||||
//*************************************************************************
|
||||
|
||||
int timestep = 0;
|
||||
printf("No. of timesteps: %i \n", timestepMax);
|
||||
|
||||
//.......create a stream for the LB calculation.......
|
||||
cudaStream_t stream;
|
||||
cudaStreamCreate(&stream);
|
||||
|
||||
//.......create and start timer............
|
||||
cudaEvent_t start, stop;
|
||||
float time;
|
||||
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord( start, 0 );
|
||||
//.........................................
|
||||
|
||||
//************ MAIN ITERATION LOOP ***************************************/
|
||||
while (timestep < timestepMax){
|
||||
|
||||
//...................................................................
|
||||
//........ Execute the swap kernel (device) .........................
|
||||
// SWAP <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
|
||||
//...................................................................
|
||||
dvc_SwapD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
|
||||
|
||||
//........ Execute the collision kernel (device) ....................
|
||||
// MRT <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S,
|
||||
// rlx_setA, rlx_setB, Fx, Fy, Fz);
|
||||
//............................................................
|
||||
dvc_MRT(ID, f_even, f_odd, rlx_setA, rlx_setB, Fx, Fy, Fz,Nx,Ny,Nz,nBlocks,nthreads,S);
|
||||
// Iteration completed!
|
||||
|
||||
timestep++;
|
||||
//...................................................................
|
||||
|
||||
}
|
||||
//************************************************************************/
|
||||
|
||||
cudaThreadSynchronize();
|
||||
//.......... stop and destroy timer.............................
|
||||
cudaEventRecord( stop, stream);
|
||||
cudaEventSynchronize( stop );
|
||||
|
||||
cudaEventElapsedTime( &time, start, stop );
|
||||
printf("CPU time = %f \n", time);
|
||||
|
||||
float MLUPS = 0.001*float(Nx*Ny*Nz)*timestep/time;
|
||||
printf("MLUPS = %f \n", MLUPS);
|
||||
|
||||
cudaStreamDestroy(stream);
|
||||
cudaEventDestroy( start );
|
||||
cudaEventDestroy( stop );
|
||||
//..............................................................
|
||||
|
||||
//..............................................................
|
||||
//.........Compute the velocity and copy result to host ........
|
||||
double *velocity;
|
||||
velocity = new double[3*N];
|
||||
//......................device distributions....................................
|
||||
double *vel;
|
||||
//..............................................................................
|
||||
cudaMalloc((void **) &vel, 3*dist_mem_size); // Allocate device memory
|
||||
//..............................................................................
|
||||
// Compute_VELOCITY <<< grid, nthreads >>> (ID, f_even, f_odd, vel, Nx, Ny, Nz, S);
|
||||
//..............................................................................
|
||||
cudaMemcpy(velocity, vel, 3*dist_mem_size, cudaMemcpyDeviceToHost);
|
||||
//..............................................................................
|
||||
|
||||
//............................................................
|
||||
//....Write the z-velocity to test poiseuille flow............
|
||||
double vz,vz_avg;
|
||||
vz_avg = 0.0;
|
||||
|
||||
FILE *output;
|
||||
output = fopen("velocity.out","w");
|
||||
for (int k=0; k<1; k++){
|
||||
for (int j=0; j<1; j++){
|
||||
for (int i=0; i<Nx; i++){
|
||||
int n = k*Nx*Ny+j*Nx+i;
|
||||
//.....print value........
|
||||
vz = velocity[2*N+n];
|
||||
vz_avg += vz;
|
||||
fprintf(output, " %e",vz);
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(output);
|
||||
|
||||
vz = vz_avg/double(sum);
|
||||
printf("Average Velocity = %e\n", vz);
|
||||
|
||||
|
||||
// cleanup
|
||||
cudaFree(f_even); cudaFree(f_odd); cudaFree(vel); cudaFree(ID);
|
||||
free (velocity); free(id);
|
||||
|
||||
}
|
246
cpu/lb1_MRT.cu
Normal file
246
cpu/lb1_MRT.cu
Normal file
@ -0,0 +1,246 @@
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cuda.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_Barrier();
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
|
||||
int Ny, int Nz, int nblocks, int nthreads, int S);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_SwapD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
|
||||
int Ny, int Nz, int nblocks, int nthreads, int S);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_MRT(char *ID, double *f_even, double *f_odd, double rlxA, double rlxB, double Fx, double Fy, double Fz,
|
||||
int Nx, int Ny, int Nz, int nblocks, int nthreads, int S);
|
||||
//*************************************************************************
|
||||
|
||||
void Write_Out(double *array, int Nx, int Ny, int Nz){
|
||||
int value;
|
||||
FILE *output;
|
||||
output = fopen("dist.list","w");
|
||||
for (int k=0; k<Nz; k++){
|
||||
for (int j=0; j<Ny; j++){
|
||||
for (int i=0; i<Nx; i++){
|
||||
int index = k*Nx*Ny+j*Nx+i;
|
||||
value = int(array[index]);
|
||||
fprintf(output, "| %i",value);
|
||||
}
|
||||
fprintf(output, " | \n");
|
||||
}
|
||||
fprintf(output,"************************************** \n");
|
||||
}
|
||||
fclose(output);
|
||||
}
|
||||
|
||||
//**************************************************************************
|
||||
// MRT implementation of the LBM using CUDA
|
||||
//**************************************************************************
|
||||
|
||||
int main(void)
|
||||
{
|
||||
|
||||
// BGK Model parameters
|
||||
string FILENAME;
|
||||
unsigned int nBlocks, nthreads;
|
||||
int timestepMax, interval;
|
||||
double tau,Fx,Fy,Fz,tol;
|
||||
// Domain variables
|
||||
int Nx,Ny,Nz;
|
||||
|
||||
ifstream input("MRT.in");
|
||||
input >> FILENAME; // name of the input file
|
||||
input >> Nz; // number of nodes (x,y,z)
|
||||
input >> nBlocks;
|
||||
input >> nthreads;
|
||||
input >> tau; // relaxation time
|
||||
input >> Fx; // External force components (x,y,z)
|
||||
input >> Fy;
|
||||
input >> Fz;
|
||||
input >> timestepMax; // max no. of timesteps
|
||||
input >> interval; // error interval
|
||||
input >> tol; // error tolerance
|
||||
|
||||
double rlx_setA = 1.f/tau;
|
||||
double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA);
|
||||
|
||||
printf("tau = %f \n", tau);
|
||||
printf("Set A = %f \n", rlx_setA);
|
||||
printf("Set B = %f \n", rlx_setB);
|
||||
printf("Force(x) = %f \n", Fx);
|
||||
printf("Force(y) = %f \n", Fy);
|
||||
printf("Force(z) = %f \n", Fz);
|
||||
|
||||
Nx = Ny = Nz; // Cubic domain
|
||||
|
||||
int N = Nx*Ny*Nz;
|
||||
int dist_mem_size = N*sizeof(double);
|
||||
|
||||
// unsigned int nBlocks = 32;
|
||||
// int nthreads = 128;
|
||||
int S = N/nthreads/nBlocks;
|
||||
|
||||
// unsigned int nBlocks = N/nthreads + (N%nthreads == 0?0:1);
|
||||
dim3 grid(nBlocks,1,1);
|
||||
|
||||
printf("Number of blocks = %i \n", nBlocks);
|
||||
printf("Threads per block = %i \n", nthreads);
|
||||
printf("Sweeps per thread = %i \n", S);
|
||||
printf("Number of nodes per side = %i \n", Nx);
|
||||
printf("Total Number of nodes = %i \n", N);
|
||||
|
||||
//.......................................................................
|
||||
printf("Read input media... \n");
|
||||
// .......... READ THE INPUT FILE .......................................
|
||||
int n;
|
||||
char value;
|
||||
char *id;
|
||||
id = new char[N];
|
||||
int sum = 0;
|
||||
double porosity;
|
||||
ifstream PM(FILENAME.c_str(),ios::binary);
|
||||
for (int k=0;k<Nz;k++){
|
||||
for (int j=0;j<Ny;j++){
|
||||
for (int i=0;i<Nx;i++){
|
||||
PM.read((char *) (&value), sizeof(value));
|
||||
n = k*Nx*Ny+j*Nx+i;
|
||||
id[n] = value;
|
||||
if (value > 0) sum++;
|
||||
}
|
||||
}
|
||||
}
|
||||
PM.close();
|
||||
printf("File porosity = %f\n", double(sum)/N);
|
||||
//.......................................................................
|
||||
//...........device phase ID.................................................
|
||||
char *ID;
|
||||
dvc_AllocateDeviceMemory((void **) &ID, N); // Allocate device memory
|
||||
// Copy to the device
|
||||
dvc_CopyToDevice(ID, id, N);
|
||||
//...........................................................................
|
||||
|
||||
//......................device distributions.................................
|
||||
double *f_even,*f_odd;
|
||||
//...........................................................................
|
||||
dvc_AllocateDeviceMemory((void **) &f_even, 10*dist_mem_size); // Allocate device memory
|
||||
dvc_AllocateDeviceMemory((void **) &f_odd, 9*dist_mem_size); // Allocate device memory
|
||||
//...........................................................................
|
||||
|
||||
//...........................................................................
|
||||
// cudaHostAlloc(&fa,dist_mem_size,cudaHostAllocPortable);
|
||||
// cudaHostAlloc(&fb,dist_mem_size,cudaHostAllocPortable);
|
||||
// cudaHostRegister(fa,dist_mem_size,cudaHostRegisterPortable);
|
||||
// cudaHostRegister(fb,dist_mem_size,cudaHostRegisterPortable);
|
||||
// cudaHostRegister(id,N*sizeof(char),cudaHostAllocPortable);
|
||||
|
||||
printf("Setting the distributions, size = : %i\n", N);
|
||||
//...........................................................................
|
||||
// INITIALIZE <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
|
||||
//...........................................................................
|
||||
dvc_InitD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
|
||||
//*************************************************************************
|
||||
|
||||
int timestep = 0;
|
||||
printf("No. of timesteps: %i \n", timestepMax);
|
||||
|
||||
//.......create a stream for the LB calculation.......
|
||||
cudaStream_t stream;
|
||||
cudaStreamCreate(&stream);
|
||||
|
||||
//.......create and start timer............
|
||||
cudaEvent_t start, stop;
|
||||
float time;
|
||||
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord( start, 0 );
|
||||
//.........................................
|
||||
|
||||
//************ MAIN ITERATION LOOP ***************************************/
|
||||
while (timestep < timestepMax){
|
||||
|
||||
//...................................................................
|
||||
//........ Execute the swap kernel (device) .........................
|
||||
// SWAP <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
|
||||
//...................................................................
|
||||
dvc_SwapD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
|
||||
|
||||
//........ Execute the collision kernel (device) ....................
|
||||
// MRT <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S,
|
||||
// rlx_setA, rlx_setB, Fx, Fy, Fz);
|
||||
//............................................................
|
||||
dvc_MRT(ID, f_even, f_odd, rlx_setA, rlx_setB, Fx, Fy, Fz,Nx,Ny,Nz,nBlocks,nthreads,S);
|
||||
// Iteration completed!
|
||||
|
||||
timestep++;
|
||||
//...................................................................
|
||||
|
||||
}
|
||||
//************************************************************************/
|
||||
|
||||
// cudaThreadSynchronize();
|
||||
dvc_Barrier();
|
||||
//.......... stop and destroy timer.............................
|
||||
cudaEventRecord( stop, stream);
|
||||
cudaEventSynchronize( stop );
|
||||
|
||||
cudaEventElapsedTime( &time, start, stop );
|
||||
printf("CPU time = %f \n", time);
|
||||
|
||||
float MLUPS = 0.001*float(Nx*Ny*Nz)*timestep/time;
|
||||
printf("MLUPS = %f \n", MLUPS);
|
||||
|
||||
cudaStreamDestroy(stream);
|
||||
cudaEventDestroy( start );
|
||||
cudaEventDestroy( stop );
|
||||
//..............................................................
|
||||
|
||||
//..............................................................
|
||||
/*//.........Compute the velocity and copy result to host ........
|
||||
double *velocity;
|
||||
velocity = new double[3*N];
|
||||
//......................device distributions....................................
|
||||
double *vel;
|
||||
//..............................................................................
|
||||
dvc_AllocateDeviceMemory((void **) &vel, 3*dist_mem_size); // Allocate device memory
|
||||
//..............................................................................
|
||||
// Compute_VELOCITY <<< grid, nthreads >>> (ID, f_even, f_odd, vel, Nx, Ny, Nz, S);
|
||||
//..............................................................................
|
||||
// cudaMemcpy(velocity, vel, 3*dist_mem_size, cudaMemcpyDeviceToHost);
|
||||
//..............................................................................
|
||||
|
||||
//............................................................
|
||||
//....Write the z-velocity to test poiseuille flow............
|
||||
double vz,vz_avg;
|
||||
vz_avg = 0.0;
|
||||
|
||||
/* FILE *output;
|
||||
output = fopen("velocity.out","w");
|
||||
for (int k=0; k<1; k++){
|
||||
for (int j=0; j<1; j++){
|
||||
for (int i=0; i<Nx; i++){
|
||||
int n = k*Nx*Ny+j*Nx+i;
|
||||
//.....print value........
|
||||
vz = velocity[2*N+n];
|
||||
vz_avg += vz;
|
||||
fprintf(output, " %e",vz);
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(output);
|
||||
|
||||
vz = vz_avg/double(sum);
|
||||
printf("Average Velocity = %e\n", vz);
|
||||
*/
|
||||
// cleanup
|
||||
// cudaFree(f_even); cudaFree(f_odd); cudaFree(vel); cudaFree(ID);
|
||||
// free (velocity); free(id);
|
||||
|
||||
}
|
1450
cpu/lb1_MRT_mpi.cpp
Normal file
1450
cpu/lb1_MRT_mpi.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1836
cpu/lb1_MRT_mpi.cu
Normal file
1836
cpu/lb1_MRT_mpi.cu
Normal file
File diff suppressed because it is too large
Load Diff
423
cpu/lb2_Color.cu
Normal file
423
cpu/lb2_Color.cu
Normal file
@ -0,0 +1,423 @@
|
||||
#ifdef useMPI
|
||||
#include <mpi.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <math.h>
|
||||
#include <cuda.h>
|
||||
|
||||
using namespace std;
|
||||
//*************************************************************************
|
||||
// HokieSpeed
|
||||
//nvcc -Xcompiler -fopenmp -lgomp -O3 -arch sm_20 -o hybridATLKR lb2_ATLKR_hybrid.cu
|
||||
// -I$VT_MPI_INC -L$VT_MPI_LIB -lmpi
|
||||
//*************************************************************************
|
||||
|
||||
//*************************************************************************
|
||||
// Implementation of Two-Phase Immiscible LBM using CUDA
|
||||
//*************************************************************************
|
||||
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_InitD3Q19(int nblocks, int nthreads, int S,
|
||||
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_InitDenColor( int nblocks, int nthreads, int S,
|
||||
char *ID, double *Den, double *Phi, double das, double dbs, int N);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_ComputeColorGradient(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *Phi, double *ColorGrad, int Nx, int Ny, int Nz);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_ColorCollide(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *f_even, double *f_odd, double *ColorGrad, double *Velocity,
|
||||
double rlxA, double rlxB,double alpha, double beta, double Fx, double Fy, double Fz,
|
||||
int Nx, int Ny, int Nz, bool pBC);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_DensityStreamD3Q7(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
|
||||
double beta, int Nx, int Ny, int Nz, bool pBC);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_ComputePhi(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *Phi, double *Copy, double *Den, int N);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_Barrier();
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_SwapD3Q19(int nblocks, int nthreads, int S,
|
||||
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_PackDist(int grid, int threads, int q, int *SendList, int start,
|
||||
int sendCount, double *sendbuf, double *Dist, int N);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_UnpackDist(int grid, int threads, int q, int Cqx, int Cqy, int Cqz, int *RecvList, int start,
|
||||
int recvCount, double *recvbuf, double *Dist, int Nx, int Ny, int Nz);
|
||||
//*************************************************************************
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
//********** Initialize MPI ****************
|
||||
int numprocs,rank;
|
||||
#ifdef useMPI
|
||||
MPI_Status stat;
|
||||
MPI_Init(&argc,&argv);
|
||||
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
|
||||
#else
|
||||
numprocs = 1;
|
||||
rank = 0;
|
||||
#endif
|
||||
//******************************************
|
||||
|
||||
if (rank == 0){
|
||||
printf("********************************************************\n");
|
||||
printf("Running Hybrid Implementation of Color LBM \n");
|
||||
printf("********************************************************\n");
|
||||
}
|
||||
// Color Model parameters
|
||||
string FILENAME;
|
||||
unsigned int nBlocks, nthreads;
|
||||
int Nx,Ny,Nz;
|
||||
int timestepMax, interval;
|
||||
double tau,Fx,Fy,Fz,tol;
|
||||
double alpha, beta;
|
||||
double das, dbs;
|
||||
double din,dout;
|
||||
bool pBC;
|
||||
|
||||
if (rank==0){
|
||||
//.............................................................
|
||||
// READ SIMULATION PARMAETERS FROM INPUT FILE
|
||||
//.............................................................
|
||||
ifstream input("Color.in");
|
||||
// Line 1: Name of the phase indicator file (s=0,w=1,n=2)
|
||||
input >> FILENAME;
|
||||
// Line 2: domain size (Nx, Ny, Nz)
|
||||
input >> Nz; // number of nodes (x,y,z)
|
||||
input >> nBlocks;
|
||||
input >> nthreads;
|
||||
// Line 3: model parameters (tau, alpha, beta, das, dbs)
|
||||
input >> tau;
|
||||
input >> alpha;
|
||||
input >> beta;
|
||||
input >> das;
|
||||
input >> dbs;
|
||||
// Line 4: External force components (Fx,Fy, Fz)
|
||||
input >> Fx;
|
||||
input >> Fy;
|
||||
input >> Fz;
|
||||
// Line 5: Pressure Boundary conditions
|
||||
input >> pBC;
|
||||
input >> din;
|
||||
input >> dout;
|
||||
// Line 6: time-stepping criteria
|
||||
input >> timestepMax; // max no. of timesteps
|
||||
input >> interval; // error interval
|
||||
input >> tol; // error tolerance
|
||||
//.............................................................
|
||||
}
|
||||
#ifdef useMPI
|
||||
// **************************************************************
|
||||
// Broadcast simulation parameters from rank 0 to all other procs
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
//.................................................
|
||||
MPI_Bcast(&Nz,1,MPI_INT,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&nBlocks,1,MPI_INT,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&nthreads,1,MPI_INT,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&Fx,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&Fy,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&Fz,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&tau,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&alpha,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&beta,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&das,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&dbs,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&pBC,1,MPI_LOGICAL,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&din,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&dout,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(×tepMax,1,MPI_INT,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&interval,1,MPI_INT,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&tol,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
//.................................................
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
// **************************************************************
|
||||
#endif
|
||||
|
||||
double rlxA = 1.f/tau;
|
||||
double rlxB = 8.f*(2.f-rlxA)/(8.f-rlxA);
|
||||
|
||||
if (pBC && rank == 0){
|
||||
printf("Assigning presusre boundary conditions \n");
|
||||
printf("Inlet density = %f \n", din);
|
||||
printf("Outlet density = %f \n", dout);
|
||||
}
|
||||
|
||||
if (rank==0){
|
||||
printf("....Parameters................\n");
|
||||
printf("tau = %f \n", tau);
|
||||
printf("alpha = %f \n", alpha);
|
||||
printf("beta = %f \n", beta);
|
||||
printf("das = %f \n", das);
|
||||
printf("dbs = %f \n", dbs);
|
||||
printf("Force(x) = %f \n", Fx);
|
||||
printf("Force(y) = %f \n", Fy);
|
||||
printf("Force(z) = %f \n", Fz);
|
||||
printf("Nz = %i \n", Nz);
|
||||
printf("timestepMax = %i \n", timestepMax);
|
||||
printf("...............................\n");
|
||||
}
|
||||
|
||||
// Identical cubic sub-domains
|
||||
Nx = Ny = Nz;// = 16*s; // Cubic domain
|
||||
int N = Nx*Ny*Nz;
|
||||
int dist_mem_size = N*sizeof(double);
|
||||
|
||||
// unsigned int nBlocks = 32;
|
||||
// int nthreads = 128;
|
||||
int S = N/nthreads/nBlocks;
|
||||
if (nBlocks*nthreads*S < N) S++;
|
||||
// int S = 1;
|
||||
|
||||
// unsigned int nBlocks = N/nthreads + (N%nthreads == 0?0:1);
|
||||
// dim3 grid(nBlocks,1,1);
|
||||
if (rank==1){
|
||||
printf("Number of blocks = %i \n", nBlocks);
|
||||
printf("Threads per block = %i \n", nthreads);
|
||||
printf("Sweeps per thread = %i \n", S);
|
||||
printf("Number of nodes per side = %i \n", Nx);
|
||||
printf("Total Number of nodes = %i \n", N);
|
||||
printf("...............................\n");
|
||||
}
|
||||
|
||||
//.......................................................................
|
||||
// .......... READ THE INPUT FILE .......................................
|
||||
int n;
|
||||
char value;
|
||||
char *id;
|
||||
id = new char[N];
|
||||
int sum = 0;
|
||||
// RANK 0 READS THE INPUT FILE
|
||||
if (rank==0){
|
||||
printf("Read input media... \n");
|
||||
ifstream PM(FILENAME.c_str(),ios::binary);
|
||||
for (int k=0;k<Nz;k++){
|
||||
for (int j=0;j<Ny;j++){
|
||||
for (int i=0;i<Nx;i++){
|
||||
PM.read((char *) (&value), sizeof(value));
|
||||
n = k*Nx*Ny+j*Nx+i;
|
||||
|
||||
if (value>0){
|
||||
if (pBC) value=2; // Saturate with NWP
|
||||
if (k<8){
|
||||
value=1;
|
||||
}
|
||||
}
|
||||
|
||||
id[n] = value;
|
||||
if (value > 0) sum++;
|
||||
}
|
||||
}
|
||||
}
|
||||
PM.close();
|
||||
printf("File porosity = %f\n", double(sum)/N);
|
||||
}
|
||||
//......... for pressure BC only............................
|
||||
// Void the first / last rows if pressure BC are to be used
|
||||
if (pBC){
|
||||
for (int k=0;k<Nz;k++){
|
||||
for (int j=0;j<Ny;j++){
|
||||
for (int i=0;i<Nx;i++){
|
||||
n = k*Nx*Ny+j*Nx+i;
|
||||
if (k<4) id[n] = 1;
|
||||
if (k>Nz-5) id[n] = 2;
|
||||
}
|
||||
}
|
||||
// Skip the non-boundary values
|
||||
if (k==4) k=Nz-5;
|
||||
}
|
||||
}
|
||||
#ifdef useMPI //............................................................
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
MPI_Bcast(&id[0],N,MPI_CHAR,0,MPI_COMM_WORLD);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
#endif
|
||||
if (rank == 0) printf("Domain set.\n");
|
||||
//...........................................................................
|
||||
|
||||
int SBC;
|
||||
int outlet = N-Nx*Ny;
|
||||
if (pBC){
|
||||
SBC = Nx*Ny/nthreads/nBlocks+1;
|
||||
printf("Number of sweeps for inlet / outlet: %i \n", SBC);
|
||||
}
|
||||
//...........................................................................
|
||||
|
||||
//...........................................................................
|
||||
//...........device phase ID.................................................
|
||||
char *ID;
|
||||
cudaMalloc((void **) &ID, N); // Allocate device memory
|
||||
// Copy to the device
|
||||
cudaMemcpy(ID, id, N, cudaMemcpyHostToDevice);
|
||||
//...........................................................................
|
||||
|
||||
//......................device distributions.................................
|
||||
double *f_even,*f_odd;
|
||||
//...........................................................................
|
||||
cudaMalloc((void **) &f_even, 10*dist_mem_size); // Allocate device memory
|
||||
cudaMalloc((void **) &f_odd, 9*dist_mem_size); // Allocate device memory
|
||||
// f_even = new double[10*N];
|
||||
// f_odd = new double[9*N];
|
||||
//...........................................................................
|
||||
|
||||
//...........................................................................
|
||||
// MAIN VARIABLES ALLOCATED HERE
|
||||
//...........................................................................
|
||||
double *Phi,*Den,*Copy;
|
||||
double *ColorGrad, *Velocity;
|
||||
//...........................................................................
|
||||
cudaMalloc((void **) &Phi, dist_mem_size);
|
||||
cudaMalloc((void **) &Den, 2*dist_mem_size);
|
||||
cudaMalloc((void **) &Copy, 2*dist_mem_size);
|
||||
cudaMalloc((void **) &Velocity, 3*dist_mem_size);
|
||||
cudaMalloc((void **) &ColorGrad, 3*dist_mem_size);
|
||||
//...........................................................................
|
||||
|
||||
//...........................................................................
|
||||
if (rank==0) printf("Setting the distributions, size = : %i\n", N);
|
||||
//...........................................................................
|
||||
dvc_InitD3Q19(nBlocks, nthreads, S, ID, f_even, f_odd, Nx, Ny, Nz);
|
||||
dvc_InitDenColor(nBlocks, nthreads, S, ID, Den, Phi, das, dbs, N);
|
||||
//...........................................................................
|
||||
dvc_ComputePhi(nBlocks, nthreads, S,ID, Phi, Copy, Den, N);
|
||||
//...........................................................................
|
||||
|
||||
int timestep;
|
||||
// double starttime,stoptime;
|
||||
if (rank==0) printf("No. of timesteps: %i \n", timestepMax);
|
||||
timestep = 0;
|
||||
//.......create and start timer............
|
||||
cudaEvent_t start, stop;
|
||||
float time;
|
||||
//.......create a stream for the LB calculation.......
|
||||
cudaStream_t stream;
|
||||
cudaStreamCreate(&stream);
|
||||
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord( start, 0 );
|
||||
//.........................................
|
||||
//************ MAIN TIMESTEP LOOP ***************************************/
|
||||
while (timestep < timestepMax){
|
||||
|
||||
//*************************************************************************
|
||||
// Compute the color gradient
|
||||
//*************************************************************************
|
||||
dvc_ComputeColorGradient(nBlocks, nthreads, S,
|
||||
ID, Phi, ColorGrad, Nx, Ny, Nz);
|
||||
//*************************************************************************
|
||||
|
||||
//*************************************************************************
|
||||
// Perform collision step for the momentum transport
|
||||
//*************************************************************************
|
||||
dvc_ColorCollide(nBlocks, nthreads, S,
|
||||
ID, f_even, f_odd, ColorGrad, Velocity,
|
||||
rlxA, rlxB,alpha, beta, Fx, Fy, Fz, Nx, Ny, Nz, pBC);
|
||||
//*************************************************************************
|
||||
|
||||
//*************************************************************************
|
||||
// Carry out the density streaming step for mass transport
|
||||
//*************************************************************************
|
||||
dvc_DensityStreamD3Q7(nBlocks, nthreads, S,
|
||||
ID, Den, Copy, Phi, ColorGrad, Velocity,beta, Nx, Ny, Nz, pBC);
|
||||
//*************************************************************************
|
||||
|
||||
//*************************************************************************
|
||||
// Swap the distributions for momentum transport
|
||||
//*************************************************************************
|
||||
dvc_SwapD3Q19(nBlocks, nthreads, S, ID, f_even, f_odd, Nx, Ny, Nz);
|
||||
//*************************************************************************
|
||||
|
||||
//*************************************************************************
|
||||
// Compute the phase indicator field and reset Copy, Den
|
||||
//*************************************************************************
|
||||
dvc_ComputePhi(nBlocks, nthreads, S,ID, Phi, Copy, Den, N);
|
||||
//*************************************************************************
|
||||
|
||||
dvc_Barrier();
|
||||
timestep++;
|
||||
//.............................................................................
|
||||
}
|
||||
//************************************************************************/
|
||||
dvc_Barrier();
|
||||
//.......... stop and destroy timer.............................
|
||||
cudaEventRecord( stop, stream);
|
||||
cudaEventSynchronize( stop );
|
||||
|
||||
cudaEventElapsedTime( &time, start, stop );
|
||||
printf("CPU time = %f \n", time);
|
||||
|
||||
float MLUPS = 0.001*float(Nx*Ny*Nz)*timestep/time;
|
||||
printf("MLUPS = %f \n", MLUPS);
|
||||
|
||||
cudaEventDestroy( start );
|
||||
cudaEventDestroy( stop );
|
||||
|
||||
double *Data;
|
||||
Data = new double[3*N];
|
||||
|
||||
cudaMemcpy(Data, Phi, dist_mem_size, cudaMemcpyDeviceToHost);
|
||||
|
||||
// Write out the Phase Indicator Field
|
||||
FILE *phase;
|
||||
phase = fopen("Phase.out","wb");
|
||||
fwrite(Data,8,N,phase);
|
||||
fclose(phase);
|
||||
|
||||
//....................................................
|
||||
// Write out the pressure - (reuse Phi arrays since we're done with those)
|
||||
// ComputeDensity<<< grid, nthreads>>> (ID, f_even, f_odd, Phi, Nx, Ny, Nz, S);
|
||||
// cudaMemcpy(Data, Phi, dist_mem_size, cudaMemcpyDeviceToHost);
|
||||
// FILE *PRESSURE;
|
||||
// PRESSURE = fopen("Pressure.out","wb");
|
||||
// fwrite(Phi,8,N,PRESSURE);
|
||||
// fclose(PRESSURE);
|
||||
//....................................................
|
||||
|
||||
// Write out the Color Gradient
|
||||
|
||||
cudaMemcpy(Data, ColorGrad, 3*dist_mem_size, cudaMemcpyDeviceToHost);
|
||||
|
||||
FILE *CG;
|
||||
CG = fopen("ColorGrad.out","wb");
|
||||
fwrite(Data,8,3*N,CG);
|
||||
fclose(CG);
|
||||
|
||||
// Write out the Velocity
|
||||
// FILE *VEL;
|
||||
// VEL = fopen("Velocity.out","wb");
|
||||
// fwrite(Velocity,8,3*N,VEL);
|
||||
// fclose(VEL);
|
||||
|
||||
// cleanup
|
||||
cudaFree(ID);
|
||||
cudaFree(f_even); cudaFree(f_odd);
|
||||
cudaFree(Velocity);
|
||||
cudaFree(Phi);
|
||||
|
||||
cudaFree (ColorGrad);
|
||||
cudaFree (Den); cudaFree(Copy);
|
||||
cudaFree (Phi);
|
||||
free(id);
|
||||
|
||||
//***********Finish up!*********************************
|
||||
#ifdef useMPI
|
||||
MPI_Finalize();
|
||||
#endif
|
||||
return 0;
|
||||
|
||||
}
|
1464
cpu/lb2_Color_mpi.cpp
Normal file
1464
cpu/lb2_Color_mpi.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1487
cpu/lb2_Color_mpi.cpp~
Normal file
1487
cpu/lb2_Color_mpi.cpp~
Normal file
File diff suppressed because it is too large
Load Diff
8
example-cpu/Color.in
Normal file
8
example-cpu/Color.in
Normal file
@ -0,0 +1,8 @@
|
||||
ID
|
||||
80 32 128
|
||||
1.0
|
||||
1.0e-2 0.9 0.1 0.9
|
||||
0.0 0.0 0.0
|
||||
0 1.0 1.0
|
||||
500 500 1e-5
|
||||
|
1
example-cpu/Domain.in
Normal file
1
example-cpu/Domain.in
Normal file
@ -0,0 +1 @@
|
||||
2 2 2
|
BIN
example-cpu/ID.00000
Normal file
BIN
example-cpu/ID.00000
Normal file
Binary file not shown.
BIN
example-cpu/ID.00001
Normal file
BIN
example-cpu/ID.00001
Normal file
Binary file not shown.
BIN
example-cpu/ID.00002
Normal file
BIN
example-cpu/ID.00002
Normal file
Binary file not shown.
BIN
example-cpu/ID.00003
Normal file
BIN
example-cpu/ID.00003
Normal file
Binary file not shown.
BIN
example-cpu/ID.00004
Normal file
BIN
example-cpu/ID.00004
Normal file
Binary file not shown.
BIN
example-cpu/ID.00005
Normal file
BIN
example-cpu/ID.00005
Normal file
Binary file not shown.
BIN
example-cpu/ID.00006
Normal file
BIN
example-cpu/ID.00006
Normal file
Binary file not shown.
BIN
example-cpu/ID.00007
Normal file
BIN
example-cpu/ID.00007
Normal file
Binary file not shown.
27
example-cpu/RunColor-cpu.hokiespeed
Executable file
27
example-cpu/RunColor-cpu.hokiespeed
Executable file
@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
#PBS -l walltime=00:10:00
|
||||
#PBS -l nodes=1:ppn=8
|
||||
# Access group, queue, and accounting project
|
||||
#PBS -W group_list=hokiespeed
|
||||
# Queue name. Replace normal_q with long_q to submit a job to the long queue.
|
||||
#PBS -q normal_q
|
||||
#PBS -A hokiespeed
|
||||
|
||||
echo "------------------------------------------"
|
||||
echo "Running Color LBM using MPI!"
|
||||
echo "Number of processors = " $PBS_NP
|
||||
echo "------------------------------------------"
|
||||
|
||||
module purge
|
||||
#module load intel mvapich2
|
||||
module load gcc cuda mvapich2/1.9rc1
|
||||
|
||||
cd $PBS_O_WORKDIR
|
||||
|
||||
export MV2_CPU_BINDING_POLICY=scatter
|
||||
#export VIADEV_CPU_MAPPING=0:2:3:4:1:5:6:7
|
||||
export MV2_SHOW_CPU_BINDING=1
|
||||
|
||||
mpirun -np $PBS_NP ~/LBPM-WIA/cpu/ColorLBM-cpu
|
||||
|
||||
exit;
|
404
example-cpu/pack.out
Normal file
404
example-cpu/pack.out
Normal file
@ -0,0 +1,404 @@
|
||||
Number of Spheres: 399
|
||||
Domain Length (x,y,z): 1, 1, 1
|
||||
Media porosity: 0.369
|
||||
log(r) Normal with mean -2.47144, variance 0
|
||||
Mean coordination No. 11.84
|
||||
0.0358075 0.0887041 0.112468 0.0844631
|
||||
0.118482 0.318651 0.473828 0.0844631
|
||||
0.582752 0.0454491 0.529264 0.0844631
|
||||
0.852622 0.828954 0.547441 0.0844631
|
||||
0.807778 0.54718 0.595125 0.0844631
|
||||
0.557502 0.0185902 0.694119 0.0844631
|
||||
0.832159 0.72426 0.207901 0.0844631
|
||||
0.84196 0.996673 0.530338 0.0844631
|
||||
0.0811649 0.0792821 0.70289 0.0844631
|
||||
0.994893 0.869664 0.237064 0.0844631
|
||||
0.488065 0.221434 0.32131 0.0844631
|
||||
0.170562 0.759156 0.532848 0.0844631
|
||||
0.844254 0.0475049 0.812703 0.0844631
|
||||
0.711717 0.835039 0.165973 0.0844631
|
||||
0.545693 0.415679 0.814859 0.0844631
|
||||
0.291362 0.221277 0.740625 0.0844631
|
||||
0.625387 0.177012 0.665196 0.0844631
|
||||
0.226285 0.692765 0.689361 0.0844631
|
||||
0.891466 0.0957847 0.657861 0.0844631
|
||||
0.517532 0.273905 0.902284 0.0844631
|
||||
0.410399 0.682132 0.16299 0.0844631
|
||||
0.0570839 0.629902 0.0756787 0.0844631
|
||||
0.00559676 0.182636 0.975355 0.0844631
|
||||
0.132737 0.531505 0.667699 0.0844631
|
||||
0.662373 0.662491 0.762575 0.0844631
|
||||
0.572523 0.557371 0.0770451 0.0844631
|
||||
0.241216 0.554304 0.795169 0.0844631
|
||||
0.404127 0.9559 0.727017 0.0844631
|
||||
0.84986 0.285616 0.807583 0.0844631
|
||||
0.622755 0.508751 0.695781 0.0844631
|
||||
0.0647357 0.683784 0.64081 0.0844631
|
||||
0.017351 0.0335981 0.271083 0.0844631
|
||||
0.634026 0.657202 0.955442 0.0844631
|
||||
0.665255 0.637861 0.595479 0.0844631
|
||||
0.936277 0.0301754 0.9533 0.0844631
|
||||
0.465166 0.519937 0.229393 0.0844631
|
||||
0.27117 0.85172 0.724784 0.0844631
|
||||
0.404611 0.539355 0.0728933 0.0844631
|
||||
0.738345 0.774204 0.892481 0.0844631
|
||||
0.784738 0.235691 0.659926 0.0844631
|
||||
0.694499 0.92715 0.574585 0.0844631
|
||||
0.203094 0.29767 0.618523 0.0844631
|
||||
0.997892 0.98119 0.593439 0.0844631
|
||||
0.51469 0.833673 0.950843 0.0844631
|
||||
0.131958 0.596077 0.511604 0.0844631
|
||||
0.429379 0.264579 0.0654981 0.0844631
|
||||
0.98469 0.958944 0.771791 0.0844631
|
||||
0.952019 0.25238 0.676499 0.0844631
|
||||
0.0395681 0.440658 0.559985 0.0844631
|
||||
0.663478 0.959076 0.270011 0.0844631
|
||||
0.118073 0.821444 0.0698155 0.0844631
|
||||
0.597588 0.270182 0.0509888 0.0844631
|
||||
0.823071 0.555628 0.203843 0.0844631
|
||||
0.710083 0.00668089 0.425246 0.0844631
|
||||
0.708692 0.904726 0.788856 0.0844631
|
||||
0.293722 0.292769 0.47605 0.0844631
|
||||
0.33054 0.402152 0.352697 0.0844631
|
||||
0.103216 0.475309 0.852193 0.0844631
|
||||
0.54144 0.881093 0.790931 0.0844631
|
||||
0.588392 0.889518 0.424822 0.0844631
|
||||
0.54267 0.681877 0.268062 0.0844631
|
||||
0.671421 0.784318 0.321985 0.0844631
|
||||
0.893617 0.749839 0.954392 0.0844631
|
||||
0.612948 0.108126 0.331386 0.0844631
|
||||
0.40227 0.503457 0.798711 0.0844631
|
||||
0.854573 0.0324028 0.101137 0.0844631
|
||||
0.426354 0.122944 0.715253 0.0844631
|
||||
0.973041 0.632152 0.222197 0.0844631
|
||||
0.232692 0.0935994 0.176676 0.0844631
|
||||
0.303504 0.575786 0.59508 0.0844631
|
||||
0.365845 0.667468 0.781117 0.0844631
|
||||
0.764071 0.282961 0.49907 0.0844631
|
||||
0.31194 0.763128 0.995006 0.0844631
|
||||
0.16544 0.598159 0.950031 0.0844631
|
||||
0.988502 0.421201 0.400142 0.0844631
|
||||
0.0904952 0.804367 0.790716 0.0844631
|
||||
0.247648 0.450753 0.942993 0.0844631
|
||||
0.269592 0.0566081 0.709864 0.0844631
|
||||
0.251452 0.261402 0.171533 0.0844631
|
||||
0.701449 0.298528 0.342954 0.0844631
|
||||
0.613014 0.423649 0.982098 0.0844631
|
||||
0.408804 0.0300075 0.575286 0.0844631
|
||||
0.299217 0.816215 0.42207 0.0844631
|
||||
0.576412 0.321367 0.588916 0.0844631
|
||||
0.519125 0.705171 0.841281 0.0844631
|
||||
0.200777 0.684171 0.384522 0.0844631
|
||||
0.553896 0.522574 0.542147 0.0844631
|
||||
0.128416 0.958809 0.168028 0.0844631
|
||||
0.724615 0.0432676 0.69352 0.0844631
|
||||
0.770004 0.00074753 0.958129 0.0844631
|
||||
0.207296 0.447809 0.541212 0.0844631
|
||||
0.903614 0.193834 0.109557 0.0844631
|
||||
0.215384 0.415782 0.229858 0.0844631
|
||||
0.666485 0.318737 0.747423 0.0844631
|
||||
0.745934 0.349421 0.0668263 0.0844631
|
||||
0.766981 0.0877808 0.234542 0.0844631
|
||||
0.315324 0.397626 0.0947259 0.0844631
|
||||
0.610816 0.534216 0.313762 0.0844631
|
||||
0.687703 0.775593 0.500285 0.0844631
|
||||
0.43313 0.83717 0.0998827 0.0844631
|
||||
0.561228 0.135274 0.816213 0.0844631
|
||||
0.213419 0.355151 0.807995 0.0844631
|
||||
0.975543 0.153798 0.811644 0.0844631
|
||||
0.161034 0.342383 0.0537506 0.0844631
|
||||
0.725965 0.170735 0.804364 0.0844631
|
||||
0.91098 0.16578 0.275974 0.0844631
|
||||
0.275188 0.74229 0.244446 0.0844631
|
||||
0.389144 0.64072 0.325376 0.0844631
|
||||
0.554735 0.372235 0.282359 0.0844631
|
||||
0.104658 0.191284 0.227677 0.0844631
|
||||
0.499806 0.985717 0.302229 0.0844631
|
||||
0.470468 0.486829 0.394958 0.0844631
|
||||
0.587684 0.750647 0.08832 0.0844631
|
||||
0.299257 0.432828 0.684974 0.0844631
|
||||
0.424695 0.92756 0.441914 0.0844631
|
||||
0.504459 0.272912 0.733867 0.0844631
|
||||
0.807186 0.287712 0.211658 0.0844631
|
||||
0.460013 0.682085 0.00151398 0.0844631
|
||||
0.860599 0.886291 0.860442 0.0844631
|
||||
0.53648 0.087021 0.172119 0.0844631
|
||||
0.82388 0.678662 0.475869 0.0844631
|
||||
0.196931 0.186369 0.876273 0.0844631
|
||||
0.463229 0.765994 0.411131 0.0844631
|
||||
0.987373 0.930252 0.0795568 0.0844631
|
||||
0.513417 0.544658 0.919064 0.0844631
|
||||
0.185582 0.164945 0.453624 0.0844631
|
||||
0.0924416 0.635728 0.800369 0.0844631
|
||||
0.889638 0.614201 0.0598228 0.0844631
|
||||
0.941545 0.724731 0.793543 0.0844631
|
||||
0.731609 0.123715 0.545145 0.0844631
|
||||
0.679436 0.640664 0.177882 0.0844631
|
||||
0.218729 0.671364 0.101905 0.0844631
|
||||
0.334539 0.739691 0.568468 0.0844631
|
||||
0.215164 0.00844407 0.0318398 0.0844631
|
||||
0.669247 0.522968 0.85756 0.0844631
|
||||
0.299222 0.158529 0.578448 0.0844631
|
||||
0.919157 0.140163 0.485759 0.0844631
|
||||
0.591358 0.657196 0.427926 0.0844631
|
||||
0.268799 0.869125 0.133054 0.0844631
|
||||
0.612547 0.990973 0.897731 0.0844631
|
||||
0.15907 0.958743 0.791981 0.0844631
|
||||
0.548442 0.844609 0.223106 0.0844631
|
||||
0.888657 0.308625 0.986192 0.0844631
|
||||
0.795381 0.439411 0.932689 0.0844631
|
||||
0.635763 0.80385 0.674 0.0844631
|
||||
0.0371748 0.202381 0.38214 0.0844631
|
||||
0.386407 0.16184 0.192518 0.0844631
|
||||
0.758207 0.719974 0.0512279 0.0844631
|
||||
0.457535 0.0925071 0.426104 0.0844631
|
||||
0.972372 0.903228 0.445768 0.0844631
|
||||
0.0607607 0.730332 0.939615 0.0844631
|
||||
0.981198 0.828728 0.664238 0.0844631
|
||||
0.739213 0.53735 0.0583449 0.0844631
|
||||
0.82734 0.158482 0.938934 0.0844631
|
||||
0.879107 0.0201421 0.367227 0.0844631
|
||||
0.687105 0.0110493 0.106996 0.0844631
|
||||
0.0818178 0.316873 0.906747 0.0844631
|
||||
0.200611 0.873239 0.931621 0.0844631
|
||||
0.106137 0.24103 0.744726 0.0844631
|
||||
0.435493 0.793483 0.692769 0.0844631
|
||||
0.528055 0.731786 0.56564 0.0844631
|
||||
0.0571424 0.0492633 0.45064 0.0844631
|
||||
0.135322 0.891012 0.632386 0.0844631
|
||||
0.328101 0.134057 0.0302443 0.0844631
|
||||
0.869313 0.301052 0.368178 0.0844631
|
||||
0.961967 0.765697 0.108057 0.0844631
|
||||
0.668496 0.446111 0.181676 0.0844631
|
||||
0.919568 0.756006 0.348926 0.0844631
|
||||
0.991666 0.744856 0.501286 0.0844631
|
||||
0.295511 0.290355 0.965744 0.0844631
|
||||
0.458841 0.605759 0.654311 0.0844631
|
||||
0.794554 0.61734 0.920233 0.0844631
|
||||
0.0019474 0.33119 0.109448 0.0844631
|
||||
0.46369 0.309833 0.463181 0.0844631
|
||||
0.296394 0.904238 0.566223 0.0844631
|
||||
0.890565 0.405413 0.63493 0.0844631
|
||||
0.144195 0.575774 0.262804 0.0844631
|
||||
0.210047 0.888592 0.29819 0.0844631
|
||||
0.617947 0.229081 0.213569 0.0844631
|
||||
0.495406 0.122105 0.00322827 0.0844631
|
||||
0.831011 0.872402 0.0523457 0.0844631
|
||||
0.279675 0.0391166 0.878759 0.0844631
|
||||
0.433519 0.406039 0.973258 0.0844631
|
||||
0.702773 0.535258 0.463338 0.0844631
|
||||
0.842781 0.922694 0.687626 0.0844631
|
||||
0.703096 0.29956 0.911215 0.0844631
|
||||
0.0380851 0.639181 0.37794 0.0844631
|
||||
0.379016 0.820676 0.851045 0.0844631
|
||||
0.723109 0.401954 0.611759 0.0844631
|
||||
0.955443 0.579247 0.512529 0.0844631
|
||||
0.296401 0.037199 0.449389 0.0844631
|
||||
0.881657 0.447792 0.087761 0.0844631
|
||||
0.656098 0.143532 0.95574 0.0844631
|
||||
0.965936 0.546205 0.690001 0.0844631
|
||||
0.334146 0.0146719 0.286282 0.0844631
|
||||
0.174903 0.0508213 0.329537 0.0844631
|
||||
0.668068 0.875575 0.00789851 0.0844631
|
||||
0.517478 0.402511 0.119801 0.0844631
|
||||
0.0335784 0.895984 0.920728 0.0844631
|
||||
0.353489 0.926216 0.98045 0.0844631
|
||||
0.515858 0.899496 0.581788 0.0844631
|
||||
0.0214488 0.483245 0.999798 0.0844631
|
||||
0.836999 0.433238 0.473884 0.0844631
|
||||
0.381474 0.299744 0.621214 0.0844631
|
||||
0.893548 0.554202 0.357359 0.0844631
|
||||
0.385287 0.00679129 0.125476 0.0844631
|
||||
0.382082 0.176473 0.875897 0.0844631
|
||||
0.375338 0.439285 0.526206 0.0844631
|
||||
0.333229 0.595868 0.930602 0.0844631
|
||||
0.533036 0.965661 0.0546621 0.0844631
|
||||
0.0348509 0.392455 0.721819 0.0844631
|
||||
0.613491 0.216291 0.46143 0.0844631
|
||||
0.17733 0.0419469 0.56912 0.0844631
|
||||
0.185888 0.506077 0.0901699 0.0844631
|
||||
0.166015 0.914118 0.459266 0.0844631
|
||||
0.746306 0.633286 0.332833 0.0844631
|
||||
0.225214 0.724578 0.855262 0.0844631
|
||||
0.820434 0.603526 0.753874 0.0844631
|
||||
0.16378 0.173693 0.0414764 0.0844631
|
||||
0.0995727 0.0546697 0.917656 0.0844631
|
||||
0.447591 0.020769 0.876813 0.0844631
|
||||
0.0623956 0.183214 0.56894 0.0844631
|
||||
0.94763 0.410834 0.865314 0.0844631
|
||||
0.953689 0.579147 0.878359 0.0844631
|
||||
0.404145 0.327586 0.22019 0.0844631
|
||||
0.800574 0.868069 0.391566 0.0844631
|
||||
0.287223 0.546206 0.429562 0.0844631
|
||||
0.606297 0.397675 0.441199 0.0844631
|
||||
0.380349 0.33851 0.827832 0.0844631
|
||||
0.202245 0.28176 0.331846 0.0844631
|
||||
0.47476 0.434332 0.662686 0.0844631
|
||||
0.081255 0.802026 0.365527 0.0844631
|
||||
0.790427 0.768306 0.731892 0.0844631
|
||||
0.952141 0.303167 0.515389 0.0844631
|
||||
0.735282 0.184001 0.0993647 0.0844631
|
||||
0.108137 0.733294 0.213575 0.0844631
|
||||
0.0626769 0.350381 0.265908 0.0844631
|
||||
0.328084 0.17067 0.350811 0.0844631
|
||||
0.0373343 0.485658 0.167958 0.0844631
|
||||
0.487331 0.179389 0.567878 0.0844631
|
||||
0.782697 0.157339 0.387679 0.0844631
|
||||
0.392496 0.856145 0.287012 0.0844631
|
||||
0.78666 0.43832 0.763992 0.0844631
|
||||
0.836323 0.927719 0.232454 0.0844631
|
||||
0.152772 0.455916 0.381533 0.0844631
|
||||
0.302341 0.558622 0.205956 0.0844631
|
||||
0.426957 0.620411 0.488758 0.0844631
|
||||
0.760088 0.455904 0.324774 0.0844631
|
||||
0.907569 0.417477 0.251912 0.0844631
|
||||
0.896312 0.687184 0.628241 0.0844631
|
||||
1.03581 0.0887041 0.112468 0.0844631
|
||||
0.582752 1.04545 0.529264 0.0844631
|
||||
0.557502 1.01859 0.694119 0.0844631
|
||||
0.84196 -0.003327 0.530338 0.0844631
|
||||
1.08116 0.0792821 0.70289 0.0844631
|
||||
0.0811649 1.07928 0.70289 0.0844631
|
||||
1.08116 1.07928 0.70289 0.0844631
|
||||
-0.005107 0.869664 0.237064 0.0844631
|
||||
0.844254 1.0475 0.812703 0.0844631
|
||||
1.05708 0.629902 0.0756787 0.0844631
|
||||
0.0570839 0.629902 1.07568 0.0844631
|
||||
1.05708 0.629902 1.07568 0.0844631
|
||||
0.00559676 0.182636 -0.024645 0.0844631
|
||||
1.0056 0.182636 0.975355 0.0844631
|
||||
1.0056 0.182636 -0.024645 0.0844631
|
||||
0.572523 0.557371 1.07705 0.0844631
|
||||
0.404127 -0.0441 0.727017 0.0844631
|
||||
1.06474 0.683784 0.64081 0.0844631
|
||||
1.01735 0.0335981 0.271083 0.0844631
|
||||
0.017351 1.0336 0.271083 0.0844631
|
||||
1.01735 1.0336 0.271083 0.0844631
|
||||
0.634026 0.657202 -0.044558 0.0844631
|
||||
-0.063723 0.0301754 0.9533 0.0844631
|
||||
0.936277 0.0301754 -0.0467 0.0844631
|
||||
0.936277 1.03018 0.9533 0.0844631
|
||||
-0.063723 0.0301754 -0.0467 0.0844631
|
||||
-0.063723 1.03018 0.9533 0.0844631
|
||||
0.936277 1.03018 -0.0467 0.0844631
|
||||
-0.063723 1.03018 -0.0467 0.0844631
|
||||
0.404611 0.539355 1.07289 0.0844631
|
||||
0.694499 -0.07285 0.574585 0.0844631
|
||||
-0.002108 0.98119 0.593439 0.0844631
|
||||
0.997892 -0.01881 0.593439 0.0844631
|
||||
-0.002108 -0.01881 0.593439 0.0844631
|
||||
0.51469 0.833673 -0.049157 0.0844631
|
||||
0.429379 0.264579 1.0655 0.0844631
|
||||
-0.01531 0.958944 0.771791 0.0844631
|
||||
0.98469 -0.041056 0.771791 0.0844631
|
||||
-0.01531 -0.041056 0.771791 0.0844631
|
||||
-0.047981 0.25238 0.676499 0.0844631
|
||||
1.03957 0.440658 0.559985 0.0844631
|
||||
0.663478 -0.040924 0.270011 0.0844631
|
||||
0.118073 0.821444 1.06982 0.0844631
|
||||
0.597588 0.270182 1.05099 0.0844631
|
||||
0.710083 1.00668 0.425246 0.0844631
|
||||
0.893617 0.749839 -0.045608 0.0844631
|
||||
0.854573 1.0324 0.101137 0.0844631
|
||||
-0.026959 0.632152 0.222197 0.0844631
|
||||
0.31194 0.763128 -0.004994 0.0844631
|
||||
0.16544 0.598159 -0.049969 0.0844631
|
||||
-0.011498 0.421201 0.400142 0.0844631
|
||||
0.247648 0.450753 -0.057007 0.0844631
|
||||
0.269592 1.05661 0.709864 0.0844631
|
||||
0.613014 0.423649 -0.017902 0.0844631
|
||||
0.408804 1.03001 0.575286 0.0844631
|
||||
0.128416 -0.041191 0.168028 0.0844631
|
||||
0.724615 1.04327 0.69352 0.0844631
|
||||
0.770004 0.00074753 -0.041871 0.0844631
|
||||
0.770004 1.00075 0.958129 0.0844631
|
||||
0.770004 1.00075 -0.041871 0.0844631
|
||||
0.745934 0.349421 1.06683 0.0844631
|
||||
-0.024457 0.153798 0.811644 0.0844631
|
||||
0.161034 0.342383 1.05375 0.0844631
|
||||
0.499806 -0.014283 0.302229 0.0844631
|
||||
0.424695 -0.07244 0.441914 0.0844631
|
||||
0.460013 0.682085 1.00151 0.0844631
|
||||
-0.012627 0.930252 0.0795568 0.0844631
|
||||
0.987373 -0.069748 0.0795568 0.0844631
|
||||
0.987373 0.930252 1.07956 0.0844631
|
||||
-0.012627 -0.069748 0.0795568 0.0844631
|
||||
-0.012627 0.930252 1.07956 0.0844631
|
||||
0.987373 -0.069748 1.07956 0.0844631
|
||||
-0.012627 -0.069748 1.07956 0.0844631
|
||||
0.513417 0.544658 -0.080936 0.0844631
|
||||
0.889638 0.614201 1.05982 0.0844631
|
||||
-0.058455 0.724731 0.793543 0.0844631
|
||||
0.215164 1.00844 0.0318398 0.0844631
|
||||
0.215164 0.00844407 1.03184 0.0844631
|
||||
0.215164 1.00844 1.03184 0.0844631
|
||||
-0.080843 0.140163 0.485759 0.0844631
|
||||
0.612547 -0.009027 0.897731 0.0844631
|
||||
0.15907 -0.041257 0.791981 0.0844631
|
||||
0.888657 0.308625 -0.013808 0.0844631
|
||||
0.795381 0.439411 -0.067311 0.0844631
|
||||
1.03717 0.202381 0.38214 0.0844631
|
||||
0.758207 0.719974 1.05123 0.0844631
|
||||
-0.027628 0.903228 0.445768 0.0844631
|
||||
0.0607607 0.730332 -0.060385 0.0844631
|
||||
1.06076 0.730332 0.939615 0.0844631
|
||||
1.06076 0.730332 -0.060385 0.0844631
|
||||
-0.018802 0.828728 0.664238 0.0844631
|
||||
0.739213 0.53735 1.05834 0.0844631
|
||||
0.82734 0.158482 -0.061066 0.0844631
|
||||
0.879107 1.02014 0.367227 0.0844631
|
||||
0.687105 1.01105 0.106996 0.0844631
|
||||
1.08182 0.316873 0.906747 0.0844631
|
||||
0.200611 0.873239 -0.068379 0.0844631
|
||||
1.05714 0.0492633 0.45064 0.0844631
|
||||
0.0571424 1.04926 0.45064 0.0844631
|
||||
1.05714 1.04926 0.45064 0.0844631
|
||||
0.328101 0.134057 1.03024 0.0844631
|
||||
-0.038033 0.765697 0.108057 0.0844631
|
||||
-0.080432 0.756006 0.348926 0.0844631
|
||||
-0.008334 0.744856 0.501286 0.0844631
|
||||
0.295511 0.290355 -0.034256 0.0844631
|
||||
0.794554 0.61734 -0.079767 0.0844631
|
||||
1.00195 0.33119 0.109448 0.0844631
|
||||
0.495406 0.122105 1.00323 0.0844631
|
||||
0.831011 0.872402 1.05235 0.0844631
|
||||
0.279675 1.03912 0.878759 0.0844631
|
||||
0.433519 0.406039 -0.026742 0.0844631
|
||||
0.842781 -0.077306 0.687626 0.0844631
|
||||
1.03809 0.639181 0.37794 0.0844631
|
||||
-0.044557 0.579247 0.512529 0.0844631
|
||||
0.296401 1.0372 0.449389 0.0844631
|
||||
0.656098 0.143532 -0.04426 0.0844631
|
||||
-0.034064 0.546205 0.690001 0.0844631
|
||||
0.334146 1.01467 0.286282 0.0844631
|
||||
0.174903 1.05082 0.329537 0.0844631
|
||||
0.668068 0.875575 1.0079 0.0844631
|
||||
0.0335784 0.895984 -0.079272 0.0844631
|
||||
1.03358 0.895984 0.920728 0.0844631
|
||||
1.03358 0.895984 -0.079272 0.0844631
|
||||
0.353489 -0.073784 0.98045 0.0844631
|
||||
0.353489 0.926216 -0.01955 0.0844631
|
||||
0.353489 -0.073784 -0.01955 0.0844631
|
||||
0.0214488 0.483245 -0.000202 0.0844631
|
||||
1.02145 0.483245 0.999798 0.0844631
|
||||
1.02145 0.483245 -0.000202 0.0844631
|
||||
0.385287 1.00679 0.125476 0.0844631
|
||||
0.333229 0.595868 -0.069398 0.0844631
|
||||
0.533036 -0.034339 0.0546621 0.0844631
|
||||
0.533036 0.965661 1.05466 0.0844631
|
||||
0.533036 -0.034339 1.05466 0.0844631
|
||||
1.03485 0.392455 0.721819 0.0844631
|
||||
0.17733 1.04195 0.56912 0.0844631
|
||||
0.16378 0.173693 1.04148 0.0844631
|
||||
0.0995727 0.0546697 -0.082344 0.0844631
|
||||
0.0995727 1.05467 0.917656 0.0844631
|
||||
0.0995727 1.05467 -0.082344 0.0844631
|
||||
0.447591 1.02077 0.876813 0.0844631
|
||||
1.0624 0.183214 0.56894 0.0844631
|
||||
-0.05237 0.410834 0.865314 0.0844631
|
||||
-0.046311 0.579147 0.878359 0.0844631
|
||||
1.08126 0.802026 0.365527 0.0844631
|
||||
-0.047859 0.303167 0.515389 0.0844631
|
||||
1.06268 0.350381 0.265908 0.0844631
|
||||
1.03733 0.485658 0.167958 0.0844631
|
||||
0.836323 -0.072281 0.232454 0.0844631
|
8
example-gpu/Color.in
Normal file
8
example-gpu/Color.in
Normal file
@ -0,0 +1,8 @@
|
||||
ID
|
||||
80 32 128
|
||||
1.0
|
||||
1.0e-2 0.9 0.1 0.9
|
||||
0.0 0.0 0.0
|
||||
0 1.0 1.0
|
||||
500 500 1e-5
|
||||
|
1
example-gpu/Domain.in
Normal file
1
example-gpu/Domain.in
Normal file
@ -0,0 +1 @@
|
||||
2 2 2
|
BIN
example-gpu/ID.00000
Normal file
BIN
example-gpu/ID.00000
Normal file
Binary file not shown.
BIN
example-gpu/ID.00001
Normal file
BIN
example-gpu/ID.00001
Normal file
Binary file not shown.
BIN
example-gpu/ID.00002
Normal file
BIN
example-gpu/ID.00002
Normal file
Binary file not shown.
BIN
example-gpu/ID.00003
Normal file
BIN
example-gpu/ID.00003
Normal file
Binary file not shown.
BIN
example-gpu/ID.00004
Normal file
BIN
example-gpu/ID.00004
Normal file
Binary file not shown.
BIN
example-gpu/ID.00005
Normal file
BIN
example-gpu/ID.00005
Normal file
Binary file not shown.
BIN
example-gpu/ID.00006
Normal file
BIN
example-gpu/ID.00006
Normal file
Binary file not shown.
BIN
example-gpu/ID.00007
Normal file
BIN
example-gpu/ID.00007
Normal file
Binary file not shown.
22
example-gpu/RunColor.hokiespeed
Executable file
22
example-gpu/RunColor.hokiespeed
Executable file
@ -0,0 +1,22 @@
|
||||
#!/bin/bash
|
||||
|
||||
#PBS -l walltime=00:10:00
|
||||
# Set the number of nodes, and the number of processors per node (generally should be 6)
|
||||
#PBS -l nodes=8:ppn=1
|
||||
#PBS -A hokiespeed
|
||||
# Access group, queue, and accounting project
|
||||
#PBS -W group_list=hokiespeed
|
||||
|
||||
module purge
|
||||
module load gcc cuda mvapich2/1.9rc1
|
||||
|
||||
cd $PBS_O_WORKDIR
|
||||
|
||||
echo "------------------------------------------"
|
||||
echo "Running LBM using MPI!"
|
||||
echo "Number of processors = " $PBS_NP
|
||||
echo "------------------------------------------"
|
||||
|
||||
mpirun_rsh -np $PBS_NP -hostfile $PBS_NODEFILE MV2_USE_CUDA=1 ~/LBPM-WIA/bin/ColorLBM
|
||||
|
||||
exit;
|
96
example-gpu/hostfile-mvapich2
Normal file
96
example-gpu/hostfile-mvapich2
Normal file
@ -0,0 +1,96 @@
|
||||
hs060
|
||||
hs060
|
||||
hs060
|
||||
hs060
|
||||
hs060
|
||||
hs060
|
||||
hs060
|
||||
hs060
|
||||
hs060
|
||||
hs060
|
||||
hs060
|
||||
hs060
|
||||
hs061
|
||||
hs061
|
||||
hs061
|
||||
hs061
|
||||
hs061
|
||||
hs061
|
||||
hs061
|
||||
hs061
|
||||
hs061
|
||||
hs061
|
||||
hs061
|
||||
hs061
|
||||
hs064
|
||||
hs064
|
||||
hs064
|
||||
hs064
|
||||
hs064
|
||||
hs064
|
||||
hs064
|
||||
hs064
|
||||
hs064
|
||||
hs064
|
||||
hs064
|
||||
hs064
|
||||
hs065
|
||||
hs065
|
||||
hs065
|
||||
hs065
|
||||
hs065
|
||||
hs065
|
||||
hs065
|
||||
hs065
|
||||
hs065
|
||||
hs065
|
||||
hs065
|
||||
hs065
|
||||
hs066
|
||||
hs066
|
||||
hs066
|
||||
hs066
|
||||
hs066
|
||||
hs066
|
||||
hs066
|
||||
hs066
|
||||
hs066
|
||||
hs066
|
||||
hs066
|
||||
hs066
|
||||
hs067
|
||||
hs067
|
||||
hs067
|
||||
hs067
|
||||
hs067
|
||||
hs067
|
||||
hs067
|
||||
hs067
|
||||
hs067
|
||||
hs067
|
||||
hs067
|
||||
hs067
|
||||
hs068
|
||||
hs068
|
||||
hs068
|
||||
hs068
|
||||
hs068
|
||||
hs068
|
||||
hs068
|
||||
hs068
|
||||
hs068
|
||||
hs068
|
||||
hs068
|
||||
hs068
|
||||
hs069
|
||||
hs069
|
||||
hs069
|
||||
hs069
|
||||
hs069
|
||||
hs069
|
||||
hs069
|
||||
hs069
|
||||
hs069
|
||||
hs069
|
||||
hs069
|
||||
hs069
|
404
example-gpu/pack.out
Normal file
404
example-gpu/pack.out
Normal file
@ -0,0 +1,404 @@
|
||||
Number of Spheres: 399
|
||||
Domain Length (x,y,z): 1, 1, 1
|
||||
Media porosity: 0.369
|
||||
log(r) Normal with mean -2.47144, variance 0
|
||||
Mean coordination No. 11.84
|
||||
0.0358075 0.0887041 0.112468 0.0844631
|
||||
0.118482 0.318651 0.473828 0.0844631
|
||||
0.582752 0.0454491 0.529264 0.0844631
|
||||
0.852622 0.828954 0.547441 0.0844631
|
||||
0.807778 0.54718 0.595125 0.0844631
|
||||
0.557502 0.0185902 0.694119 0.0844631
|
||||
0.832159 0.72426 0.207901 0.0844631
|
||||
0.84196 0.996673 0.530338 0.0844631
|
||||
0.0811649 0.0792821 0.70289 0.0844631
|
||||
0.994893 0.869664 0.237064 0.0844631
|
||||
0.488065 0.221434 0.32131 0.0844631
|
||||
0.170562 0.759156 0.532848 0.0844631
|
||||
0.844254 0.0475049 0.812703 0.0844631
|
||||
0.711717 0.835039 0.165973 0.0844631
|
||||
0.545693 0.415679 0.814859 0.0844631
|
||||
0.291362 0.221277 0.740625 0.0844631
|
||||
0.625387 0.177012 0.665196 0.0844631
|
||||
0.226285 0.692765 0.689361 0.0844631
|
||||
0.891466 0.0957847 0.657861 0.0844631
|
||||
0.517532 0.273905 0.902284 0.0844631
|
||||
0.410399 0.682132 0.16299 0.0844631
|
||||
0.0570839 0.629902 0.0756787 0.0844631
|
||||
0.00559676 0.182636 0.975355 0.0844631
|
||||
0.132737 0.531505 0.667699 0.0844631
|
||||
0.662373 0.662491 0.762575 0.0844631
|
||||
0.572523 0.557371 0.0770451 0.0844631
|
||||
0.241216 0.554304 0.795169 0.0844631
|
||||
0.404127 0.9559 0.727017 0.0844631
|
||||
0.84986 0.285616 0.807583 0.0844631
|
||||
0.622755 0.508751 0.695781 0.0844631
|
||||
0.0647357 0.683784 0.64081 0.0844631
|
||||
0.017351 0.0335981 0.271083 0.0844631
|
||||
0.634026 0.657202 0.955442 0.0844631
|
||||
0.665255 0.637861 0.595479 0.0844631
|
||||
0.936277 0.0301754 0.9533 0.0844631
|
||||
0.465166 0.519937 0.229393 0.0844631
|
||||
0.27117 0.85172 0.724784 0.0844631
|
||||
0.404611 0.539355 0.0728933 0.0844631
|
||||
0.738345 0.774204 0.892481 0.0844631
|
||||
0.784738 0.235691 0.659926 0.0844631
|
||||
0.694499 0.92715 0.574585 0.0844631
|
||||
0.203094 0.29767 0.618523 0.0844631
|
||||
0.997892 0.98119 0.593439 0.0844631
|
||||
0.51469 0.833673 0.950843 0.0844631
|
||||
0.131958 0.596077 0.511604 0.0844631
|
||||
0.429379 0.264579 0.0654981 0.0844631
|
||||
0.98469 0.958944 0.771791 0.0844631
|
||||
0.952019 0.25238 0.676499 0.0844631
|
||||
0.0395681 0.440658 0.559985 0.0844631
|
||||
0.663478 0.959076 0.270011 0.0844631
|
||||
0.118073 0.821444 0.0698155 0.0844631
|
||||
0.597588 0.270182 0.0509888 0.0844631
|
||||
0.823071 0.555628 0.203843 0.0844631
|
||||
0.710083 0.00668089 0.425246 0.0844631
|
||||
0.708692 0.904726 0.788856 0.0844631
|
||||
0.293722 0.292769 0.47605 0.0844631
|
||||
0.33054 0.402152 0.352697 0.0844631
|
||||
0.103216 0.475309 0.852193 0.0844631
|
||||
0.54144 0.881093 0.790931 0.0844631
|
||||
0.588392 0.889518 0.424822 0.0844631
|
||||
0.54267 0.681877 0.268062 0.0844631
|
||||
0.671421 0.784318 0.321985 0.0844631
|
||||
0.893617 0.749839 0.954392 0.0844631
|
||||
0.612948 0.108126 0.331386 0.0844631
|
||||
0.40227 0.503457 0.798711 0.0844631
|
||||
0.854573 0.0324028 0.101137 0.0844631
|
||||
0.426354 0.122944 0.715253 0.0844631
|
||||
0.973041 0.632152 0.222197 0.0844631
|
||||
0.232692 0.0935994 0.176676 0.0844631
|
||||
0.303504 0.575786 0.59508 0.0844631
|
||||
0.365845 0.667468 0.781117 0.0844631
|
||||
0.764071 0.282961 0.49907 0.0844631
|
||||
0.31194 0.763128 0.995006 0.0844631
|
||||
0.16544 0.598159 0.950031 0.0844631
|
||||
0.988502 0.421201 0.400142 0.0844631
|
||||
0.0904952 0.804367 0.790716 0.0844631
|
||||
0.247648 0.450753 0.942993 0.0844631
|
||||
0.269592 0.0566081 0.709864 0.0844631
|
||||
0.251452 0.261402 0.171533 0.0844631
|
||||
0.701449 0.298528 0.342954 0.0844631
|
||||
0.613014 0.423649 0.982098 0.0844631
|
||||
0.408804 0.0300075 0.575286 0.0844631
|
||||
0.299217 0.816215 0.42207 0.0844631
|
||||
0.576412 0.321367 0.588916 0.0844631
|
||||
0.519125 0.705171 0.841281 0.0844631
|
||||
0.200777 0.684171 0.384522 0.0844631
|
||||
0.553896 0.522574 0.542147 0.0844631
|
||||
0.128416 0.958809 0.168028 0.0844631
|
||||
0.724615 0.0432676 0.69352 0.0844631
|
||||
0.770004 0.00074753 0.958129 0.0844631
|
||||
0.207296 0.447809 0.541212 0.0844631
|
||||
0.903614 0.193834 0.109557 0.0844631
|
||||
0.215384 0.415782 0.229858 0.0844631
|
||||
0.666485 0.318737 0.747423 0.0844631
|
||||
0.745934 0.349421 0.0668263 0.0844631
|
||||
0.766981 0.0877808 0.234542 0.0844631
|
||||
0.315324 0.397626 0.0947259 0.0844631
|
||||
0.610816 0.534216 0.313762 0.0844631
|
||||
0.687703 0.775593 0.500285 0.0844631
|
||||
0.43313 0.83717 0.0998827 0.0844631
|
||||
0.561228 0.135274 0.816213 0.0844631
|
||||
0.213419 0.355151 0.807995 0.0844631
|
||||
0.975543 0.153798 0.811644 0.0844631
|
||||
0.161034 0.342383 0.0537506 0.0844631
|
||||
0.725965 0.170735 0.804364 0.0844631
|
||||
0.91098 0.16578 0.275974 0.0844631
|
||||
0.275188 0.74229 0.244446 0.0844631
|
||||
0.389144 0.64072 0.325376 0.0844631
|
||||
0.554735 0.372235 0.282359 0.0844631
|
||||
0.104658 0.191284 0.227677 0.0844631
|
||||
0.499806 0.985717 0.302229 0.0844631
|
||||
0.470468 0.486829 0.394958 0.0844631
|
||||
0.587684 0.750647 0.08832 0.0844631
|
||||
0.299257 0.432828 0.684974 0.0844631
|
||||
0.424695 0.92756 0.441914 0.0844631
|
||||
0.504459 0.272912 0.733867 0.0844631
|
||||
0.807186 0.287712 0.211658 0.0844631
|
||||
0.460013 0.682085 0.00151398 0.0844631
|
||||
0.860599 0.886291 0.860442 0.0844631
|
||||
0.53648 0.087021 0.172119 0.0844631
|
||||
0.82388 0.678662 0.475869 0.0844631
|
||||
0.196931 0.186369 0.876273 0.0844631
|
||||
0.463229 0.765994 0.411131 0.0844631
|
||||
0.987373 0.930252 0.0795568 0.0844631
|
||||
0.513417 0.544658 0.919064 0.0844631
|
||||
0.185582 0.164945 0.453624 0.0844631
|
||||
0.0924416 0.635728 0.800369 0.0844631
|
||||
0.889638 0.614201 0.0598228 0.0844631
|
||||
0.941545 0.724731 0.793543 0.0844631
|
||||
0.731609 0.123715 0.545145 0.0844631
|
||||
0.679436 0.640664 0.177882 0.0844631
|
||||
0.218729 0.671364 0.101905 0.0844631
|
||||
0.334539 0.739691 0.568468 0.0844631
|
||||
0.215164 0.00844407 0.0318398 0.0844631
|
||||
0.669247 0.522968 0.85756 0.0844631
|
||||
0.299222 0.158529 0.578448 0.0844631
|
||||
0.919157 0.140163 0.485759 0.0844631
|
||||
0.591358 0.657196 0.427926 0.0844631
|
||||
0.268799 0.869125 0.133054 0.0844631
|
||||
0.612547 0.990973 0.897731 0.0844631
|
||||
0.15907 0.958743 0.791981 0.0844631
|
||||
0.548442 0.844609 0.223106 0.0844631
|
||||
0.888657 0.308625 0.986192 0.0844631
|
||||
0.795381 0.439411 0.932689 0.0844631
|
||||
0.635763 0.80385 0.674 0.0844631
|
||||
0.0371748 0.202381 0.38214 0.0844631
|
||||
0.386407 0.16184 0.192518 0.0844631
|
||||
0.758207 0.719974 0.0512279 0.0844631
|
||||
0.457535 0.0925071 0.426104 0.0844631
|
||||
0.972372 0.903228 0.445768 0.0844631
|
||||
0.0607607 0.730332 0.939615 0.0844631
|
||||
0.981198 0.828728 0.664238 0.0844631
|
||||
0.739213 0.53735 0.0583449 0.0844631
|
||||
0.82734 0.158482 0.938934 0.0844631
|
||||
0.879107 0.0201421 0.367227 0.0844631
|
||||
0.687105 0.0110493 0.106996 0.0844631
|
||||
0.0818178 0.316873 0.906747 0.0844631
|
||||
0.200611 0.873239 0.931621 0.0844631
|
||||
0.106137 0.24103 0.744726 0.0844631
|
||||
0.435493 0.793483 0.692769 0.0844631
|
||||
0.528055 0.731786 0.56564 0.0844631
|
||||
0.0571424 0.0492633 0.45064 0.0844631
|
||||
0.135322 0.891012 0.632386 0.0844631
|
||||
0.328101 0.134057 0.0302443 0.0844631
|
||||
0.869313 0.301052 0.368178 0.0844631
|
||||
0.961967 0.765697 0.108057 0.0844631
|
||||
0.668496 0.446111 0.181676 0.0844631
|
||||
0.919568 0.756006 0.348926 0.0844631
|
||||
0.991666 0.744856 0.501286 0.0844631
|
||||
0.295511 0.290355 0.965744 0.0844631
|
||||
0.458841 0.605759 0.654311 0.0844631
|
||||
0.794554 0.61734 0.920233 0.0844631
|
||||
0.0019474 0.33119 0.109448 0.0844631
|
||||
0.46369 0.309833 0.463181 0.0844631
|
||||
0.296394 0.904238 0.566223 0.0844631
|
||||
0.890565 0.405413 0.63493 0.0844631
|
||||
0.144195 0.575774 0.262804 0.0844631
|
||||
0.210047 0.888592 0.29819 0.0844631
|
||||
0.617947 0.229081 0.213569 0.0844631
|
||||
0.495406 0.122105 0.00322827 0.0844631
|
||||
0.831011 0.872402 0.0523457 0.0844631
|
||||
0.279675 0.0391166 0.878759 0.0844631
|
||||
0.433519 0.406039 0.973258 0.0844631
|
||||
0.702773 0.535258 0.463338 0.0844631
|
||||
0.842781 0.922694 0.687626 0.0844631
|
||||
0.703096 0.29956 0.911215 0.0844631
|
||||
0.0380851 0.639181 0.37794 0.0844631
|
||||
0.379016 0.820676 0.851045 0.0844631
|
||||
0.723109 0.401954 0.611759 0.0844631
|
||||
0.955443 0.579247 0.512529 0.0844631
|
||||
0.296401 0.037199 0.449389 0.0844631
|
||||
0.881657 0.447792 0.087761 0.0844631
|
||||
0.656098 0.143532 0.95574 0.0844631
|
||||
0.965936 0.546205 0.690001 0.0844631
|
||||
0.334146 0.0146719 0.286282 0.0844631
|
||||
0.174903 0.0508213 0.329537 0.0844631
|
||||
0.668068 0.875575 0.00789851 0.0844631
|
||||
0.517478 0.402511 0.119801 0.0844631
|
||||
0.0335784 0.895984 0.920728 0.0844631
|
||||
0.353489 0.926216 0.98045 0.0844631
|
||||
0.515858 0.899496 0.581788 0.0844631
|
||||
0.0214488 0.483245 0.999798 0.0844631
|
||||
0.836999 0.433238 0.473884 0.0844631
|
||||
0.381474 0.299744 0.621214 0.0844631
|
||||
0.893548 0.554202 0.357359 0.0844631
|
||||
0.385287 0.00679129 0.125476 0.0844631
|
||||
0.382082 0.176473 0.875897 0.0844631
|
||||
0.375338 0.439285 0.526206 0.0844631
|
||||
0.333229 0.595868 0.930602 0.0844631
|
||||
0.533036 0.965661 0.0546621 0.0844631
|
||||
0.0348509 0.392455 0.721819 0.0844631
|
||||
0.613491 0.216291 0.46143 0.0844631
|
||||
0.17733 0.0419469 0.56912 0.0844631
|
||||
0.185888 0.506077 0.0901699 0.0844631
|
||||
0.166015 0.914118 0.459266 0.0844631
|
||||
0.746306 0.633286 0.332833 0.0844631
|
||||
0.225214 0.724578 0.855262 0.0844631
|
||||
0.820434 0.603526 0.753874 0.0844631
|
||||
0.16378 0.173693 0.0414764 0.0844631
|
||||
0.0995727 0.0546697 0.917656 0.0844631
|
||||
0.447591 0.020769 0.876813 0.0844631
|
||||
0.0623956 0.183214 0.56894 0.0844631
|
||||
0.94763 0.410834 0.865314 0.0844631
|
||||
0.953689 0.579147 0.878359 0.0844631
|
||||
0.404145 0.327586 0.22019 0.0844631
|
||||
0.800574 0.868069 0.391566 0.0844631
|
||||
0.287223 0.546206 0.429562 0.0844631
|
||||
0.606297 0.397675 0.441199 0.0844631
|
||||
0.380349 0.33851 0.827832 0.0844631
|
||||
0.202245 0.28176 0.331846 0.0844631
|
||||
0.47476 0.434332 0.662686 0.0844631
|
||||
0.081255 0.802026 0.365527 0.0844631
|
||||
0.790427 0.768306 0.731892 0.0844631
|
||||
0.952141 0.303167 0.515389 0.0844631
|
||||
0.735282 0.184001 0.0993647 0.0844631
|
||||
0.108137 0.733294 0.213575 0.0844631
|
||||
0.0626769 0.350381 0.265908 0.0844631
|
||||
0.328084 0.17067 0.350811 0.0844631
|
||||
0.0373343 0.485658 0.167958 0.0844631
|
||||
0.487331 0.179389 0.567878 0.0844631
|
||||
0.782697 0.157339 0.387679 0.0844631
|
||||
0.392496 0.856145 0.287012 0.0844631
|
||||
0.78666 0.43832 0.763992 0.0844631
|
||||
0.836323 0.927719 0.232454 0.0844631
|
||||
0.152772 0.455916 0.381533 0.0844631
|
||||
0.302341 0.558622 0.205956 0.0844631
|
||||
0.426957 0.620411 0.488758 0.0844631
|
||||
0.760088 0.455904 0.324774 0.0844631
|
||||
0.907569 0.417477 0.251912 0.0844631
|
||||
0.896312 0.687184 0.628241 0.0844631
|
||||
1.03581 0.0887041 0.112468 0.0844631
|
||||
0.582752 1.04545 0.529264 0.0844631
|
||||
0.557502 1.01859 0.694119 0.0844631
|
||||
0.84196 -0.003327 0.530338 0.0844631
|
||||
1.08116 0.0792821 0.70289 0.0844631
|
||||
0.0811649 1.07928 0.70289 0.0844631
|
||||
1.08116 1.07928 0.70289 0.0844631
|
||||
-0.005107 0.869664 0.237064 0.0844631
|
||||
0.844254 1.0475 0.812703 0.0844631
|
||||
1.05708 0.629902 0.0756787 0.0844631
|
||||
0.0570839 0.629902 1.07568 0.0844631
|
||||
1.05708 0.629902 1.07568 0.0844631
|
||||
0.00559676 0.182636 -0.024645 0.0844631
|
||||
1.0056 0.182636 0.975355 0.0844631
|
||||
1.0056 0.182636 -0.024645 0.0844631
|
||||
0.572523 0.557371 1.07705 0.0844631
|
||||
0.404127 -0.0441 0.727017 0.0844631
|
||||
1.06474 0.683784 0.64081 0.0844631
|
||||
1.01735 0.0335981 0.271083 0.0844631
|
||||
0.017351 1.0336 0.271083 0.0844631
|
||||
1.01735 1.0336 0.271083 0.0844631
|
||||
0.634026 0.657202 -0.044558 0.0844631
|
||||
-0.063723 0.0301754 0.9533 0.0844631
|
||||
0.936277 0.0301754 -0.0467 0.0844631
|
||||
0.936277 1.03018 0.9533 0.0844631
|
||||
-0.063723 0.0301754 -0.0467 0.0844631
|
||||
-0.063723 1.03018 0.9533 0.0844631
|
||||
0.936277 1.03018 -0.0467 0.0844631
|
||||
-0.063723 1.03018 -0.0467 0.0844631
|
||||
0.404611 0.539355 1.07289 0.0844631
|
||||
0.694499 -0.07285 0.574585 0.0844631
|
||||
-0.002108 0.98119 0.593439 0.0844631
|
||||
0.997892 -0.01881 0.593439 0.0844631
|
||||
-0.002108 -0.01881 0.593439 0.0844631
|
||||
0.51469 0.833673 -0.049157 0.0844631
|
||||
0.429379 0.264579 1.0655 0.0844631
|
||||
-0.01531 0.958944 0.771791 0.0844631
|
||||
0.98469 -0.041056 0.771791 0.0844631
|
||||
-0.01531 -0.041056 0.771791 0.0844631
|
||||
-0.047981 0.25238 0.676499 0.0844631
|
||||
1.03957 0.440658 0.559985 0.0844631
|
||||
0.663478 -0.040924 0.270011 0.0844631
|
||||
0.118073 0.821444 1.06982 0.0844631
|
||||
0.597588 0.270182 1.05099 0.0844631
|
||||
0.710083 1.00668 0.425246 0.0844631
|
||||
0.893617 0.749839 -0.045608 0.0844631
|
||||
0.854573 1.0324 0.101137 0.0844631
|
||||
-0.026959 0.632152 0.222197 0.0844631
|
||||
0.31194 0.763128 -0.004994 0.0844631
|
||||
0.16544 0.598159 -0.049969 0.0844631
|
||||
-0.011498 0.421201 0.400142 0.0844631
|
||||
0.247648 0.450753 -0.057007 0.0844631
|
||||
0.269592 1.05661 0.709864 0.0844631
|
||||
0.613014 0.423649 -0.017902 0.0844631
|
||||
0.408804 1.03001 0.575286 0.0844631
|
||||
0.128416 -0.041191 0.168028 0.0844631
|
||||
0.724615 1.04327 0.69352 0.0844631
|
||||
0.770004 0.00074753 -0.041871 0.0844631
|
||||
0.770004 1.00075 0.958129 0.0844631
|
||||
0.770004 1.00075 -0.041871 0.0844631
|
||||
0.745934 0.349421 1.06683 0.0844631
|
||||
-0.024457 0.153798 0.811644 0.0844631
|
||||
0.161034 0.342383 1.05375 0.0844631
|
||||
0.499806 -0.014283 0.302229 0.0844631
|
||||
0.424695 -0.07244 0.441914 0.0844631
|
||||
0.460013 0.682085 1.00151 0.0844631
|
||||
-0.012627 0.930252 0.0795568 0.0844631
|
||||
0.987373 -0.069748 0.0795568 0.0844631
|
||||
0.987373 0.930252 1.07956 0.0844631
|
||||
-0.012627 -0.069748 0.0795568 0.0844631
|
||||
-0.012627 0.930252 1.07956 0.0844631
|
||||
0.987373 -0.069748 1.07956 0.0844631
|
||||
-0.012627 -0.069748 1.07956 0.0844631
|
||||
0.513417 0.544658 -0.080936 0.0844631
|
||||
0.889638 0.614201 1.05982 0.0844631
|
||||
-0.058455 0.724731 0.793543 0.0844631
|
||||
0.215164 1.00844 0.0318398 0.0844631
|
||||
0.215164 0.00844407 1.03184 0.0844631
|
||||
0.215164 1.00844 1.03184 0.0844631
|
||||
-0.080843 0.140163 0.485759 0.0844631
|
||||
0.612547 -0.009027 0.897731 0.0844631
|
||||
0.15907 -0.041257 0.791981 0.0844631
|
||||
0.888657 0.308625 -0.013808 0.0844631
|
||||
0.795381 0.439411 -0.067311 0.0844631
|
||||
1.03717 0.202381 0.38214 0.0844631
|
||||
0.758207 0.719974 1.05123 0.0844631
|
||||
-0.027628 0.903228 0.445768 0.0844631
|
||||
0.0607607 0.730332 -0.060385 0.0844631
|
||||
1.06076 0.730332 0.939615 0.0844631
|
||||
1.06076 0.730332 -0.060385 0.0844631
|
||||
-0.018802 0.828728 0.664238 0.0844631
|
||||
0.739213 0.53735 1.05834 0.0844631
|
||||
0.82734 0.158482 -0.061066 0.0844631
|
||||
0.879107 1.02014 0.367227 0.0844631
|
||||
0.687105 1.01105 0.106996 0.0844631
|
||||
1.08182 0.316873 0.906747 0.0844631
|
||||
0.200611 0.873239 -0.068379 0.0844631
|
||||
1.05714 0.0492633 0.45064 0.0844631
|
||||
0.0571424 1.04926 0.45064 0.0844631
|
||||
1.05714 1.04926 0.45064 0.0844631
|
||||
0.328101 0.134057 1.03024 0.0844631
|
||||
-0.038033 0.765697 0.108057 0.0844631
|
||||
-0.080432 0.756006 0.348926 0.0844631
|
||||
-0.008334 0.744856 0.501286 0.0844631
|
||||
0.295511 0.290355 -0.034256 0.0844631
|
||||
0.794554 0.61734 -0.079767 0.0844631
|
||||
1.00195 0.33119 0.109448 0.0844631
|
||||
0.495406 0.122105 1.00323 0.0844631
|
||||
0.831011 0.872402 1.05235 0.0844631
|
||||
0.279675 1.03912 0.878759 0.0844631
|
||||
0.433519 0.406039 -0.026742 0.0844631
|
||||
0.842781 -0.077306 0.687626 0.0844631
|
||||
1.03809 0.639181 0.37794 0.0844631
|
||||
-0.044557 0.579247 0.512529 0.0844631
|
||||
0.296401 1.0372 0.449389 0.0844631
|
||||
0.656098 0.143532 -0.04426 0.0844631
|
||||
-0.034064 0.546205 0.690001 0.0844631
|
||||
0.334146 1.01467 0.286282 0.0844631
|
||||
0.174903 1.05082 0.329537 0.0844631
|
||||
0.668068 0.875575 1.0079 0.0844631
|
||||
0.0335784 0.895984 -0.079272 0.0844631
|
||||
1.03358 0.895984 0.920728 0.0844631
|
||||
1.03358 0.895984 -0.079272 0.0844631
|
||||
0.353489 -0.073784 0.98045 0.0844631
|
||||
0.353489 0.926216 -0.01955 0.0844631
|
||||
0.353489 -0.073784 -0.01955 0.0844631
|
||||
0.0214488 0.483245 -0.000202 0.0844631
|
||||
1.02145 0.483245 0.999798 0.0844631
|
||||
1.02145 0.483245 -0.000202 0.0844631
|
||||
0.385287 1.00679 0.125476 0.0844631
|
||||
0.333229 0.595868 -0.069398 0.0844631
|
||||
0.533036 -0.034339 0.0546621 0.0844631
|
||||
0.533036 0.965661 1.05466 0.0844631
|
||||
0.533036 -0.034339 1.05466 0.0844631
|
||||
1.03485 0.392455 0.721819 0.0844631
|
||||
0.17733 1.04195 0.56912 0.0844631
|
||||
0.16378 0.173693 1.04148 0.0844631
|
||||
0.0995727 0.0546697 -0.082344 0.0844631
|
||||
0.0995727 1.05467 0.917656 0.0844631
|
||||
0.0995727 1.05467 -0.082344 0.0844631
|
||||
0.447591 1.02077 0.876813 0.0844631
|
||||
1.0624 0.183214 0.56894 0.0844631
|
||||
-0.05237 0.410834 0.865314 0.0844631
|
||||
-0.046311 0.579147 0.878359 0.0844631
|
||||
1.08126 0.802026 0.365527 0.0844631
|
||||
-0.047859 0.303167 0.515389 0.0844631
|
||||
1.06268 0.350381 0.265908 0.0844631
|
||||
1.03733 0.485658 0.167958 0.0844631
|
||||
0.836323 -0.072281 0.232454 0.0844631
|
813
gpu/Color.cu
Normal file
813
gpu/Color.cu
Normal file
@ -0,0 +1,813 @@
|
||||
#include <cuda.h>
|
||||
|
||||
__device__ double atomicAdd(double* address, double val)
|
||||
{
|
||||
unsigned long long int* address_as_ull =
|
||||
(unsigned long long int*)address;
|
||||
unsigned long long int old = *address_as_ull, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = atomicCAS(address_as_ull, assumed,
|
||||
__double_as_longlong(val +
|
||||
__longlong_as_double(assumed)));
|
||||
} while (assumed != old);
|
||||
return __longlong_as_double(old);
|
||||
}
|
||||
__global__ void InitDenColor(char *ID, double *Den, double *Phi, double das, double dbs, int N, int S)
|
||||
{
|
||||
int n;
|
||||
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
if (n<N){
|
||||
if ( ID[n] == 1){
|
||||
Den[2*n] = 1.0;
|
||||
Den[2*n+1] = 0.0;
|
||||
Phi[n] = 1.0;
|
||||
}
|
||||
else if ( ID[n] == 2){
|
||||
Den[2*n] = 0.0;
|
||||
Den[2*n+1] = 1.0;
|
||||
Phi[n] = -1.0;
|
||||
}
|
||||
else{
|
||||
Den[2*n] = das;
|
||||
Den[2*n+1] = dbs;
|
||||
Phi[n] = (das-dbs)/(das+dbs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__global__ void Compute_VELOCITY(char *ID, double *disteven, double *distodd, double *vel, int Nx, int Ny, int Nz, int S)
|
||||
{
|
||||
int n,N;
|
||||
// distributions
|
||||
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
double vx,vy,vz;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
// S - number of threadblocks per grid block
|
||||
for (int s=0; s<S; s++){
|
||||
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
|
||||
if (n<N){
|
||||
if (ID[n] > 0){
|
||||
//........................................................................
|
||||
// Registers to store the distributions
|
||||
//........................................................................
|
||||
f2 = disteven[N+n];
|
||||
f4 = disteven[2*N+n];
|
||||
f6 = disteven[3*N+n];
|
||||
f8 = disteven[4*N+n];
|
||||
f10 = disteven[5*N+n];
|
||||
f12 = disteven[6*N+n];
|
||||
f14 = disteven[7*N+n];
|
||||
f16 = disteven[8*N+n];
|
||||
f18 = disteven[9*N+n];
|
||||
//........................................................................
|
||||
f1 = distodd[n];
|
||||
f3 = distodd[1*N+n];
|
||||
f5 = distodd[2*N+n];
|
||||
f7 = distodd[3*N+n];
|
||||
f9 = distodd[4*N+n];
|
||||
f11 = distodd[5*N+n];
|
||||
f13 = distodd[6*N+n];
|
||||
f15 = distodd[7*N+n];
|
||||
f17 = distodd[8*N+n];
|
||||
//.................Compute the velocity...................................
|
||||
vx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
vy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
vz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
//..................Write the velocity.....................................
|
||||
vel[n] = vx;
|
||||
vel[N+n] = vy;
|
||||
vel[2*N+n] = vz;
|
||||
//........................................................................
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//*************************************************************************
|
||||
//*************************************************************************
|
||||
__global__ void PressureBC_inlet(double *disteven, double *distodd, double din,
|
||||
int Nx, int Ny, int Nz, int S)
|
||||
{
|
||||
int n,N;
|
||||
// distributions
|
||||
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
double uz;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
// Loop over the boundary - threadblocks delineated by start...finish
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
|
||||
if (n<Nx*Ny){
|
||||
|
||||
//........................................................................
|
||||
// Read distributions from "opposite" memory convention
|
||||
//........................................................................
|
||||
//........................................................................
|
||||
f1 = distodd[n];
|
||||
f3 = distodd[N+n];
|
||||
f5 = distodd[2*N+n];
|
||||
f7 = distodd[3*N+n];
|
||||
f9 = distodd[4*N+n];
|
||||
f11 = distodd[5*N+n];
|
||||
f13 = distodd[6*N+n];
|
||||
f15 = distodd[7*N+n];
|
||||
f17 = distodd[8*N+n];
|
||||
//........................................................................
|
||||
f0 = disteven[n];
|
||||
f2 = disteven[N+n];
|
||||
f4 = disteven[2*N+n];
|
||||
f6 = disteven[3*N+n];
|
||||
f8 = disteven[4*N+n];
|
||||
f10 = disteven[5*N+n];
|
||||
f12 = disteven[6*N+n];
|
||||
f14 = disteven[7*N+n];
|
||||
f16 = disteven[8*N+n];
|
||||
f18 = disteven[9*N+n];
|
||||
//...................................................
|
||||
//........Determine the intlet flow velocity.........
|
||||
// uz = -1 + (f0+f3+f4+f1+f2+f7+f8+f10+f9
|
||||
// + 2*(f5+f15+f18+f11+f14))/din;
|
||||
//........Set the unknown distributions..............
|
||||
// f6 = f5 - 0.3333333333333333*din*uz;
|
||||
// f16 = f15 - 0.1666666666666667*din*uz;
|
||||
// f17 = f16 - f3 + f4-f15+f18-f7+f8-f10+f9;
|
||||
// f12= 0.5*(-din*uz+f5+f15+f18+f11+f14-f6-f16-
|
||||
// f17+f1-f2-f14+f11+f7-f8-f10+f9);
|
||||
// f13= -din*uz+f5+f15+f18+f11+f14-f6-f16-f17-f12;
|
||||
|
||||
// Determine the outlet flow velocity
|
||||
uz = 1.0 - (f0+f4+f3+f2+f1+f8+f7+f9+ f10 +
|
||||
2*(f5+ f15+f18+f11+f14))/din;
|
||||
// Set the unknown distributions:
|
||||
f6 = f5 + 0.3333333333333333*din*uz;
|
||||
f16 = f15 + 0.1666666666666667*din*uz;
|
||||
f17 = f16 + f4 - f3-f15+f18+f8-f7 +f9-f10;
|
||||
f12= (din*uz+f5+ f15+f18+f11+f14-f6-f16-f17-f2+f1-f14+f11-f8+f7+f9-f10)*0.5;
|
||||
f13= din*uz+f5+ f15+f18+f11+f14-f6-f16-f17-f12;
|
||||
|
||||
//........Store in "opposite" memory location..........
|
||||
disteven[3*N+n] = f6;
|
||||
disteven[6*N+n] = f12;
|
||||
distodd[6*N+n] = f13;
|
||||
disteven[8*N+n] = f16;
|
||||
distodd[8*N+n] = f17;
|
||||
//...................................................
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void PressureBC_outlet(double *disteven, double *distodd, double dout,
|
||||
int Nx, int Ny, int Nz, int S, int outlet)
|
||||
{
|
||||
int n,N;
|
||||
// distributions
|
||||
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
double uz;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
// Loop over the boundary - threadblocks delineated by start...finish
|
||||
for (int s=0; s<S; s++){
|
||||
|
||||
//........Get 1-D index for this thread....................
|
||||
n = outlet + S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
|
||||
if (n<N){
|
||||
//........................................................................
|
||||
// Read distributions from "opposite" memory convention
|
||||
//........................................................................
|
||||
f1 = distodd[n];
|
||||
f3 = distodd[N+n];
|
||||
f5 = distodd[2*N+n];
|
||||
f7 = distodd[3*N+n];
|
||||
f9 = distodd[4*N+n];
|
||||
f11 = distodd[5*N+n];
|
||||
f13 = distodd[6*N+n];
|
||||
f15 = distodd[7*N+n];
|
||||
f17 = distodd[8*N+n];
|
||||
//........................................................................
|
||||
f0 = disteven[n];
|
||||
f2 = disteven[N+n];
|
||||
f4 = disteven[2*N+n];
|
||||
f6 = disteven[3*N+n];
|
||||
f8 = disteven[4*N+n];
|
||||
f10 = disteven[5*N+n];
|
||||
f12 = disteven[6*N+n];
|
||||
f14 = disteven[7*N+n];
|
||||
f16 = disteven[8*N+n];
|
||||
f18 = disteven[9*N+n];
|
||||
//........Determine the outlet flow velocity.........
|
||||
// uz = 1 - (f0+f3+f4+f1+f2+f7+f8+f10+f9+
|
||||
// 2*(f6+f16+f17+f12+f13))/dout;
|
||||
//...................................................
|
||||
//........Set the Unknown Distributions..............
|
||||
// f5 = f6 + 0.33333333333333338*dout*uz;
|
||||
// f15 = f16 + 0.16666666666666678*dout*uz;
|
||||
// f18 = f15+f3-f4-f16+f17+f7-f8+f10-f9;
|
||||
// f11= 0.5*(dout*uz+f6+ f16+f17+f12+f13-f5
|
||||
// -f15-f18-f1+f2-f13+f12-f7+f8+f10-f9);
|
||||
// f14= dout*uz+f6+ f16+f17+f12+f13-f5-f15-f18-f11;
|
||||
|
||||
uz = -1.0 + (f0+f4+f3+f2+f1+f8+f7+f9+f10 + 2*(f6+f16+f17+f12+f13))/dout;
|
||||
|
||||
f5 = f6 - 0.33333333333333338*dout* uz;
|
||||
f15 = f16 - 0.16666666666666678*dout* uz;
|
||||
f18 = f15 - f4 + f3-f16+f17-f8+f7-f9+f10;
|
||||
f11 = (-dout*uz+f6+ f16+f17+f12+f13-f5-f15-f18+f2-f1-f13+f12+f8-f7-f9+f10)*0.5;
|
||||
f14 = -dout*uz+f6+ f16+f17+f12+f13-f5-f15-f18-f11;
|
||||
//........Store in "opposite" memory location..........
|
||||
distodd[2*N+n] = f5;
|
||||
distodd[5*N+n] = f11;
|
||||
disteven[7*N+n] = f14;
|
||||
distodd[7*N+n] = f15;
|
||||
disteven[9*N+n] = f18;
|
||||
//...................................................
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
//*************************************************************************
|
||||
__global__ void ComputeColorGradient(char *ID, double *phi, double *ColorGrad, int Nx, int Ny, int Nz, int S)
|
||||
{
|
||||
int n,N,i,j,k,nn;
|
||||
// distributions
|
||||
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
double nx,ny,nz;
|
||||
|
||||
// non-conserved moments
|
||||
// additional variables needed for computations
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
for (int s=0; s<S; s++){
|
||||
// for (int n=0; n<N; n++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
|
||||
if (n<N){
|
||||
//.......Back out the 3-D indices for node n..............
|
||||
k = n/(Nx*Ny);
|
||||
j = (n-Nx*Ny*k)/Nx;
|
||||
i = n-Nx*Ny*k-Nx*j;
|
||||
//........................................................................
|
||||
//........Get 1-D index for this thread....................
|
||||
// n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
//........................................................................
|
||||
// COMPUTE THE COLOR GRADIENT
|
||||
//........................................................................
|
||||
//.................Read Phase Indicator Values............................
|
||||
//........................................................................
|
||||
nn = n-1; // neighbor index (get convention)
|
||||
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
|
||||
f1 = phi[nn]; // get neighbor for phi - 1
|
||||
//........................................................................
|
||||
nn = n+1; // neighbor index (get convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
f2 = phi[nn]; // get neighbor for phi - 2
|
||||
//........................................................................
|
||||
nn = n-Nx; // neighbor index (get convention)
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
f3 = phi[nn]; // get neighbor for phi - 3
|
||||
//........................................................................
|
||||
nn = n+Nx; // neighbor index (get convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
f4 = phi[nn]; // get neighbor for phi - 4
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny; // neighbor index (get convention)
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f5 = phi[nn]; // get neighbor for phi - 5
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny; // neighbor index (get convention)
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f6 = phi[nn]; // get neighbor for phi - 6
|
||||
//........................................................................
|
||||
nn = n-Nx-1; // neighbor index (get convention)
|
||||
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
f7 = phi[nn]; // get neighbor for phi - 7
|
||||
//........................................................................
|
||||
nn = n+Nx+1; // neighbor index (get convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
f8 = phi[nn]; // get neighbor for phi - 8
|
||||
//........................................................................
|
||||
nn = n+Nx-1; // neighbor index (get convention)
|
||||
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
f9 = phi[nn]; // get neighbor for phi - 9
|
||||
//........................................................................
|
||||
nn = n-Nx+1; // neighbor index (get convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
f10 = phi[nn]; // get neighbor for phi - 10
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny-1; // neighbor index (get convention)
|
||||
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f11 = phi[nn]; // get neighbor for phi - 11
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny+1; // neighbor index (get convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f12 = phi[nn]; // get neighbor for phi - 12
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny-1; // neighbor index (get convention)
|
||||
if (i-1<0) nn += Nx; // periodic BC along the x-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f13 = phi[nn]; // get neighbor for phi - 13
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny+1; // neighbor index (get convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f14 = phi[nn]; // get neighbor for phi - 14
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny-Nx; // neighbor index (get convention)
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f15 = phi[nn]; // get neighbor for phi - 15
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny+Nx; // neighbor index (get convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f16 = phi[nn]; // get neighbor for phi - 16
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny-Nx; // neighbor index (get convention)
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f17 = phi[nn]; // get neighbor for phi - 17
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny+Nx; // neighbor index (get convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
f18 = phi[nn]; // get neighbor for phi - 18
|
||||
//............Compute the Color Gradient...................................
|
||||
nx = -(f1-f2+0.5*(f7-f8+f9-f10+f11-f12+f13-f14));
|
||||
ny = -(f3-f4+0.5*(f7-f8-f9+f10+f15-f16+f17-f18));
|
||||
nz = -(f5-f6+0.5*(f11-f12-f13+f14+f15-f16-f17+f18));
|
||||
//...........Normalize the Color Gradient.................................
|
||||
// C = sqrt(nx*nx+ny*ny+nz*nz);
|
||||
// nx = nx/C;
|
||||
// ny = ny/C;
|
||||
// nz = nz/C;
|
||||
//...Store the Color Gradient....................
|
||||
ColorGrad[3*n] = nx;
|
||||
ColorGrad[3*n+1] = ny;
|
||||
ColorGrad[3*n+2] = nz;
|
||||
//...............................................
|
||||
}
|
||||
}
|
||||
}
|
||||
//*************************************************************************
|
||||
__global__ void ColorCollide( char *ID, double *disteven, double *distodd, double *ColorGrad,
|
||||
double *Velocity, int Nx, int Ny, int Nz, int S,double rlx_setA, double rlx_setB,
|
||||
double alpha, double beta, double Fx, double Fy, double Fz, bool pBC)
|
||||
{
|
||||
|
||||
int n,N;
|
||||
// distributions
|
||||
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
|
||||
// non-conserved moments
|
||||
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
|
||||
// additional variables needed for computations
|
||||
double rho,jx,jy,jz,C,nx,ny,nz;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
char id;
|
||||
|
||||
// S - number of threadblocks per grid block
|
||||
for (int s=0; s<S; s++){
|
||||
// for (int n=0; n<N; n++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
|
||||
id = ID[n];
|
||||
|
||||
if (n<N){
|
||||
if (id > 0){
|
||||
|
||||
// Retrieve the color gradient
|
||||
nx = ColorGrad[3*n];
|
||||
ny = ColorGrad[3*n+1];
|
||||
nz = ColorGrad[3*n+2];
|
||||
//...........Normalize the Color Gradient.................................
|
||||
C = sqrt(nx*nx+ny*ny+nz*nz);
|
||||
nx = nx/C;
|
||||
ny = ny/C;
|
||||
nz = nz/C;
|
||||
//......No color gradient at z-boundary if pressure BC are set.............
|
||||
// if (pBC && k==0) nx = ny = nz = 0.f;
|
||||
// if (pBC && k==Nz-1) nx = ny = nz = 0.f;
|
||||
//........................................................................
|
||||
// READ THE DISTRIBUTIONS
|
||||
// (read from opposite array due to previous swap operation)
|
||||
//........................................................................
|
||||
f2 = distodd[n];
|
||||
f4 = distodd[N+n];
|
||||
f6 = distodd[2*N+n];
|
||||
f8 = distodd[3*N+n];
|
||||
f10 = distodd[4*N+n];
|
||||
f12 = distodd[5*N+n];
|
||||
f14 = distodd[6*N+n];
|
||||
f16 = distodd[7*N+n];
|
||||
f18 = distodd[8*N+n];
|
||||
//........................................................................
|
||||
f0 = disteven[n];
|
||||
f1 = disteven[N+n];
|
||||
f3 = disteven[2*N+n];
|
||||
f5 = disteven[3*N+n];
|
||||
f7 = disteven[4*N+n];
|
||||
f9 = disteven[5*N+n];
|
||||
f11 = disteven[6*N+n];
|
||||
f13 = disteven[7*N+n];
|
||||
f15 = disteven[8*N+n];
|
||||
f17 = disteven[9*N+n];
|
||||
//........................................................................
|
||||
// PERFORM RELAXATION PROCESS
|
||||
//........................................................................
|
||||
//....................compute the moments...............................................
|
||||
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
|
||||
m1 = -30*f0-11*(f2+f1+f4+f3+f6+f5)+8*(f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18 +f17);
|
||||
m2 = 12*f0-4*(f2+f1 +f4+f3+f6 +f5)+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
|
||||
jx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
m4 = 4*(-f1+f2)+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
jy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
m6 = -4*(f3-f4)+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
jz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
m8 = -4*(f5-f6)+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
m9 = 2*(f1+f2)-f3-f4-f5-f6+f7+f8+f9+f10+f11+f12+f13+f14-2*(f15+f16+f17+f18);
|
||||
m10 = -4*(f1+f2)+2*(f4+f3+f6+f5)+f8+f7+f10+f9+f12+f11+f14+f13-2*(f16+f15+f18+f17);
|
||||
m11 = f4+f3-f6-f5+f8+f7+f10+f9-f12-f11-f14-f13;
|
||||
m12 = -2*(f4+f3-f6-f5)+f8+f7+f10+f9-f12-f11-f14-f13;
|
||||
m13 = f8+f7-f10-f9;
|
||||
m14 = f16+f15-f18-f17;
|
||||
m15 = f12+f11-f14-f13;
|
||||
m16 = f7-f8+f9-f10-f11+f12-f13+f14;
|
||||
m17 = -f7+f8+f9-f10+f15-f16+f17-f18;
|
||||
m18 = f11-f12-f13+f14-f15+f16+f17-f18;
|
||||
//..........Toelke, Fruediger et. al. 2006...............
|
||||
if (C == 0.0) nx = ny = nz = 1.0;
|
||||
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) -alpha*C - m1);
|
||||
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho)- m2);
|
||||
m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4);
|
||||
m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6);
|
||||
m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8);
|
||||
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9);
|
||||
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
|
||||
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) + 0.5*alpha*C*(ny*ny-nz*nz)- m11);
|
||||
m12 = m12 + rlx_setA*( -0.5*((jy*jy-jz*jz)/rho) - m12);
|
||||
m13 = m13 + rlx_setA*( (jx*jy/rho) + 0.5*alpha*C*nx*ny - m13);
|
||||
m14 = m14 + rlx_setA*( (jy*jz/rho) + 0.5*alpha*C*ny*nz - m14);
|
||||
m15 = m15 + rlx_setA*( (jx*jz/rho) + 0.5*alpha*C*nx*nz - m15);
|
||||
m16 = m16 + rlx_setB*( - m16);
|
||||
m17 = m17 + rlx_setB*( - m17);
|
||||
m18 = m18 + rlx_setB*( - m18);
|
||||
//.................inverse transformation......................................................
|
||||
f0 = 0.05263157894736842*rho-0.012531328320802*m1+0.04761904761904762*m2;
|
||||
f1 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(jx-m4)+0.0555555555555555555555555*(m9-m10);
|
||||
f2 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(m4-jx)+0.0555555555555555555555555*(m9-m10);
|
||||
f3 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(jy-m6)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
|
||||
f4 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(m6-jy)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
|
||||
f5 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(jz-m8)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
|
||||
f6 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(m8-jz)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
|
||||
f7 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx+jy)+0.025*(m4+m6)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12+0.25*m13+0.125*(m16-m17);
|
||||
f8 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2-0.1*(jx+jy)-0.025*(m4+m6)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12+0.25*m13+0.125*(m17-m16);
|
||||
f9 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx-jy)+0.025*(m4-m6)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12-0.25*m13+0.125*(m16+m17);
|
||||
f10 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jy-jx)+0.025*(m6-m4)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12-0.25*m13-0.125*(m16+m17);
|
||||
f11 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jx+jz)+0.025*(m4+m8)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12+0.25*m15+0.125*(m18-m16);
|
||||
f12 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2-0.1*(jx+jz)-0.025*(m4+m8)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12+0.25*m15+0.125*(m16-m18);
|
||||
f13 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jx-jz)+0.025*(m4-m8)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12-0.25*m15-0.125*(m16+m18);
|
||||
f14 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jz-jx)+0.025*(m8-m4)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12-0.25*m15+0.125*(m16+m18);
|
||||
f15 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jy+jz)+0.025*(m6+m8)
|
||||
-0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m17-m18);
|
||||
f16 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2-0.1*(jy+jz)-0.025*(m6+m8)
|
||||
-0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m18-m17);
|
||||
f17 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jy-jz)+0.025*(m6-m8)
|
||||
-0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14+0.125*(m17+m18);
|
||||
f18 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jz-jy)+0.025*(m8-m6)
|
||||
-0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14-0.125*(m17+m18);
|
||||
//.......................................................................................................
|
||||
// incorporate external force
|
||||
f1 += 0.16666666*Fx;
|
||||
f2 -= 0.16666666*Fx;
|
||||
f3 += 0.16666666*Fy;
|
||||
f4 -= 0.16666666*Fy;
|
||||
f5 += 0.16666666*Fz;
|
||||
f6 -= 0.16666666*Fz;
|
||||
f7 += 0.08333333333*(Fx+Fy);
|
||||
f8 -= 0.08333333333*(Fx+Fy);
|
||||
f9 += 0.08333333333*(Fx-Fy);
|
||||
f10 -= 0.08333333333*(Fx-Fy);
|
||||
f11 += 0.08333333333*(Fx+Fz);
|
||||
f12 -= 0.08333333333*(Fx+Fz);
|
||||
f13 += 0.08333333333*(Fx-Fz);
|
||||
f14 -= 0.08333333333*(Fx-Fz);
|
||||
f15 += 0.08333333333*(Fy+Fz);
|
||||
f16 -= 0.08333333333*(Fy+Fz);
|
||||
f17 += 0.08333333333*(Fy-Fz);
|
||||
f18 -= 0.08333333333*(Fy-Fz);
|
||||
//*********** WRITE UPDATED VALUES TO MEMORY ******************
|
||||
// Write the updated distributions
|
||||
//....EVEN.....................................
|
||||
disteven[n] = f0;
|
||||
disteven[N+n] = f2;
|
||||
disteven[2*N+n] = f4;
|
||||
disteven[3*N+n] = f6;
|
||||
disteven[4*N+n] = f8;
|
||||
disteven[5*N+n] = f10;
|
||||
disteven[6*N+n] = f12;
|
||||
disteven[7*N+n] = f14;
|
||||
disteven[8*N+n] = f16;
|
||||
disteven[9*N+n] = f18;
|
||||
//....ODD......................................
|
||||
distodd[n] = f1;
|
||||
distodd[N+n] = f3;
|
||||
distodd[2*N+n] = f5;
|
||||
distodd[3*N+n] = f7;
|
||||
distodd[4*N+n] = f9;
|
||||
distodd[5*N+n] = f11;
|
||||
distodd[6*N+n] = f13;
|
||||
distodd[7*N+n] = f15;
|
||||
distodd[8*N+n] = f17;
|
||||
//...Store the Velocity..........................
|
||||
Velocity[3*n] = jx;
|
||||
Velocity[3*n+1] = jy;
|
||||
Velocity[3*n+2] = jz;
|
||||
/* //...Store the Color Gradient....................
|
||||
ColorGrad[3*n] = C*nx;
|
||||
ColorGrad[3*n+1] = C*ny;
|
||||
ColorGrad[3*n+2] = C*nz;
|
||||
*/ //...............................................
|
||||
//***************************************************************
|
||||
} // check if n is in the solid
|
||||
} // check if n is in the domain
|
||||
} // loop over s
|
||||
}
|
||||
//*************************************************************************
|
||||
__global__ void DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
|
||||
double beta, int Nx, int Ny, int Nz, bool pBC, int S)
|
||||
{
|
||||
char id;
|
||||
|
||||
int idx;
|
||||
int in,jn,kn,n,nn,N;
|
||||
int q,Cqx,Cqy,Cqz;
|
||||
// int sendLoc;
|
||||
|
||||
double na,nb; // density values
|
||||
double ux,uy,uz; // flow velocity
|
||||
double nx,ny,nz,C; // color gradient components
|
||||
double a1,a2,b1,b2;
|
||||
double sp,delta;
|
||||
double feq[6]; // equilibrium distributions
|
||||
// Set of Discrete velocities for the D3Q19 Model
|
||||
int D3Q7[3][3]={{1,0,0},{0,1,0},{0,0,1}};
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
// S - number of threadblocks per grid block
|
||||
for (int s=0; s<S; s++){
|
||||
// for (int n=0; n<N; n++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
if (n<N){
|
||||
id = ID[n];
|
||||
// Local Density Values
|
||||
na = Copy[2*n];
|
||||
nb = Copy[2*n+1];
|
||||
if (id > 0 && na+nb > 0.0){
|
||||
//.......Back out the 3-D indices for node n..............
|
||||
int k = n/(Nx*Ny);
|
||||
int j = (n-Nx*Ny*k)/Nx;
|
||||
int i = n-Nx*Ny*k-Nx*j;
|
||||
//.....Load the Color gradient.........
|
||||
nx = ColorGrad[3*n];
|
||||
ny = ColorGrad[3*n+1];
|
||||
nz = ColorGrad[3*n+2];
|
||||
C = sqrt(nx*nx+ny*ny+nz*nz);
|
||||
nx = nx/C;
|
||||
ny = ny/C;
|
||||
nz = nz/C;
|
||||
//....Load the flow velocity...........
|
||||
ux = Velocity[3*n];
|
||||
uy = Velocity[3*n+1];
|
||||
uz = Velocity[3*n+2];
|
||||
//....Instantiate the density distributions
|
||||
// Generate Equilibrium Distributions and stream
|
||||
// Stationary value - distribution 0
|
||||
Den[2*n] += 0.3333333333333333*na;
|
||||
Den[2*n+1] += 0.3333333333333333*nb;
|
||||
// Non-Stationary equilibrium distributions
|
||||
feq[0] = 0.1111111111111111*(1+3*ux);
|
||||
feq[1] = 0.1111111111111111*(1-3*ux);
|
||||
feq[2] = 0.1111111111111111*(1+3*uy);
|
||||
feq[3] = 0.1111111111111111*(1-3*uy);
|
||||
feq[4] = 0.1111111111111111*(1+3*uz);
|
||||
feq[5] = 0.1111111111111111*(1-3*uz);
|
||||
// Construction and streaming for the components
|
||||
for (idx=0; idx<3; idx++){
|
||||
// Distribution index
|
||||
q = 2*idx;
|
||||
// Associated discrete velocity
|
||||
Cqx = D3Q7[idx][0];
|
||||
Cqy = D3Q7[idx][1];
|
||||
Cqz = D3Q7[idx][2];
|
||||
// Generate the Equilibrium Distribution
|
||||
a1 = na*feq[q];
|
||||
b1 = nb*feq[q];
|
||||
a2 = na*feq[q+1];
|
||||
b2 = nb*feq[q+1];
|
||||
// Recolor the distributions
|
||||
if (C > 0.0){
|
||||
sp = nx*double(Cqx)+ny*double(Cqy)+nz*double(Cqz);
|
||||
//if (idx > 2) sp = 0.7071067811865475*sp;
|
||||
//delta = sp*min( min(a1,a2), min(b1,b2) );
|
||||
delta = na*nb/(na+nb)*0.1111111111111111*sp;
|
||||
//if (a1>0 && b1>0){
|
||||
a1 += beta*delta;
|
||||
a2 -= beta*delta;
|
||||
b1 -= beta*delta;
|
||||
b2 += beta*delta;
|
||||
}
|
||||
|
||||
// .......Get the neighbor node..............
|
||||
//nn = n + Stride[idx];
|
||||
in = i+Cqx;
|
||||
jn = j+Cqy;
|
||||
kn = k+Cqz;
|
||||
|
||||
// Adjust for periodic BC, if necessary
|
||||
if (in<0) in+= Nx;
|
||||
if (jn<0) jn+= Ny;
|
||||
if (kn<0) kn+= Nz;
|
||||
if (!(in<Nx)) in-= Nx;
|
||||
if (!(jn<Ny)) jn-= Ny;
|
||||
if (!(kn<Nz)) kn-= Nz;
|
||||
// Perform streaming or bounce-back as needed
|
||||
id = ID[kn*Nx*Ny+jn*Nx+in];
|
||||
if (id == 0){ //.....Bounce-back Rule...........
|
||||
// Den[2*n] += a1;
|
||||
// Den[2*n+1] += b1;
|
||||
atomicAdd(&Den[2*n], a1);
|
||||
atomicAdd(&Den[2*n+1], b1);
|
||||
}
|
||||
else{
|
||||
//......Push the "distribution" to neighboring node...........
|
||||
// Index of the neighbor in the local process
|
||||
//nn = (kn-zmin[rank]+1)*Nxp*Nyp + (jn-ymin[rank]+1)*Nxp + (in-xmin[rank]+1);
|
||||
nn = kn*Nx*Ny+jn*Nx+in;
|
||||
// Push to neighboring node
|
||||
// Den[2*nn] += a1;
|
||||
// Den[2*nn+1] += b1;
|
||||
atomicAdd(&Den[2*nn], a1);
|
||||
atomicAdd(&Den[2*nn+1], b1);
|
||||
}
|
||||
|
||||
// .......Get the neighbor node..............
|
||||
q = 2*idx+1;
|
||||
in = i-Cqx;
|
||||
jn = j-Cqy;
|
||||
kn = k-Cqz;
|
||||
// Adjust for periodic BC, if necessary
|
||||
if (in<0) in+= Nx;
|
||||
if (jn<0) jn+= Ny;
|
||||
if (kn<0) kn+= Nz;
|
||||
if (!(in<Nx)) in-= Nx;
|
||||
if (!(jn<Ny)) jn-= Ny;
|
||||
if (!(kn<Nz)) kn-= Nz;
|
||||
// Perform streaming or bounce-back as needed
|
||||
id = ID[kn*Nx*Ny+jn*Nx+in];
|
||||
if (id == 0){
|
||||
//.....Bounce-back Rule...........
|
||||
// Den[2*n] += a2;
|
||||
// Den[2*n+1] += b2;
|
||||
atomicAdd(&Den[2*n], a2);
|
||||
atomicAdd(&Den[2*n+1], b2);
|
||||
}
|
||||
else{
|
||||
//......Push the "distribution" to neighboring node...........
|
||||
// Index of the neighbor in the local process
|
||||
//nn = (kn-zmin[rank]+1)*Nxp*Nyp + (jn-ymin[rank]+1)*Nxp + (in-xmin[rank]+1);
|
||||
nn = kn*Nx*Ny+jn*Nx+in;
|
||||
// Push to neighboring node
|
||||
// Den[2*nn] += a2;
|
||||
// Den[2*nn+1] += b2;
|
||||
atomicAdd(&Den[2*nn], a2);
|
||||
atomicAdd(&Den[2*nn+1], b2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void ComputePhi(char *ID, double *Phi, double *Copy, double *Den, int N, int S)
|
||||
{
|
||||
int n;
|
||||
double Na,Nb;
|
||||
//...................................................................
|
||||
// Update Phi
|
||||
// S - number of threadblocks per grID block
|
||||
for (int s=0; s<S; s++){
|
||||
// for (int n=0; n<N; n++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
if (ID[n] > 0 && n<N){
|
||||
// Get the density value (Streaming already performed)
|
||||
Na = Den[2*n];
|
||||
Nb = Den[2*n+1];
|
||||
Phi[n] = (Na-Nb)/(Na+Nb);
|
||||
// Store the copy of the current density
|
||||
Copy[2*n] = Na;
|
||||
Copy[2*n+1] = Nb;
|
||||
// Zero the Density value to get ready for the next streaming
|
||||
Den[2*n] = 0.0;
|
||||
Den[2*n+1] = 0.0;
|
||||
}
|
||||
}
|
||||
//...................................................................
|
||||
}
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_InitDenColor( int nblocks, int nthreads, int S,
|
||||
char *ID, double *Den, double *Phi, double das, double dbs, int N)
|
||||
{
|
||||
InitDenColor <<<nblocks, nthreads>>> (ID, Den, Phi, das, dbs, N, S);
|
||||
}
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_ComputeColorGradient(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *Phi, double *ColorGrad, int Nx, int Ny, int Nz)
|
||||
{
|
||||
ComputeColorGradient<<<nBlocks,nthreads>>>(ID, Phi, ColorGrad, Nx, Ny, Nz, S);
|
||||
}
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_ColorCollide(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *f_even, double *f_odd, double *ColorGrad, double *Velocity,
|
||||
double rlxA, double rlxB,double alpha, double beta, double Fx, double Fy, double Fz,
|
||||
int Nx, int Ny, int Nz, bool pBC)
|
||||
{
|
||||
ColorCollide<<<nBlocks, nthreads>>>(ID, f_even, f_odd, ColorGrad, Velocity, Nx, Ny, Nz, S,
|
||||
rlxA, rlxB, alpha, beta, Fx, Fy, Fz, pBC);
|
||||
}
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_DensityStreamD3Q7(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
|
||||
double beta, int Nx, int Ny, int Nz, bool pBC)
|
||||
{
|
||||
DensityStreamD3Q7<<<nBlocks, nthreads>>>(ID,Den,Copy,Phi,ColorGrad,Velocity,beta,Nx,Ny,Nz,pBC,S);
|
||||
}
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_ComputePhi(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *Phi, double *Copy, double *Den, int N)
|
||||
{
|
||||
ComputePhi<<<nBlocks, nthreads>>>(ID,Phi,Copy,Den,N,S);
|
||||
}
|
||||
//*************************************************************************
|
21
gpu/Color.h
Normal file
21
gpu/Color.h
Normal file
@ -0,0 +1,21 @@
|
||||
//
|
||||
//*************************************************************************
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_InitDenColor( int nblocks, int nthreads, int S,
|
||||
char *ID, double *Den, double *Phi, double das, double dbs, int N);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_ComputeColorGradient(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *Phi, double *ColorGrad, int Nx, int Ny, int Nz);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_ColorCollide(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *f_even, double *f_odd, double *ColorGrad, double *Velocity,
|
||||
double rlxA, double rlxB,double alpha, double beta, double Fx, double Fy, double Fz,
|
||||
int Nx, int Ny, int Nz, bool pBC);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_DensityStreamD3Q7(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
|
||||
double beta, int Nx, int Ny, int Nz, bool pBC);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_ComputePhi(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *Phi, double *Copy, double *Den, int N);
|
||||
//*************************************************************************
|
19
gpu/CudaExtras.cu
Normal file
19
gpu/CudaExtras.cu
Normal file
@ -0,0 +1,19 @@
|
||||
// Basic cuda functions callable from C/C++ code
|
||||
#include <cuda.h>
|
||||
|
||||
extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size){
|
||||
cudaMalloc(address,size);
|
||||
}
|
||||
|
||||
extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size){
|
||||
cudaMemcpy(dest,source,size,cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
|
||||
extern "C" void dvc_CopyToHost(void* dest, void* source, size_t size){
|
||||
cudaMemcpy(dest,source,size,cudaMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
extern "C" void dvc_Barrier(){
|
||||
cudaDeviceSynchronize();
|
||||
}
|
12
gpu/CudaExtras.h
Normal file
12
gpu/CudaExtras.h
Normal file
@ -0,0 +1,12 @@
|
||||
//*************************************************************************
|
||||
// A few basic cuda functions callable from C / C++ code
|
||||
//************************************************************************
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_CopyToHost(void* dest, void* source, size_t size);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_Barrier();
|
||||
//*************************************************************************
|
359
gpu/D3Q19.cu
Normal file
359
gpu/D3Q19.cu
Normal file
@ -0,0 +1,359 @@
|
||||
#include <cuda.h>
|
||||
|
||||
__global__ void InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz, int S)
|
||||
{
|
||||
int n,N;
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
for (int s=0; s<S; s++){
|
||||
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
|
||||
if (n<N){
|
||||
if (ID[n] > 0){
|
||||
f_even[n] = 0.3333333333333333;
|
||||
f_odd[n] = 0.055555555555555555; //double(100*n)+1.f;
|
||||
f_even[N+n] = 0.055555555555555555; //double(100*n)+2.f;
|
||||
f_odd[N+n] = 0.055555555555555555; //double(100*n)+3.f;
|
||||
f_even[2*N+n] = 0.055555555555555555; //double(100*n)+4.f;
|
||||
f_odd[2*N+n] = 0.055555555555555555; //double(100*n)+5.f;
|
||||
f_even[3*N+n] = 0.055555555555555555; //double(100*n)+6.f;
|
||||
f_odd[3*N+n] = 0.0277777777777778; //double(100*n)+7.f;
|
||||
f_even[4*N+n] = 0.0277777777777778; //double(100*n)+8.f;
|
||||
f_odd[4*N+n] = 0.0277777777777778; //double(100*n)+9.f;
|
||||
f_even[5*N+n] = 0.0277777777777778; //double(100*n)+10.f;
|
||||
f_odd[5*N+n] = 0.0277777777777778; //double(100*n)+11.f;
|
||||
f_even[6*N+n] = 0.0277777777777778; //double(100*n)+12.f;
|
||||
f_odd[6*N+n] = 0.0277777777777778; //double(100*n)+13.f;
|
||||
f_even[7*N+n] = 0.0277777777777778; //double(100*n)+14.f;
|
||||
f_odd[7*N+n] = 0.0277777777777778; //double(100*n)+15.f;
|
||||
f_even[8*N+n] = 0.0277777777777778; //double(100*n)+16.f;
|
||||
f_odd[8*N+n] = 0.0277777777777778; //double(100*n)+17.f;
|
||||
f_even[9*N+n] = 0.0277777777777778; //double(100*n)+18.f;
|
||||
}
|
||||
else{
|
||||
for(int q=0; q<9; q++){
|
||||
f_even[q*N+n] = -1.0;
|
||||
f_odd[q*N+n] = -1.0;
|
||||
}
|
||||
f_even[9*N+n] = -1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void PackDist(int q, int *list, int start, int count, double *sendbuf, double *dist, int N){
|
||||
//....................................................................................
|
||||
// Pack distribution q into the send buffer for the listed lattice sites
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int idx,n;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx<count){
|
||||
// for (idx=0; idx<count; idx++){
|
||||
n = list[idx];
|
||||
sendbuf[start+idx] = dist[q*N+n];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__global__ void MapRecvDist(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count,
|
||||
double *recvbuf, double *dist, int Nx, int Ny, int Nz){
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Distribution q matche Cqx, Cqy, Cqz
|
||||
// swap rule means that the distributions in recvbuf are OPPOSITE of q
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int i,j,k,n,nn,idx;
|
||||
int N = Nx*Ny*Nz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx<count){
|
||||
// for (idx=0; idx<count; idx++){
|
||||
// Get the value from the list -- note that n is the index is from the send (non-local) process
|
||||
n = list[idx];
|
||||
// Get the 3-D indices
|
||||
k = n/(Nx*Ny);
|
||||
j = (n-Nx*Ny*k)/Nx;
|
||||
i = n-Nx*Ny*k-Nz*j;
|
||||
// Streaming for the non-local distribution
|
||||
i += Cqx;
|
||||
j += Cqy;
|
||||
k += Cqz;
|
||||
/* if (i < 0) i += Nx;
|
||||
if (j < 0) j += Ny;
|
||||
if (k < 0) k += Nz;
|
||||
if (!(i<Nx)) i -= Nx;
|
||||
if (!(j<Ny)) j -= Ny;
|
||||
if (!(k<Nz)) k -= Nz;
|
||||
*/
|
||||
nn = k*Nx*Ny+j*Nx+i;
|
||||
// unpack the distribution to the proper location
|
||||
// if (recvbuf[start+idx] != dist[q*N+nn]){
|
||||
// printf("Stopping to check error \n");
|
||||
// printf("recvbuf[start+idx] = %f \n",recvbuf[start+idx]);
|
||||
// printf("dist[q*N+nn] = %f \n",dist[q*N+nn]);
|
||||
// printf("A bug! Again? \n");
|
||||
// idx = count;
|
||||
// }
|
||||
// list[idx] = nn;
|
||||
dist[q*N+nn] = recvbuf[start+idx];
|
||||
}
|
||||
}
|
||||
|
||||
//*************************************************************************
|
||||
__global__ void SwapD3Q19(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz, int S)
|
||||
{
|
||||
int n,nn,N;
|
||||
// distributions
|
||||
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
// S - number of threadblocks per grid block
|
||||
for (int s=0; s<S; s++){
|
||||
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
|
||||
// for (n=0; n<N; n++){
|
||||
//.......Back out the 3-D indices for node n..............
|
||||
int k = n/(Nx*Ny);
|
||||
int j = (n-Nx*Ny*k)/Nx;
|
||||
int i = n-Nx*Ny*k-Nz*j;
|
||||
|
||||
if (n<N){
|
||||
if (ID[n] > 0){
|
||||
//........................................................................
|
||||
// Retrieve even distributions from the local node (swap convention)
|
||||
// f0 = disteven[n]; // Does not particupate in streaming
|
||||
f1 = distodd[n];
|
||||
f3 = distodd[N+n];
|
||||
f5 = distodd[2*N+n];
|
||||
f7 = distodd[3*N+n];
|
||||
f9 = distodd[4*N+n];
|
||||
f11 = distodd[5*N+n];
|
||||
f13 = distodd[6*N+n];
|
||||
f15 = distodd[7*N+n];
|
||||
f17 = distodd[8*N+n];
|
||||
//........................................................................
|
||||
|
||||
//........................................................................
|
||||
// Retrieve odd distributions from neighboring nodes (swap convention)
|
||||
//........................................................................
|
||||
nn = n+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
//if (i+1<Nx){
|
||||
f2 = disteven[N+nn]; // pull neighbor for distribution 2
|
||||
if (f2 > 0){
|
||||
distodd[n] = f2;
|
||||
disteven[N+nn] = f1;
|
||||
}
|
||||
//}
|
||||
//........................................................................
|
||||
nn = n+Nx; // neighbor index (pull convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
//if (j+1<Ny){
|
||||
f4 = disteven[2*N+nn]; // pull neighbor for distribution 4
|
||||
if (f4 > 0){
|
||||
distodd[N+n] = f4;
|
||||
disteven[2*N+nn] = f3;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny; // neighbor index (pull convention)
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (k+1<Nz){
|
||||
f6 = disteven[3*N+nn]; // pull neighbor for distribution 6
|
||||
if (f6 > 0){
|
||||
distodd[2*N+n] = f6;
|
||||
disteven[3*N+nn] = f5;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
//if ((i+1<Nx) && (j+1<Ny)){
|
||||
f8 = disteven[4*N+nn]; // pull neighbor for distribution 8
|
||||
if (f8 > 0){
|
||||
distodd[3*N+n] = f8;
|
||||
disteven[4*N+nn] = f7;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n-Nx+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
//if (!(i-1<0) && (j+1<Ny)){
|
||||
f10 = disteven[5*N+nn]; // pull neighbor for distribution 9
|
||||
if (f10 > 0){
|
||||
distodd[4*N+n] = f10;
|
||||
disteven[5*N+nn] = f9;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if ( !(i-1<0) && !(k-1<0)){
|
||||
f12 = disteven[6*N+nn]; // pull distribution 11
|
||||
if (f12 > 0){
|
||||
distodd[5*N+n] = f12;
|
||||
disteven[6*N+nn] = f11;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (!(i-1<0) && (k+1<Nz)){
|
||||
f14 = disteven[7*N+nn]; // pull neighbor for distribution 13
|
||||
if (f14 > 0){
|
||||
distodd[6*N+n] = f14;
|
||||
disteven[7*N+nn] = f13;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny+Nx; // neighbor index (pull convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (!(j-1<0) && !(k-1<0)){
|
||||
f16 = disteven[8*N+nn]; // pull neighbor for distribution 15
|
||||
if (f16 > 0){
|
||||
distodd[7*N+n] = f16;
|
||||
disteven[8*N+nn] = f15;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny+Nx; // neighbor index (pull convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (!(j-1<0) && (k+1<Nz)){
|
||||
f18 = disteven[9*N+nn]; // pull neighbor for distribution 17
|
||||
if (f18 > 0){
|
||||
distodd[8*N+n] = f18;
|
||||
disteven[9*N+nn] = f17;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_PackD3Q19(int faceGrid, int edgeGrid, int threads,double *f_even, double *f_odd, int N,
|
||||
int *dvcSendList_x, int *dvcSendList_y, int *dvcSendList_z, int *dvcSendList_X, int *dvcSendList_Y, int *dvcSendList_Z,
|
||||
int *dvcSendList_xy, int *dvcSendList_XY, int *dvcSendList_xY, int *dvcSendList_Xy,
|
||||
int *dvcSendList_xz, int *dvcSendList_XZ, int *dvcSendList_xZ, int *dvcSendList_Xz,
|
||||
int *dvcSendList_yz, int *dvcSendList_YZ, int *dvcSendList_yZ, int *dvcSendList_Yz,
|
||||
int sendCount_x, int sendCount_y, int sendCount_z, int sendCount_X, int sendCount_Y, int sendCount_Z,
|
||||
int sendCount_xy, int sendCount_XY, int sendCount_xY, int sendCount_Xy,
|
||||
int sendCount_xz, int sendCount_XZ, int sendCount_xZ, int sendCount_Xz,
|
||||
int sendCount_yz, int sendCount_YZ, int sendCount_yZ, int sendCount_Yz,
|
||||
double *sendbuf_x, double *sendbuf_y, double *sendbuf_z, double *sendbuf_X, double *sendbuf_Y, double *sendbuf_Z,
|
||||
double *sendbuf_xy, double *sendbuf_XY, double *sendbuf_xY, double *sendbuf_Xy,
|
||||
double *sendbuf_xz, double *sendbuf_XZ, double *sendbuf_xZ, double *sendbuf_Xz,
|
||||
double *sendbuf_yz, double *sendbuf_YZ, double *sendbuf_yZ, double *sendbuf_Yz)
|
||||
{
|
||||
//...................................................................................
|
||||
PackDist<<<faceGrid,threads>>>(1,dvcSendList_x,0,sendCount_x,sendbuf_x,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(4,dvcSendList_x,sendCount_x,sendCount_x,sendbuf_x,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(5,dvcSendList_x,2*sendCount_x,sendCount_x,sendbuf_x,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(6,dvcSendList_x,3*sendCount_x,sendCount_x,sendbuf_x,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(7,dvcSendList_x,4*sendCount_x,sendCount_x,sendbuf_x,f_even,N);
|
||||
//...Packing for X face<<<faceGrid,threads>>>(1,7,9,11,13)................................
|
||||
PackDist<<<faceGrid,threads>>>(0,dvcSendList_X,0,sendCount_X,sendbuf_X,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(3,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(4,dvcSendList_X,2*sendCount_X,sendCount_X,sendbuf_X,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(5,dvcSendList_X,3*sendCount_X,sendCount_X,sendbuf_X,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(6,dvcSendList_X,4*sendCount_X,sendCount_X,sendbuf_X,f_odd,N);
|
||||
//...Packing for y face<<<faceGrid,threads>>>(4,8,9,16,18).................................
|
||||
PackDist<<<faceGrid,threads>>>(2,dvcSendList_y,0,sendCount_y,sendbuf_y,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(4,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(4,dvcSendList_y,2*sendCount_y,sendCount_y,sendbuf_y,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(8,dvcSendList_y,3*sendCount_y,sendCount_y,sendbuf_y,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(9,dvcSendList_y,4*sendCount_y,sendCount_y,sendbuf_y,f_even,N);
|
||||
//...Packing for Y face<<<faceGrid,threads>>>(3,7,10,15,17).................................
|
||||
PackDist<<<faceGrid,threads>>>(1,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(3,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(5,dvcSendList_Y,2*sendCount_Y,sendCount_Y,sendbuf_Y,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(7,dvcSendList_Y,3*sendCount_Y,sendCount_Y,sendbuf_Y,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(8,dvcSendList_Y,4*sendCount_Y,sendCount_Y,sendbuf_Y,f_odd,N);
|
||||
//...Packing for z face<<<faceGrid,threads>>>(6,12,13,16,17)................................
|
||||
PackDist<<<faceGrid,threads>>>(3,dvcSendList_z,0,sendCount_z,sendbuf_z,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(6,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(6,dvcSendList_z,2*sendCount_z,sendCount_z,sendbuf_z,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(8,dvcSendList_z,3*sendCount_z,sendCount_z,sendbuf_z,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(8,dvcSendList_z,4*sendCount_z,sendCount_z,sendbuf_z,f_odd,N);
|
||||
//...Packing for Z face<<<faceGrid,threads>>>(5,11,14,15,18)................................
|
||||
PackDist<<<faceGrid,threads>>>(2,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(5,dvcSendList_Z,sendCount_Z,sendCount_Z,sendbuf_Z,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(7,dvcSendList_Z,2*sendCount_Z,sendCount_Z,sendbuf_Z,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(7,dvcSendList_Z,3*sendCount_Z,sendCount_Z,sendbuf_Z,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(9,dvcSendList_Z,4*sendCount_Z,sendCount_Z,sendbuf_Z,f_even,N);
|
||||
//...Pack the xy edge <<<edgeGrid,threads>>>(8)................................
|
||||
PackDist<<<edgeGrid,threads>>>(4,dvcSendList_xy,0,sendCount_xy,sendbuf_xy,f_even,N);
|
||||
//...Pack the Xy edge <<<edgeGrid,threads>>>(9)................................
|
||||
PackDist<<<edgeGrid,threads>>>(4,dvcSendList_Xy,0,sendCount_Xy,sendbuf_Xy,f_odd,N);
|
||||
//...Pack the xY edge <<<edgeGrid,threads>>>(10)................................
|
||||
PackDist<<<edgeGrid,threads>>>(5,dvcSendList_xY,0,sendCount_xY,sendbuf_xY,f_even,N);
|
||||
//...Pack the XY edge <<<edgeGrid,threads>>>(7)................................
|
||||
PackDist<<<edgeGrid,threads>>>(3,dvcSendList_XY,0,sendCount_XY,sendbuf_XY,f_odd,N);
|
||||
//...Pack the xz edge <<<edgeGrid,threads>>>(12)................................
|
||||
PackDist<<<edgeGrid,threads>>>(6,dvcSendList_xz,0,sendCount_xz,sendbuf_xz,f_even,N);
|
||||
//...Pack the xZ edge <<<edgeGrid,threads>>>(14)................................
|
||||
PackDist<<<edgeGrid,threads>>>(7,dvcSendList_xZ,0,sendCount_xZ,sendbuf_xZ,f_even,N);
|
||||
//...Pack the Xz edge <<<edgeGrid,threads>>>(13)................................
|
||||
PackDist<<<edgeGrid,threads>>>(6,dvcSendList_Xz,0,sendCount_Xz,sendbuf_Xz,f_odd,N);
|
||||
//...Pack the XZ edge <<<edgeGrid,threads>>>(11)................................
|
||||
PackDist<<<edgeGrid,threads>>>(5,dvcSendList_XZ,0,sendCount_XZ,sendbuf_XZ,f_odd,N);
|
||||
//...Pack the xz edge <<<edgeGrid,threads>>>(12)................................
|
||||
PackDist<<<edgeGrid,threads>>>(6,dvcSendList_xz,0,sendCount_xz,sendbuf_xz,f_even,N);
|
||||
//...Pack the xZ edge <<<edgeGrid,threads>>>(14)................................
|
||||
PackDist<<<edgeGrid,threads>>>(7,dvcSendList_xZ,0,sendCount_xZ,sendbuf_xZ,f_even,N);
|
||||
//...Pack the Xz edge <<<edgeGrid,threads>>>(13)................................
|
||||
PackDist<<<edgeGrid,threads>>>(6,dvcSendList_Xz,0,sendCount_Xz,sendbuf_Xz,f_odd,N);
|
||||
//...Pack the XZ edge <<<edgeGrid,threads>>>(11)................................
|
||||
PackDist<<<edgeGrid,threads>>>(5,dvcSendList_XZ,0,sendCount_XZ,sendbuf_XZ,f_odd,N);
|
||||
//...Pack the yz edge <<<edgeGrid,threads>>>(16)................................
|
||||
PackDist<<<edgeGrid,threads>>>(8,dvcSendList_yz,0,sendCount_yz,sendbuf_yz,f_even,N);
|
||||
//...Pack the yZ edge <<<edgeGrid,threads>>>(18)................................
|
||||
PackDist<<<edgeGrid,threads>>>(9,dvcSendList_yZ,0,sendCount_yZ,sendbuf_yZ,f_even,N);
|
||||
//...Pack the Yz edge <<<edgeGrid,threads>>>(17)................................
|
||||
PackDist<<<edgeGrid,threads>>>(8,dvcSendList_Yz,0,sendCount_Yz,sendbuf_Yz,f_odd,N);
|
||||
//...Pack the YZ edge <<<edgeGrid,threads>>>(15)................................
|
||||
PackDist<<<edgeGrid,threads>>>(7,dvcSendList_YZ,0,sendCount_YZ,sendbuf_YZ,f_odd,N);
|
||||
}
|
||||
//...................................................................................
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_PackDist(int grid, int threads, int q, int *SendList, int start,
|
||||
int sendCount, double *sendbuf, double *Dist, int N)
|
||||
{
|
||||
//...................................................................................
|
||||
PackDist<<<grid,threads>>>(q,SendList,start,sendCount,sendbuf,Dist,N);
|
||||
}
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_UnpackDist(int grid, int threads, int q, int Cqx, int Cqy, int Cqz, int *RecvList, int start,
|
||||
int recvCount, double *recvbuf, double *Dist, int Nx, int Ny, int Nz)
|
||||
{
|
||||
//...................................................................................
|
||||
MapRecvDist<<<grid,threads>>>(q,Cqx,Cqy,Cqz,RecvList,start,recvCount,recvbuf,Dist,Nx,Ny,Nz);
|
||||
}
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_SwapD3Q19( int nblocks, int nthreads, int S,
|
||||
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
|
||||
{
|
||||
SwapD3Q19 <<< nblocks, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
|
||||
}
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_InitD3Q19(int nblocks, int nthreads, int S, char *ID, double *f_even, double *f_odd, int Nx,
|
||||
int Ny, int Nz)
|
||||
{
|
||||
InitD3Q19 <<<nblocks, nthreads>>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
|
||||
}
|
||||
//*************************************************************************
|
||||
|
15
gpu/D3Q19.h
Normal file
15
gpu/D3Q19.h
Normal file
@ -0,0 +1,15 @@
|
||||
|
||||
//*************************************************************************
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_InitD3Q19(int nblocks, int nthreads, int S, char *ID, double *f_even, double *f_odd, int Nx,
|
||||
int Ny, int Nz);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_SwapD3Q19(int nblocks, int nthreads, int S,
|
||||
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_PackDist(int grid, int threads, int q, int *SendList, int start,
|
||||
int sendCount, double *sendbuf, double *Dist, int N);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_UnpackDist(int grid, int threads, int q, int Cqx, int Cqy, int Cqz, int *RecvList, int start,
|
||||
int recvCount, double *recvbuf, double *Dist, int Nx, int Ny, int Nz);
|
||||
//*************************************************************************
|
89
gpu/D3Q7.cu
Normal file
89
gpu/D3Q7.cu
Normal file
@ -0,0 +1,89 @@
|
||||
// GPU Functions for D3Q7 Lattice Boltzmann Methods
|
||||
|
||||
__global__ void PackValues(int *list, int count, double *sendbuf, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Pack distribution q into the send buffer for the listed lattice sites
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int idx,n;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx<count){
|
||||
n = list[idx];
|
||||
sendbuf[idx] = Data[n];
|
||||
}
|
||||
}
|
||||
__global__ void UnpackValues(int *list, int count, double *recvbuf, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Pack distribution q into the send buffer for the listed lattice sites
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int idx,n;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx<count){
|
||||
n = list[idx];
|
||||
Data[n] = recvbuf[idx];
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void PackDenD3Q7(int *list, int count, double *sendbuf, int number, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Pack distribution into the send buffer for the listed lattice sites
|
||||
//....................................................................................
|
||||
int idx,n,component;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx<count){
|
||||
// for (idx=0; idx<count; idx++){
|
||||
for (component=0; component<number; component++){
|
||||
n = list[idx];
|
||||
sendbuf[idx*number+component] = Data[number*n+component];
|
||||
Data[number*n+component] = 0.0; // Set the data value to zero once it's in the buffer!
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__global__ void UnpackDenD3Q7(int *list, int count, double *recvbuf, int number, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Sum to the existing density value
|
||||
//....................................................................................
|
||||
int idx,n,component;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx<count){
|
||||
// for (idx=0; idx<count; idx++){
|
||||
for (component=0; component<number; component++){
|
||||
n = list[idx];
|
||||
Data[number*n+component] += recvbuf[idx*number+component];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//***************************************************************************************
|
||||
extern "C" void dvc_PackDenD3Q7(int grid, int threads, int *list, int count, double *sendbuf,
|
||||
int number, double *Data, int N)
|
||||
{
|
||||
//...................................................................................
|
||||
PackDenD3Q7<<<grid,threads>>>(list,count,sendbuf,number,Data,N);
|
||||
}
|
||||
//***************************************************************************************
|
||||
extern "C" void dvc_UnpackDenD3Q7(int grid, int threads, int *list, int count, double *recvbuf,
|
||||
int number, double *Data, int N)
|
||||
{
|
||||
//...................................................................................
|
||||
UnpackDenD3Q7<<<grid,threads>>>(list,count,recvbuf,number,Data,N);
|
||||
}
|
||||
//***************************************************************************************
|
||||
extern "C" void dvc_PackValues(int grid, int threads, int *list, int count, double *sendbuf,
|
||||
double *Data, int N)
|
||||
{
|
||||
//...................................................................................
|
||||
PackValues<<<grid,threads>>>(list,count,sendbuf,Data,N);
|
||||
}
|
||||
//***************************************************************************************
|
||||
extern "C" void dvc_UnpackValues(int grid, int threads, int *list, int count, double *recvbuf,
|
||||
double *Data, int N)
|
||||
{
|
||||
//...................................................................................
|
||||
UnpackValues<<<grid,threads>>>(list,count,recvbuf,Data,N);
|
||||
}
|
||||
//***************************************************************************************
|
15
gpu/D3Q7.h
Normal file
15
gpu/D3Q7.h
Normal file
@ -0,0 +1,15 @@
|
||||
//
|
||||
//***************************************************************************************
|
||||
//***************************************************************************************
|
||||
extern "C" void dvc_PackDenD3Q7(int grid, int threads, int *list, int count, double *sendbuf,
|
||||
int number, double *Data, int N);
|
||||
//***************************************************************************************
|
||||
extern "C" void dvc_UnpackDenD3Q7(int grid, int threads, int *list, int count, double *recvbuf,
|
||||
int number, double *Data, int N);
|
||||
//***************************************************************************************
|
||||
extern "C" void dvc_PackValues(int grid, int threads, int *list, int count, double *sendbuf,
|
||||
double *Data, int N);
|
||||
//***************************************************************************************
|
||||
extern "C" void dvc_UnpackValues(int grid, int threads, int *list, int count, double *recvbuf,
|
||||
double *Data, int N);
|
||||
//***************************************************************************************
|
312
gpu/MRT.cu
Normal file
312
gpu/MRT.cu
Normal file
@ -0,0 +1,312 @@
|
||||
#include <cuda.h>
|
||||
|
||||
// CUDA kernels for single-phase MRT code
|
||||
// James McClure
|
||||
//*************************************************************************
|
||||
__global__ void INITIALIZE(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz, int S)
|
||||
{
|
||||
int n,N;
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
for (int s=0; s<S; s++){
|
||||
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
|
||||
if (n<N){
|
||||
if (ID[n] > 0){
|
||||
f_even[n] = 0.3333333333333333;
|
||||
f_odd[n] = 0.055555555555555555; //double(100*n)+1.f;
|
||||
f_even[N+n] = 0.055555555555555555; //double(100*n)+2.f;
|
||||
f_odd[N+n] = 0.055555555555555555; //double(100*n)+3.f;
|
||||
f_even[2*N+n] = 0.055555555555555555; //double(100*n)+4.f;
|
||||
f_odd[2*N+n] = 0.055555555555555555; //double(100*n)+5.f;
|
||||
f_even[3*N+n] = 0.055555555555555555; //double(100*n)+6.f;
|
||||
f_odd[3*N+n] = 0.0277777777777778; //double(100*n)+7.f;
|
||||
f_even[4*N+n] = 0.0277777777777778; //double(100*n)+8.f;
|
||||
f_odd[4*N+n] = 0.0277777777777778; //double(100*n)+9.f;
|
||||
f_even[5*N+n] = 0.0277777777777778; //double(100*n)+10.f;
|
||||
f_odd[5*N+n] = 0.0277777777777778; //double(100*n)+11.f;
|
||||
f_even[6*N+n] = 0.0277777777777778; //double(100*n)+12.f;
|
||||
f_odd[6*N+n] = 0.0277777777777778; //double(100*n)+13.f;
|
||||
f_even[7*N+n] = 0.0277777777777778; //double(100*n)+14.f;
|
||||
f_odd[7*N+n] = 0.0277777777777778; //double(100*n)+15.f;
|
||||
f_even[8*N+n] = 0.0277777777777778; //double(100*n)+16.f;
|
||||
f_odd[8*N+n] = 0.0277777777777778; //double(100*n)+17.f;
|
||||
f_even[9*N+n] = 0.0277777777777778; //double(100*n)+18.f;
|
||||
}
|
||||
else{
|
||||
for(int q=0; q<9; q++){
|
||||
f_even[q*N+n] = -1.0;
|
||||
f_odd[q*N+n] = -1.0;
|
||||
}
|
||||
f_even[9*N+n] = -1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void Compute_VELOCITY(char *ID, double *disteven, double *distodd, double *vel, int Nx, int Ny, int Nz, int S)
|
||||
{
|
||||
int n,N;
|
||||
// distributions
|
||||
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
double vx,vy,vz;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
// S - number of threadblocks per grid block
|
||||
for (int s=0; s<S; s++){
|
||||
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
|
||||
if (n<N){
|
||||
if (ID[n] > 0){
|
||||
//........................................................................
|
||||
// Registers to store the distributions
|
||||
//........................................................................
|
||||
f2 = disteven[N+n];
|
||||
f4 = disteven[2*N+n];
|
||||
f6 = disteven[3*N+n];
|
||||
f8 = disteven[4*N+n];
|
||||
f10 = disteven[5*N+n];
|
||||
f12 = disteven[6*N+n];
|
||||
f14 = disteven[7*N+n];
|
||||
f16 = disteven[8*N+n];
|
||||
f18 = disteven[9*N+n];
|
||||
//........................................................................
|
||||
f1 = distodd[n];
|
||||
f3 = distodd[1*N+n];
|
||||
f5 = distodd[2*N+n];
|
||||
f7 = distodd[3*N+n];
|
||||
f9 = distodd[4*N+n];
|
||||
f11 = distodd[5*N+n];
|
||||
f13 = distodd[6*N+n];
|
||||
f15 = distodd[7*N+n];
|
||||
f17 = distodd[8*N+n];
|
||||
//.................Compute the velocity...................................
|
||||
vx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
vy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
vz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
//..................Write the velocity.....................................
|
||||
vel[n] = vx;
|
||||
vel[N+n] = vy;
|
||||
vel[2*N+n] = vz;
|
||||
//........................................................................
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//*************************************************************************
|
||||
__global__ void MRT(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz, int S,
|
||||
double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz)
|
||||
{
|
||||
|
||||
int n,N;
|
||||
// distributions
|
||||
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
|
||||
// conserved momemnts
|
||||
double rho,jx,jy,jz;
|
||||
// non-conserved moments
|
||||
double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
char id;
|
||||
|
||||
// S - number of threadblocks per grid block
|
||||
for (int s=0; s<S; s++){
|
||||
// for (int n=0; n<N; n++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
|
||||
id = ID[n];
|
||||
|
||||
if (n<N){
|
||||
if (id > 0){
|
||||
//........................................................................
|
||||
// Registers to store the distributions - read based on swap convention
|
||||
//........................................................................
|
||||
f2 = distodd[n];
|
||||
f4 = distodd[N+n];
|
||||
f6 = distodd[2*N+n];
|
||||
f8 = distodd[3*N+n];
|
||||
f10 = distodd[4*N+n];
|
||||
f12 = distodd[5*N+n];
|
||||
f14 = distodd[6*N+n];
|
||||
f16 = distodd[7*N+n];
|
||||
f18 = distodd[8*N+n];
|
||||
//........................................................................
|
||||
f0 = disteven[n];
|
||||
f1 = disteven[N+n];
|
||||
f3 = disteven[2*N+n];
|
||||
f5 = disteven[3*N+n];
|
||||
f7 = disteven[4*N+n];
|
||||
f9 = disteven[5*N+n];
|
||||
f11 = disteven[6*N+n];
|
||||
f13 = disteven[7*N+n];
|
||||
f15 = disteven[8*N+n];
|
||||
f17 = disteven[9*N+n];
|
||||
//........................................................................
|
||||
//....................compute the moments...............................................
|
||||
rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
|
||||
m1 = -30*f0-11*(f2+f1+f4+f3+f6+f5)+8*(f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18 +f17);
|
||||
m2 = 12*f0-4*(f2+f1 +f4+f3+f6 +f5)+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17;
|
||||
jx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
m4 = 4*(-f1+f2)+f7-f8+f9-f10+f11-f12+f13-f14;
|
||||
jy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
m6 = -4*(f3-f4)+f7-f8-f9+f10+f15-f16+f17-f18;
|
||||
jz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
m8 = -4*(f5-f6)+f11-f12-f13+f14+f15-f16-f17+f18;
|
||||
m9 = 2*(f1+f2)-f3-f4-f5-f6+f7+f8+f9+f10+f11+f12+f13+f14-2*(f15+f16+f17+f18);
|
||||
m10 = -4*(f1+f2)+2*(f4+f3+f6+f5)+f8+f7+f10+f9+f12+f11+f14+f13-2*(f16+f15+f18+f17);
|
||||
m11 = f4+f3-f6-f5+f8+f7+f10+f9-f12-f11-f14-f13;
|
||||
m12 = -2*(f4+f3-f6-f5)+f8+f7+f10+f9-f12-f11-f14-f13;
|
||||
m13 = f8+f7-f10-f9;
|
||||
m14 = f16+f15-f18-f17;
|
||||
m15 = f12+f11-f14-f13;
|
||||
m16 = f7-f8+f9-f10-f11+f12-f13+f14;
|
||||
m17 = -f7+f8+f9-f10+f15-f16+f17-f18;
|
||||
m18 = f11-f12-f13+f14-f15+f16+f17-f18;
|
||||
//..............incorporate external force................................................
|
||||
//jx += 0.5*Fx;
|
||||
//jy += 0.5*Fy;
|
||||
//jz += 0.5*Fz;
|
||||
//..............carry out relaxation process...............................................
|
||||
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) - m1);
|
||||
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho) - m2);
|
||||
m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4);
|
||||
m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6);
|
||||
m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8);
|
||||
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) - m9);
|
||||
m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10);
|
||||
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) - m11);
|
||||
m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho) - m12);
|
||||
m13 = m13 + rlx_setA*((jx*jy/rho) - m13);
|
||||
m14 = m14 + rlx_setA*((jy*jz/rho) - m14);
|
||||
m15 = m15 + rlx_setA*((jx*jz/rho) - m15);
|
||||
m16 = m16 + rlx_setB*( - m16);
|
||||
m17 = m17 + rlx_setB*( - m17);
|
||||
m18 = m18 + rlx_setB*( - m18);
|
||||
//.................inverse transformation......................................................
|
||||
f0 = 0.05263157894736842*rho-0.012531328320802*m1+0.04761904761904762*m2;
|
||||
f1 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(jx-m4)+0.05555555555555555*(m9-m10);
|
||||
f2 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(m4-jx)+0.05555555555555555*(m9-m10);
|
||||
f3 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(jy-m6)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
|
||||
f4 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(m6-jy)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12);
|
||||
f5 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(jz-m8)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
|
||||
f6 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2
|
||||
+0.1*(m8-jz)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11);
|
||||
f7 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx+jy)+0.025*(m4+m6)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12+0.25*m13+0.125*(m16-m17);
|
||||
f8 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2-0.1*(jx+jy)-0.025*(m4+m6)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12+0.25*m13+0.125*(m17-m16);
|
||||
f9 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx-jy)+0.025*(m4-m6)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12-0.25*m13+0.125*(m16+m17);
|
||||
f10 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jy-jx)+0.025*(m6-m4)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11
|
||||
+0.04166666666666666*m12-0.25*m13-0.125*(m16+m17);
|
||||
f11 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jx+jz)+0.025*(m4+m8)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12+0.25*m15+0.125*(m18-m16);
|
||||
f12 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2-0.1*(jx+jz)-0.025*(m4+m8)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12+0.25*m15+0.125*(m16-m18);
|
||||
f13 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jx-jz)+0.025*(m4-m8)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12-0.25*m15-0.125*(m16+m18);
|
||||
f14 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jz-jx)+0.025*(m8-m4)
|
||||
+0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11
|
||||
-0.04166666666666666*m12-0.25*m15+0.125*(m16+m18);
|
||||
f15 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jy+jz)+0.025*(m6+m8)
|
||||
-0.05555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m17-m18);
|
||||
f16 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2-0.1*(jy+jz)-0.025*(m6+m8)
|
||||
-0.05555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m18-m17);
|
||||
f17 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jy-jz)+0.025*(m6-m8)
|
||||
-0.05555555555555555*m9-0.02777777777777778*m10-0.25*m14+0.125*(m17+m18);
|
||||
f18 = 0.05263157894736842*rho+0.003341687552213868*m1
|
||||
+0.003968253968253968*m2+0.1*(jz-jy)+0.025*(m8-m6)
|
||||
-0.05555555555555555*m9-0.02777777777777778*m10-0.25*m14-0.125*(m17+m18);
|
||||
//.......................................................................................................
|
||||
// incorporate external force
|
||||
f1 += 0.16666666*Fx;
|
||||
f2 -= 0.16666666*Fx;
|
||||
f3 += 0.16666666*Fy;
|
||||
f4 -= 0.16666666*Fy;
|
||||
f5 += 0.16666666*Fz;
|
||||
f6 -= 0.16666666*Fz;
|
||||
f7 += 0.08333333333*(Fx+Fy);
|
||||
f8 -= 0.08333333333*(Fx+Fy);
|
||||
f9 += 0.08333333333*(Fx-Fy);
|
||||
f10 -= 0.08333333333*(Fx-Fy);
|
||||
f11 += 0.08333333333*(Fx+Fz);
|
||||
f12 -= 0.08333333333*(Fx+Fz);
|
||||
f13 += 0.08333333333*(Fx-Fz);
|
||||
f14 -= 0.08333333333*(Fx-Fz);
|
||||
f15 += 0.08333333333*(Fy+Fz);
|
||||
f16 -= 0.08333333333*(Fy+Fz);
|
||||
f17 += 0.08333333333*(Fy-Fz);
|
||||
f18 -= 0.08333333333*(Fy-Fz);
|
||||
//.......................................................................................................
|
||||
// Write data based on un-swapped convention
|
||||
disteven[n] = f0;
|
||||
disteven[N+n] = f2;
|
||||
disteven[2*N+n] = f4;
|
||||
disteven[3*N+n] = f6;
|
||||
disteven[4*N+n] = f8;
|
||||
disteven[5*N+n] = f10;
|
||||
disteven[6*N+n] = f12;
|
||||
disteven[7*N+n] = f14;
|
||||
disteven[8*N+n] = f16;
|
||||
disteven[9*N+n] = f18;
|
||||
|
||||
distodd[n] = f1;
|
||||
distodd[N+n] = f3;
|
||||
distodd[2*N+n] = f5;
|
||||
distodd[3*N+n] = f7;
|
||||
distodd[4*N+n] = f9;
|
||||
distodd[5*N+n] = f11;
|
||||
distodd[6*N+n] = f13;
|
||||
distodd[7*N+n] = f15;
|
||||
distodd[8*N+n] = f17;
|
||||
//.......................................................................................................
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void dvc_MRT(int nblocks, int nthreads, int S, char *ID,
|
||||
double *f_even, double *f_odd, double rlxA, double rlxB, double Fx, double Fy, double Fz,
|
||||
int Nx, int Ny, int Nz)
|
||||
{
|
||||
MRT <<< nblocks, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S,rlxA, rlxB, Fx, Fy, Fz);
|
||||
}
|
||||
|
||||
extern "C" void dvc_InitD3Q19( int nblocks, int nthreads, int S,
|
||||
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
|
||||
{
|
||||
INITIALIZE <<< nblocks, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
|
||||
}
|
||||
|
||||
|
BIN
gpu/bin/ColorLBM
Executable file
BIN
gpu/bin/ColorLBM
Executable file
Binary file not shown.
248
gpu/lb1_MRT-swap.cu
Normal file
248
gpu/lb1_MRT-swap.cu
Normal file
@ -0,0 +1,248 @@
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cuda.h>
|
||||
//#include <cutil.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
|
||||
int Ny, int Nz, int nblocks, int nthreads, int S);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_SwapD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
|
||||
int Ny, int Nz, int nblocks, int nthreads, int S);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_MRT(char *ID, double *f_even, double *f_odd, double rlxA, double rlxB, double Fx, double Fy, double Fz,
|
||||
int Nx, int Ny, int Nz, int nblocks, int nthreads, int S);
|
||||
//*************************************************************************
|
||||
|
||||
void Write_Out(double *array, int Nx, int Ny, int Nz){
|
||||
int value;
|
||||
FILE *output;
|
||||
output = fopen("dist.list","w");
|
||||
for (int k=0; k<Nz; k++){
|
||||
for (int j=0; j<Ny; j++){
|
||||
for (int i=0; i<Nx; i++){
|
||||
int index = k*Nx*Ny+j*Nx+i;
|
||||
value = int(array[index]);
|
||||
fprintf(output, "| %i",value);
|
||||
}
|
||||
fprintf(output, " | \n");
|
||||
}
|
||||
fprintf(output,"************************************** \n");
|
||||
}
|
||||
fclose(output);
|
||||
}
|
||||
|
||||
//**************************************************************************
|
||||
// MRT implementation of the LBM using CUDA
|
||||
//**************************************************************************
|
||||
|
||||
int main(void)
|
||||
{
|
||||
|
||||
int deviceCount;
|
||||
cudaGetDeviceCount(&deviceCount);
|
||||
int device = 1;
|
||||
printf("Number of devices = %i \n", deviceCount);
|
||||
printf("Current device is = %i \n", device);
|
||||
cudaSetDevice(device);
|
||||
|
||||
// BGK Model parameters
|
||||
string FILENAME;
|
||||
unsigned int nBlocks, nthreads;
|
||||
int timestepMax, interval;
|
||||
double tau,Fx,Fy,Fz,tol;
|
||||
// Domain variables
|
||||
int Nx,Ny,Nz;
|
||||
|
||||
ifstream input("MRT.in");
|
||||
input >> FILENAME; // name of the input file
|
||||
input >> Nz; // number of nodes (x,y,z)
|
||||
input >> nBlocks;
|
||||
input >> nthreads;
|
||||
input >> tau; // relaxation time
|
||||
input >> Fx; // External force components (x,y,z)
|
||||
input >> Fy;
|
||||
input >> Fz;
|
||||
input >> timestepMax; // max no. of timesteps
|
||||
input >> interval; // error interval
|
||||
input >> tol; // error tolerance
|
||||
|
||||
double rlx_setA = 1.f/tau;
|
||||
double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA);
|
||||
|
||||
printf("tau = %f \n", tau);
|
||||
printf("Set A = %f \n", rlx_setA);
|
||||
printf("Set B = %f \n", rlx_setB);
|
||||
printf("Force(x) = %f \n", Fx);
|
||||
printf("Force(y) = %f \n", Fy);
|
||||
printf("Force(z) = %f \n", Fz);
|
||||
|
||||
Nx = Ny = Nz; // Cubic domain
|
||||
|
||||
int N = Nx*Ny*Nz;
|
||||
int dist_mem_size = N*sizeof(double);
|
||||
|
||||
// unsigned int nBlocks = 32;
|
||||
// int nthreads = 128;
|
||||
int S = N/nthreads/nBlocks;
|
||||
|
||||
// unsigned int nBlocks = N/nthreads + (N%nthreads == 0?0:1);
|
||||
dim3 grid(nBlocks,1,1);
|
||||
|
||||
printf("Number of blocks = %i \n", nBlocks);
|
||||
printf("Threads per block = %i \n", nthreads);
|
||||
printf("Sweeps per thread = %i \n", S);
|
||||
printf("Number of nodes per side = %i \n", Nx);
|
||||
printf("Total Number of nodes = %i \n", N);
|
||||
|
||||
//.......................................................................
|
||||
printf("Read input media... \n");
|
||||
// .......... READ THE INPUT FILE .......................................
|
||||
int n;
|
||||
char value;
|
||||
char *id;
|
||||
id = new char[N];
|
||||
int sum = 0;
|
||||
double porosity;
|
||||
ifstream PM(FILENAME.c_str(),ios::binary);
|
||||
for (int k=0;k<Nz;k++){
|
||||
for (int j=0;j<Ny;j++){
|
||||
for (int i=0;i<Nx;i++){
|
||||
PM.read((char *) (&value), sizeof(value));
|
||||
n = k*Nx*Ny+j*Nx+i;
|
||||
id[n] = value;
|
||||
if (value > 0) sum++;
|
||||
}
|
||||
}
|
||||
}
|
||||
PM.close();
|
||||
printf("File porosity = %f\n", double(sum)/N);
|
||||
//.......................................................................
|
||||
//...........device phase ID.................................................
|
||||
char *ID;
|
||||
cudaMalloc((void **) &ID, N); // Allocate device memory
|
||||
// Copy to the device
|
||||
cudaMemcpy(ID, id, N, cudaMemcpyHostToDevice);
|
||||
//...........................................................................
|
||||
|
||||
//......................device distributions.................................
|
||||
double *f_even,*f_odd;
|
||||
//...........................................................................
|
||||
cudaMalloc((void **) &f_even, 10*dist_mem_size); // Allocate device memory
|
||||
cudaMalloc((void **) &f_odd, 9*dist_mem_size); // Allocate device memory
|
||||
//...........................................................................
|
||||
|
||||
//...........................................................................
|
||||
// cudaHostAlloc(&fa,dist_mem_size,cudaHostAllocPortable);
|
||||
// cudaHostAlloc(&fb,dist_mem_size,cudaHostAllocPortable);
|
||||
// cudaHostRegister(fa,dist_mem_size,cudaHostRegisterPortable);
|
||||
// cudaHostRegister(fb,dist_mem_size,cudaHostRegisterPortable);
|
||||
// cudaHostRegister(id,N*sizeof(char),cudaHostAllocPortable);
|
||||
|
||||
printf("Setting the distributions, size = : %i\n", N);
|
||||
//...........................................................................
|
||||
// INITIALIZE <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
|
||||
//...........................................................................
|
||||
dvc_InitD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
|
||||
//*************************************************************************
|
||||
|
||||
int timestep = 0;
|
||||
printf("No. of timesteps: %i \n", timestepMax);
|
||||
|
||||
//.......create a stream for the LB calculation.......
|
||||
cudaStream_t stream;
|
||||
cudaStreamCreate(&stream);
|
||||
|
||||
//.......create and start timer............
|
||||
cudaEvent_t start, stop;
|
||||
float time;
|
||||
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord( start, 0 );
|
||||
//.........................................
|
||||
|
||||
//************ MAIN ITERATION LOOP ***************************************/
|
||||
while (timestep < timestepMax){
|
||||
|
||||
//...................................................................
|
||||
//........ Execute the swap kernel (device) .........................
|
||||
// SWAP <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
|
||||
//...................................................................
|
||||
dvc_SwapD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
|
||||
|
||||
//........ Execute the collision kernel (device) ....................
|
||||
// MRT <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S,
|
||||
// rlx_setA, rlx_setB, Fx, Fy, Fz);
|
||||
//............................................................
|
||||
dvc_MRT(ID, f_even, f_odd, rlx_setA, rlx_setB, Fx, Fy, Fz,Nx,Ny,Nz,nBlocks,nthreads,S);
|
||||
// Iteration completed!
|
||||
|
||||
timestep++;
|
||||
//...................................................................
|
||||
|
||||
}
|
||||
//************************************************************************/
|
||||
|
||||
cudaThreadSynchronize();
|
||||
//.......... stop and destroy timer.............................
|
||||
cudaEventRecord( stop, stream);
|
||||
cudaEventSynchronize( stop );
|
||||
|
||||
cudaEventElapsedTime( &time, start, stop );
|
||||
printf("CPU time = %f \n", time);
|
||||
|
||||
float MLUPS = 0.001*float(Nx*Ny*Nz)*timestep/time;
|
||||
printf("MLUPS = %f \n", MLUPS);
|
||||
|
||||
cudaStreamDestroy(stream);
|
||||
cudaEventDestroy( start );
|
||||
cudaEventDestroy( stop );
|
||||
//..............................................................
|
||||
|
||||
//..............................................................
|
||||
//.........Compute the velocity and copy result to host ........
|
||||
double *velocity;
|
||||
velocity = new double[3*N];
|
||||
//......................device distributions....................................
|
||||
double *vel;
|
||||
//..............................................................................
|
||||
cudaMalloc((void **) &vel, 3*dist_mem_size); // Allocate device memory
|
||||
//..............................................................................
|
||||
// Compute_VELOCITY <<< grid, nthreads >>> (ID, f_even, f_odd, vel, Nx, Ny, Nz, S);
|
||||
//..............................................................................
|
||||
cudaMemcpy(velocity, vel, 3*dist_mem_size, cudaMemcpyDeviceToHost);
|
||||
//..............................................................................
|
||||
|
||||
//............................................................
|
||||
//....Write the z-velocity to test poiseuille flow............
|
||||
double vz,vz_avg;
|
||||
vz_avg = 0.0;
|
||||
|
||||
FILE *output;
|
||||
output = fopen("velocity.out","w");
|
||||
for (int k=0; k<1; k++){
|
||||
for (int j=0; j<1; j++){
|
||||
for (int i=0; i<Nx; i++){
|
||||
int n = k*Nx*Ny+j*Nx+i;
|
||||
//.....print value........
|
||||
vz = velocity[2*N+n];
|
||||
vz_avg += vz;
|
||||
fprintf(output, " %e",vz);
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(output);
|
||||
|
||||
vz = vz_avg/double(sum);
|
||||
printf("Average Velocity = %e\n", vz);
|
||||
|
||||
|
||||
// cleanup
|
||||
cudaFree(f_even); cudaFree(f_odd); cudaFree(vel); cudaFree(ID);
|
||||
free (velocity); free(id);
|
||||
|
||||
}
|
246
gpu/lb1_MRT.cu
Normal file
246
gpu/lb1_MRT.cu
Normal file
@ -0,0 +1,246 @@
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cuda.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_Barrier();
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
|
||||
int Ny, int Nz, int nblocks, int nthreads, int S);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_SwapD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
|
||||
int Ny, int Nz, int nblocks, int nthreads, int S);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_MRT(char *ID, double *f_even, double *f_odd, double rlxA, double rlxB, double Fx, double Fy, double Fz,
|
||||
int Nx, int Ny, int Nz, int nblocks, int nthreads, int S);
|
||||
//*************************************************************************
|
||||
|
||||
void Write_Out(double *array, int Nx, int Ny, int Nz){
|
||||
int value;
|
||||
FILE *output;
|
||||
output = fopen("dist.list","w");
|
||||
for (int k=0; k<Nz; k++){
|
||||
for (int j=0; j<Ny; j++){
|
||||
for (int i=0; i<Nx; i++){
|
||||
int index = k*Nx*Ny+j*Nx+i;
|
||||
value = int(array[index]);
|
||||
fprintf(output, "| %i",value);
|
||||
}
|
||||
fprintf(output, " | \n");
|
||||
}
|
||||
fprintf(output,"************************************** \n");
|
||||
}
|
||||
fclose(output);
|
||||
}
|
||||
|
||||
//**************************************************************************
|
||||
// MRT implementation of the LBM using CUDA
|
||||
//**************************************************************************
|
||||
|
||||
int main(void)
|
||||
{
|
||||
|
||||
// BGK Model parameters
|
||||
string FILENAME;
|
||||
unsigned int nBlocks, nthreads;
|
||||
int timestepMax, interval;
|
||||
double tau,Fx,Fy,Fz,tol;
|
||||
// Domain variables
|
||||
int Nx,Ny,Nz;
|
||||
|
||||
ifstream input("MRT.in");
|
||||
input >> FILENAME; // name of the input file
|
||||
input >> Nz; // number of nodes (x,y,z)
|
||||
input >> nBlocks;
|
||||
input >> nthreads;
|
||||
input >> tau; // relaxation time
|
||||
input >> Fx; // External force components (x,y,z)
|
||||
input >> Fy;
|
||||
input >> Fz;
|
||||
input >> timestepMax; // max no. of timesteps
|
||||
input >> interval; // error interval
|
||||
input >> tol; // error tolerance
|
||||
|
||||
double rlx_setA = 1.f/tau;
|
||||
double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA);
|
||||
|
||||
printf("tau = %f \n", tau);
|
||||
printf("Set A = %f \n", rlx_setA);
|
||||
printf("Set B = %f \n", rlx_setB);
|
||||
printf("Force(x) = %f \n", Fx);
|
||||
printf("Force(y) = %f \n", Fy);
|
||||
printf("Force(z) = %f \n", Fz);
|
||||
|
||||
Nx = Ny = Nz; // Cubic domain
|
||||
|
||||
int N = Nx*Ny*Nz;
|
||||
int dist_mem_size = N*sizeof(double);
|
||||
|
||||
// unsigned int nBlocks = 32;
|
||||
// int nthreads = 128;
|
||||
int S = N/nthreads/nBlocks;
|
||||
|
||||
// unsigned int nBlocks = N/nthreads + (N%nthreads == 0?0:1);
|
||||
dim3 grid(nBlocks,1,1);
|
||||
|
||||
printf("Number of blocks = %i \n", nBlocks);
|
||||
printf("Threads per block = %i \n", nthreads);
|
||||
printf("Sweeps per thread = %i \n", S);
|
||||
printf("Number of nodes per side = %i \n", Nx);
|
||||
printf("Total Number of nodes = %i \n", N);
|
||||
|
||||
//.......................................................................
|
||||
printf("Read input media... \n");
|
||||
// .......... READ THE INPUT FILE .......................................
|
||||
int n;
|
||||
char value;
|
||||
char *id;
|
||||
id = new char[N];
|
||||
int sum = 0;
|
||||
double porosity;
|
||||
ifstream PM(FILENAME.c_str(),ios::binary);
|
||||
for (int k=0;k<Nz;k++){
|
||||
for (int j=0;j<Ny;j++){
|
||||
for (int i=0;i<Nx;i++){
|
||||
PM.read((char *) (&value), sizeof(value));
|
||||
n = k*Nx*Ny+j*Nx+i;
|
||||
id[n] = value;
|
||||
if (value > 0) sum++;
|
||||
}
|
||||
}
|
||||
}
|
||||
PM.close();
|
||||
printf("File porosity = %f\n", double(sum)/N);
|
||||
//.......................................................................
|
||||
//...........device phase ID.................................................
|
||||
char *ID;
|
||||
dvc_AllocateDeviceMemory((void **) &ID, N); // Allocate device memory
|
||||
// Copy to the device
|
||||
dvc_CopyToDevice(ID, id, N);
|
||||
//...........................................................................
|
||||
|
||||
//......................device distributions.................................
|
||||
double *f_even,*f_odd;
|
||||
//...........................................................................
|
||||
dvc_AllocateDeviceMemory((void **) &f_even, 10*dist_mem_size); // Allocate device memory
|
||||
dvc_AllocateDeviceMemory((void **) &f_odd, 9*dist_mem_size); // Allocate device memory
|
||||
//...........................................................................
|
||||
|
||||
//...........................................................................
|
||||
// cudaHostAlloc(&fa,dist_mem_size,cudaHostAllocPortable);
|
||||
// cudaHostAlloc(&fb,dist_mem_size,cudaHostAllocPortable);
|
||||
// cudaHostRegister(fa,dist_mem_size,cudaHostRegisterPortable);
|
||||
// cudaHostRegister(fb,dist_mem_size,cudaHostRegisterPortable);
|
||||
// cudaHostRegister(id,N*sizeof(char),cudaHostAllocPortable);
|
||||
|
||||
printf("Setting the distributions, size = : %i\n", N);
|
||||
//...........................................................................
|
||||
// INITIALIZE <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
|
||||
//...........................................................................
|
||||
dvc_InitD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
|
||||
//*************************************************************************
|
||||
|
||||
int timestep = 0;
|
||||
printf("No. of timesteps: %i \n", timestepMax);
|
||||
|
||||
//.......create a stream for the LB calculation.......
|
||||
cudaStream_t stream;
|
||||
cudaStreamCreate(&stream);
|
||||
|
||||
//.......create and start timer............
|
||||
cudaEvent_t start, stop;
|
||||
float time;
|
||||
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord( start, 0 );
|
||||
//.........................................
|
||||
|
||||
//************ MAIN ITERATION LOOP ***************************************/
|
||||
while (timestep < timestepMax){
|
||||
|
||||
//...................................................................
|
||||
//........ Execute the swap kernel (device) .........................
|
||||
// SWAP <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
|
||||
//...................................................................
|
||||
dvc_SwapD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
|
||||
|
||||
//........ Execute the collision kernel (device) ....................
|
||||
// MRT <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S,
|
||||
// rlx_setA, rlx_setB, Fx, Fy, Fz);
|
||||
//............................................................
|
||||
dvc_MRT(ID, f_even, f_odd, rlx_setA, rlx_setB, Fx, Fy, Fz,Nx,Ny,Nz,nBlocks,nthreads,S);
|
||||
// Iteration completed!
|
||||
|
||||
timestep++;
|
||||
//...................................................................
|
||||
|
||||
}
|
||||
//************************************************************************/
|
||||
|
||||
// cudaThreadSynchronize();
|
||||
dvc_Barrier();
|
||||
//.......... stop and destroy timer.............................
|
||||
cudaEventRecord( stop, stream);
|
||||
cudaEventSynchronize( stop );
|
||||
|
||||
cudaEventElapsedTime( &time, start, stop );
|
||||
printf("CPU time = %f \n", time);
|
||||
|
||||
float MLUPS = 0.001*float(Nx*Ny*Nz)*timestep/time;
|
||||
printf("MLUPS = %f \n", MLUPS);
|
||||
|
||||
cudaStreamDestroy(stream);
|
||||
cudaEventDestroy( start );
|
||||
cudaEventDestroy( stop );
|
||||
//..............................................................
|
||||
|
||||
//..............................................................
|
||||
/*//.........Compute the velocity and copy result to host ........
|
||||
double *velocity;
|
||||
velocity = new double[3*N];
|
||||
//......................device distributions....................................
|
||||
double *vel;
|
||||
//..............................................................................
|
||||
dvc_AllocateDeviceMemory((void **) &vel, 3*dist_mem_size); // Allocate device memory
|
||||
//..............................................................................
|
||||
// Compute_VELOCITY <<< grid, nthreads >>> (ID, f_even, f_odd, vel, Nx, Ny, Nz, S);
|
||||
//..............................................................................
|
||||
// cudaMemcpy(velocity, vel, 3*dist_mem_size, cudaMemcpyDeviceToHost);
|
||||
//..............................................................................
|
||||
|
||||
//............................................................
|
||||
//....Write the z-velocity to test poiseuille flow............
|
||||
double vz,vz_avg;
|
||||
vz_avg = 0.0;
|
||||
|
||||
/* FILE *output;
|
||||
output = fopen("velocity.out","w");
|
||||
for (int k=0; k<1; k++){
|
||||
for (int j=0; j<1; j++){
|
||||
for (int i=0; i<Nx; i++){
|
||||
int n = k*Nx*Ny+j*Nx+i;
|
||||
//.....print value........
|
||||
vz = velocity[2*N+n];
|
||||
vz_avg += vz;
|
||||
fprintf(output, " %e",vz);
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(output);
|
||||
|
||||
vz = vz_avg/double(sum);
|
||||
printf("Average Velocity = %e\n", vz);
|
||||
*/
|
||||
// cleanup
|
||||
// cudaFree(f_even); cudaFree(f_odd); cudaFree(vel); cudaFree(ID);
|
||||
// free (velocity); free(id);
|
||||
|
||||
}
|
1334
gpu/lb1_MRT_mpi.cpp
Normal file
1334
gpu/lb1_MRT_mpi.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1836
gpu/lb1_MRT_mpi.cu
Normal file
1836
gpu/lb1_MRT_mpi.cu
Normal file
File diff suppressed because it is too large
Load Diff
423
gpu/lb2_Color.cu
Normal file
423
gpu/lb2_Color.cu
Normal file
@ -0,0 +1,423 @@
|
||||
#ifdef useMPI
|
||||
#include <mpi.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <math.h>
|
||||
#include <cuda.h>
|
||||
|
||||
using namespace std;
|
||||
//*************************************************************************
|
||||
// HokieSpeed
|
||||
//nvcc -Xcompiler -fopenmp -lgomp -O3 -arch sm_20 -o hybridATLKR lb2_ATLKR_hybrid.cu
|
||||
// -I$VT_MPI_INC -L$VT_MPI_LIB -lmpi
|
||||
//*************************************************************************
|
||||
|
||||
//*************************************************************************
|
||||
// Implementation of Two-Phase Immiscible LBM using CUDA
|
||||
//*************************************************************************
|
||||
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_InitD3Q19(int nblocks, int nthreads, int S,
|
||||
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_InitDenColor( int nblocks, int nthreads, int S,
|
||||
char *ID, double *Den, double *Phi, double das, double dbs, int N);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_ComputeColorGradient(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *Phi, double *ColorGrad, int Nx, int Ny, int Nz);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_ColorCollide(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *f_even, double *f_odd, double *ColorGrad, double *Velocity,
|
||||
double rlxA, double rlxB,double alpha, double beta, double Fx, double Fy, double Fz,
|
||||
int Nx, int Ny, int Nz, bool pBC);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_DensityStreamD3Q7(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
|
||||
double beta, int Nx, int Ny, int Nz, bool pBC);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_ComputePhi(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *Phi, double *Copy, double *Den, int N);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_Barrier();
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_SwapD3Q19(int nblocks, int nthreads, int S,
|
||||
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_PackDist(int grid, int threads, int q, int *SendList, int start,
|
||||
int sendCount, double *sendbuf, double *Dist, int N);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_UnpackDist(int grid, int threads, int q, int Cqx, int Cqy, int Cqz, int *RecvList, int start,
|
||||
int recvCount, double *recvbuf, double *Dist, int Nx, int Ny, int Nz);
|
||||
//*************************************************************************
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
//********** Initialize MPI ****************
|
||||
int numprocs,rank;
|
||||
#ifdef useMPI
|
||||
MPI_Status stat;
|
||||
MPI_Init(&argc,&argv);
|
||||
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
|
||||
#else
|
||||
numprocs = 1;
|
||||
rank = 0;
|
||||
#endif
|
||||
//******************************************
|
||||
|
||||
if (rank == 0){
|
||||
printf("********************************************************\n");
|
||||
printf("Running Hybrid Implementation of Color LBM \n");
|
||||
printf("********************************************************\n");
|
||||
}
|
||||
// Color Model parameters
|
||||
string FILENAME;
|
||||
unsigned int nBlocks, nthreads;
|
||||
int Nx,Ny,Nz;
|
||||
int timestepMax, interval;
|
||||
double tau,Fx,Fy,Fz,tol;
|
||||
double alpha, beta;
|
||||
double das, dbs;
|
||||
double din,dout;
|
||||
bool pBC;
|
||||
|
||||
if (rank==0){
|
||||
//.............................................................
|
||||
// READ SIMULATION PARMAETERS FROM INPUT FILE
|
||||
//.............................................................
|
||||
ifstream input("Color.in");
|
||||
// Line 1: Name of the phase indicator file (s=0,w=1,n=2)
|
||||
input >> FILENAME;
|
||||
// Line 2: domain size (Nx, Ny, Nz)
|
||||
input >> Nz; // number of nodes (x,y,z)
|
||||
input >> nBlocks;
|
||||
input >> nthreads;
|
||||
// Line 3: model parameters (tau, alpha, beta, das, dbs)
|
||||
input >> tau;
|
||||
input >> alpha;
|
||||
input >> beta;
|
||||
input >> das;
|
||||
input >> dbs;
|
||||
// Line 4: External force components (Fx,Fy, Fz)
|
||||
input >> Fx;
|
||||
input >> Fy;
|
||||
input >> Fz;
|
||||
// Line 5: Pressure Boundary conditions
|
||||
input >> pBC;
|
||||
input >> din;
|
||||
input >> dout;
|
||||
// Line 6: time-stepping criteria
|
||||
input >> timestepMax; // max no. of timesteps
|
||||
input >> interval; // error interval
|
||||
input >> tol; // error tolerance
|
||||
//.............................................................
|
||||
}
|
||||
#ifdef useMPI
|
||||
// **************************************************************
|
||||
// Broadcast simulation parameters from rank 0 to all other procs
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
//.................................................
|
||||
MPI_Bcast(&Nz,1,MPI_INT,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&nBlocks,1,MPI_INT,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&nthreads,1,MPI_INT,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&Fx,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&Fy,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&Fz,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&tau,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&alpha,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&beta,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&das,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&dbs,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&pBC,1,MPI_LOGICAL,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&din,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&dout,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(×tepMax,1,MPI_INT,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&interval,1,MPI_INT,0,MPI_COMM_WORLD);
|
||||
MPI_Bcast(&tol,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
|
||||
//.................................................
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
// **************************************************************
|
||||
#endif
|
||||
|
||||
double rlxA = 1.f/tau;
|
||||
double rlxB = 8.f*(2.f-rlxA)/(8.f-rlxA);
|
||||
|
||||
if (pBC && rank == 0){
|
||||
printf("Assigning presusre boundary conditions \n");
|
||||
printf("Inlet density = %f \n", din);
|
||||
printf("Outlet density = %f \n", dout);
|
||||
}
|
||||
|
||||
if (rank==0){
|
||||
printf("....Parameters................\n");
|
||||
printf("tau = %f \n", tau);
|
||||
printf("alpha = %f \n", alpha);
|
||||
printf("beta = %f \n", beta);
|
||||
printf("das = %f \n", das);
|
||||
printf("dbs = %f \n", dbs);
|
||||
printf("Force(x) = %f \n", Fx);
|
||||
printf("Force(y) = %f \n", Fy);
|
||||
printf("Force(z) = %f \n", Fz);
|
||||
printf("Nz = %i \n", Nz);
|
||||
printf("timestepMax = %i \n", timestepMax);
|
||||
printf("...............................\n");
|
||||
}
|
||||
|
||||
// Identical cubic sub-domains
|
||||
Nx = Ny = Nz;// = 16*s; // Cubic domain
|
||||
int N = Nx*Ny*Nz;
|
||||
int dist_mem_size = N*sizeof(double);
|
||||
|
||||
// unsigned int nBlocks = 32;
|
||||
// int nthreads = 128;
|
||||
int S = N/nthreads/nBlocks;
|
||||
if (nBlocks*nthreads*S < N) S++;
|
||||
// int S = 1;
|
||||
|
||||
// unsigned int nBlocks = N/nthreads + (N%nthreads == 0?0:1);
|
||||
// dim3 grid(nBlocks,1,1);
|
||||
if (rank==1){
|
||||
printf("Number of blocks = %i \n", nBlocks);
|
||||
printf("Threads per block = %i \n", nthreads);
|
||||
printf("Sweeps per thread = %i \n", S);
|
||||
printf("Number of nodes per side = %i \n", Nx);
|
||||
printf("Total Number of nodes = %i \n", N);
|
||||
printf("...............................\n");
|
||||
}
|
||||
|
||||
//.......................................................................
|
||||
// .......... READ THE INPUT FILE .......................................
|
||||
int n;
|
||||
char value;
|
||||
char *id;
|
||||
id = new char[N];
|
||||
int sum = 0;
|
||||
// RANK 0 READS THE INPUT FILE
|
||||
if (rank==0){
|
||||
printf("Read input media... \n");
|
||||
ifstream PM(FILENAME.c_str(),ios::binary);
|
||||
for (int k=0;k<Nz;k++){
|
||||
for (int j=0;j<Ny;j++){
|
||||
for (int i=0;i<Nx;i++){
|
||||
PM.read((char *) (&value), sizeof(value));
|
||||
n = k*Nx*Ny+j*Nx+i;
|
||||
|
||||
if (value>0){
|
||||
if (pBC) value=2; // Saturate with NWP
|
||||
if (k<8){
|
||||
value=1;
|
||||
}
|
||||
}
|
||||
|
||||
id[n] = value;
|
||||
if (value > 0) sum++;
|
||||
}
|
||||
}
|
||||
}
|
||||
PM.close();
|
||||
printf("File porosity = %f\n", double(sum)/N);
|
||||
}
|
||||
//......... for pressure BC only............................
|
||||
// Void the first / last rows if pressure BC are to be used
|
||||
if (pBC){
|
||||
for (int k=0;k<Nz;k++){
|
||||
for (int j=0;j<Ny;j++){
|
||||
for (int i=0;i<Nx;i++){
|
||||
n = k*Nx*Ny+j*Nx+i;
|
||||
if (k<4) id[n] = 1;
|
||||
if (k>Nz-5) id[n] = 2;
|
||||
}
|
||||
}
|
||||
// Skip the non-boundary values
|
||||
if (k==4) k=Nz-5;
|
||||
}
|
||||
}
|
||||
#ifdef useMPI //............................................................
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
MPI_Bcast(&id[0],N,MPI_CHAR,0,MPI_COMM_WORLD);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
#endif
|
||||
if (rank == 0) printf("Domain set.\n");
|
||||
//...........................................................................
|
||||
|
||||
int SBC;
|
||||
int outlet = N-Nx*Ny;
|
||||
if (pBC){
|
||||
SBC = Nx*Ny/nthreads/nBlocks+1;
|
||||
printf("Number of sweeps for inlet / outlet: %i \n", SBC);
|
||||
}
|
||||
//...........................................................................
|
||||
|
||||
//...........................................................................
|
||||
//...........device phase ID.................................................
|
||||
char *ID;
|
||||
cudaMalloc((void **) &ID, N); // Allocate device memory
|
||||
// Copy to the device
|
||||
cudaMemcpy(ID, id, N, cudaMemcpyHostToDevice);
|
||||
//...........................................................................
|
||||
|
||||
//......................device distributions.................................
|
||||
double *f_even,*f_odd;
|
||||
//...........................................................................
|
||||
cudaMalloc((void **) &f_even, 10*dist_mem_size); // Allocate device memory
|
||||
cudaMalloc((void **) &f_odd, 9*dist_mem_size); // Allocate device memory
|
||||
// f_even = new double[10*N];
|
||||
// f_odd = new double[9*N];
|
||||
//...........................................................................
|
||||
|
||||
//...........................................................................
|
||||
// MAIN VARIABLES ALLOCATED HERE
|
||||
//...........................................................................
|
||||
double *Phi,*Den,*Copy;
|
||||
double *ColorGrad, *Velocity;
|
||||
//...........................................................................
|
||||
cudaMalloc((void **) &Phi, dist_mem_size);
|
||||
cudaMalloc((void **) &Den, 2*dist_mem_size);
|
||||
cudaMalloc((void **) &Copy, 2*dist_mem_size);
|
||||
cudaMalloc((void **) &Velocity, 3*dist_mem_size);
|
||||
cudaMalloc((void **) &ColorGrad, 3*dist_mem_size);
|
||||
//...........................................................................
|
||||
|
||||
//...........................................................................
|
||||
if (rank==0) printf("Setting the distributions, size = : %i\n", N);
|
||||
//...........................................................................
|
||||
dvc_InitD3Q19(nBlocks, nthreads, S, ID, f_even, f_odd, Nx, Ny, Nz);
|
||||
dvc_InitDenColor(nBlocks, nthreads, S, ID, Den, Phi, das, dbs, N);
|
||||
//...........................................................................
|
||||
dvc_ComputePhi(nBlocks, nthreads, S,ID, Phi, Copy, Den, N);
|
||||
//...........................................................................
|
||||
|
||||
int timestep;
|
||||
// double starttime,stoptime;
|
||||
if (rank==0) printf("No. of timesteps: %i \n", timestepMax);
|
||||
timestep = 0;
|
||||
//.......create and start timer............
|
||||
cudaEvent_t start, stop;
|
||||
float time;
|
||||
//.......create a stream for the LB calculation.......
|
||||
cudaStream_t stream;
|
||||
cudaStreamCreate(&stream);
|
||||
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord( start, 0 );
|
||||
//.........................................
|
||||
//************ MAIN TIMESTEP LOOP ***************************************/
|
||||
while (timestep < timestepMax){
|
||||
|
||||
//*************************************************************************
|
||||
// Compute the color gradient
|
||||
//*************************************************************************
|
||||
dvc_ComputeColorGradient(nBlocks, nthreads, S,
|
||||
ID, Phi, ColorGrad, Nx, Ny, Nz);
|
||||
//*************************************************************************
|
||||
|
||||
//*************************************************************************
|
||||
// Perform collision step for the momentum transport
|
||||
//*************************************************************************
|
||||
dvc_ColorCollide(nBlocks, nthreads, S,
|
||||
ID, f_even, f_odd, ColorGrad, Velocity,
|
||||
rlxA, rlxB,alpha, beta, Fx, Fy, Fz, Nx, Ny, Nz, pBC);
|
||||
//*************************************************************************
|
||||
|
||||
//*************************************************************************
|
||||
// Carry out the density streaming step for mass transport
|
||||
//*************************************************************************
|
||||
dvc_DensityStreamD3Q7(nBlocks, nthreads, S,
|
||||
ID, Den, Copy, Phi, ColorGrad, Velocity,beta, Nx, Ny, Nz, pBC);
|
||||
//*************************************************************************
|
||||
|
||||
//*************************************************************************
|
||||
// Swap the distributions for momentum transport
|
||||
//*************************************************************************
|
||||
dvc_SwapD3Q19(nBlocks, nthreads, S, ID, f_even, f_odd, Nx, Ny, Nz);
|
||||
//*************************************************************************
|
||||
|
||||
//*************************************************************************
|
||||
// Compute the phase indicator field and reset Copy, Den
|
||||
//*************************************************************************
|
||||
dvc_ComputePhi(nBlocks, nthreads, S,ID, Phi, Copy, Den, N);
|
||||
//*************************************************************************
|
||||
|
||||
dvc_Barrier();
|
||||
timestep++;
|
||||
//.............................................................................
|
||||
}
|
||||
//************************************************************************/
|
||||
dvc_Barrier();
|
||||
//.......... stop and destroy timer.............................
|
||||
cudaEventRecord( stop, stream);
|
||||
cudaEventSynchronize( stop );
|
||||
|
||||
cudaEventElapsedTime( &time, start, stop );
|
||||
printf("CPU time = %f \n", time);
|
||||
|
||||
float MLUPS = 0.001*float(Nx*Ny*Nz)*timestep/time;
|
||||
printf("MLUPS = %f \n", MLUPS);
|
||||
|
||||
cudaEventDestroy( start );
|
||||
cudaEventDestroy( stop );
|
||||
|
||||
double *Data;
|
||||
Data = new double[3*N];
|
||||
|
||||
cudaMemcpy(Data, Phi, dist_mem_size, cudaMemcpyDeviceToHost);
|
||||
|
||||
// Write out the Phase Indicator Field
|
||||
FILE *phase;
|
||||
phase = fopen("Phase.out","wb");
|
||||
fwrite(Data,8,N,phase);
|
||||
fclose(phase);
|
||||
|
||||
//....................................................
|
||||
// Write out the pressure - (reuse Phi arrays since we're done with those)
|
||||
// ComputeDensity<<< grid, nthreads>>> (ID, f_even, f_odd, Phi, Nx, Ny, Nz, S);
|
||||
// cudaMemcpy(Data, Phi, dist_mem_size, cudaMemcpyDeviceToHost);
|
||||
// FILE *PRESSURE;
|
||||
// PRESSURE = fopen("Pressure.out","wb");
|
||||
// fwrite(Phi,8,N,PRESSURE);
|
||||
// fclose(PRESSURE);
|
||||
//....................................................
|
||||
|
||||
// Write out the Color Gradient
|
||||
|
||||
cudaMemcpy(Data, ColorGrad, 3*dist_mem_size, cudaMemcpyDeviceToHost);
|
||||
|
||||
FILE *CG;
|
||||
CG = fopen("ColorGrad.out","wb");
|
||||
fwrite(Data,8,3*N,CG);
|
||||
fclose(CG);
|
||||
|
||||
// Write out the Velocity
|
||||
// FILE *VEL;
|
||||
// VEL = fopen("Velocity.out","wb");
|
||||
// fwrite(Velocity,8,3*N,VEL);
|
||||
// fclose(VEL);
|
||||
|
||||
// cleanup
|
||||
cudaFree(ID);
|
||||
cudaFree(f_even); cudaFree(f_odd);
|
||||
cudaFree(Velocity);
|
||||
cudaFree(Phi);
|
||||
|
||||
cudaFree (ColorGrad);
|
||||
cudaFree (Den); cudaFree(Copy);
|
||||
cudaFree (Phi);
|
||||
free(id);
|
||||
|
||||
//***********Finish up!*********************************
|
||||
#ifdef useMPI
|
||||
MPI_Finalize();
|
||||
#endif
|
||||
return 0;
|
||||
|
||||
}
|
1621
gpu/lb2_Color_mpi.cpp
Normal file
1621
gpu/lb2_Color_mpi.cpp
Normal file
File diff suppressed because it is too large
Load Diff
BIN
gpu/lib/libcuColor.a
Normal file
BIN
gpu/lib/libcuColor.a
Normal file
Binary file not shown.
BIN
gpu/lib/libcuD3Q19.a
Normal file
BIN
gpu/lib/libcuD3Q19.a
Normal file
Binary file not shown.
BIN
gpu/lib/libcuD3Q7.a
Normal file
BIN
gpu/lib/libcuD3Q7.a
Normal file
Binary file not shown.
BIN
gpu/lib/libcuExtra.a
Normal file
BIN
gpu/lib/libcuExtra.a
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user