Re-factoring gpu code to match cpu version
This commit is contained in:
2146
gpu/Color.cu
2146
gpu/Color.cu
File diff suppressed because it is too large
Load Diff
47
gpu/Color.h
47
gpu/Color.h
@@ -1,21 +1,28 @@
|
||||
//
|
||||
//*************************************************************************
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_InitDenColor( int nblocks, int nthreads, int S,
|
||||
char *ID, double *Den, double *Phi, double das, double dbs, int N);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_ComputeColorGradient(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *Phi, double *ColorGrad, int Nx, int Ny, int Nz);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_ColorCollide(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *f_even, double *f_odd, double *ColorGrad, double *Velocity,
|
||||
double rlxA, double rlxB,double alpha, double beta, double Fx, double Fy, double Fz,
|
||||
int Nx, int Ny, int Nz, bool pBC);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_DensityStreamD3Q7(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
|
||||
extern "C" void dvc_InitDenColor(char *ID, double *Den, double *Phi, double das, double dbs, int Nx, int Ny, int Nz);
|
||||
extern "C" void dvc_InitDenColorDistance(char *ID, double *Den, double *Phi, double *Distance,
|
||||
double das, double dbs, double beta, double xp, int Nx, int Ny, int Nz);
|
||||
extern "C" void dvc_Compute_VELOCITY(char *ID, double *disteven, double *distodd, double *vel, int Nx, int Ny, int Nz);
|
||||
extern "C" void dvc_ComputePressureD3Q19(char *ID, double *disteven, double *distodd, double *Pressure,
|
||||
int Nx, int Ny, int Nz);
|
||||
extern "C" void dvc_PressureBC_inlet(double *disteven, double *distodd, double din,
|
||||
int Nx, int Ny, int Nz);
|
||||
extern "C" void dvc_PressureBC_outlet(double *disteven, double *distodd, double dout,
|
||||
int Nx, int Ny, int Nz, int outlet);
|
||||
extern "C" void dvc_ComputeColorGradient(char *ID, double *phi, double *ColorGrad, int Nx, int Ny, int Nz);
|
||||
extern "C" void dvc_ColorCollide( char *ID, double *disteven, double *distodd, double *ColorGrad,
|
||||
double *Velocity, int Nx, int Ny, int Nz,double rlx_setA, double rlx_setB,
|
||||
double alpha, double beta, double Fx, double Fy, double Fz, bool pBC);
|
||||
extern "C" void dvc_ColorCollideOpt( char *ID, double *disteven, double *distodd, double *phi, double *ColorGrad,
|
||||
double *Velocity, int Nx, int Ny, int Nz,double rlx_setA, double rlx_setB,
|
||||
double alpha, double beta, double Fx, double Fy, double Fz);
|
||||
extern "C" void dvc_DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
|
||||
double beta, int Nx, int Ny, int Nz, bool pBC);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_ComputePhi(int nBlocks, int nthreads, int S,
|
||||
char *ID, double *Phi, double *Copy, double *Den, int N);
|
||||
//*************************************************************************
|
||||
//extern "C" void dvc_ComputePhi(char *ID, double *Phi, double *Copy, double *Den, int N);
|
||||
// Added by J.E. McClure 2-12-2014
|
||||
extern "C" void dvc_ComputePhi(char *ID, double *Phi, double *Den, int N);
|
||||
extern "C" void dvc_MassColorCollideD3Q7(char *ID, double *A_even, double *A_odd, double *B_even, double *B_odd,
|
||||
double *Den, double *Phi, double *ColorGrad, double *Velocity, double beta, int N, bool pBC);
|
||||
extern "C" void dvc_ColorBC_inlet(double *Phi, double *Den, double *A_even, double *A_odd,
|
||||
double *B_even, double *B_odd, int Nx, int Ny, int Nz);
|
||||
extern "C" void dvc_ColorBC_outlet(double *Phi, double *Den, double *A_even, double *A_odd,
|
||||
double *B_even, double *B_odd, int Nx, int Ny, int Nz);
|
||||
|
||||
461
gpu/D3Q19.cu
461
gpu/D3Q19.cu
@@ -1,49 +1,4 @@
|
||||
#include <cuda.h>
|
||||
|
||||
__global__ void InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz, int S)
|
||||
{
|
||||
int n,N;
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
for (int s=0; s<S; s++){
|
||||
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
|
||||
if (n<N){
|
||||
if (ID[n] > 0){
|
||||
f_even[n] = 0.3333333333333333;
|
||||
f_odd[n] = 0.055555555555555555; //double(100*n)+1.f;
|
||||
f_even[N+n] = 0.055555555555555555; //double(100*n)+2.f;
|
||||
f_odd[N+n] = 0.055555555555555555; //double(100*n)+3.f;
|
||||
f_even[2*N+n] = 0.055555555555555555; //double(100*n)+4.f;
|
||||
f_odd[2*N+n] = 0.055555555555555555; //double(100*n)+5.f;
|
||||
f_even[3*N+n] = 0.055555555555555555; //double(100*n)+6.f;
|
||||
f_odd[3*N+n] = 0.0277777777777778; //double(100*n)+7.f;
|
||||
f_even[4*N+n] = 0.0277777777777778; //double(100*n)+8.f;
|
||||
f_odd[4*N+n] = 0.0277777777777778; //double(100*n)+9.f;
|
||||
f_even[5*N+n] = 0.0277777777777778; //double(100*n)+10.f;
|
||||
f_odd[5*N+n] = 0.0277777777777778; //double(100*n)+11.f;
|
||||
f_even[6*N+n] = 0.0277777777777778; //double(100*n)+12.f;
|
||||
f_odd[6*N+n] = 0.0277777777777778; //double(100*n)+13.f;
|
||||
f_even[7*N+n] = 0.0277777777777778; //double(100*n)+14.f;
|
||||
f_odd[7*N+n] = 0.0277777777777778; //double(100*n)+15.f;
|
||||
f_even[8*N+n] = 0.0277777777777778; //double(100*n)+16.f;
|
||||
f_odd[8*N+n] = 0.0277777777777778; //double(100*n)+17.f;
|
||||
f_even[9*N+n] = 0.0277777777777778; //double(100*n)+18.f;
|
||||
}
|
||||
else{
|
||||
for(int q=0; q<9; q++){
|
||||
f_even[q*N+n] = -1.0;
|
||||
f_odd[q*N+n] = -1.0;
|
||||
}
|
||||
f_even[9*N+n] = -1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void PackDist(int q, int *list, int start, int count, double *sendbuf, double *dist, int N){
|
||||
__global__ void dvc_PackDist(int q, int *list, int start, int count, double *sendbuf, double *dist, int N){
|
||||
//....................................................................................
|
||||
// Pack distribution q into the send buffer for the listed lattice sites
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
@@ -51,14 +6,12 @@ __global__ void PackDist(int q, int *list, int start, int count, double *sendbuf
|
||||
int idx,n;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx<count){
|
||||
// for (idx=0; idx<count; idx++){
|
||||
n = list[idx];
|
||||
sendbuf[start+idx] = dist[q*N+n];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__global__ void MapRecvDist(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count,
|
||||
__global__ void dvc_UnpackDist(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count,
|
||||
double *recvbuf, double *dist, int Nx, int Ny, int Nz){
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
@@ -70,7 +23,6 @@ __global__ void MapRecvDist(int q, int Cqx, int Cqy, int Cqz, int *list, int st
|
||||
int N = Nx*Ny*Nz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx<count){
|
||||
// for (idx=0; idx<count; idx++){
|
||||
// Get the value from the list -- note that n is the index is from the send (non-local) process
|
||||
n = list[idx];
|
||||
// Get the 3-D indices
|
||||
@@ -102,258 +54,175 @@ __global__ void MapRecvDist(int q, int Cqx, int Cqy, int Cqz, int *list, int st
|
||||
}
|
||||
}
|
||||
|
||||
//*************************************************************************
|
||||
__global__ void SwapD3Q19(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz, int S)
|
||||
__global__ void dvc_InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
|
||||
{
|
||||
int n,nn,N;
|
||||
// distributions
|
||||
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
|
||||
int n,N;
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
// S - number of threadblocks per grid block
|
||||
for (int s=0; s<S; s++){
|
||||
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
|
||||
// for (n=0; n<N; n++){
|
||||
//.......Back out the 3-D indices for node n..............
|
||||
int k = n/(Nx*Ny);
|
||||
int j = (n-Nx*Ny*k)/Nx;
|
||||
int i = n-Nx*Ny*k-Nz*j;
|
||||
|
||||
if (n<N){
|
||||
if (ID[n] > 0){
|
||||
//........................................................................
|
||||
// Retrieve even distributions from the local node (swap convention)
|
||||
// f0 = disteven[n]; // Does not particupate in streaming
|
||||
f1 = distodd[n];
|
||||
f3 = distodd[N+n];
|
||||
f5 = distodd[2*N+n];
|
||||
f7 = distodd[3*N+n];
|
||||
f9 = distodd[4*N+n];
|
||||
f11 = distodd[5*N+n];
|
||||
f13 = distodd[6*N+n];
|
||||
f15 = distodd[7*N+n];
|
||||
f17 = distodd[8*N+n];
|
||||
//........................................................................
|
||||
|
||||
//........................................................................
|
||||
// Retrieve odd distributions from neighboring nodes (swap convention)
|
||||
//........................................................................
|
||||
nn = n+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
//if (i+1<Nx){
|
||||
f2 = disteven[N+nn]; // pull neighbor for distribution 2
|
||||
if (f2 > 0){
|
||||
distodd[n] = f2;
|
||||
disteven[N+nn] = f1;
|
||||
}
|
||||
//}
|
||||
//........................................................................
|
||||
nn = n+Nx; // neighbor index (pull convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
//if (j+1<Ny){
|
||||
f4 = disteven[2*N+nn]; // pull neighbor for distribution 4
|
||||
if (f4 > 0){
|
||||
distodd[N+n] = f4;
|
||||
disteven[2*N+nn] = f3;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny; // neighbor index (pull convention)
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (k+1<Nz){
|
||||
f6 = disteven[3*N+nn]; // pull neighbor for distribution 6
|
||||
if (f6 > 0){
|
||||
distodd[2*N+n] = f6;
|
||||
disteven[3*N+nn] = f5;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
//if ((i+1<Nx) && (j+1<Ny)){
|
||||
f8 = disteven[4*N+nn]; // pull neighbor for distribution 8
|
||||
if (f8 > 0){
|
||||
distodd[3*N+n] = f8;
|
||||
disteven[4*N+nn] = f7;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n-Nx+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
//if (!(i-1<0) && (j+1<Ny)){
|
||||
f10 = disteven[5*N+nn]; // pull neighbor for distribution 9
|
||||
if (f10 > 0){
|
||||
distodd[4*N+n] = f10;
|
||||
disteven[5*N+nn] = f9;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if ( !(i-1<0) && !(k-1<0)){
|
||||
f12 = disteven[6*N+nn]; // pull distribution 11
|
||||
if (f12 > 0){
|
||||
distodd[5*N+n] = f12;
|
||||
disteven[6*N+nn] = f11;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (!(i-1<0) && (k+1<Nz)){
|
||||
f14 = disteven[7*N+nn]; // pull neighbor for distribution 13
|
||||
if (f14 > 0){
|
||||
distodd[6*N+n] = f14;
|
||||
disteven[7*N+nn] = f13;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny+Nx; // neighbor index (pull convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (!(j-1<0) && !(k-1<0)){
|
||||
f16 = disteven[8*N+nn]; // pull neighbor for distribution 15
|
||||
if (f16 > 0){
|
||||
distodd[7*N+n] = f16;
|
||||
disteven[8*N+nn] = f15;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny+Nx; // neighbor index (pull convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (!(j-1<0) && (k+1<Nz)){
|
||||
f18 = disteven[9*N+nn]; // pull neighbor for distribution 17
|
||||
if (f18 > 0){
|
||||
distodd[8*N+n] = f18;
|
||||
disteven[9*N+nn] = f17;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
for (n=0; n<N; n++){
|
||||
|
||||
if (ID[n] > 0){
|
||||
f_even[n] = 0.3333333333333333;
|
||||
f_odd[n] = 0.055555555555555555; //double(100*n)+1.f;
|
||||
f_even[N+n] = 0.055555555555555555; //double(100*n)+2.f;
|
||||
f_odd[N+n] = 0.055555555555555555; //double(100*n)+3.f;
|
||||
f_even[2*N+n] = 0.055555555555555555; //double(100*n)+4.f;
|
||||
f_odd[2*N+n] = 0.055555555555555555; //double(100*n)+5.f;
|
||||
f_even[3*N+n] = 0.055555555555555555; //double(100*n)+6.f;
|
||||
f_odd[3*N+n] = 0.0277777777777778; //double(100*n)+7.f;
|
||||
f_even[4*N+n] = 0.0277777777777778; //double(100*n)+8.f;
|
||||
f_odd[4*N+n] = 0.0277777777777778; //double(100*n)+9.f;
|
||||
f_even[5*N+n] = 0.0277777777777778; //double(100*n)+10.f;
|
||||
f_odd[5*N+n] = 0.0277777777777778; //double(100*n)+11.f;
|
||||
f_even[6*N+n] = 0.0277777777777778; //double(100*n)+12.f;
|
||||
f_odd[6*N+n] = 0.0277777777777778; //double(100*n)+13.f;
|
||||
f_even[7*N+n] = 0.0277777777777778; //double(100*n)+14.f;
|
||||
f_odd[7*N+n] = 0.0277777777777778; //double(100*n)+15.f;
|
||||
f_even[8*N+n] = 0.0277777777777778; //double(100*n)+16.f;
|
||||
f_odd[8*N+n] = 0.0277777777777778; //double(100*n)+17.f;
|
||||
f_even[9*N+n] = 0.0277777777777778; //double(100*n)+18.f;
|
||||
}
|
||||
else{
|
||||
for(int q=0; q<9; q++){
|
||||
f_even[q*N+n] = -1.0;
|
||||
f_odd[q*N+n] = -1.0;
|
||||
}
|
||||
f_even[9*N+n] = -1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_PackD3Q19(int faceGrid, int edgeGrid, int threads,double *f_even, double *f_odd, int N,
|
||||
int *dvcSendList_x, int *dvcSendList_y, int *dvcSendList_z, int *dvcSendList_X, int *dvcSendList_Y, int *dvcSendList_Z,
|
||||
int *dvcSendList_xy, int *dvcSendList_XY, int *dvcSendList_xY, int *dvcSendList_Xy,
|
||||
int *dvcSendList_xz, int *dvcSendList_XZ, int *dvcSendList_xZ, int *dvcSendList_Xz,
|
||||
int *dvcSendList_yz, int *dvcSendList_YZ, int *dvcSendList_yZ, int *dvcSendList_Yz,
|
||||
int sendCount_x, int sendCount_y, int sendCount_z, int sendCount_X, int sendCount_Y, int sendCount_Z,
|
||||
int sendCount_xy, int sendCount_XY, int sendCount_xY, int sendCount_Xy,
|
||||
int sendCount_xz, int sendCount_XZ, int sendCount_xZ, int sendCount_Xz,
|
||||
int sendCount_yz, int sendCount_YZ, int sendCount_yZ, int sendCount_Yz,
|
||||
double *sendbuf_x, double *sendbuf_y, double *sendbuf_z, double *sendbuf_X, double *sendbuf_Y, double *sendbuf_Z,
|
||||
double *sendbuf_xy, double *sendbuf_XY, double *sendbuf_xY, double *sendbuf_Xy,
|
||||
double *sendbuf_xz, double *sendbuf_XZ, double *sendbuf_xZ, double *sendbuf_Xz,
|
||||
double *sendbuf_yz, double *sendbuf_YZ, double *sendbuf_yZ, double *sendbuf_Yz)
|
||||
{
|
||||
//...................................................................................
|
||||
PackDist<<<faceGrid,threads>>>(1,dvcSendList_x,0,sendCount_x,sendbuf_x,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(4,dvcSendList_x,sendCount_x,sendCount_x,sendbuf_x,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(5,dvcSendList_x,2*sendCount_x,sendCount_x,sendbuf_x,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(6,dvcSendList_x,3*sendCount_x,sendCount_x,sendbuf_x,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(7,dvcSendList_x,4*sendCount_x,sendCount_x,sendbuf_x,f_even,N);
|
||||
//...Packing for X face<<<faceGrid,threads>>>(1,7,9,11,13)................................
|
||||
PackDist<<<faceGrid,threads>>>(0,dvcSendList_X,0,sendCount_X,sendbuf_X,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(3,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(4,dvcSendList_X,2*sendCount_X,sendCount_X,sendbuf_X,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(5,dvcSendList_X,3*sendCount_X,sendCount_X,sendbuf_X,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(6,dvcSendList_X,4*sendCount_X,sendCount_X,sendbuf_X,f_odd,N);
|
||||
//...Packing for y face<<<faceGrid,threads>>>(4,8,9,16,18).................................
|
||||
PackDist<<<faceGrid,threads>>>(2,dvcSendList_y,0,sendCount_y,sendbuf_y,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(4,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(4,dvcSendList_y,2*sendCount_y,sendCount_y,sendbuf_y,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(8,dvcSendList_y,3*sendCount_y,sendCount_y,sendbuf_y,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(9,dvcSendList_y,4*sendCount_y,sendCount_y,sendbuf_y,f_even,N);
|
||||
//...Packing for Y face<<<faceGrid,threads>>>(3,7,10,15,17).................................
|
||||
PackDist<<<faceGrid,threads>>>(1,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(3,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(5,dvcSendList_Y,2*sendCount_Y,sendCount_Y,sendbuf_Y,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(7,dvcSendList_Y,3*sendCount_Y,sendCount_Y,sendbuf_Y,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(8,dvcSendList_Y,4*sendCount_Y,sendCount_Y,sendbuf_Y,f_odd,N);
|
||||
//...Packing for z face<<<faceGrid,threads>>>(6,12,13,16,17)................................
|
||||
PackDist<<<faceGrid,threads>>>(3,dvcSendList_z,0,sendCount_z,sendbuf_z,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(6,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(6,dvcSendList_z,2*sendCount_z,sendCount_z,sendbuf_z,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(8,dvcSendList_z,3*sendCount_z,sendCount_z,sendbuf_z,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(8,dvcSendList_z,4*sendCount_z,sendCount_z,sendbuf_z,f_odd,N);
|
||||
//...Packing for Z face<<<faceGrid,threads>>>(5,11,14,15,18)................................
|
||||
PackDist<<<faceGrid,threads>>>(2,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(5,dvcSendList_Z,sendCount_Z,sendCount_Z,sendbuf_Z,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(7,dvcSendList_Z,2*sendCount_Z,sendCount_Z,sendbuf_Z,f_even,N);
|
||||
PackDist<<<faceGrid,threads>>>(7,dvcSendList_Z,3*sendCount_Z,sendCount_Z,sendbuf_Z,f_odd,N);
|
||||
PackDist<<<faceGrid,threads>>>(9,dvcSendList_Z,4*sendCount_Z,sendCount_Z,sendbuf_Z,f_even,N);
|
||||
//...Pack the xy edge <<<edgeGrid,threads>>>(8)................................
|
||||
PackDist<<<edgeGrid,threads>>>(4,dvcSendList_xy,0,sendCount_xy,sendbuf_xy,f_even,N);
|
||||
//...Pack the Xy edge <<<edgeGrid,threads>>>(9)................................
|
||||
PackDist<<<edgeGrid,threads>>>(4,dvcSendList_Xy,0,sendCount_Xy,sendbuf_Xy,f_odd,N);
|
||||
//...Pack the xY edge <<<edgeGrid,threads>>>(10)................................
|
||||
PackDist<<<edgeGrid,threads>>>(5,dvcSendList_xY,0,sendCount_xY,sendbuf_xY,f_even,N);
|
||||
//...Pack the XY edge <<<edgeGrid,threads>>>(7)................................
|
||||
PackDist<<<edgeGrid,threads>>>(3,dvcSendList_XY,0,sendCount_XY,sendbuf_XY,f_odd,N);
|
||||
//...Pack the xz edge <<<edgeGrid,threads>>>(12)................................
|
||||
PackDist<<<edgeGrid,threads>>>(6,dvcSendList_xz,0,sendCount_xz,sendbuf_xz,f_even,N);
|
||||
//...Pack the xZ edge <<<edgeGrid,threads>>>(14)................................
|
||||
PackDist<<<edgeGrid,threads>>>(7,dvcSendList_xZ,0,sendCount_xZ,sendbuf_xZ,f_even,N);
|
||||
//...Pack the Xz edge <<<edgeGrid,threads>>>(13)................................
|
||||
PackDist<<<edgeGrid,threads>>>(6,dvcSendList_Xz,0,sendCount_Xz,sendbuf_Xz,f_odd,N);
|
||||
//...Pack the XZ edge <<<edgeGrid,threads>>>(11)................................
|
||||
PackDist<<<edgeGrid,threads>>>(5,dvcSendList_XZ,0,sendCount_XZ,sendbuf_XZ,f_odd,N);
|
||||
//...Pack the xz edge <<<edgeGrid,threads>>>(12)................................
|
||||
PackDist<<<edgeGrid,threads>>>(6,dvcSendList_xz,0,sendCount_xz,sendbuf_xz,f_even,N);
|
||||
//...Pack the xZ edge <<<edgeGrid,threads>>>(14)................................
|
||||
PackDist<<<edgeGrid,threads>>>(7,dvcSendList_xZ,0,sendCount_xZ,sendbuf_xZ,f_even,N);
|
||||
//...Pack the Xz edge <<<edgeGrid,threads>>>(13)................................
|
||||
PackDist<<<edgeGrid,threads>>>(6,dvcSendList_Xz,0,sendCount_Xz,sendbuf_Xz,f_odd,N);
|
||||
//...Pack the XZ edge <<<edgeGrid,threads>>>(11)................................
|
||||
PackDist<<<edgeGrid,threads>>>(5,dvcSendList_XZ,0,sendCount_XZ,sendbuf_XZ,f_odd,N);
|
||||
//...Pack the yz edge <<<edgeGrid,threads>>>(16)................................
|
||||
PackDist<<<edgeGrid,threads>>>(8,dvcSendList_yz,0,sendCount_yz,sendbuf_yz,f_even,N);
|
||||
//...Pack the yZ edge <<<edgeGrid,threads>>>(18)................................
|
||||
PackDist<<<edgeGrid,threads>>>(9,dvcSendList_yZ,0,sendCount_yZ,sendbuf_yZ,f_even,N);
|
||||
//...Pack the Yz edge <<<edgeGrid,threads>>>(17)................................
|
||||
PackDist<<<edgeGrid,threads>>>(8,dvcSendList_Yz,0,sendCount_Yz,sendbuf_Yz,f_odd,N);
|
||||
//...Pack the YZ edge <<<edgeGrid,threads>>>(15)................................
|
||||
PackDist<<<edgeGrid,threads>>>(7,dvcSendList_YZ,0,sendCount_YZ,sendbuf_YZ,f_odd,N);
|
||||
}
|
||||
//...................................................................................
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_PackDist(int grid, int threads, int q, int *SendList, int start,
|
||||
int sendCount, double *sendbuf, double *Dist, int N)
|
||||
{
|
||||
//...................................................................................
|
||||
PackDist<<<grid,threads>>>(q,SendList,start,sendCount,sendbuf,Dist,N);
|
||||
}
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_UnpackDist(int grid, int threads, int q, int Cqx, int Cqy, int Cqz, int *RecvList, int start,
|
||||
int recvCount, double *recvbuf, double *Dist, int Nx, int Ny, int Nz)
|
||||
{
|
||||
//...................................................................................
|
||||
MapRecvDist<<<grid,threads>>>(q,Cqx,Cqy,Cqz,RecvList,start,recvCount,recvbuf,Dist,Nx,Ny,Nz);
|
||||
}
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_SwapD3Q19( int nblocks, int nthreads, int S,
|
||||
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
|
||||
{
|
||||
SwapD3Q19 <<< nblocks, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
|
||||
}
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_InitD3Q19(int nblocks, int nthreads, int S, char *ID, double *f_even, double *f_odd, int Nx,
|
||||
int Ny, int Nz)
|
||||
{
|
||||
InitD3Q19 <<<nblocks, nthreads>>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
|
||||
}
|
||||
//*************************************************************************
|
||||
|
||||
//*************************************************************************
|
||||
__global__ void dvc_SwapD3Q19(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz)
|
||||
{
|
||||
int i,j,k,n,nn,N;
|
||||
// distributions
|
||||
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
for (n=0; n<N; n++){
|
||||
//.......Back out the 3-D indices for node n..............
|
||||
k = n/(Nx*Ny);
|
||||
j = (n-Nx*Ny*k)/Nx;
|
||||
i = n-Nx*Ny*k-Nz*j;
|
||||
|
||||
if (ID[n] > 0){
|
||||
//........................................................................
|
||||
// Retrieve even distributions from the local node (swap convention)
|
||||
// f0 = disteven[n]; // Does not particupate in streaming
|
||||
f1 = distodd[n];
|
||||
f3 = distodd[N+n];
|
||||
f5 = distodd[2*N+n];
|
||||
f7 = distodd[3*N+n];
|
||||
f9 = distodd[4*N+n];
|
||||
f11 = distodd[5*N+n];
|
||||
f13 = distodd[6*N+n];
|
||||
f15 = distodd[7*N+n];
|
||||
f17 = distodd[8*N+n];
|
||||
//........................................................................
|
||||
|
||||
//........................................................................
|
||||
// Retrieve odd distributions from neighboring nodes (swap convention)
|
||||
//........................................................................
|
||||
nn = n+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
//if (i+1<Nx){
|
||||
f2 = disteven[N+nn]; // pull neighbor for distribution 2
|
||||
if (f2 > 0){
|
||||
distodd[n] = f2;
|
||||
disteven[N+nn] = f1;
|
||||
}
|
||||
//}
|
||||
//........................................................................
|
||||
nn = n+Nx; // neighbor index (pull convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
//if (j+1<Ny){
|
||||
f4 = disteven[2*N+nn]; // pull neighbor for distribution 4
|
||||
if (f4 > 0){
|
||||
distodd[N+n] = f4;
|
||||
disteven[2*N+nn] = f3;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny; // neighbor index (pull convention)
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (k+1<Nz){
|
||||
f6 = disteven[3*N+nn]; // pull neighbor for distribution 6
|
||||
if (f6 > 0){
|
||||
distodd[2*N+n] = f6;
|
||||
disteven[3*N+nn] = f5;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
//if ((i+1<Nx) && (j+1<Ny)){
|
||||
f8 = disteven[4*N+nn]; // pull neighbor for distribution 8
|
||||
if (f8 > 0){
|
||||
distodd[3*N+n] = f8;
|
||||
disteven[4*N+nn] = f7;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n-Nx+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (j-1<0) nn += Nx*Ny; // Perioidic BC along the y-boundary
|
||||
//if (!(i-1<0) && (j+1<Ny)){
|
||||
f10 = disteven[5*N+nn]; // pull neighbor for distribution 9
|
||||
if (f10 > 0){
|
||||
distodd[4*N+n] = f10;
|
||||
disteven[5*N+nn] = f9;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if ( !(i-1<0) && !(k-1<0)){
|
||||
f12 = disteven[6*N+nn]; // pull distribution 11
|
||||
if (f12 > 0){
|
||||
distodd[5*N+n] = f12;
|
||||
disteven[6*N+nn] = f11;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (!(i-1<0) && (k+1<Nz)){
|
||||
f14 = disteven[7*N+nn]; // pull neighbor for distribution 13
|
||||
if (f14 > 0){
|
||||
distodd[6*N+n] = f14;
|
||||
disteven[7*N+nn] = f13;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny+Nx; // neighbor index (pull convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (!(j-1<0) && !(k-1<0)){
|
||||
f16 = disteven[8*N+nn]; // pull neighbor for distribution 15
|
||||
if (f16 > 0){
|
||||
distodd[7*N+n] = f16;
|
||||
disteven[8*N+nn] = f15;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n-Nx*Ny+Nx; // neighbor index (pull convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
if (k-1<0) nn += Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (!(j-1<0) && (k+1<Nz)){
|
||||
f18 = disteven[9*N+nn]; // pull neighbor for distribution 17
|
||||
if (f18 > 0){
|
||||
distodd[8*N+n] = f18;
|
||||
disteven[9*N+nn] = f17;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
19
gpu/D3Q19.h
19
gpu/D3Q19.h
@@ -1,15 +1,6 @@
|
||||
|
||||
//*************************************************************************
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_InitD3Q19(int nblocks, int nthreads, int S, char *ID, double *f_even, double *f_odd, int Nx,
|
||||
int Ny, int Nz);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_SwapD3Q19(int nblocks, int nthreads, int S,
|
||||
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_PackDist(int grid, int threads, int q, int *SendList, int start,
|
||||
int sendCount, double *sendbuf, double *Dist, int N);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_UnpackDist(int grid, int threads, int q, int Cqx, int Cqy, int Cqz, int *RecvList, int start,
|
||||
int recvCount, double *recvbuf, double *Dist, int Nx, int Ny, int Nz);
|
||||
extern "C" void dvc_PackDist(int q, int *list, int start, int count, double *sendbuf, double *dist, int N);
|
||||
extern "C" void dvc_UnpackDist(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count,
|
||||
double *recvbuf, double *dist, int Nx, int Ny, int Nz);
|
||||
//*************************************************************************
|
||||
extern "C" void dvc_InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
|
||||
extern "C" void dvc_SwapD3Q19(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz);
|
||||
|
||||
151
gpu/D3Q7.cu
151
gpu/D3Q7.cu
@@ -1,6 +1,6 @@
|
||||
// GPU Functions for D3Q7 Lattice Boltzmann Methods
|
||||
// CPU Functions for D3Q7 Lattice Boltzmann Methods
|
||||
|
||||
__global__ void PackValues(int *list, int count, double *sendbuf, double *Data, int N){
|
||||
__global__ void dvc_PackValues(int *list, int count, double *sendbuf, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Pack distribution q into the send buffer for the listed lattice sites
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
@@ -12,7 +12,7 @@ __global__ void PackValues(int *list, int count, double *sendbuf, double *Data,
|
||||
sendbuf[idx] = Data[n];
|
||||
}
|
||||
}
|
||||
__global__ void UnpackValues(int *list, int count, double *recvbuf, double *Data, int N){
|
||||
__global__ void dvc_UnpackValues(int *list, int count, double *recvbuf, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Pack distribution q into the send buffer for the listed lattice sites
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
@@ -25,15 +25,14 @@ __global__ void UnpackValues(int *list, int count, double *recvbuf, double *Data
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void PackDenD3Q7(int *list, int count, double *sendbuf, int number, double *Data, int N){
|
||||
__global__ void dvc_PackDenD3Q7(int *list, int count, double *sendbuf, int number, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Pack distribution into the send buffer for the listed lattice sites
|
||||
//....................................................................................
|
||||
int idx,n,component;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx<count){
|
||||
// for (idx=0; idx<count; idx++){
|
||||
for (component=0; component<number; component++){
|
||||
for (component=0; component<number; component++){
|
||||
n = list[idx];
|
||||
sendbuf[idx*number+component] = Data[number*n+component];
|
||||
Data[number*n+component] = 0.0; // Set the data value to zero once it's in the buffer!
|
||||
@@ -42,7 +41,7 @@ __global__ void PackDenD3Q7(int *list, int count, double *sendbuf, int number, d
|
||||
}
|
||||
|
||||
|
||||
__global__ void UnpackDenD3Q7(int *list, int count, double *recvbuf, int number, double *Data, int N){
|
||||
__global__ void dvc_UnpackDenD3Q7(int *list, int count, double *recvbuf, int number, double *Data, int N){
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Sum to the existing density value
|
||||
@@ -50,40 +49,128 @@ __global__ void UnpackDenD3Q7(int *list, int count, double *recvbuf, int number,
|
||||
int idx,n,component;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx<count){
|
||||
// for (idx=0; idx<count; idx++){
|
||||
for (component=0; component<number; component++){
|
||||
for (component=0; component<number; component++){
|
||||
n = list[idx];
|
||||
Data[number*n+component] += recvbuf[idx*number+component];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//***************************************************************************************
|
||||
extern "C" void dvc_PackDenD3Q7(int grid, int threads, int *list, int count, double *sendbuf,
|
||||
int number, double *Data, int N)
|
||||
__global__ void dvc_InitD3Q7(char *ID, double *f_even, double *f_odd, double *Den, int Nx, int Ny, int Nz)
|
||||
{
|
||||
//...................................................................................
|
||||
PackDenD3Q7<<<grid,threads>>>(list,count,sendbuf,number,Data,N);
|
||||
int n,N;
|
||||
N = Nx*Ny*Nz;
|
||||
double value;
|
||||
|
||||
for (int s=0; s*blockDim.x*<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
if (n<N){
|
||||
|
||||
if (ID[n] > 0){
|
||||
value = Den[n];
|
||||
f_even[n] = 0.3333333333333333*value;
|
||||
f_odd[n] = 0.1111111111111111*value; //double(100*n)+1.f;
|
||||
f_even[N+n] = 0.1111111111111111*value; //double(100*n)+2.f;
|
||||
f_odd[N+n] = 0.1111111111111111*value; //double(100*n)+3.f;
|
||||
f_even[2*N+n] = 0.1111111111111111*value; //double(100*n)+4.f;
|
||||
f_odd[2*N+n] = 0.1111111111111111*value; //double(100*n)+5.f;
|
||||
f_even[3*N+n] = 0.1111111111111111*value; //double(100*n)+6.f;
|
||||
}
|
||||
else{
|
||||
for(int q=0; q<2; q++){
|
||||
f_even[q*N+n] = -1.0;
|
||||
f_odd[q*N+n] = -1.0;
|
||||
}
|
||||
f_even[3*N+n] = -1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
//***************************************************************************************
|
||||
extern "C" void dvc_UnpackDenD3Q7(int grid, int threads, int *list, int count, double *recvbuf,
|
||||
int number, double *Data, int N)
|
||||
|
||||
//*************************************************************************
|
||||
__global__ void dvc_SwapD3Q7(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz)
|
||||
{
|
||||
//...................................................................................
|
||||
UnpackDenD3Q7<<<grid,threads>>>(list,count,recvbuf,number,Data,N);
|
||||
int i,j,k,n,nn,N;
|
||||
// distributions
|
||||
double f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
|
||||
|
||||
N = Nx*Ny*Nz;
|
||||
|
||||
for (n=0; n<N; n++){
|
||||
//.......Back out the 3-D indices for node n..............
|
||||
k = n/(Nx*Ny);
|
||||
j = (n-Nx*Ny*k)/Nx;
|
||||
i = n-Nx*Ny*k-Nz*j;
|
||||
|
||||
if (ID[n] > 0){
|
||||
//........................................................................
|
||||
// Retrieve even distributions from the local node (swap convention)
|
||||
// f0 = disteven[n]; // Does not particupate in streaming
|
||||
f1 = distodd[n];
|
||||
f3 = distodd[N+n];
|
||||
f5 = distodd[2*N+n];
|
||||
//........................................................................
|
||||
|
||||
//........................................................................
|
||||
// Retrieve odd distributions from neighboring nodes (swap convention)
|
||||
//........................................................................
|
||||
nn = n+1; // neighbor index (pull convention)
|
||||
if (!(i+1<Nx)) nn -= Nx; // periodic BC along the x-boundary
|
||||
//if (i+1<Nx){
|
||||
f2 = disteven[N+nn]; // pull neighbor for distribution 2
|
||||
if (!(f2 < 0)){
|
||||
distodd[n] = f2;
|
||||
disteven[N+nn] = f1;
|
||||
}
|
||||
//}
|
||||
//........................................................................
|
||||
nn = n+Nx; // neighbor index (pull convention)
|
||||
if (!(j+1<Ny)) nn -= Nx*Ny; // Perioidic BC along the y-boundary
|
||||
//if (j+1<Ny){
|
||||
f4 = disteven[2*N+nn]; // pull neighbor for distribution 4
|
||||
if (!(f4 < 0)){
|
||||
distodd[N+n] = f4;
|
||||
disteven[2*N+nn] = f3;
|
||||
// }
|
||||
}
|
||||
//........................................................................
|
||||
nn = n+Nx*Ny; // neighbor index (pull convention)
|
||||
if (!(k+1<Nz)) nn -= Nx*Ny*Nz; // Perioidic BC along the z-boundary
|
||||
//if (k+1<Nz){
|
||||
f6 = disteven[3*N+nn]; // pull neighbor for distribution 6
|
||||
if (!(f6 < 0)){
|
||||
distodd[2*N+n] = f6;
|
||||
disteven[3*N+nn] = f5;
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
//***************************************************************************************
|
||||
extern "C" void dvc_PackValues(int grid, int threads, int *list, int count, double *sendbuf,
|
||||
double *Data, int N)
|
||||
|
||||
//*************************************************************************
|
||||
__global__ void dvc_ComputeDensityD3Q7(char *ID, double *disteven, double *distodd, double *Den,
|
||||
int Nx, int Ny, int Nz)
|
||||
{
|
||||
//...................................................................................
|
||||
PackValues<<<grid,threads>>>(list,count,sendbuf,Data,N);
|
||||
char id;
|
||||
int n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
int N = Nx*Ny*Nz;
|
||||
|
||||
for (n=0; n<N; n++){
|
||||
id = ID[n];
|
||||
if (id > 0 ){
|
||||
// Read the distributions
|
||||
f0 = disteven[n];
|
||||
f2 = disteven[N+n];
|
||||
f4 = disteven[2*N+n];
|
||||
f6 = disteven[3*N+n];
|
||||
f1 = distodd[n];
|
||||
f3 = distodd[N+n];
|
||||
f5 = distodd[2*N+n];
|
||||
// Compute the density
|
||||
Den[n] = f0+f1+f2+f3+f4+f5+f6;
|
||||
}
|
||||
}
|
||||
}
|
||||
//***************************************************************************************
|
||||
extern "C" void dvc_UnpackValues(int grid, int threads, int *list, int count, double *recvbuf,
|
||||
double *Data, int N)
|
||||
{
|
||||
//...................................................................................
|
||||
UnpackValues<<<grid,threads>>>(list,count,recvbuf,Data,N);
|
||||
}
|
||||
//***************************************************************************************
|
||||
|
||||
31
gpu/D3Q7.h
31
gpu/D3Q7.h
@@ -1,15 +1,16 @@
|
||||
//
|
||||
//***************************************************************************************
|
||||
//***************************************************************************************
|
||||
extern "C" void dvc_PackDenD3Q7(int grid, int threads, int *list, int count, double *sendbuf,
|
||||
int number, double *Data, int N);
|
||||
//***************************************************************************************
|
||||
extern "C" void dvc_UnpackDenD3Q7(int grid, int threads, int *list, int count, double *recvbuf,
|
||||
int number, double *Data, int N);
|
||||
//***************************************************************************************
|
||||
extern "C" void dvc_PackValues(int grid, int threads, int *list, int count, double *sendbuf,
|
||||
double *Data, int N);
|
||||
//***************************************************************************************
|
||||
extern "C" void dvc_UnpackValues(int grid, int threads, int *list, int count, double *recvbuf,
|
||||
double *Data, int N);
|
||||
//***************************************************************************************
|
||||
// CPU Functions for D3Q7 Lattice Boltzmann Methods
|
||||
|
||||
extern "C" void dvc_PackValues(int *list, int count, double *sendbuf, double *Data, int N);
|
||||
|
||||
extern "C" void dvc_UnpackValues(int *list, int count, double *recvbuf, double *Data, int N);
|
||||
|
||||
extern "C" void dvc_PackDenD3Q7(int *list, int count, double *sendbuf, int number, double *Data, int N);
|
||||
|
||||
extern "C" void dvc_UnpackDenD3Q7(int *list, int count, double *recvbuf, int number, double *Data, int N);
|
||||
|
||||
extern "C" void dvc_InitD3Q7(char *ID, double *f_even, double *f_odd, double *Den, int Nx, int Ny, int Nz);
|
||||
|
||||
extern "C" void dvc_SwapD3Q7(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz);
|
||||
|
||||
extern "C" void dvc_ComputeDensityD3Q7(char *ID, double *disteven, double *distodd, double *Den,
|
||||
int Nx, int Ny, int Nz);
|
||||
19
gpu/Extras.cu
Normal file
19
gpu/Extras.cu
Normal file
@@ -0,0 +1,19 @@
|
||||
// Basic cuda functions callable from C/C++ code
|
||||
#include <cuda.h>
|
||||
|
||||
extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size){
|
||||
cudaMalloc(address,size);
|
||||
}
|
||||
|
||||
extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size){
|
||||
cudaMemcpy(dest,source,size,cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
|
||||
extern "C" void dvc_CopyToHost(void* dest, void* source, size_t size){
|
||||
cudaMemcpy(dest,source,size,cudaMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
extern "C" void dvc_Barrier(){
|
||||
cudaDeviceSynchronize();
|
||||
}
|
||||
7
gpu/Extras.h
Normal file
7
gpu/Extras.h
Normal file
@@ -0,0 +1,7 @@
|
||||
extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size);
|
||||
|
||||
extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size);
|
||||
|
||||
extern "C" void dvc_CopyToHost(void* dest, void* source, size_t size);
|
||||
|
||||
extern "C" void dvc_Barrier();
|
||||
Reference in New Issue
Block a user