Refactored D3Q19.cu / D3Q19.h for gpu build
This commit is contained in:
parent
7f68548125
commit
595765881a
47
gpu/D3Q19.cu
47
gpu/D3Q19.cu
@ -1,3 +1,6 @@
|
|||||||
|
#define NBLOCKS 32
|
||||||
|
#define NTHREADS 128
|
||||||
|
|
||||||
__global__ void dvc_PackDist(int q, int *list, int start, int count, double *sendbuf, double *dist, int N){
|
__global__ void dvc_PackDist(int q, int *list, int start, int count, double *sendbuf, double *dist, int N){
|
||||||
//....................................................................................
|
//....................................................................................
|
||||||
// Pack distribution q into the send buffer for the listed lattice sites
|
// Pack distribution q into the send buffer for the listed lattice sites
|
||||||
@ -58,10 +61,11 @@ __global__ void dvc_InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx, i
|
|||||||
{
|
{
|
||||||
int n,N;
|
int n,N;
|
||||||
N = Nx*Ny*Nz;
|
N = Nx*Ny*Nz;
|
||||||
|
int S = N/NBLOCKS/NTHREADS + 1;
|
||||||
for (n=0; n<N; n++){
|
for (int s=0; s<S; s++){
|
||||||
|
//........Get 1-D index for this thread....................
|
||||||
if (ID[n] > 0){
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||||
|
if (n<N && ID[n] > 0){
|
||||||
f_even[n] = 0.3333333333333333;
|
f_even[n] = 0.3333333333333333;
|
||||||
f_odd[n] = 0.055555555555555555; //double(100*n)+1.f;
|
f_odd[n] = 0.055555555555555555; //double(100*n)+1.f;
|
||||||
f_even[N+n] = 0.055555555555555555; //double(100*n)+2.f;
|
f_even[N+n] = 0.055555555555555555; //double(100*n)+2.f;
|
||||||
@ -102,13 +106,15 @@ __global__ void dvc_SwapD3Q19(char *ID, double *disteven, double *distodd, int
|
|||||||
|
|
||||||
N = Nx*Ny*Nz;
|
N = Nx*Ny*Nz;
|
||||||
|
|
||||||
for (n=0; n<N; n++){
|
int S = N/NBLOCKS/NTHREADS + 1;
|
||||||
//.......Back out the 3-D indices for node n..............
|
for (int s=0; s<S; s++){
|
||||||
k = n/(Nx*Ny);
|
//........Get 1-D index for this thread....................
|
||||||
j = (n-Nx*Ny*k)/Nx;
|
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||||
i = n-Nx*Ny*k-Nz*j;
|
if (n<N && ID[n] > 0){
|
||||||
|
//.......Back out the 3-D indices for node n..............
|
||||||
if (ID[n] > 0){
|
k = n/(Nx*Ny);
|
||||||
|
j = (n-Nx*Ny*k)/Nx;
|
||||||
|
i = n-Nx*Ny*k-Nz*j;
|
||||||
//........................................................................
|
//........................................................................
|
||||||
// Retrieve even distributions from the local node (swap convention)
|
// Retrieve even distributions from the local node (swap convention)
|
||||||
// f0 = disteven[n]; // Does not particupate in streaming
|
// f0 = disteven[n]; // Does not particupate in streaming
|
||||||
@ -226,3 +232,22 @@ __global__ void dvc_SwapD3Q19(char *ID, double *disteven, double *distodd, int
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern "C" void PackDist(int q, int *list, int start, int count, double *sendbuf, double *dist, int N){
|
||||||
|
int GRID = count / 512 + 1;
|
||||||
|
dvc_PackDist <<<GRID,512 >>>(q, list, start, count, sendbuf, dist, N);
|
||||||
|
}
|
||||||
|
extern "C" void UnpackDist(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count,
|
||||||
|
double *recvbuf, double *dist, int Nx, int Ny, int Nz){
|
||||||
|
int GRID = count / 512 + 1;
|
||||||
|
dvc_UnpackDist <<<GRID,512 >>>(q, Cqx, Cqy, Cqz, list, start, count, recvbuf, dist, Nx, Ny, Nz);
|
||||||
|
}
|
||||||
|
//*************************************************************************
|
||||||
|
extern "C" void InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz){
|
||||||
|
dvc_InitD3Q19<<<NBLOCKS,NTHREADS >>>(ID, f_even, f_odd, Nx, Ny, Nz);
|
||||||
|
}
|
||||||
|
extern "C" void SwapD3Q19(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz){
|
||||||
|
dvc_SwapD3Q19<<<NBLOCKS,NTHREADS >>>(ID, disteven, distodd, Nx, Ny, Nz);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
extern "C" void dvc_PackDist(int q, int *list, int start, int count, double *sendbuf, double *dist, int N);
|
extern "C" void PackDist(int q, int *list, int start, int count, double *sendbuf, double *dist, int N);
|
||||||
extern "C" void dvc_UnpackDist(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count,
|
extern "C" void UnpackDist(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count,
|
||||||
double *recvbuf, double *dist, int Nx, int Ny, int Nz);
|
double *recvbuf, double *dist, int Nx, int Ny, int Nz);
|
||||||
//*************************************************************************
|
//*************************************************************************
|
||||||
extern "C" void dvc_InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
|
extern "C" void InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
|
||||||
extern "C" void dvc_SwapD3Q19(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz);
|
extern "C" void SwapD3Q19(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz);
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
// CPU Functions for D3Q7 Lattice Boltzmann Methods
|
// GPU Functions for D3Q7 Lattice Boltzmann Methods
|
||||||
//int NBLOCKS = 32;
|
|
||||||
//int NTHREADS = 128;
|
|
||||||
|
|
||||||
#define NBLOCKS 32
|
#define NBLOCKS 32
|
||||||
#define NTHREADS 128
|
#define NTHREADS 128
|
||||||
|
Loading…
Reference in New Issue
Block a user