clean up
This commit is contained in:
parent
1ed3428ef6
commit
12a9496a7e
|
@ -1,11 +0,0 @@
|
||||||
#INSTALL_LBPM_EXE( lb1_MRT_mpi )
|
|
||||||
INSTALL_LBPM_EXE( lb2_Color )
|
|
||||||
INSTALL_LBPM_EXE( lb2_Color_mpi )
|
|
||||||
#INSTALL_LBPM_EXE( lb2_Color_pBC_wia_mpi )
|
|
||||||
INSTALL_LBPM_EXE( lb2_Color_wia_mpi )
|
|
||||||
|
|
||||||
|
|
||||||
# Run the serial ConstrainedBubble inputs as a weekly test
|
|
||||||
CONFIGURE_FILE( ${LBPM_SOURCE_DIR}/example/ConstrainedBubble/Color.in ${CMAKE_CURRENT_BINARY_DIR}/Color.in COPYONLY )
|
|
||||||
CONFIGURE_FILE( ${LBPM_SOURCE_DIR}/example/ConstrainedBubble/Domain.in ${CMAKE_CURRENT_BINARY_DIR}/Domain.in COPYONLY )
|
|
||||||
ADD_LBPM_WEEKLY_TEST( lb2_Color_wia_mpi 1 )
|
|
|
@ -1,248 +0,0 @@
|
||||||
#include <stdio.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <fstream>
|
|
||||||
#include <cuda.h>
|
|
||||||
//#include <cutil.h>
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
|
|
||||||
int Ny, int Nz, int nblocks, int nthreads, int S);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_SwapD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
|
|
||||||
int Ny, int Nz, int nblocks, int nthreads, int S);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_MRT(char *ID, double *f_even, double *f_odd, double rlxA, double rlxB, double Fx, double Fy, double Fz,
|
|
||||||
int Nx, int Ny, int Nz, int nblocks, int nthreads, int S);
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
void Write_Out(double *array, int Nx, int Ny, int Nz){
|
|
||||||
int value;
|
|
||||||
FILE *output;
|
|
||||||
output = fopen("dist.list","w");
|
|
||||||
for (int k=0; k<Nz; k++){
|
|
||||||
for (int j=0; j<Ny; j++){
|
|
||||||
for (int i=0; i<Nx; i++){
|
|
||||||
int index = k*Nx*Ny+j*Nx+i;
|
|
||||||
value = int(array[index]);
|
|
||||||
fprintf(output, "| %i",value);
|
|
||||||
}
|
|
||||||
fprintf(output, " | \n");
|
|
||||||
}
|
|
||||||
fprintf(output,"************************************** \n");
|
|
||||||
}
|
|
||||||
fclose(output);
|
|
||||||
}
|
|
||||||
|
|
||||||
//**************************************************************************
|
|
||||||
// MRT implementation of the LBM using CUDA
|
|
||||||
//**************************************************************************
|
|
||||||
|
|
||||||
int main(void)
|
|
||||||
{
|
|
||||||
|
|
||||||
int deviceCount;
|
|
||||||
cudaGetDeviceCount(&deviceCount);
|
|
||||||
int device = 1;
|
|
||||||
printf("Number of devices = %i \n", deviceCount);
|
|
||||||
printf("Current device is = %i \n", device);
|
|
||||||
cudaSetDevice(device);
|
|
||||||
|
|
||||||
// BGK Model parameters
|
|
||||||
string FILENAME;
|
|
||||||
unsigned int nBlocks, nthreads;
|
|
||||||
int timestepMax, interval;
|
|
||||||
double tau,Fx,Fy,Fz,tol;
|
|
||||||
// Domain variables
|
|
||||||
int Nx,Ny,Nz;
|
|
||||||
|
|
||||||
ifstream input("MRT.in");
|
|
||||||
input >> FILENAME; // name of the input file
|
|
||||||
input >> Nz; // number of nodes (x,y,z)
|
|
||||||
input >> nBlocks;
|
|
||||||
input >> nthreads;
|
|
||||||
input >> tau; // relaxation time
|
|
||||||
input >> Fx; // External force components (x,y,z)
|
|
||||||
input >> Fy;
|
|
||||||
input >> Fz;
|
|
||||||
input >> timestepMax; // max no. of timesteps
|
|
||||||
input >> interval; // error interval
|
|
||||||
input >> tol; // error tolerance
|
|
||||||
|
|
||||||
double rlx_setA = 1.f/tau;
|
|
||||||
double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA);
|
|
||||||
|
|
||||||
printf("tau = %f \n", tau);
|
|
||||||
printf("Set A = %f \n", rlx_setA);
|
|
||||||
printf("Set B = %f \n", rlx_setB);
|
|
||||||
printf("Force(x) = %f \n", Fx);
|
|
||||||
printf("Force(y) = %f \n", Fy);
|
|
||||||
printf("Force(z) = %f \n", Fz);
|
|
||||||
|
|
||||||
Nx = Ny = Nz; // Cubic domain
|
|
||||||
|
|
||||||
int N = Nx*Ny*Nz;
|
|
||||||
int dist_mem_size = N*sizeof(double);
|
|
||||||
|
|
||||||
// unsigned int nBlocks = 32;
|
|
||||||
// int nthreads = 128;
|
|
||||||
int S = N/nthreads/nBlocks;
|
|
||||||
|
|
||||||
// unsigned int nBlocks = N/nthreads + (N%nthreads == 0?0:1);
|
|
||||||
dim3 grid(nBlocks,1,1);
|
|
||||||
|
|
||||||
printf("Number of blocks = %i \n", nBlocks);
|
|
||||||
printf("Threads per block = %i \n", nthreads);
|
|
||||||
printf("Sweeps per thread = %i \n", S);
|
|
||||||
printf("Number of nodes per side = %i \n", Nx);
|
|
||||||
printf("Total Number of nodes = %i \n", N);
|
|
||||||
|
|
||||||
//.......................................................................
|
|
||||||
printf("Read input media... \n");
|
|
||||||
// .......... READ THE INPUT FILE .......................................
|
|
||||||
int n;
|
|
||||||
char value;
|
|
||||||
char *id;
|
|
||||||
id = new char[N];
|
|
||||||
int sum = 0;
|
|
||||||
double porosity;
|
|
||||||
ifstream PM(FILENAME.c_str(),ios::binary);
|
|
||||||
for (int k=0;k<Nz;k++){
|
|
||||||
for (int j=0;j<Ny;j++){
|
|
||||||
for (int i=0;i<Nx;i++){
|
|
||||||
PM.read((char *) (&value), sizeof(value));
|
|
||||||
n = k*Nx*Ny+j*Nx+i;
|
|
||||||
id[n] = value;
|
|
||||||
if (value > 0) sum++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PM.close();
|
|
||||||
printf("File porosity = %f\n", double(sum)/N);
|
|
||||||
//.......................................................................
|
|
||||||
//...........device phase ID.................................................
|
|
||||||
char *ID;
|
|
||||||
cudaMalloc((void **) &ID, N); // Allocate device memory
|
|
||||||
// Copy to the device
|
|
||||||
cudaMemcpy(ID, id, N, cudaMemcpyHostToDevice);
|
|
||||||
//...........................................................................
|
|
||||||
|
|
||||||
//......................device distributions.................................
|
|
||||||
double *f_even,*f_odd;
|
|
||||||
//...........................................................................
|
|
||||||
cudaMalloc((void **) &f_even, 10*dist_mem_size); // Allocate device memory
|
|
||||||
cudaMalloc((void **) &f_odd, 9*dist_mem_size); // Allocate device memory
|
|
||||||
//...........................................................................
|
|
||||||
|
|
||||||
//...........................................................................
|
|
||||||
// cudaHostAlloc(&fa,dist_mem_size,cudaHostAllocPortable);
|
|
||||||
// cudaHostAlloc(&fb,dist_mem_size,cudaHostAllocPortable);
|
|
||||||
// cudaHostRegister(fa,dist_mem_size,cudaHostRegisterPortable);
|
|
||||||
// cudaHostRegister(fb,dist_mem_size,cudaHostRegisterPortable);
|
|
||||||
// cudaHostRegister(id,N*sizeof(char),cudaHostAllocPortable);
|
|
||||||
|
|
||||||
printf("Setting the distributions, size = : %i\n", N);
|
|
||||||
//...........................................................................
|
|
||||||
// INITIALIZE <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
|
|
||||||
//...........................................................................
|
|
||||||
dvc_InitD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
int timestep = 0;
|
|
||||||
printf("No. of timesteps: %i \n", timestepMax);
|
|
||||||
|
|
||||||
//.......create a stream for the LB calculation.......
|
|
||||||
cudaStream_t stream;
|
|
||||||
cudaStreamCreate(&stream);
|
|
||||||
|
|
||||||
//.......create and start timer............
|
|
||||||
cudaEvent_t start, stop;
|
|
||||||
float time;
|
|
||||||
|
|
||||||
cudaEventCreate(&start);
|
|
||||||
cudaEventCreate(&stop);
|
|
||||||
cudaEventRecord( start, 0 );
|
|
||||||
//.........................................
|
|
||||||
|
|
||||||
//************ MAIN ITERATION LOOP ***************************************/
|
|
||||||
while (timestep < timestepMax){
|
|
||||||
|
|
||||||
//...................................................................
|
|
||||||
//........ Execute the swap kernel (device) .........................
|
|
||||||
// SWAP <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
|
|
||||||
//...................................................................
|
|
||||||
dvc_SwapD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
|
|
||||||
|
|
||||||
//........ Execute the collision kernel (device) ....................
|
|
||||||
// MRT <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S,
|
|
||||||
// rlx_setA, rlx_setB, Fx, Fy, Fz);
|
|
||||||
//............................................................
|
|
||||||
dvc_MRT(ID, f_even, f_odd, rlx_setA, rlx_setB, Fx, Fy, Fz,Nx,Ny,Nz,nBlocks,nthreads,S);
|
|
||||||
// Iteration completed!
|
|
||||||
|
|
||||||
timestep++;
|
|
||||||
//...................................................................
|
|
||||||
|
|
||||||
}
|
|
||||||
//************************************************************************/
|
|
||||||
|
|
||||||
cudaThreadSynchronize();
|
|
||||||
//.......... stop and destroy timer.............................
|
|
||||||
cudaEventRecord( stop, stream);
|
|
||||||
cudaEventSynchronize( stop );
|
|
||||||
|
|
||||||
cudaEventElapsedTime( &time, start, stop );
|
|
||||||
printf("CPU time = %f \n", time);
|
|
||||||
|
|
||||||
float MLUPS = 0.001*float(Nx*Ny*Nz)*timestep/time;
|
|
||||||
printf("MLUPS = %f \n", MLUPS);
|
|
||||||
|
|
||||||
cudaStreamDestroy(stream);
|
|
||||||
cudaEventDestroy( start );
|
|
||||||
cudaEventDestroy( stop );
|
|
||||||
//..............................................................
|
|
||||||
|
|
||||||
//..............................................................
|
|
||||||
//.........Compute the velocity and copy result to host ........
|
|
||||||
double *velocity;
|
|
||||||
velocity = new double[3*N];
|
|
||||||
//......................device distributions....................................
|
|
||||||
double *vel;
|
|
||||||
//..............................................................................
|
|
||||||
cudaMalloc((void **) &vel, 3*dist_mem_size); // Allocate device memory
|
|
||||||
//..............................................................................
|
|
||||||
// Compute_VELOCITY <<< grid, nthreads >>> (ID, f_even, f_odd, vel, Nx, Ny, Nz, S);
|
|
||||||
//..............................................................................
|
|
||||||
cudaMemcpy(velocity, vel, 3*dist_mem_size, cudaMemcpyDeviceToHost);
|
|
||||||
//..............................................................................
|
|
||||||
|
|
||||||
//............................................................
|
|
||||||
//....Write the z-velocity to test poiseuille flow............
|
|
||||||
double vz,vz_avg;
|
|
||||||
vz_avg = 0.0;
|
|
||||||
|
|
||||||
FILE *output;
|
|
||||||
output = fopen("velocity.out","w");
|
|
||||||
for (int k=0; k<1; k++){
|
|
||||||
for (int j=0; j<1; j++){
|
|
||||||
for (int i=0; i<Nx; i++){
|
|
||||||
int n = k*Nx*Ny+j*Nx+i;
|
|
||||||
//.....print value........
|
|
||||||
vz = velocity[2*N+n];
|
|
||||||
vz_avg += vz;
|
|
||||||
fprintf(output, " %e",vz);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fclose(output);
|
|
||||||
|
|
||||||
vz = vz_avg/double(sum);
|
|
||||||
printf("Average Velocity = %e\n", vz);
|
|
||||||
|
|
||||||
|
|
||||||
// cleanup
|
|
||||||
cudaFree(f_even); cudaFree(f_odd); cudaFree(vel); cudaFree(ID);
|
|
||||||
free (velocity); free(id);
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,246 +0,0 @@
|
||||||
#include <stdio.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <fstream>
|
|
||||||
#include <cuda.h>
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_Barrier();
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_InitD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
|
|
||||||
int Ny, int Nz, int nblocks, int nthreads, int S);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_SwapD3Q19(char *ID, double *f_even, double *f_odd, int Nx,
|
|
||||||
int Ny, int Nz, int nblocks, int nthreads, int S);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_MRT(char *ID, double *f_even, double *f_odd, double rlxA, double rlxB, double Fx, double Fy, double Fz,
|
|
||||||
int Nx, int Ny, int Nz, int nblocks, int nthreads, int S);
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
void Write_Out(double *array, int Nx, int Ny, int Nz){
|
|
||||||
int value;
|
|
||||||
FILE *output;
|
|
||||||
output = fopen("dist.list","w");
|
|
||||||
for (int k=0; k<Nz; k++){
|
|
||||||
for (int j=0; j<Ny; j++){
|
|
||||||
for (int i=0; i<Nx; i++){
|
|
||||||
int index = k*Nx*Ny+j*Nx+i;
|
|
||||||
value = int(array[index]);
|
|
||||||
fprintf(output, "| %i",value);
|
|
||||||
}
|
|
||||||
fprintf(output, " | \n");
|
|
||||||
}
|
|
||||||
fprintf(output,"************************************** \n");
|
|
||||||
}
|
|
||||||
fclose(output);
|
|
||||||
}
|
|
||||||
|
|
||||||
//**************************************************************************
|
|
||||||
// MRT implementation of the LBM using CUDA
|
|
||||||
//**************************************************************************
|
|
||||||
|
|
||||||
int main(void)
|
|
||||||
{
|
|
||||||
|
|
||||||
// BGK Model parameters
|
|
||||||
string FILENAME;
|
|
||||||
unsigned int nBlocks, nthreads;
|
|
||||||
int timestepMax, interval;
|
|
||||||
double tau,Fx,Fy,Fz,tol;
|
|
||||||
// Domain variables
|
|
||||||
int Nx,Ny,Nz;
|
|
||||||
|
|
||||||
ifstream input("MRT.in");
|
|
||||||
input >> FILENAME; // name of the input file
|
|
||||||
input >> Nz; // number of nodes (x,y,z)
|
|
||||||
input >> nBlocks;
|
|
||||||
input >> nthreads;
|
|
||||||
input >> tau; // relaxation time
|
|
||||||
input >> Fx; // External force components (x,y,z)
|
|
||||||
input >> Fy;
|
|
||||||
input >> Fz;
|
|
||||||
input >> timestepMax; // max no. of timesteps
|
|
||||||
input >> interval; // error interval
|
|
||||||
input >> tol; // error tolerance
|
|
||||||
|
|
||||||
double rlx_setA = 1.f/tau;
|
|
||||||
double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA);
|
|
||||||
|
|
||||||
printf("tau = %f \n", tau);
|
|
||||||
printf("Set A = %f \n", rlx_setA);
|
|
||||||
printf("Set B = %f \n", rlx_setB);
|
|
||||||
printf("Force(x) = %f \n", Fx);
|
|
||||||
printf("Force(y) = %f \n", Fy);
|
|
||||||
printf("Force(z) = %f \n", Fz);
|
|
||||||
|
|
||||||
Nx = Ny = Nz; // Cubic domain
|
|
||||||
|
|
||||||
int N = Nx*Ny*Nz;
|
|
||||||
int dist_mem_size = N*sizeof(double);
|
|
||||||
|
|
||||||
// unsigned int nBlocks = 32;
|
|
||||||
// int nthreads = 128;
|
|
||||||
int S = N/nthreads/nBlocks;
|
|
||||||
|
|
||||||
// unsigned int nBlocks = N/nthreads + (N%nthreads == 0?0:1);
|
|
||||||
dim3 grid(nBlocks,1,1);
|
|
||||||
|
|
||||||
printf("Number of blocks = %i \n", nBlocks);
|
|
||||||
printf("Threads per block = %i \n", nthreads);
|
|
||||||
printf("Sweeps per thread = %i \n", S);
|
|
||||||
printf("Number of nodes per side = %i \n", Nx);
|
|
||||||
printf("Total Number of nodes = %i \n", N);
|
|
||||||
|
|
||||||
//.......................................................................
|
|
||||||
printf("Read input media... \n");
|
|
||||||
// .......... READ THE INPUT FILE .......................................
|
|
||||||
int n;
|
|
||||||
char value;
|
|
||||||
char *id;
|
|
||||||
id = new char[N];
|
|
||||||
int sum = 0;
|
|
||||||
double porosity;
|
|
||||||
ifstream PM(FILENAME.c_str(),ios::binary);
|
|
||||||
for (int k=0;k<Nz;k++){
|
|
||||||
for (int j=0;j<Ny;j++){
|
|
||||||
for (int i=0;i<Nx;i++){
|
|
||||||
PM.read((char *) (&value), sizeof(value));
|
|
||||||
n = k*Nx*Ny+j*Nx+i;
|
|
||||||
id[n] = value;
|
|
||||||
if (value > 0) sum++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PM.close();
|
|
||||||
printf("File porosity = %f\n", double(sum)/N);
|
|
||||||
//.......................................................................
|
|
||||||
//...........device phase ID.................................................
|
|
||||||
char *ID;
|
|
||||||
dvc_AllocateDeviceMemory((void **) &ID, N); // Allocate device memory
|
|
||||||
// Copy to the device
|
|
||||||
dvc_CopyToDevice(ID, id, N);
|
|
||||||
//...........................................................................
|
|
||||||
|
|
||||||
//......................device distributions.................................
|
|
||||||
double *f_even,*f_odd;
|
|
||||||
//...........................................................................
|
|
||||||
dvc_AllocateDeviceMemory((void **) &f_even, 10*dist_mem_size); // Allocate device memory
|
|
||||||
dvc_AllocateDeviceMemory((void **) &f_odd, 9*dist_mem_size); // Allocate device memory
|
|
||||||
//...........................................................................
|
|
||||||
|
|
||||||
//...........................................................................
|
|
||||||
// cudaHostAlloc(&fa,dist_mem_size,cudaHostAllocPortable);
|
|
||||||
// cudaHostAlloc(&fb,dist_mem_size,cudaHostAllocPortable);
|
|
||||||
// cudaHostRegister(fa,dist_mem_size,cudaHostRegisterPortable);
|
|
||||||
// cudaHostRegister(fb,dist_mem_size,cudaHostRegisterPortable);
|
|
||||||
// cudaHostRegister(id,N*sizeof(char),cudaHostAllocPortable);
|
|
||||||
|
|
||||||
printf("Setting the distributions, size = : %i\n", N);
|
|
||||||
//...........................................................................
|
|
||||||
// INITIALIZE <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
|
|
||||||
//...........................................................................
|
|
||||||
dvc_InitD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
int timestep = 0;
|
|
||||||
printf("No. of timesteps: %i \n", timestepMax);
|
|
||||||
|
|
||||||
//.......create a stream for the LB calculation.......
|
|
||||||
cudaStream_t stream;
|
|
||||||
cudaStreamCreate(&stream);
|
|
||||||
|
|
||||||
//.......create and start timer............
|
|
||||||
cudaEvent_t start, stop;
|
|
||||||
float time;
|
|
||||||
|
|
||||||
cudaEventCreate(&start);
|
|
||||||
cudaEventCreate(&stop);
|
|
||||||
cudaEventRecord( start, 0 );
|
|
||||||
//.........................................
|
|
||||||
|
|
||||||
//************ MAIN ITERATION LOOP ***************************************/
|
|
||||||
while (timestep < timestepMax){
|
|
||||||
|
|
||||||
//...................................................................
|
|
||||||
//........ Execute the swap kernel (device) .........................
|
|
||||||
// SWAP <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S);
|
|
||||||
//...................................................................
|
|
||||||
dvc_SwapD3Q19(ID,f_even,f_odd,Nx,Ny,Nz,nBlocks,nthreads,S);
|
|
||||||
|
|
||||||
//........ Execute the collision kernel (device) ....................
|
|
||||||
// MRT <<< grid, nthreads >>> (ID, f_even, f_odd, Nx, Ny, Nz, S,
|
|
||||||
// rlx_setA, rlx_setB, Fx, Fy, Fz);
|
|
||||||
//............................................................
|
|
||||||
dvc_MRT(ID, f_even, f_odd, rlx_setA, rlx_setB, Fx, Fy, Fz,Nx,Ny,Nz,nBlocks,nthreads,S);
|
|
||||||
// Iteration completed!
|
|
||||||
|
|
||||||
timestep++;
|
|
||||||
//...................................................................
|
|
||||||
|
|
||||||
}
|
|
||||||
//************************************************************************/
|
|
||||||
|
|
||||||
// cudaThreadSynchronize();
|
|
||||||
dvc_Barrier();
|
|
||||||
//.......... stop and destroy timer.............................
|
|
||||||
cudaEventRecord( stop, stream);
|
|
||||||
cudaEventSynchronize( stop );
|
|
||||||
|
|
||||||
cudaEventElapsedTime( &time, start, stop );
|
|
||||||
printf("CPU time = %f \n", time);
|
|
||||||
|
|
||||||
float MLUPS = 0.001*float(Nx*Ny*Nz)*timestep/time;
|
|
||||||
printf("MLUPS = %f \n", MLUPS);
|
|
||||||
|
|
||||||
cudaStreamDestroy(stream);
|
|
||||||
cudaEventDestroy( start );
|
|
||||||
cudaEventDestroy( stop );
|
|
||||||
//..............................................................
|
|
||||||
|
|
||||||
//..............................................................
|
|
||||||
/*//.........Compute the velocity and copy result to host ........
|
|
||||||
double *velocity;
|
|
||||||
velocity = new double[3*N];
|
|
||||||
//......................device distributions....................................
|
|
||||||
double *vel;
|
|
||||||
//..............................................................................
|
|
||||||
dvc_AllocateDeviceMemory((void **) &vel, 3*dist_mem_size); // Allocate device memory
|
|
||||||
//..............................................................................
|
|
||||||
// Compute_VELOCITY <<< grid, nthreads >>> (ID, f_even, f_odd, vel, Nx, Ny, Nz, S);
|
|
||||||
//..............................................................................
|
|
||||||
// cudaMemcpy(velocity, vel, 3*dist_mem_size, cudaMemcpyDeviceToHost);
|
|
||||||
//..............................................................................
|
|
||||||
|
|
||||||
//............................................................
|
|
||||||
//....Write the z-velocity to test poiseuille flow............
|
|
||||||
double vz,vz_avg;
|
|
||||||
vz_avg = 0.0;
|
|
||||||
|
|
||||||
/* FILE *output;
|
|
||||||
output = fopen("velocity.out","w");
|
|
||||||
for (int k=0; k<1; k++){
|
|
||||||
for (int j=0; j<1; j++){
|
|
||||||
for (int i=0; i<Nx; i++){
|
|
||||||
int n = k*Nx*Ny+j*Nx+i;
|
|
||||||
//.....print value........
|
|
||||||
vz = velocity[2*N+n];
|
|
||||||
vz_avg += vz;
|
|
||||||
fprintf(output, " %e",vz);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fclose(output);
|
|
||||||
|
|
||||||
vz = vz_avg/double(sum);
|
|
||||||
printf("Average Velocity = %e\n", vz);
|
|
||||||
*/
|
|
||||||
// cleanup
|
|
||||||
// cudaFree(f_even); cudaFree(f_odd); cudaFree(vel); cudaFree(ID);
|
|
||||||
// free (velocity); free(id);
|
|
||||||
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,400 +0,0 @@
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <fstream>
|
|
||||||
|
|
||||||
//*************************************************************************
|
|
||||||
// Functions defined in Color.cu
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_InitDenColor(int nblocks, int nthreads, int S, char *ID,
|
|
||||||
double *Den, double *Phi, double das,
|
|
||||||
double dbs, int N);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_ComputeColorGradient(int nBlocks, int nthreads, int S,
|
|
||||||
char *ID, double *Phi,
|
|
||||||
double *ColorGrad, int Nx, int Ny,
|
|
||||||
int Nz);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_ColorCollide(int nBlocks, int nthreads, int S, char *ID,
|
|
||||||
double *f_even, double *f_odd,
|
|
||||||
double *ColorGrad, double *Velocity,
|
|
||||||
double rlxA, double rlxB, double alpha,
|
|
||||||
double beta, double Fx, double Fy, double Fz,
|
|
||||||
int Nx, int Ny, int Nz, bool pBC);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_DensityStreamD3Q7(int nBlocks, int nthreads, int S,
|
|
||||||
char *ID, double *Den, double *Copy,
|
|
||||||
double *Phi, double *ColorGrad,
|
|
||||||
double *Velocity, double beta, int Nx,
|
|
||||||
int Ny, int Nz, bool pBC);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_ComputePhi(int nBlocks, int nthreads, int S, char *ID,
|
|
||||||
double *Phi, double *Copy, double *Den, int N);
|
|
||||||
//*************************************************************************
|
|
||||||
//*************************************************************************
|
|
||||||
// Functions defined in D3Q19.cu
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_InitD3Q19(int nblocks, int nthreads, int S, char *ID,
|
|
||||||
double *f_even, double *f_odd, int Nx, int Ny,
|
|
||||||
int Nz);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_SwapD3Q19(int nblocks, int nthreads, int S, char *ID,
|
|
||||||
double *f_even, double *f_odd, int Nx, int Ny,
|
|
||||||
int Nz);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_PackDist(int grid, int threads, int q, int *SendList,
|
|
||||||
int start, int sendCount, double *sendbuf,
|
|
||||||
double *Dist, int N);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_UnpackDist(int grid, int threads, int q, int Cqx, int Cqy,
|
|
||||||
int Cqz, int *RecvList, int start, int recvCount,
|
|
||||||
double *recvbuf, double *Dist, int Nx, int Ny,
|
|
||||||
int Nz);
|
|
||||||
//*************************************************************************
|
|
||||||
//***************************************************************************************
|
|
||||||
// Functions defined in D3Q7.cu
|
|
||||||
//***************************************************************************************
|
|
||||||
extern "C" void dvc_PackDenD3Q7(int grid, int threads, int *list, int count,
|
|
||||||
double *sendbuf, int number, double *Data,
|
|
||||||
int N);
|
|
||||||
//***************************************************************************************
|
|
||||||
extern "C" void dvc_UnpackDenD3Q7(int grid, int threads, int *list, int count,
|
|
||||||
double *recvbuf, int number, double *Data,
|
|
||||||
int N);
|
|
||||||
//***************************************************************************************
|
|
||||||
extern "C" void dvc_PackValues(int grid, int threads, int *list, int count,
|
|
||||||
double *sendbuf, double *Data, int N);
|
|
||||||
//***************************************************************************************
|
|
||||||
extern "C" void dvc_UnpackValues(int grid, int threads, int *list, int count,
|
|
||||||
double *recvbuf, double *Data, int N);
|
|
||||||
//***************************************************************************************
|
|
||||||
//*************************************************************************
|
|
||||||
// Functions defined in CudaExtras.cu
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_AllocateDeviceMemory(void **address, size_t size);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_CopyToDevice(void *dest, void *source, size_t size);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_CopyToHost(void *dest, void *source, size_t size);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_Barrier();
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
//*************************************************************************
|
|
||||||
// Implementation of Two-Phase Immiscible LBM using CUDA
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
inline void PackID(int *list, int count, char *sendbuf, char *ID) {
|
|
||||||
// Fill in the phase ID values from neighboring processors
|
|
||||||
// This packs up the values that need to be sent from one processor to another
|
|
||||||
int idx, n;
|
|
||||||
|
|
||||||
for (idx = 0; idx < count; idx++) {
|
|
||||||
n = list[idx];
|
|
||||||
sendbuf[idx] = ID[n];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//***************************************************************************************
|
|
||||||
inline void UnpackID(int *list, int count, char *recvbuf, char *ID) {
|
|
||||||
// Fill in the phase ID values from neighboring processors
|
|
||||||
// This unpacks the values once they have been recieved from neighbors
|
|
||||||
int idx, n;
|
|
||||||
|
|
||||||
for (idx = 0; idx < count; idx++) {
|
|
||||||
n = list[idx];
|
|
||||||
ID[n] = recvbuf[idx];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//***************************************************************************************
|
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
|
||||||
|
|
||||||
int rank = 0;
|
|
||||||
int nprocs = 1;
|
|
||||||
int nprocx, nprocy, nprocz;
|
|
||||||
int iproc, jproc, kproc;
|
|
||||||
if (rank == 0) {
|
|
||||||
printf("********************************************************\n");
|
|
||||||
printf("Running Hybrid Implementation of Color LBM \n");
|
|
||||||
printf("********************************************************\n");
|
|
||||||
}
|
|
||||||
// Color Model parameters
|
|
||||||
string FILENAME;
|
|
||||||
unsigned int nBlocks, nthreads;
|
|
||||||
int Nx, Ny, Nz;
|
|
||||||
int timestepMax, interval;
|
|
||||||
double tau, Fx, Fy, Fz, tol;
|
|
||||||
double alpha, beta;
|
|
||||||
double das, dbs;
|
|
||||||
double din, dout;
|
|
||||||
bool pBC;
|
|
||||||
int i, j, k, n;
|
|
||||||
|
|
||||||
if (rank == 0) {
|
|
||||||
//.............................................................
|
|
||||||
// READ SIMULATION PARMAETERS FROM INPUT FILE
|
|
||||||
//.............................................................
|
|
||||||
ifstream input("Color.in");
|
|
||||||
// Line 1: Name of the phase indicator file (s=0,w=1,n=2)
|
|
||||||
input >> FILENAME;
|
|
||||||
// Line 2: domain size (Nx, Ny, Nz)
|
|
||||||
input >> Nz; // number of nodes (x,y,z)
|
|
||||||
input >> nBlocks;
|
|
||||||
input >> nthreads;
|
|
||||||
// Line 3: model parameters (tau, alpha, beta, das, dbs)
|
|
||||||
input >> tau;
|
|
||||||
input >> alpha;
|
|
||||||
input >> beta;
|
|
||||||
input >> das;
|
|
||||||
input >> dbs;
|
|
||||||
// Line 4: External force components (Fx,Fy, Fz)
|
|
||||||
input >> Fx;
|
|
||||||
input >> Fy;
|
|
||||||
input >> Fz;
|
|
||||||
// Line 5: Pressure Boundary conditions
|
|
||||||
input >> pBC;
|
|
||||||
input >> din;
|
|
||||||
input >> dout;
|
|
||||||
// Line 6: time-stepping criteria
|
|
||||||
input >> timestepMax; // max no. of timesteps
|
|
||||||
input >> interval; // error interval
|
|
||||||
input >> tol; // error tolerance
|
|
||||||
//.............................................................
|
|
||||||
|
|
||||||
ifstream domain("Domain.in");
|
|
||||||
domain >> nprocx;
|
|
||||||
domain >> nprocy;
|
|
||||||
domain >> nprocz;
|
|
||||||
}
|
|
||||||
|
|
||||||
double rlxA = 1.f / tau;
|
|
||||||
double rlxB = 8.f * (2.f - rlxA) / (8.f - rlxA);
|
|
||||||
|
|
||||||
if (nprocs != nprocx * nprocy * nprocz) {
|
|
||||||
printf("Fatal error in processor number! \n");
|
|
||||||
printf("nprocx = %i \n", nprocx);
|
|
||||||
printf("nprocy = %i \n", nprocy);
|
|
||||||
printf("nprocz = %i \n", nprocz);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rank == 0) {
|
|
||||||
printf("********************************************************\n");
|
|
||||||
printf("tau = %f \n", tau);
|
|
||||||
printf("alpha = %f \n", alpha);
|
|
||||||
printf("beta = %f \n", beta);
|
|
||||||
printf("das = %f \n", beta);
|
|
||||||
printf("dbs = %f \n", beta);
|
|
||||||
printf("Force(x) = %f \n", Fx);
|
|
||||||
printf("Force(y) = %f \n", Fy);
|
|
||||||
printf("Force(z) = %f \n", Fz);
|
|
||||||
printf("Sub-domain size = %i x %i x %i\n", Nz, Nz, Nz);
|
|
||||||
printf("Parallel domain size = %i x %i x %i\n", nprocx, nprocy, nprocz);
|
|
||||||
printf("********************************************************\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
Nz += 2;
|
|
||||||
Nx = Ny = Nz; // Cubic domain
|
|
||||||
|
|
||||||
int N = Nx * Ny * Nz;
|
|
||||||
int dist_mem_size = N * sizeof(double);
|
|
||||||
|
|
||||||
// unsigned int nBlocks = 32;
|
|
||||||
// int nthreads = 128;
|
|
||||||
int S = N / nthreads / nBlocks;
|
|
||||||
|
|
||||||
// unsigned int nBlocks = N/nthreads + (N%nthreads == 0?0:1);
|
|
||||||
// dim3 grid(nBlocks,1,1);
|
|
||||||
|
|
||||||
if (rank == 0)
|
|
||||||
printf("Number of blocks = %i \n", nBlocks);
|
|
||||||
if (rank == 0)
|
|
||||||
printf("Threads per block = %i \n", nthreads);
|
|
||||||
if (rank == 0)
|
|
||||||
printf("Sweeps per thread = %i \n", S);
|
|
||||||
if (rank == 0)
|
|
||||||
printf("Number of nodes per side = %i \n", Nx);
|
|
||||||
if (rank == 0)
|
|
||||||
printf("Total Number of nodes = %i \n", N);
|
|
||||||
if (rank == 0)
|
|
||||||
printf("********************************************************\n");
|
|
||||||
|
|
||||||
//.......................................................................
|
|
||||||
if (rank == 0)
|
|
||||||
printf("Read input media... \n");
|
|
||||||
//.......................................................................
|
|
||||||
char LocalRankString[8];
|
|
||||||
char LocalRankFilename[40];
|
|
||||||
sprintf(LocalRankString, "%05d", rank);
|
|
||||||
sprintf(LocalRankFilename, "%s%s", "ID.", LocalRankString);
|
|
||||||
// printf("Local File Name = %s \n",LocalRankFilename);
|
|
||||||
// .......... READ THE INPUT FILE .......................................
|
|
||||||
char value;
|
|
||||||
char *id;
|
|
||||||
id = new char[N];
|
|
||||||
int sum = 0;
|
|
||||||
// double porosity;
|
|
||||||
//.......................................................................
|
|
||||||
ifstream PM(LocalRankFilename, ios::binary);
|
|
||||||
for (k = 0; k < Nz; k++) {
|
|
||||||
for (j = 0; j < Ny; j++) {
|
|
||||||
for (i = 0; i < Nx; i++) {
|
|
||||||
n = k * Nx * Ny + j * Nx + i;
|
|
||||||
id[n] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (k = 1; k < Nz - 1; k++) {
|
|
||||||
for (j = 1; j < Ny - 1; j++) {
|
|
||||||
for (i = 1; i < Nx - 1; i++) {
|
|
||||||
PM.read((char *)(&value), sizeof(value));
|
|
||||||
n = k * Nx * Ny + j * Nx + i;
|
|
||||||
id[n] = value;
|
|
||||||
if (value > 0)
|
|
||||||
sum++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PM.close();
|
|
||||||
// printf("File porosity = %f\n", double(sum)/N);
|
|
||||||
|
|
||||||
//...........device phase ID.................................................
|
|
||||||
if (rank == 0)
|
|
||||||
printf("Copying phase ID to device \n");
|
|
||||||
char *ID;
|
|
||||||
dvc_AllocateDeviceMemory((void **)&ID, N); // Allocate device memory
|
|
||||||
// Copy to the device
|
|
||||||
dvc_CopyToDevice(ID, id, N);
|
|
||||||
//...........................................................................
|
|
||||||
|
|
||||||
if (rank == 0)
|
|
||||||
printf("Allocating distributions \n");
|
|
||||||
//......................device distributions.................................
|
|
||||||
double *f_even, *f_odd;
|
|
||||||
//...........................................................................
|
|
||||||
dvc_AllocateDeviceMemory((void **)&f_even,
|
|
||||||
10 * dist_mem_size); // Allocate device memory
|
|
||||||
dvc_AllocateDeviceMemory((void **)&f_odd,
|
|
||||||
9 * dist_mem_size); // Allocate device memory
|
|
||||||
//...........................................................................
|
|
||||||
//...........................................................................
|
|
||||||
// MAIN VARIABLES ALLOCATED HERE
|
|
||||||
//...........................................................................
|
|
||||||
double *Phi, *Den, *Copy;
|
|
||||||
double *ColorGrad, *Velocity;
|
|
||||||
//...........................................................................
|
|
||||||
dvc_AllocateDeviceMemory((void **)&Phi, dist_mem_size);
|
|
||||||
dvc_AllocateDeviceMemory((void **)&Den, 2 * dist_mem_size);
|
|
||||||
dvc_AllocateDeviceMemory((void **)&Copy, 2 * dist_mem_size);
|
|
||||||
dvc_AllocateDeviceMemory((void **)&Velocity, 3 * dist_mem_size);
|
|
||||||
dvc_AllocateDeviceMemory((void **)&ColorGrad, 3 * dist_mem_size);
|
|
||||||
//...........................................................................
|
|
||||||
if (rank == 0)
|
|
||||||
printf("Setting the distributions, size = : %i\n", N);
|
|
||||||
//...........................................................................
|
|
||||||
dvc_InitD3Q19(nBlocks, nthreads, S, ID, f_even, f_odd, Nx, Ny, Nz);
|
|
||||||
dvc_InitDenColor(nBlocks, nthreads, S, ID, Den, Phi, das, dbs, N);
|
|
||||||
//...........................................................................
|
|
||||||
dvc_ComputePhi(nBlocks, nthreads, S, ID, Phi, Copy, Den, N);
|
|
||||||
//...........................................................................
|
|
||||||
|
|
||||||
//...........................................................................
|
|
||||||
// Grids used to pack faces on the GPU for MPI
|
|
||||||
int faceGrid, edgeGrid, packThreads;
|
|
||||||
packThreads = 512;
|
|
||||||
edgeGrid = 1;
|
|
||||||
faceGrid = Nx * Ny / packThreads;
|
|
||||||
|
|
||||||
int timestep = 0;
|
|
||||||
if (rank == 0)
|
|
||||||
printf("********************************************************\n");
|
|
||||||
if (rank == 0)
|
|
||||||
printf("No. of timesteps: %i \n", timestepMax);
|
|
||||||
|
|
||||||
//.......create a stream for the LB calculation.......
|
|
||||||
// cudaStream_t stream;
|
|
||||||
// cudaStreamCreate(&stream);
|
|
||||||
|
|
||||||
//.......create and start timer............
|
|
||||||
double start, stop;
|
|
||||||
double walltime;
|
|
||||||
start = clock();
|
|
||||||
|
|
||||||
//************ MAIN ITERATION LOOP ***************************************/
|
|
||||||
while (timestep < timestepMax) {
|
|
||||||
|
|
||||||
//*************************************************************************
|
|
||||||
// Compute the color gradient
|
|
||||||
//*************************************************************************
|
|
||||||
dvc_ComputeColorGradient(nBlocks, nthreads, S, ID, Phi, ColorGrad, Nx,
|
|
||||||
Ny, Nz);
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
//*************************************************************************
|
|
||||||
// Perform collision step for the momentum transport
|
|
||||||
//*************************************************************************
|
|
||||||
dvc_ColorCollide(nBlocks, nthreads, S, ID, f_even, f_odd, ColorGrad,
|
|
||||||
Velocity, rlxA, rlxB, alpha, beta, Fx, Fy, Fz, Nx, Ny,
|
|
||||||
Nz, pBC);
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
//*************************************************************************
|
|
||||||
// Carry out the density streaming step for mass transport
|
|
||||||
//*************************************************************************
|
|
||||||
dvc_DensityStreamD3Q7(nBlocks, nthreads, S, ID, Den, Copy, Phi,
|
|
||||||
ColorGrad, Velocity, beta, Nx, Ny, Nz, pBC);
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
//*************************************************************************
|
|
||||||
// Swap the distributions for momentum transport
|
|
||||||
//*************************************************************************
|
|
||||||
dvc_SwapD3Q19(nBlocks, nthreads, S, ID, f_even, f_odd, Nx, Ny, Nz);
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
//*************************************************************************
|
|
||||||
// Compute the phase indicator field and reset Copy, Den
|
|
||||||
//*************************************************************************
|
|
||||||
dvc_ComputePhi(nBlocks, nthreads, S, ID, Phi, Copy, Den, N);
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
// Iteration completed!
|
|
||||||
timestep++;
|
|
||||||
|
|
||||||
//...................................................................
|
|
||||||
}
|
|
||||||
//************************************************************************/
|
|
||||||
dvc_Barrier();
|
|
||||||
stop = clock();
|
|
||||||
|
|
||||||
// cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl;
|
|
||||||
walltime = (stop - start) / CLOCKS_PER_SEC;
|
|
||||||
// cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl;
|
|
||||||
double MLUPS = double(Nx * Ny * Nz * timestep) / walltime / 1000000;
|
|
||||||
if (rank == 0)
|
|
||||||
printf("********************************************************\n");
|
|
||||||
if (rank == 0)
|
|
||||||
printf("CPU time = %f \n", walltime);
|
|
||||||
if (rank == 0)
|
|
||||||
printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS);
|
|
||||||
MLUPS *= nprocs;
|
|
||||||
if (rank == 0)
|
|
||||||
printf("Lattice update rate (total)= %f MLUPS \n", MLUPS);
|
|
||||||
if (rank == 0)
|
|
||||||
printf("********************************************************\n");
|
|
||||||
|
|
||||||
//************************************************************************/
|
|
||||||
// Write out the phase indicator field
|
|
||||||
//************************************************************************/
|
|
||||||
sprintf(LocalRankFilename, "%s%s", "Phase.", LocalRankString);
|
|
||||||
// printf("Local File Name = %s \n",LocalRankFilename);
|
|
||||||
double *phiOut;
|
|
||||||
phiOut = new double[N];
|
|
||||||
dvc_CopyToHost(phiOut, Phi, N * sizeof(double));
|
|
||||||
|
|
||||||
FILE *PHASE;
|
|
||||||
PHASE = fopen(LocalRankFilename, "wb");
|
|
||||||
fwrite(phiOut, 8, N, PHASE);
|
|
||||||
fclose(PHASE);
|
|
||||||
//************************************************************************/
|
|
||||||
}
|
|
|
@ -1,425 +0,0 @@
|
||||||
#ifdef useMPI
|
|
||||||
#include <mpi.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <fstream>
|
|
||||||
#include <math.h>
|
|
||||||
#include <cuda.h>
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
//*************************************************************************
|
|
||||||
// HokieSpeed
|
|
||||||
//nvcc -Xcompiler -fopenmp -lgomp -O3 -arch sm_20 -o hybridATLKR lb2_ATLKR_hybrid.cu
|
|
||||||
// -I$VT_MPI_INC -L$VT_MPI_LIB -lmpi
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
//*************************************************************************
|
|
||||||
// Implementation of Two-Phase Immiscible LBM using CUDA
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_InitD3Q19(int nblocks, int nthreads, int S,
|
|
||||||
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_InitDenColor( int nblocks, int nthreads, int S,
|
|
||||||
char *ID, double *Den, double *Phi, double das, double dbs, int N);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_ComputeColorGradient(int nBlocks, int nthreads, int S,
|
|
||||||
char *ID, double *Phi, double *ColorGrad, int Nx, int Ny, int Nz);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_ColorCollide(int nBlocks, int nthreads, int S,
|
|
||||||
char *ID, double *f_even, double *f_odd, double *ColorGrad, double *Velocity,
|
|
||||||
double rlxA, double rlxB,double alpha, double beta, double Fx, double Fy, double Fz,
|
|
||||||
int Nx, int Ny, int Nz, bool pBC);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_DensityStreamD3Q7(int nBlocks, int nthreads, int S,
|
|
||||||
char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity,
|
|
||||||
double beta, int Nx, int Ny, int Nz, bool pBC);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_ComputePhi(int nBlocks, int nthreads, int S,
|
|
||||||
char *ID, double *Phi, double *Copy, double *Den, int N);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_Barrier();
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_SwapD3Q19(int nblocks, int nthreads, int S,
|
|
||||||
char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_PackDist(int grid, int threads, int q, int *SendList, int start,
|
|
||||||
int sendCount, double *sendbuf, double *Dist, int N);
|
|
||||||
//*************************************************************************
|
|
||||||
extern "C" void dvc_UnpackDist(int grid, int threads, int q, int Cqx, int Cqy, int Cqz, int *RecvList, int start,
|
|
||||||
int recvCount, double *recvbuf, double *Dist, int Nx, int Ny, int Nz);
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
|
||||||
{
|
|
||||||
|
|
||||||
//********** Initialize MPI ****************
|
|
||||||
int numprocs,rank;
|
|
||||||
#ifdef useMPI
|
|
||||||
MPI_Status stat;
|
|
||||||
MPI_Init(&argc,&argv);
|
|
||||||
MPI_Comm comm = MPI_COMM_WORLD;
|
|
||||||
MPI_Comm_size(comm,&numprocs);
|
|
||||||
MPI_Comm_rank(comm,&rank);
|
|
||||||
#else
|
|
||||||
MPI_Comm comm = MPI_COMM_WORLD;
|
|
||||||
numprocs = 1;
|
|
||||||
rank = 0;
|
|
||||||
#endif
|
|
||||||
//******************************************
|
|
||||||
|
|
||||||
if (rank == 0){
|
|
||||||
printf("********************************************************\n");
|
|
||||||
printf("Running Hybrid Implementation of Color LBM \n");
|
|
||||||
printf("********************************************************\n");
|
|
||||||
}
|
|
||||||
// Color Model parameters
|
|
||||||
string FILENAME;
|
|
||||||
unsigned int nBlocks, nthreads;
|
|
||||||
int Nx,Ny,Nz;
|
|
||||||
int timestepMax, interval;
|
|
||||||
double tau,Fx,Fy,Fz,tol;
|
|
||||||
double alpha, beta;
|
|
||||||
double das, dbs;
|
|
||||||
double din,dout;
|
|
||||||
bool pBC;
|
|
||||||
|
|
||||||
if (rank==0){
|
|
||||||
//.............................................................
|
|
||||||
// READ SIMULATION PARMAETERS FROM INPUT FILE
|
|
||||||
//.............................................................
|
|
||||||
ifstream input("Color.in");
|
|
||||||
// Line 1: Name of the phase indicator file (s=0,w=1,n=2)
|
|
||||||
input >> FILENAME;
|
|
||||||
// Line 2: domain size (Nx, Ny, Nz)
|
|
||||||
input >> Nz; // number of nodes (x,y,z)
|
|
||||||
input >> nBlocks;
|
|
||||||
input >> nthreads;
|
|
||||||
// Line 3: model parameters (tau, alpha, beta, das, dbs)
|
|
||||||
input >> tau;
|
|
||||||
input >> alpha;
|
|
||||||
input >> beta;
|
|
||||||
input >> das;
|
|
||||||
input >> dbs;
|
|
||||||
// Line 4: External force components (Fx,Fy, Fz)
|
|
||||||
input >> Fx;
|
|
||||||
input >> Fy;
|
|
||||||
input >> Fz;
|
|
||||||
// Line 5: Pressure Boundary conditions
|
|
||||||
input >> pBC;
|
|
||||||
input >> din;
|
|
||||||
input >> dout;
|
|
||||||
// Line 6: time-stepping criteria
|
|
||||||
input >> timestepMax; // max no. of timesteps
|
|
||||||
input >> interval; // error interval
|
|
||||||
input >> tol; // error tolerance
|
|
||||||
//.............................................................
|
|
||||||
}
|
|
||||||
#ifdef useMPI
|
|
||||||
// **************************************************************
|
|
||||||
// Broadcast simulation parameters from rank 0 to all other procs
|
|
||||||
MPI_Barrier(comm);
|
|
||||||
//.................................................
|
|
||||||
MPI_Bcast(&Nz,1,MPI_INT,0,comm);
|
|
||||||
MPI_Bcast(&nBlocks,1,MPI_INT,0,comm);
|
|
||||||
MPI_Bcast(&nthreads,1,MPI_INT,0,comm);
|
|
||||||
MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm);
|
|
||||||
MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm);
|
|
||||||
MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm);
|
|
||||||
MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm);
|
|
||||||
MPI_Bcast(&alpha,1,MPI_DOUBLE,0,comm);
|
|
||||||
MPI_Bcast(&beta,1,MPI_DOUBLE,0,comm);
|
|
||||||
MPI_Bcast(&das,1,MPI_DOUBLE,0,comm);
|
|
||||||
MPI_Bcast(&dbs,1,MPI_DOUBLE,0,comm);
|
|
||||||
MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm);
|
|
||||||
MPI_Bcast(&din,1,MPI_DOUBLE,0,comm);
|
|
||||||
MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm);
|
|
||||||
MPI_Bcast(×tepMax,1,MPI_INT,0,comm);
|
|
||||||
MPI_Bcast(&interval,1,MPI_INT,0,comm);
|
|
||||||
MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm);
|
|
||||||
//.................................................
|
|
||||||
MPI_Barrier(comm);
|
|
||||||
// **************************************************************
|
|
||||||
#endif
|
|
||||||
|
|
||||||
double rlxA = 1.f/tau;
|
|
||||||
double rlxB = 8.f*(2.f-rlxA)/(8.f-rlxA);
|
|
||||||
|
|
||||||
if (pBC && rank == 0){
|
|
||||||
printf("Assigning presusre boundary conditions \n");
|
|
||||||
printf("Inlet density = %f \n", din);
|
|
||||||
printf("Outlet density = %f \n", dout);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rank==0){
|
|
||||||
printf("....Parameters................\n");
|
|
||||||
printf("tau = %f \n", tau);
|
|
||||||
printf("alpha = %f \n", alpha);
|
|
||||||
printf("beta = %f \n", beta);
|
|
||||||
printf("das = %f \n", das);
|
|
||||||
printf("dbs = %f \n", dbs);
|
|
||||||
printf("Force(x) = %f \n", Fx);
|
|
||||||
printf("Force(y) = %f \n", Fy);
|
|
||||||
printf("Force(z) = %f \n", Fz);
|
|
||||||
printf("Nz = %i \n", Nz);
|
|
||||||
printf("timestepMax = %i \n", timestepMax);
|
|
||||||
printf("...............................\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Identical cubic sub-domains
|
|
||||||
Nx = Ny = Nz;// = 16*s; // Cubic domain
|
|
||||||
int N = Nx*Ny*Nz;
|
|
||||||
int dist_mem_size = N*sizeof(double);
|
|
||||||
|
|
||||||
// unsigned int nBlocks = 32;
|
|
||||||
// int nthreads = 128;
|
|
||||||
int S = N/nthreads/nBlocks;
|
|
||||||
if (nBlocks*nthreads*S < N) S++;
|
|
||||||
// int S = 1;
|
|
||||||
|
|
||||||
// unsigned int nBlocks = N/nthreads + (N%nthreads == 0?0:1);
|
|
||||||
// dim3 grid(nBlocks,1,1);
|
|
||||||
if (rank==1){
|
|
||||||
printf("Number of blocks = %i \n", nBlocks);
|
|
||||||
printf("Threads per block = %i \n", nthreads);
|
|
||||||
printf("Sweeps per thread = %i \n", S);
|
|
||||||
printf("Number of nodes per side = %i \n", Nx);
|
|
||||||
printf("Total Number of nodes = %i \n", N);
|
|
||||||
printf("...............................\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
//.......................................................................
|
|
||||||
// .......... READ THE INPUT FILE .......................................
|
|
||||||
int n;
|
|
||||||
char value;
|
|
||||||
char *id;
|
|
||||||
id = new char[N];
|
|
||||||
int sum = 0;
|
|
||||||
// RANK 0 READS THE INPUT FILE
|
|
||||||
if (rank==0){
|
|
||||||
printf("Read input media... \n");
|
|
||||||
ifstream PM(FILENAME.c_str(),ios::binary);
|
|
||||||
for (int k=0;k<Nz;k++){
|
|
||||||
for (int j=0;j<Ny;j++){
|
|
||||||
for (int i=0;i<Nx;i++){
|
|
||||||
PM.read((char *) (&value), sizeof(value));
|
|
||||||
n = k*Nx*Ny+j*Nx+i;
|
|
||||||
|
|
||||||
if (value>0){
|
|
||||||
if (pBC) value=2; // Saturate with NWP
|
|
||||||
if (k<8){
|
|
||||||
value=1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
id[n] = value;
|
|
||||||
if (value > 0) sum++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PM.close();
|
|
||||||
printf("File porosity = %f\n", double(sum)/N);
|
|
||||||
}
|
|
||||||
//......... for pressure BC only............................
|
|
||||||
// Void the first / last rows if pressure BC are to be used
|
|
||||||
if (pBC){
|
|
||||||
for (int k=0;k<Nz;k++){
|
|
||||||
for (int j=0;j<Ny;j++){
|
|
||||||
for (int i=0;i<Nx;i++){
|
|
||||||
n = k*Nx*Ny+j*Nx+i;
|
|
||||||
if (k<4) id[n] = 1;
|
|
||||||
if (k>Nz-5) id[n] = 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Skip the non-boundary values
|
|
||||||
if (k==4) k=Nz-5;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#ifdef useMPI //............................................................
|
|
||||||
MPI_Barrier(comm);
|
|
||||||
MPI_Bcast(&id[0],N,MPI_CHAR,0,comm);
|
|
||||||
MPI_Barrier(comm);
|
|
||||||
#endif
|
|
||||||
if (rank == 0) printf("Domain set.\n");
|
|
||||||
//...........................................................................
|
|
||||||
|
|
||||||
int SBC;
|
|
||||||
int outlet = N-Nx*Ny;
|
|
||||||
if (pBC){
|
|
||||||
SBC = Nx*Ny/nthreads/nBlocks+1;
|
|
||||||
printf("Number of sweeps for inlet / outlet: %i \n", SBC);
|
|
||||||
}
|
|
||||||
//...........................................................................
|
|
||||||
|
|
||||||
//...........................................................................
|
|
||||||
//...........device phase ID.................................................
|
|
||||||
char *ID;
|
|
||||||
cudaMalloc((void **) &ID, N); // Allocate device memory
|
|
||||||
// Copy to the device
|
|
||||||
cudaMemcpy(ID, id, N, cudaMemcpyHostToDevice);
|
|
||||||
//...........................................................................
|
|
||||||
|
|
||||||
//......................device distributions.................................
|
|
||||||
double *f_even,*f_odd;
|
|
||||||
//...........................................................................
|
|
||||||
cudaMalloc((void **) &f_even, 10*dist_mem_size); // Allocate device memory
|
|
||||||
cudaMalloc((void **) &f_odd, 9*dist_mem_size); // Allocate device memory
|
|
||||||
// f_even = new double[10*N];
|
|
||||||
// f_odd = new double[9*N];
|
|
||||||
//...........................................................................
|
|
||||||
|
|
||||||
//...........................................................................
|
|
||||||
// MAIN VARIABLES ALLOCATED HERE
|
|
||||||
//...........................................................................
|
|
||||||
double *Phi,*Den,*Copy;
|
|
||||||
double *ColorGrad, *Velocity;
|
|
||||||
//...........................................................................
|
|
||||||
cudaMalloc((void **) &Phi, dist_mem_size);
|
|
||||||
cudaMalloc((void **) &Den, 2*dist_mem_size);
|
|
||||||
cudaMalloc((void **) &Copy, 2*dist_mem_size);
|
|
||||||
cudaMalloc((void **) &Velocity, 3*dist_mem_size);
|
|
||||||
cudaMalloc((void **) &ColorGrad, 3*dist_mem_size);
|
|
||||||
//...........................................................................
|
|
||||||
|
|
||||||
//...........................................................................
|
|
||||||
if (rank==0) printf("Setting the distributions, size = : %i\n", N);
|
|
||||||
//...........................................................................
|
|
||||||
dvc_InitD3Q19(nBlocks, nthreads, S, ID, f_even, f_odd, Nx, Ny, Nz);
|
|
||||||
dvc_InitDenColor(nBlocks, nthreads, S, ID, Den, Phi, das, dbs, N);
|
|
||||||
//...........................................................................
|
|
||||||
dvc_ComputePhi(nBlocks, nthreads, S,ID, Phi, Copy, Den, N);
|
|
||||||
//...........................................................................
|
|
||||||
|
|
||||||
int timestep;
|
|
||||||
// double starttime,stoptime;
|
|
||||||
if (rank==0) printf("No. of timesteps: %i \n", timestepMax);
|
|
||||||
timestep = 0;
|
|
||||||
//.......create and start timer............
|
|
||||||
cudaEvent_t start, stop;
|
|
||||||
float time;
|
|
||||||
//.......create a stream for the LB calculation.......
|
|
||||||
cudaStream_t stream;
|
|
||||||
cudaStreamCreate(&stream);
|
|
||||||
|
|
||||||
cudaEventCreate(&start);
|
|
||||||
cudaEventCreate(&stop);
|
|
||||||
cudaEventRecord( start, 0 );
|
|
||||||
//.........................................
|
|
||||||
//************ MAIN TIMESTEP LOOP ***************************************/
|
|
||||||
while (timestep < timestepMax){
|
|
||||||
|
|
||||||
//*************************************************************************
|
|
||||||
// Compute the color gradient
|
|
||||||
//*************************************************************************
|
|
||||||
dvc_ComputeColorGradient(nBlocks, nthreads, S,
|
|
||||||
ID, Phi, ColorGrad, Nx, Ny, Nz);
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
//*************************************************************************
|
|
||||||
// Perform collision step for the momentum transport
|
|
||||||
//*************************************************************************
|
|
||||||
dvc_ColorCollide(nBlocks, nthreads, S,
|
|
||||||
ID, f_even, f_odd, ColorGrad, Velocity,
|
|
||||||
rlxA, rlxB,alpha, beta, Fx, Fy, Fz, Nx, Ny, Nz, pBC);
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
//*************************************************************************
|
|
||||||
// Carry out the density streaming step for mass transport
|
|
||||||
//*************************************************************************
|
|
||||||
dvc_DensityStreamD3Q7(nBlocks, nthreads, S,
|
|
||||||
ID, Den, Copy, Phi, ColorGrad, Velocity,beta, Nx, Ny, Nz, pBC);
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
//*************************************************************************
|
|
||||||
// Swap the distributions for momentum transport
|
|
||||||
//*************************************************************************
|
|
||||||
dvc_SwapD3Q19(nBlocks, nthreads, S, ID, f_even, f_odd, Nx, Ny, Nz);
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
//*************************************************************************
|
|
||||||
// Compute the phase indicator field and reset Copy, Den
|
|
||||||
//*************************************************************************
|
|
||||||
dvc_ComputePhi(nBlocks, nthreads, S,ID, Phi, Copy, Den, N);
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
dvc_Barrier();
|
|
||||||
timestep++;
|
|
||||||
//.............................................................................
|
|
||||||
}
|
|
||||||
//************************************************************************/
|
|
||||||
dvc_Barrier();
|
|
||||||
//.......... stop and destroy timer.............................
|
|
||||||
cudaEventRecord( stop, stream);
|
|
||||||
cudaEventSynchronize( stop );
|
|
||||||
|
|
||||||
cudaEventElapsedTime( &time, start, stop );
|
|
||||||
printf("CPU time = %f \n", time);
|
|
||||||
|
|
||||||
float MLUPS = 0.001*float(Nx*Ny*Nz)*timestep/time;
|
|
||||||
printf("MLUPS = %f \n", MLUPS);
|
|
||||||
|
|
||||||
cudaEventDestroy( start );
|
|
||||||
cudaEventDestroy( stop );
|
|
||||||
|
|
||||||
double *Data;
|
|
||||||
Data = new double[3*N];
|
|
||||||
|
|
||||||
cudaMemcpy(Data, Phi, dist_mem_size, cudaMemcpyDeviceToHost);
|
|
||||||
|
|
||||||
// Write out the Phase Indicator Field
|
|
||||||
FILE *phase;
|
|
||||||
phase = fopen("Phase.out","wb");
|
|
||||||
fwrite(Data,8,N,phase);
|
|
||||||
fclose(phase);
|
|
||||||
|
|
||||||
//....................................................
|
|
||||||
// Write out the pressure - (reuse Phi arrays since we're done with those)
|
|
||||||
// ComputeDensity<<< grid, nthreads>>> (ID, f_even, f_odd, Phi, Nx, Ny, Nz, S);
|
|
||||||
// cudaMemcpy(Data, Phi, dist_mem_size, cudaMemcpyDeviceToHost);
|
|
||||||
// FILE *PRESSURE;
|
|
||||||
// PRESSURE = fopen("Pressure.out","wb");
|
|
||||||
// fwrite(Phi,8,N,PRESSURE);
|
|
||||||
// fclose(PRESSURE);
|
|
||||||
//....................................................
|
|
||||||
|
|
||||||
// Write out the Color Gradient
|
|
||||||
|
|
||||||
cudaMemcpy(Data, ColorGrad, 3*dist_mem_size, cudaMemcpyDeviceToHost);
|
|
||||||
|
|
||||||
FILE *CG;
|
|
||||||
CG = fopen("ColorGrad.out","wb");
|
|
||||||
fwrite(Data,8,3*N,CG);
|
|
||||||
fclose(CG);
|
|
||||||
|
|
||||||
// Write out the Velocity
|
|
||||||
// FILE *VEL;
|
|
||||||
// VEL = fopen("Velocity.out","wb");
|
|
||||||
// fwrite(Velocity,8,3*N,VEL);
|
|
||||||
// fclose(VEL);
|
|
||||||
|
|
||||||
// cleanup
|
|
||||||
cudaFree(ID);
|
|
||||||
cudaFree(f_even); cudaFree(f_odd);
|
|
||||||
cudaFree(Velocity);
|
|
||||||
cudaFree(Phi);
|
|
||||||
|
|
||||||
cudaFree (ColorGrad);
|
|
||||||
cudaFree (Den); cudaFree(Copy);
|
|
||||||
cudaFree (Phi);
|
|
||||||
free(id);
|
|
||||||
|
|
||||||
//***********Finish up!*********************************
|
|
||||||
#ifdef useMPI
|
|
||||||
MPI_Finalize();
|
|
||||||
#endif
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user