Fixed issues with broken build

This commit is contained in:
James E McClure 2017-10-01 11:46:22 -04:00
parent 20cdfcd49c
commit 43112e299a
5 changed files with 1068 additions and 1005 deletions

View File

@ -11,7 +11,7 @@
extern "C" void ScaLBL_AllocateDeviceMemory(void** address, size_t size); extern "C" void ScaLBL_AllocateDeviceMemory(void** address, size_t size);
//extern "C" void FreeDeviceMemory(void** address); extern "C" void ScaLBL_FreeDeviceMemory(void* pointer);
extern "C" void ScaLBL_CopyToDevice(void* dest, const void* source, size_t size); extern "C" void ScaLBL_CopyToDevice(void* dest, const void* source, size_t size);
@ -21,7 +21,7 @@ extern "C" void ScaLBL_DeviceBarrier();
extern "C" void ScaLBL_D3Q19_Pack(int q, int *list, int start, int count, double *sendbuf, double *dist, int N); extern "C" void ScaLBL_D3Q19_Pack(int q, int *list, int start, int count, double *sendbuf, double *dist, int N);
extern "C" void ScaLBL_D3Q19_Unpack(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count,double *recvbuf, double *dist, int Nx, int Ny, int Nz); extern "C" void ScaLBL_D3Q19_Unpack(int q, int *list, int start, int count, double *recvbuf, double *dist, int N);
extern "C" void ScaLBL_Scalar_Pack(int *list, int count, double *sendbuf, double *Data, int N); extern "C" void ScaLBL_Scalar_Pack(int *list, int count, double *sendbuf, double *Data, int N);
@ -41,6 +41,8 @@ extern "C" void ScaLBL_D3Q19_Init(char *ID, double *f_even, double *f_odd, int N
extern "C" void ScaLBL_D3Q19_Swap(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz); extern "C" void ScaLBL_D3Q19_Swap(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz);
extern "C" void ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *disteven, double *distodd, int Np);
extern "C" void ScaLBL_D3Q19_MRT(char *ID, double *f_even, double *f_odd, double rlxA, double rlxB, extern "C" void ScaLBL_D3Q19_MRT(char *ID, double *f_even, double *f_odd, double rlxA, double rlxB,
double Fx, double Fy, double Fz,int Nx, int Ny, int Nz); double Fx, double Fy, double Fz,int Nx, int Ny, int Nz);
@ -56,6 +58,13 @@ extern "C" void ScaLBL_D3Q19_Pressure_BC_z(double *disteven, double *distodd, do
extern "C" void ScaLBL_D3Q19_Pressure_BC_Z(double *disteven, double *distodd, double dout, extern "C" void ScaLBL_D3Q19_Pressure_BC_Z(double *disteven, double *distodd, double dout,
int Nx, int Ny, int Nz, int outlet); int Nx, int Ny, int Nz, int outlet);
extern "C" double ScaLBL_D3Q19_Flux_BC_z(double *disteven, double *distodd, double flux,
int Nx, int Ny, int Nz);
extern "C" double ScaLBL_D3Q19_Flux_BC_Z(double *disteven, double *distodd, double flux,
int Nx, int Ny, int Nz, int outlet);
extern "C" void ScaLBL_Color_Init(char *ID, double *Den, double *Phi, double das, double dbs, int Nx, int Ny, int Nz); extern "C" void ScaLBL_Color_Init(char *ID, double *Den, double *Phi, double das, double dbs, int Nx, int Ny, int Nz);
extern "C" void ScaLBL_ColorDistance_Init(char *ID, double *Den, double *Phi, double *Distance, extern "C" void ScaLBL_ColorDistance_Init(char *ID, double *Den, double *Phi, double *Distance,
@ -91,17 +100,11 @@ extern "C" void ScaLBL_D3Q19_Velocity_BC_Z(double *disteven, double *distodd, do
extern "C" void ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice); extern "C" void ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice);
extern "C" double ScaLBL_D3Q19_Flux_BC_z(double *disteven, double *distodd, double flux,
int Nx, int Ny, int Nz);
extern "C" double ScaLBL_D3Q19_Flux_BC_Z(double *disteven, double *distodd, double flux,
int Nx, int Ny, int Nz, int outlet);
class ScaLBL_Communicator{ class ScaLBL_Communicator{
public: public:
//...................................................................................... //......................................................................................
ScaLBL_Communicator(Domain &Dm); ScaLBL_Communicator(Domain &Dm);
//ScaLBL_Communicator(Domain &Dm, IntArray &Map);
~ScaLBL_Communicator(); ~ScaLBL_Communicator();
//...................................................................................... //......................................................................................
unsigned long int CommunicationCount,SendCount,RecvCount; unsigned long int CommunicationCount,SendCount,RecvCount;
@ -119,6 +122,7 @@ public:
double *recvbuf_xY, *recvbuf_yZ, *recvbuf_Xz, *recvbuf_XY, *recvbuf_YZ, *recvbuf_XZ; double *recvbuf_xY, *recvbuf_yZ, *recvbuf_Xz, *recvbuf_XY, *recvbuf_YZ, *recvbuf_XZ;
//...................................................................................... //......................................................................................
void MemoryOptimizedLayout(IntArray &Map, int *neighborList, char *id, int Np);
void SendD3Q19(double *f_even, double *f_odd); void SendD3Q19(double *f_even, double *f_odd);
void RecvD3Q19(double *f_even, double *f_odd); void RecvD3Q19(double *f_even, double *f_odd);
void BiSendD3Q7(double *A_even, double *A_odd, double *B_even, double *B_odd); void BiSendD3Q7(double *A_even, double *A_odd, double *B_even, double *B_odd);
@ -126,7 +130,12 @@ public:
void SendHalo(double *data); void SendHalo(double *data);
void RecvHalo(double *data); void RecvHalo(double *data);
// Debugging and unit testing functions
void PrintD3Q19();
private: private:
void D3Q19_MapRecv(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count, int *d3q19_recvlist);
bool Lock; // use Lock to make sure only one call at a time to protect data in transit bool Lock; // use Lock to make sure only one call at a time to protect data in transit
// only one set of Send requests can be active at any time (per instance) // only one set of Send requests can be active at any time (per instance)
int i,j,k,n; int i,j,k,n;
@ -166,10 +175,16 @@ private:
int *dvcRecvList_x, *dvcRecvList_y, *dvcRecvList_z, *dvcRecvList_X, *dvcRecvList_Y, *dvcRecvList_Z; int *dvcRecvList_x, *dvcRecvList_y, *dvcRecvList_z, *dvcRecvList_X, *dvcRecvList_Y, *dvcRecvList_Z;
int *dvcRecvList_xy, *dvcRecvList_yz, *dvcRecvList_xz, *dvcRecvList_Xy, *dvcRecvList_Yz, *dvcRecvList_xZ; int *dvcRecvList_xy, *dvcRecvList_yz, *dvcRecvList_xz, *dvcRecvList_Xy, *dvcRecvList_Yz, *dvcRecvList_xZ;
int *dvcRecvList_xY, *dvcRecvList_yZ, *dvcRecvList_Xz, *dvcRecvList_XY, *dvcRecvList_YZ, *dvcRecvList_XZ; int *dvcRecvList_xY, *dvcRecvList_yZ, *dvcRecvList_Xz, *dvcRecvList_XY, *dvcRecvList_YZ, *dvcRecvList_XZ;
// Recieve buffers for the distributions
int *dvcRecvDist_x, *dvcRecvDist_y, *dvcRecvDist_z, *dvcRecvDist_X, *dvcRecvDist_Y, *dvcRecvDist_Z;
int *dvcRecvDist_xy, *dvcRecvDist_yz, *dvcRecvDist_xz, *dvcRecvDist_Xy, *dvcRecvDist_Yz, *dvcRecvDist_xZ;
int *dvcRecvDist_xY, *dvcRecvDist_yZ, *dvcRecvDist_Xz, *dvcRecvDist_XY, *dvcRecvDist_YZ, *dvcRecvDist_XZ;
//...................................................................................... //......................................................................................
}; };
ScaLBL_Communicator::ScaLBL_Communicator(Domain &Dm){ ScaLBL_Communicator::ScaLBL_Communicator(Domain &Dm){
//...................................................................................... //......................................................................................
Lock=false; // unlock the communicator Lock=false; // unlock the communicator
@ -315,6 +330,25 @@ ScaLBL_Communicator::ScaLBL_Communicator(Domain &Dm){
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_Yz, recvCount_Yz*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_Yz, recvCount_Yz*sizeof(int)); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_YZ, recvCount_YZ*sizeof(int)); // Allocate device memory ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_YZ, recvCount_YZ*sizeof(int)); // Allocate device memory
//...................................................................................... //......................................................................................
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_x, 5*recvCount_x*sizeof(int)); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_X, 5*recvCount_X*sizeof(int)); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_y, 5*recvCount_y*sizeof(int)); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_Y, 5*recvCount_Y*sizeof(int)); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_z, 5*recvCount_z*sizeof(int)); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_Z, 5*recvCount_Z*sizeof(int)); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_xy, recvCount_xy*sizeof(int)); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_xY, recvCount_xY*sizeof(int)); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_Xy, recvCount_Xy*sizeof(int)); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_XY, recvCount_XY*sizeof(int)); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_xz, recvCount_xz*sizeof(int)); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_xZ, recvCount_xZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_Xz, recvCount_Xz*sizeof(int)); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_XZ, recvCount_XZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_yz, recvCount_yz*sizeof(int)); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_yZ, recvCount_yZ*sizeof(int)); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_Yz, recvCount_Yz*sizeof(int)); // Allocate device memory
ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_YZ, recvCount_YZ*sizeof(int)); // Allocate device memory
//......................................................................................
ScaLBL_CopyToDevice(dvcSendList_x,Dm.sendList_x,sendCount_x*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_x,Dm.sendList_x,sendCount_x*sizeof(int));
ScaLBL_CopyToDevice(dvcSendList_X,Dm.sendList_X,sendCount_X*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_X,Dm.sendList_X,sendCount_X*sizeof(int));
ScaLBL_CopyToDevice(dvcSendList_y,Dm.sendList_y,sendCount_y*sizeof(int)); ScaLBL_CopyToDevice(dvcSendList_y,Dm.sendList_y,sendCount_y*sizeof(int));
@ -352,6 +386,81 @@ ScaLBL_Communicator::ScaLBL_Communicator(Domain &Dm){
ScaLBL_CopyToDevice(dvcRecvList_YZ,Dm.recvList_YZ,recvCount_YZ*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_YZ,Dm.recvList_YZ,recvCount_YZ*sizeof(int));
ScaLBL_CopyToDevice(dvcRecvList_yZ,Dm.recvList_yZ,recvCount_yZ*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_yZ,Dm.recvList_yZ,recvCount_yZ*sizeof(int));
ScaLBL_CopyToDevice(dvcRecvList_Yz,Dm.recvList_Yz,recvCount_Yz*sizeof(int)); ScaLBL_CopyToDevice(dvcRecvList_Yz,Dm.recvList_Yz,recvCount_Yz*sizeof(int));
//......................................................................................
MPI_Barrier(MPI_COMM_SCALBL);
if (rank==0) printf("Mapping recieve distributions \n");
//...................................................................................
// Set up the recieve distribution lists
//...................................................................................
//...Map recieve list for the X face: q=2,8,10,12,13 .................................
D3Q19_MapRecv(0,-1,0,0,Dm.recvList_X,0,recvCount_X,dvcRecvDist_X);
D3Q19_MapRecv(3,-1,-1,0,Dm.recvList_X,recvCount_X,recvCount_X,dvcRecvDist_X);
D3Q19_MapRecv(4,-1,1,0,Dm.recvList_X,2*recvCount_X,recvCount_X,dvcRecvDist_X);
D3Q19_MapRecv(5,-1,0,-1,Dm.recvList_X,3*recvCount_X,recvCount_X,dvcRecvDist_X);
D3Q19_MapRecv(6,-1,0,1,Dm.recvList_X,4*recvCount_X,recvCount_X,dvcRecvDist_X);
//...................................................................................
//...Map recieve list for the x face: q=1,7,9,11,13..................................
D3Q19_MapRecv(1,1,0,0,Dm.recvList_x,0,recvCount_x,dvcRecvDist_x);
D3Q19_MapRecv(4,1,1,0,Dm.recvList_x,recvCount_x,recvCount_x,dvcRecvDist_x);
D3Q19_MapRecv(5,1,-1,0,Dm.recvList_x,2*recvCount_x,recvCount_x,dvcRecvDist_x);
D3Q19_MapRecv(6,1,0,1,Dm.recvList_x,3*recvCount_x,recvCount_x,dvcRecvDist_x);
D3Q19_MapRecv(7,1,0,-1,Dm.recvList_x,4*recvCount_x,recvCount_x,dvcRecvDist_x);
//...................................................................................
//...Map recieve list for the y face: q=4,8,9,16,18 ...................................
D3Q19_MapRecv(1,0,-1,0,Dm.recvList_Y,0,recvCount_Y,dvcRecvDist_Y);
D3Q19_MapRecv(3,-1,-1,0,Dm.recvList_Y,recvCount_Y,recvCount_Y,dvcRecvDist_Y);
D3Q19_MapRecv(5,1,-1,0,Dm.recvList_Y,2*recvCount_Y,recvCount_Y,dvcRecvDist_Y);
D3Q19_MapRecv(7,0,-1,-1,Dm.recvList_Y,3*recvCount_Y,recvCount_Y,dvcRecvDist_Y);
D3Q19_MapRecv(8,0,-1,1,Dm.recvList_Y,4*recvCount_Y,recvCount_Y,dvcRecvDist_Y);
//...................................................................................
//...Map recieve list for the Y face: q=3,7,10,15,17 ..................................
D3Q19_MapRecv(2,0,1,0,Dm.recvList_y,0,recvCount_y,dvcRecvDist_y);
D3Q19_MapRecv(4,1,1,0,Dm.recvList_y,recvCount_y,recvCount_y,dvcRecvDist_y);
D3Q19_MapRecv(4,-1,1,0,Dm.recvList_y,2*recvCount_y,recvCount_y,dvcRecvDist_y);
D3Q19_MapRecv(8,0,1,1,Dm.recvList_y,3*recvCount_y,recvCount_y,dvcRecvDist_y);
D3Q19_MapRecv(9,0,1,-1,Dm.recvList_y,4*recvCount_y,recvCount_y,dvcRecvDist_y);
//...................................................................................
//...Map recieve list for the z face<<<6,12,13,16,17)..............................................
D3Q19_MapRecv(2,0,0,-1,Dm.recvList_Z,0,recvCount_Z,dvcRecvDist_Z);
D3Q19_MapRecv(5,-1,0,-1,Dm.recvList_Z,recvCount_Z,recvCount_Z,dvcRecvDist_Z);
D3Q19_MapRecv(7,1,0,-1,Dm.recvList_Z,2*recvCount_Z,recvCount_Z,dvcRecvDist_Z);
D3Q19_MapRecv(7,0,-1,-1,Dm.recvList_Z,3*recvCount_Z,recvCount_Z,dvcRecvDist_Z);
D3Q19_MapRecv(9,0,1,-1,Dm.recvList_Z,4*recvCount_Z,recvCount_Z,dvcRecvDist_Z);
//...Map recieve list for the Z face<<<5,11,14,15,18)..............................................
D3Q19_MapRecv(3,0,0,1,Dm.recvList_z,0,recvCount_z,dvcRecvDist_z);
D3Q19_MapRecv(6,1,0,1,Dm.recvList_z,recvCount_z,recvCount_z,dvcRecvDist_z);
D3Q19_MapRecv(6,-1,0,1,Dm.recvList_z,2*recvCount_z,recvCount_z,dvcRecvDist_z);
D3Q19_MapRecv(8,0,1,1,Dm.recvList_z,3*recvCount_z,recvCount_z,dvcRecvDist_z);
D3Q19_MapRecv(8,0,-1,1,Dm.recvList_z,4*recvCount_z,recvCount_z,dvcRecvDist_z);
//..................................................................................
//...Map recieve list for the xy edge <<<8)................................
D3Q19_MapRecv(3,-1,-1,0,Dm.recvList_XY,0,recvCount_XY,dvcRecvDist_XY);
//...Map recieve list for the Xy edge <<<9)................................
D3Q19_MapRecv(5,1,-1,0,Dm.recvList_xY,0,recvCount_xY,dvcRecvDist_xY);
//...Map recieve list for the xY edge <<<10)................................
D3Q19_MapRecv(4,-1,1,0,Dm.recvList_Xy,0,recvCount_Xy,dvcRecvDist_Xy);
//...Map recieve list for the XY edge <<<7)................................
D3Q19_MapRecv(4,1,1,0,Dm.recvList_xy,0,recvCount_xy,dvcRecvDist_xy);
//...Map recieve list for the xz edge <<<12)................................
D3Q19_MapRecv(5,-1,0,-1,Dm.recvList_XZ,0,recvCount_XZ,dvcRecvDist_XZ);
//...Map recieve list for the xZ edge <<<14)................................
D3Q19_MapRecv(6,-1,0,1,Dm.recvList_Xz,0,recvCount_Xz,dvcRecvDist_Xz);
//...Map recieve list for the Xz edge <<<13)................................
D3Q19_MapRecv(7,1,0,-1,Dm.recvList_xZ,0,recvCount_xZ,dvcRecvDist_xZ);
//...Map recieve list for the XZ edge <<<11)................................
D3Q19_MapRecv(6,1,0,1,Dm.recvList_xz,0,recvCount_xz,dvcRecvDist_xz);
//...Map recieve list for the yz edge <<<16)................................
D3Q19_MapRecv(7,0,-1,-1,Dm.recvList_YZ,0,recvCount_YZ,dvcRecvDist_YZ);
//...Map recieve list for the yZ edge <<<18)................................
D3Q19_MapRecv(8,0,-1,1,Dm.recvList_Yz,0,recvCount_Yz,dvcRecvDist_Yz);
//...Map recieve list for the Yz edge <<<17)................................
D3Q19_MapRecv(9,0,1,-1,Dm.recvList_yZ,0,recvCount_yZ,dvcRecvDist_yZ);
//...Map recieve list for the YZ edge <<<15)................................
D3Q19_MapRecv(8,0,1,1,Dm.recvList_yz,0,recvCount_yz,dvcRecvDist_yz);
//...................................................................................
//...................................................................................... //......................................................................................
MPI_Barrier(MPI_COMM_SCALBL); MPI_Barrier(MPI_COMM_SCALBL);
ScaLBL_DeviceBarrier(); ScaLBL_DeviceBarrier();
@ -376,6 +485,527 @@ ScaLBL_Communicator::~ScaLBL_Communicator(){
// -- note that there needs to be a way to free memory allocated on the device!!! // -- note that there needs to be a way to free memory allocated on the device!!!
} }
void ScaLBL_Communicator::D3Q19_MapRecv(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count,
int *d3q19_recvlist){
//....................................................................................
// Map the recieve distributions to
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int i,j,k,n,nn,idx;
//int N = Nx*Ny*Nz;
// Create work arrays to store the values from the device (so that host can do this work)
int * ReturnDist;
ReturnDist=new int [count];
// Copy the list from the device
//ScaLBL_CopyToHost(List, list, count*sizeof(int));
for (idx=0; idx<count; idx++){
// Get the value from the list -- note that n is the index is from the send (non-local) process
n = list[idx];
// Get the 3-D indices
k = n/(Nx*Ny);
j = (n-Nx*Ny*k)/Nx;
i = n-Nx*Ny*k-Nx*j;
// Streaming for the non-local distribution
i += Cqx;
j += Cqy;
k += Cqz;
// compute 1D index for the neighbor and save
nn = k*Nx*Ny+j*Nx+i;
//d3q19_recvlist[start+idx] = nn;
ReturnDist[idx] = nn;
}
// Return updated version to the device
ScaLBL_CopyToDevice(&d3q19_recvlist[start], ReturnDist, count*sizeof(int));
// clean up the work arrays
delete [] ReturnDist;
}
void ScaLBL_Communicator::MemoryOptimizedLayout(IntArray &Map, int *neighborList, char *id, int Np){
/*
* Generate a memory optimized layout
* id[n] == 0 implies that site n should be ignored (treat as a mask)
* Map(i,j,k) = idx <- this is the index for the memory optimized layout
* neighborList(idx) <-stores the neighbors for the D3Q19 model
* note that the number of communications remains the same
* the index in the Send and Recv lists is also updated
* this means that the commuincations are no longer valid for regular data structures
*/
int idx,i,j,k,n;
// Check that Map has size matching sub-domain
if (Map.size(0) != Nx)
ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Map array dimensions do not match! \n");
// Initialize Map
for (k=0;k<Nz;k++){
for (j=0;j<Ny;j++){
for (i=0;i<Nx;i++){
Map(i,j,k) = -2;
}
}
}
idx=0;
for (k=1; k<Nz-1; k++){
for (j=1; j<Ny-1; j++){
for (i=1; i<Nx-1; i++){
// domain interior
Map(i,j,k) = -1;
// Local index
n = k*Nx*Ny+j*Nx+i;
if (id[n] != 0){
// Counts for the six faces
if (i==1) Map(n)=idx++;
else if (j==1) Map(n)=idx++;
else if (k==1) Map(n)=idx++;
else if (i==Nx-2) Map(n)=idx++;
else if (j==Ny-2) Map(n)=idx++;
else if (k==Nz-2) Map(n)=idx++;
}
}
}
}
// Next loop over the domain interior in block-cyclic fashion
int MemBlockSize=4;
k=2;
while (k<Nz-2){
int kmax = min(k+MemBlockSize,Nz-2);
j=2;
while (j<Ny-2){
int jmax = min(j+MemBlockSize,Ny-2);
i=2;
while (i<Nx-2){
int imax = min(i+MemBlockSize,Nx-2);
// Loop over the size of the local block
//printf("Executing block [%i,%i]x[%i,%i] (idx=%i) \n",i,imax,k,kmax,idx);
for (int kb=k; kb<kmax; kb++){
for (int jb=j; jb<jmax; jb++){
for (int ib=i; ib<imax; ib++){
// Local index (regular layout)
n = kb*Nx*Ny + jb*Nx + ib;
if (id[n] != 0 ){
Map(n) = idx;
neighborList[idx++] = n; // index of self in regular layout
}
}
}
}
i=imax;
}
j=jmax;
}
k=kmax;
}
if (idx > Np ){
ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Failed to create memory efficient layout!\n");
}
/*
for (k=1;k<Nz-1;k++){
printf("....k=%i .....\n",k);
for (j=1;j<Ny-1;j++){
for (i=1;i<Nx-1;i++){
n=k*Nx*Ny+j*Nx+i;
idx=Map(i,j,k);
printf("%i ",idx);
}
printf("\n");
}
}
*/
// Now use Map to determine the neighbors for each lattice direction
for (k=1;k<Nz-1;k++){
for (j=1;j<Ny-1;j++){
for (i=1;i<Nx-1;i++){
n=k*Nx*Ny+j*Nx+i;
idx=Map(i,j,k);
if (idx > Np) printf("ScaLBL_Communicator::MemoryOptimizedLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np);
else if (!(idx<0)){
// store the idx associated with each neighbor
// store idx for self if neighbor is in solid or out of domain
//D3Q19 = {{1,0,0},{-1,0,0}
// {0,1,0},{0,-1,0}
// {0,0,1},{0,0,-1},
// {1,1,0},{-1,-1,0},
// {1,-1,0},{-1,1,0},
// {1,0,1},{-1,0,-1},
// {1,0,-1},{-1,0,1},
// {0,1,1},{0,-1,-1},
// {0,1,-1},{0,-1,1}};
// note that only odd distributions need to be stored to execute the swap algorithm
int neighbor; // cycle through the neighbors of lattice site idx
neighbor=Map(i+1,j,k);
if (neighbor==-2) neighborList[idx]=-1;
else if (neighbor<0) neighborList[idx]=idx;
else neighborList[idx]=neighbor;
neighbor=Map(i,j+1,k);
if (neighbor==-2) neighborList[Np+idx]=-1;
else if (neighbor<0) neighborList[Np+idx]=idx;
else neighborList[Np+idx]=neighbor;
neighbor=Map(i,j,k+1);
if (neighbor==-2) neighborList[2*Np+idx]=-1;
else if (neighbor<0) neighborList[2*Np+idx]=idx;
else neighborList[2*Np+idx]=neighbor;
neighbor=Map(i+1,j+1,k);
if (neighbor==-2) neighborList[3*Np+idx]=-1;
else if (neighbor<0) neighborList[3*Np+idx]=idx;
else neighborList[3*Np+idx]=neighbor;
neighbor=Map(i+1,j-1,k);
if (neighbor==-2) neighborList[4*Np+idx]=-1;
else if (neighbor<0) neighborList[4*Np+idx]=idx;
else neighborList[4*Np+idx]=neighbor;
neighbor=Map(i+1,j,k+1);
if (neighbor==-2) neighborList[5*Np+idx]=-1;
else if (neighbor<0) neighborList[5*Np+idx]=idx;
else neighborList[5*Np+idx]=neighbor;
neighbor=Map(i+1,j,k-1);
if (neighbor==-2) neighborList[6*Np+idx]=-1;
else if (neighbor<0) neighborList[6*Np+idx]=idx;
else neighborList[6*Np+idx]=neighbor;
neighbor=Map(i,j+1,k+1);
if (neighbor==-2) neighborList[7*Np+idx]=-1;
else if (neighbor<0) neighborList[7*Np+idx]=idx;
else neighborList[7*Np+idx]=neighbor;
neighbor=Map(i,j+1,k-1);
if (neighbor==-2) neighborList[8*Np+idx]=-1;
else if (neighbor<0) neighborList[8*Np+idx]=idx;
else neighborList[8*Np+idx]=neighbor;
}
}
}
}
//for (idx=0; idx<Np; idx++) printf("%i: %i %i\n", idx, neighborList[3*Np+idx], neighborList[4*Np+idx]);
//.......................................................................
// Now map through SendList and RecvList to update indices
// First loop over the send lists
int *TempBuffer;
TempBuffer = new int [5*RecvCount];
//.......................................................................
// Re-index the send lists
ScaLBL_CopyToHost(TempBuffer,dvcSendList_x,sendCount_x*sizeof(int));
for (i=0; i<sendCount_x; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcSendList_x,TempBuffer,sendCount_x*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcSendList_y,sendCount_y*sizeof(int));
for (i=0; i<sendCount_y; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcSendList_y,TempBuffer,sendCount_y*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcSendList_z,sendCount_z*sizeof(int));
for (i=0; i<sendCount_z; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcSendList_z,TempBuffer,sendCount_z*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcSendList_X,sendCount_X*sizeof(int));
for (i=0; i<sendCount_X; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcSendList_X,TempBuffer,sendCount_X*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcSendList_Y,sendCount_Y*sizeof(int));
for (i=0; i<sendCount_Y; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcSendList_Y,TempBuffer,sendCount_Y*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcSendList_Z,sendCount_Z*sizeof(int));
for (i=0; i<sendCount_Z; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcSendList_Z,TempBuffer,sendCount_Z*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcSendList_xy,sendCount_xy*sizeof(int));
for (i=0; i<sendCount_xy; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcSendList_xy,TempBuffer,sendCount_xy*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcSendList_xY,sendCount_xY*sizeof(int));
for (i=0; i<sendCount_xY; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcSendList_xY,TempBuffer,sendCount_xY*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcSendList_Xy,sendCount_Xy*sizeof(int));
for (i=0; i<sendCount_Xy; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcSendList_Xy,TempBuffer,sendCount_Xy*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcSendList_XY,sendCount_XY*sizeof(int));
for (i=0; i<sendCount_XY; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcSendList_XY,TempBuffer,sendCount_XY*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcSendList_xz,sendCount_xz*sizeof(int));
for (i=0; i<sendCount_xz; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcSendList_xz,TempBuffer,sendCount_xz*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcSendList_xZ,sendCount_xZ*sizeof(int));
for (i=0; i<sendCount_xZ; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcSendList_xZ,TempBuffer,sendCount_xZ*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcSendList_Xz,sendCount_Xz*sizeof(int));
for (i=0; i<sendCount_Xz; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcSendList_Xz,TempBuffer,sendCount_Xz*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcSendList_XZ,sendCount_XZ*sizeof(int));
for (i=0; i<sendCount_XZ; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcSendList_XZ,TempBuffer,sendCount_XZ*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcSendList_yz,sendCount_yz*sizeof(int));
for (i=0; i<sendCount_yz; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcSendList_yz,TempBuffer,sendCount_yz*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcSendList_Yz,sendCount_Yz*sizeof(int));
for (i=0; i<sendCount_Yz; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcSendList_Yz,TempBuffer,sendCount_Yz*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcSendList_yZ,sendCount_yZ*sizeof(int));
for (i=0; i<sendCount_yZ; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcSendList_yZ,TempBuffer,sendCount_yZ*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcSendList_YZ,sendCount_YZ*sizeof(int));
for (i=0; i<sendCount_YZ; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcSendList_YZ,TempBuffer,sendCount_YZ*sizeof(int));
//.......................................................................
// Re-index the recieve lists for the D3Q19 distributions
ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_x,5*recvCount_x*sizeof(int));
for (i=0; i<5*recvCount_x; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
// printf("dvcRecvList_x: Map(%i)=%i \n",n,idx);
}
ScaLBL_CopyToDevice(dvcRecvDist_x,TempBuffer,5*recvCount_x*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_y,5*recvCount_y*sizeof(int));
for (i=0; i<5*recvCount_y; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcRecvDist_y,TempBuffer,5*recvCount_y*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_z,5*recvCount_z*sizeof(int));
for (i=0; i<5*recvCount_z; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcRecvDist_z,TempBuffer,5*recvCount_z*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_X,5*recvCount_X*sizeof(int));
for (i=0; i<5*recvCount_X; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcRecvDist_X,TempBuffer,5*recvCount_X*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_Y,5*recvCount_Y*sizeof(int));
for (i=0; i<5*recvCount_Y; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcRecvDist_Y,TempBuffer,5*recvCount_Y*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_Z,5*recvCount_Z*sizeof(int));
for (i=0; i<5*recvCount_Z; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcRecvDist_Z,TempBuffer,5*recvCount_Z*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_xy,recvCount_xy*sizeof(int));
for (i=0; i<recvCount_xy; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcRecvDist_xy,TempBuffer,recvCount_xy*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_xY,recvCount_xY*sizeof(int));
for (i=0; i<recvCount_xY; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcRecvDist_xY,TempBuffer,recvCount_xY*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_Xy,recvCount_Xy*sizeof(int));
for (i=0; i<recvCount_Xy; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcRecvDist_Xy,TempBuffer,recvCount_Xy*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_XY,recvCount_XY*sizeof(int));
for (i=0; i<recvCount_XY; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcRecvDist_XY,TempBuffer,recvCount_XY*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_xz,recvCount_xz*sizeof(int));
for (i=0; i<recvCount_xz; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcRecvDist_xz,TempBuffer,recvCount_xz*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_xZ,recvCount_xZ*sizeof(int));
for (i=0; i<recvCount_xZ; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcRecvDist_xZ,TempBuffer,recvCount_xZ*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_Xz,recvCount_Xz*sizeof(int));
for (i=0; i<recvCount_Xz; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcRecvDist_Xz,TempBuffer,recvCount_Xz*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_XZ,recvCount_XZ*sizeof(int));
for (i=0; i<recvCount_XZ; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcRecvDist_XZ,TempBuffer,recvCount_XZ*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_yz,recvCount_yz*sizeof(int));
for (i=0; i<recvCount_yz; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcRecvDist_yz,TempBuffer,recvCount_yz*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_Yz,recvCount_Yz*sizeof(int));
for (i=0; i<recvCount_Yz; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcRecvDist_Yz,TempBuffer,recvCount_Yz*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_yZ,recvCount_yZ*sizeof(int));
for (i=0; i<recvCount_yZ; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcRecvDist_yZ,TempBuffer,recvCount_yZ*sizeof(int));
ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_YZ,recvCount_YZ*sizeof(int));
for (i=0; i<recvCount_YZ; i++){
n = TempBuffer[i];
idx=Map(n);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcRecvDist_YZ,TempBuffer,recvCount_YZ*sizeof(int));
//.......................................................................
// Reset the value of N to match the dense structure
N = Np;
// Clean up
delete [] TempBuffer;
}
void ScaLBL_Communicator::SendD3Q19(double *f_even, double *f_odd){ void ScaLBL_Communicator::SendD3Q19(double *f_even, double *f_odd){
if (Lock==true){ if (Lock==true){
@ -501,70 +1131,70 @@ void ScaLBL_Communicator::RecvD3Q19(double *f_even, double *f_odd){
// Unpack the distributions on the device // Unpack the distributions on the device
//................................................................................... //...................................................................................
//...Map recieve list for the X face: q=2,8,10,12,13 ................................. //...Map recieve list for the X face: q=2,8,10,12,13 .................................
ScaLBL_D3Q19_Unpack(0,-1,0,0,dvcRecvList_X,0,recvCount_X,recvbuf_X,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(0,dvcRecvDist_X,0,recvCount_X,recvbuf_X,f_odd,N);
ScaLBL_D3Q19_Unpack(3,-1,-1,0,dvcRecvList_X,recvCount_X,recvCount_X,recvbuf_X,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(3,dvcRecvDist_X,recvCount_X,recvCount_X,recvbuf_X,f_odd,N);
ScaLBL_D3Q19_Unpack(4,-1,1,0,dvcRecvList_X,2*recvCount_X,recvCount_X,recvbuf_X,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(4,dvcRecvDist_X,2*recvCount_X,recvCount_X,recvbuf_X,f_odd,N);
ScaLBL_D3Q19_Unpack(5,-1,0,-1,dvcRecvList_X,3*recvCount_X,recvCount_X,recvbuf_X,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(5,dvcRecvDist_X,3*recvCount_X,recvCount_X,recvbuf_X,f_odd,N);
ScaLBL_D3Q19_Unpack(6,-1,0,1,dvcRecvList_X,4*recvCount_X,recvCount_X,recvbuf_X,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(6,dvcRecvDist_X,4*recvCount_X,recvCount_X,recvbuf_X,f_odd,N);
//................................................................................... //...................................................................................
//...Map recieve list for the x face: q=1,7,9,11,13.................................. //...Map recieve list for the x face: q=1,7,9,11,13..................................
ScaLBL_D3Q19_Unpack(1,1,0,0,dvcRecvList_x,0,recvCount_x,recvbuf_x,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(1,dvcRecvDist_x,0,recvCount_x,recvbuf_x,f_even,N);
ScaLBL_D3Q19_Unpack(4,1,1,0,dvcRecvList_x,recvCount_x,recvCount_x,recvbuf_x,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(4,dvcRecvDist_x,recvCount_x,recvCount_x,recvbuf_x,f_even,N);
ScaLBL_D3Q19_Unpack(5,1,-1,0,dvcRecvList_x,2*recvCount_x,recvCount_x,recvbuf_x,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(5,dvcRecvDist_x,2*recvCount_x,recvCount_x,recvbuf_x,f_even,N);
ScaLBL_D3Q19_Unpack(6,1,0,1,dvcRecvList_x,3*recvCount_x,recvCount_x,recvbuf_x,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(6,dvcRecvDist_x,3*recvCount_x,recvCount_x,recvbuf_x,f_even,N);
ScaLBL_D3Q19_Unpack(7,1,0,-1,dvcRecvList_x,4*recvCount_x,recvCount_x,recvbuf_x,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(7,dvcRecvDist_x,4*recvCount_x,recvCount_x,recvbuf_x,f_even,N);
//................................................................................... //...................................................................................
//...Map recieve list for the y face: q=4,8,9,16,18 ................................... //...Map recieve list for the y face: q=4,8,9,16,18 ...................................
ScaLBL_D3Q19_Unpack(1,0,-1,0,dvcRecvList_Y,0,recvCount_Y,recvbuf_Y,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(1,dvcRecvDist_Y,0,recvCount_Y,recvbuf_Y,f_odd,N);
ScaLBL_D3Q19_Unpack(3,-1,-1,0,dvcRecvList_Y,recvCount_Y,recvCount_Y,recvbuf_Y,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(3,dvcRecvDist_Y,recvCount_Y,recvCount_Y,recvbuf_Y,f_odd,N);
ScaLBL_D3Q19_Unpack(5,1,-1,0,dvcRecvList_Y,2*recvCount_Y,recvCount_Y,recvbuf_Y,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(5,dvcRecvDist_Y,2*recvCount_Y,recvCount_Y,recvbuf_Y,f_even,N);
ScaLBL_D3Q19_Unpack(7,0,-1,-1,dvcRecvList_Y,3*recvCount_Y,recvCount_Y,recvbuf_Y,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(7,dvcRecvDist_Y,3*recvCount_Y,recvCount_Y,recvbuf_Y,f_odd,N);
ScaLBL_D3Q19_Unpack(8,0,-1,1,dvcRecvList_Y,4*recvCount_Y,recvCount_Y,recvbuf_Y,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(8,dvcRecvDist_Y,4*recvCount_Y,recvCount_Y,recvbuf_Y,f_odd,N);
//................................................................................... //...................................................................................
//...Map recieve list for the Y face: q=3,7,10,15,17 .................................. //...Map recieve list for the Y face: q=3,7,10,15,17 ..................................
ScaLBL_D3Q19_Unpack(2,0,1,0,dvcRecvList_y,0,recvCount_y,recvbuf_y,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(2,dvcRecvDist_y,0,recvCount_y,recvbuf_y,f_even,N);
ScaLBL_D3Q19_Unpack(4,1,1,0,dvcRecvList_y,recvCount_y,recvCount_y,recvbuf_y,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(4,dvcRecvDist_y,recvCount_y,recvCount_y,recvbuf_y,f_even,N);
ScaLBL_D3Q19_Unpack(4,-1,1,0,dvcRecvList_y,2*recvCount_y,recvCount_y,recvbuf_y,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(4,dvcRecvDist_y,2*recvCount_y,recvCount_y,recvbuf_y,f_odd,N);
ScaLBL_D3Q19_Unpack(8,0,1,1,dvcRecvList_y,3*recvCount_y,recvCount_y,recvbuf_y,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(8,dvcRecvDist_y,3*recvCount_y,recvCount_y,recvbuf_y,f_even,N);
ScaLBL_D3Q19_Unpack(9,0,1,-1,dvcRecvList_y,4*recvCount_y,recvCount_y,recvbuf_y,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(9,dvcRecvDist_y,4*recvCount_y,recvCount_y,recvbuf_y,f_even,N);
//................................................................................... //...................................................................................
//...Map recieve list for the z face<<<6,12,13,16,17).............................................. //...Map recieve list for the z face<<<6,12,13,16,17)..............................................
ScaLBL_D3Q19_Unpack(2,0,0,-1,dvcRecvList_Z,0,recvCount_Z,recvbuf_Z,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(2,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,f_odd,N);
ScaLBL_D3Q19_Unpack(5,-1,0,-1,dvcRecvList_Z,recvCount_Z,recvCount_Z,recvbuf_Z,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(5,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,f_odd,N);
ScaLBL_D3Q19_Unpack(7,1,0,-1,dvcRecvList_Z,2*recvCount_Z,recvCount_Z,recvbuf_Z,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(7,dvcRecvDist_Z,2*recvCount_Z,recvCount_Z,recvbuf_Z,f_even,N);
ScaLBL_D3Q19_Unpack(7,0,-1,-1,dvcRecvList_Z,3*recvCount_Z,recvCount_Z,recvbuf_Z,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(7,dvcRecvDist_Z,3*recvCount_Z,recvCount_Z,recvbuf_Z,f_odd,N);
ScaLBL_D3Q19_Unpack(9,0,1,-1,dvcRecvList_Z,4*recvCount_Z,recvCount_Z,recvbuf_Z,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(9,dvcRecvDist_Z,4*recvCount_Z,recvCount_Z,recvbuf_Z,f_even,N);
//...Map recieve list for the Z face<<<5,11,14,15,18).............................................. //...Map recieve list for the Z face<<<5,11,14,15,18)..............................................
ScaLBL_D3Q19_Unpack(3,0,0,1,dvcRecvList_z,0,recvCount_z,recvbuf_z,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(3,dvcRecvDist_z,0,recvCount_z,recvbuf_z,f_even,N);
ScaLBL_D3Q19_Unpack(6,1,0,1,dvcRecvList_z,recvCount_z,recvCount_z,recvbuf_z,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(6,dvcRecvDist_z,recvCount_z,recvCount_z,recvbuf_z,f_even,N);
ScaLBL_D3Q19_Unpack(6,-1,0,1,dvcRecvList_z,2*recvCount_z,recvCount_z,recvbuf_z,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(6,dvcRecvDist_z,2*recvCount_z,recvCount_z,recvbuf_z,f_odd,N);
ScaLBL_D3Q19_Unpack(8,0,1,1,dvcRecvList_z,3*recvCount_z,recvCount_z,recvbuf_z,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(8,dvcRecvDist_z,3*recvCount_z,recvCount_z,recvbuf_z,f_even,N);
ScaLBL_D3Q19_Unpack(8,0,-1,1,dvcRecvList_z,4*recvCount_z,recvCount_z,recvbuf_z,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(8,dvcRecvDist_z,4*recvCount_z,recvCount_z,recvbuf_z,f_odd,N);
//.................................................................................. //..................................................................................
//...Map recieve list for the xy edge <<<8)................................ //...Map recieve list for the xy edge <<<8)................................
ScaLBL_D3Q19_Unpack(3,-1,-1,0,dvcRecvList_XY,0,recvCount_XY,recvbuf_XY,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(3,dvcRecvDist_XY,0,recvCount_XY,recvbuf_XY,f_odd,N);
//...Map recieve list for the Xy edge <<<9)................................ //...Map recieve list for the Xy edge <<<9)................................
ScaLBL_D3Q19_Unpack(5,1,-1,0,dvcRecvList_xY,0,recvCount_xY,recvbuf_xY,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(5,dvcRecvDist_xY,0,recvCount_xY,recvbuf_xY,f_even,N);
//...Map recieve list for the xY edge <<<10)................................ //...Map recieve list for the xY edge <<<10)................................
ScaLBL_D3Q19_Unpack(4,-1,1,0,dvcRecvList_Xy,0,recvCount_Xy,recvbuf_Xy,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(4,dvcRecvDist_Xy,0,recvCount_Xy,recvbuf_Xy,f_odd,N);
//...Map recieve list for the XY edge <<<7)................................ //...Map recieve list for the XY edge <<<7)................................
ScaLBL_D3Q19_Unpack(4,1,1,0,dvcRecvList_xy,0,recvCount_xy,recvbuf_xy,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(4,dvcRecvDist_xy,0,recvCount_xy,recvbuf_xy,f_even,N);
//...Map recieve list for the xz edge <<<12)................................ //...Map recieve list for the xz edge <<<12)................................
ScaLBL_D3Q19_Unpack(5,-1,0,-1,dvcRecvList_XZ,0,recvCount_XZ,recvbuf_XZ,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(5,dvcRecvDist_XZ,0,recvCount_XZ,recvbuf_XZ,f_odd,N);
//...Map recieve list for the xZ edge <<<14)................................ //...Map recieve list for the xZ edge <<<14)................................
ScaLBL_D3Q19_Unpack(6,-1,0,1,dvcRecvList_Xz,0,recvCount_Xz,recvbuf_Xz,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(6,dvcRecvDist_Xz,0,recvCount_Xz,recvbuf_Xz,f_odd,N);
//...Map recieve list for the Xz edge <<<13)................................ //...Map recieve list for the Xz edge <<<13)................................
ScaLBL_D3Q19_Unpack(7,1,0,-1,dvcRecvList_xZ,0,recvCount_xZ,recvbuf_xZ,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(7,dvcRecvDist_xZ,0,recvCount_xZ,recvbuf_xZ,f_even,N);
//...Map recieve list for the XZ edge <<<11)................................ //...Map recieve list for the XZ edge <<<11)................................
ScaLBL_D3Q19_Unpack(6,1,0,1,dvcRecvList_xz,0,recvCount_xz,recvbuf_xz,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(6,dvcRecvDist_xz,0,recvCount_xz,recvbuf_xz,f_even,N);
//...Map recieve list for the yz edge <<<16)................................ //...Map recieve list for the yz edge <<<16)................................
ScaLBL_D3Q19_Unpack(7,0,-1,-1,dvcRecvList_YZ,0,recvCount_YZ,recvbuf_YZ,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(7,dvcRecvDist_YZ,0,recvCount_YZ,recvbuf_YZ,f_odd,N);
//...Map recieve list for the yZ edge <<<18)................................ //...Map recieve list for the yZ edge <<<18)................................
ScaLBL_D3Q19_Unpack(8,0,-1,1,dvcRecvList_Yz,0,recvCount_Yz,recvbuf_Yz,f_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(8,dvcRecvDist_Yz,0,recvCount_Yz,recvbuf_Yz,f_odd,N);
//...Map recieve list for the Yz edge <<<17)................................ //...Map recieve list for the Yz edge <<<17)................................
ScaLBL_D3Q19_Unpack(9,0,1,-1,dvcRecvList_yZ,0,recvCount_yZ,recvbuf_yZ,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(9,dvcRecvDist_yZ,0,recvCount_yZ,recvbuf_yZ,f_even,N);
//...Map recieve list for the YZ edge <<<15)................................ //...Map recieve list for the YZ edge <<<15)................................
ScaLBL_D3Q19_Unpack(8,0,1,1,dvcRecvList_yz,0,recvCount_yz,recvbuf_yz,f_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(8,dvcRecvDist_yz,0,recvCount_yz,recvbuf_yz,f_even,N);
//................................................................................... //...................................................................................
Lock=false; // unlock the communicator after communications complete Lock=false; // unlock the communicator after communications complete
//................................................................................... //...................................................................................
@ -625,7 +1255,7 @@ void ScaLBL_Communicator::BiRecvD3Q7(double *A_even, double *A_odd, double *B_ev
ScaLBL_DeviceBarrier(); ScaLBL_DeviceBarrier();
//................................................................................... //...................................................................................
// Unpack the distributions on the device // Unpack the distributions on the device
//................................................................................... /* //...................................................................................
//...Map recieve list for the X face: q=2,8,10,12,13 ................................ //...Map recieve list for the X face: q=2,8,10,12,13 ................................
ScaLBL_D3Q19_Unpack(0,-1,0,0,dvcRecvList_X,0,recvCount_X,recvbuf_X,A_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(0,-1,0,0,dvcRecvList_X,0,recvCount_X,recvbuf_X,A_odd,Nx,Ny,Nz);
ScaLBL_D3Q19_Unpack(0,-1,0,0,dvcRecvList_X,recvCount_X,recvCount_X,recvbuf_X,B_odd,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(0,-1,0,0,dvcRecvList_X,recvCount_X,recvCount_X,recvbuf_X,B_odd,Nx,Ny,Nz);
@ -649,6 +1279,30 @@ void ScaLBL_Communicator::BiRecvD3Q7(double *A_even, double *A_odd, double *B_ev
ScaLBL_D3Q19_Unpack(3,0,0,1,dvcRecvList_z,0,recvCount_z,recvbuf_z,A_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(3,0,0,1,dvcRecvList_z,0,recvCount_z,recvbuf_z,A_even,Nx,Ny,Nz);
ScaLBL_D3Q19_Unpack(3,0,0,1,dvcRecvList_z,recvCount_z,recvCount_z,recvbuf_z,B_even,Nx,Ny,Nz); ScaLBL_D3Q19_Unpack(3,0,0,1,dvcRecvList_z,recvCount_z,recvCount_z,recvbuf_z,B_even,Nx,Ny,Nz);
//.................................................................................. //..................................................................................
*/
//...Map recieve list for the X face: q=2,8,10,12,13 ................................
ScaLBL_D3Q19_Unpack(0,dvcRecvDist_X,0,recvCount_X,recvbuf_X,A_odd,N);
ScaLBL_D3Q19_Unpack(0,dvcRecvDist_X,recvCount_X,recvCount_X,recvbuf_X,B_odd,N);
//...................................................................................
//...Map recieve list for the x face: q=1,7,9,11,13..................................
ScaLBL_D3Q19_Unpack(1,dvcRecvDist_x,0,recvCount_x,recvbuf_x,A_even,N);
ScaLBL_D3Q19_Unpack(1,dvcRecvDist_x,recvCount_x,recvCount_x,recvbuf_x,B_even,N);
//...................................................................................
//...Map recieve list for the y face: q=4,8,9,16,18 .................................
ScaLBL_D3Q19_Unpack(1,dvcRecvDist_Y,0,recvCount_Y,recvbuf_Y,A_odd,N);
ScaLBL_D3Q19_Unpack(1,dvcRecvDist_Y,recvCount_Y,recvCount_Y,recvbuf_Y,B_odd,N);
//...................................................................................
//...Map recieve list for the Y face: q=3,7,10,15,17 ................................
ScaLBL_D3Q19_Unpack(2,dvcRecvDist_y,0,recvCount_y,recvbuf_y,A_even,N);
ScaLBL_D3Q19_Unpack(2,dvcRecvDist_y,recvCount_y,recvCount_y,recvbuf_y,B_even,N);
//...................................................................................
//...Map recieve list for the z face<<<6,12,13,16,17)................................
ScaLBL_D3Q19_Unpack(2,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,A_odd,N);
ScaLBL_D3Q19_Unpack(2,dvcRecvDist_Z,recvCount_Z,recvCount_Z,recvbuf_Z,B_odd,N);
//...Map recieve list for the Z face<<<5,11,14,15,18)................................
ScaLBL_D3Q19_Unpack(3,dvcRecvDist_z,0,recvCount_z,recvbuf_z,A_even,N);
ScaLBL_D3Q19_Unpack(3,dvcRecvDist_z,recvCount_z,recvCount_z,recvbuf_z,B_even,N);
//..................................................................................
Lock=false; // unlock the communicator after communications complete Lock=false; // unlock the communicator after communications complete
//................................................................................... //...................................................................................
} }
@ -754,3 +1408,22 @@ void ScaLBL_Communicator::RecvHalo(double *data){
//................................................................................... //...................................................................................
} }
void ScaLBL_Communicator::PrintD3Q19(){
printf("Printing D3Q19 communication buffer contents \n");
int i,n;
double f;
int *TempBuffer;
TempBuffer = new int [5*recvCount_x];
//.......................................................................
// Re-index the send lists
ScaLBL_CopyToHost(TempBuffer,dvcRecvDist_x,5*recvCount_x*sizeof(int));
for (i=0; i<5*recvCount_x; i++){
n = TempBuffer[i];
f = recvbuf_x[i];
printf("Receive %f to %i from buffer index %i \n", f, n, i);
}
delete [] TempBuffer;
}

View File

@ -197,7 +197,7 @@ TwoPhase::~TwoPhase()
void TwoPhase::ColorToSignedDistance(double Beta, DoubleArray &ColorData, DoubleArray &DistData) void TwoPhase::ColorToSignedDistance(double Beta, DoubleArray &ColorData, DoubleArray &DistData)
{ {
double factor,temp,value; double factor,temp,value;
factor=0.5/Beta; /* factor=0.5/Beta;
// Initialize to -1,1 (segmentation) // Initialize to -1,1 (segmentation)
for (int k=0; k<Nz; k++){ for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){ for (int j=0; j<Ny; j++){
@ -241,15 +241,15 @@ void TwoPhase::ColorToSignedDistance(double Beta, DoubleArray &ColorData, Double
} }
} }
} }
*/
/* for (int k=0; k<Nz; k++){ for (int k=0; k<Nz; k++){
for (int j=0; j<Ny; j++){ for (int j=0; j<Ny; j++){
for (int i=0; i<Nx; i++){ for (int i=0; i<Nx; i++){
DistData(i,j,k) = ColorData(i,j,k); DistData(i,j,k) = ColorData(i,j,k);
} }
} }
} }
*/
} }

View File

@ -7,20 +7,6 @@
// functionality for parallel reduction in Flux BC routines -- probably should be re-factored to another location // functionality for parallel reduction in Flux BC routines -- probably should be re-factored to another location
// functions copied from https://devblogs.nvidia.com/parallelforall/faster-parallel-reductions-kepler/ // functions copied from https://devblogs.nvidia.com/parallelforall/faster-parallel-reductions-kepler/
__device__ double atomicAdd(double* address, double val)
{
unsigned long long int* address_as_ull =
(unsigned long long int*)address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = atomicCAS(address_as_ull, assumed,
__double_as_longlong(val +
__longlong_as_double(assumed)));
} while (assumed != old);
return __longlong_as_double(old);
}
__inline__ __device__ __inline__ __device__
double warpReduceSum(double val) { double warpReduceSum(double val) {
for (int offset = warpSize/2; offset > 0; offset /= 2) for (int offset = warpSize/2; offset > 0; offset /= 2)
@ -78,7 +64,7 @@ __global__ void dvc_ScaLBL_D3Q19_Pack(int q, int *list, int start, int count, d
__global__ void dvc_ScaLBL_D3Q19_Unpack(int q, int *list, int start, int count, __global__ void dvc_ScaLBL_D3Q19_Unpack(int q, int *list, int start, int count,
double *recvbuf, double *dist, int N){ double *recvbuf, double *dist, int N){
//.................................................................................... //....................................................................................
// Unpack distribution from the recv buffer // Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz // Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q // swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout // dist may be even or odd distributions stored by stream layout
@ -185,451 +171,6 @@ __global__ void dvc_ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *distev
} }
} }
} }
__global__ void dvc_ScaLBL_AAodd_Compact(char * ID, int *d_neighborList, double *dist, int Np) {
int n, nn, nnn;
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
char id;
int nread,nwrite,naccess;
int S = Np/NBLOCKS/NTHREADS+1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
id = ID[n];
if (n<Np) {
if (id > 0) {
f0 = dist[n];
nread = d_neighborList[n]; naccess = Np;
if (nread<0) { nread=n; naccess = 10*Np; }
f2 = dist[nread + naccess];
nread = d_neighborList[n+2*Np]; naccess = 2*Np;
if (nread<0) { nread=n; naccess = 11*Np; }
f4 = dist[nread + naccess];
nread = d_neighborList[n+4*Np]; naccess = 3*Np;
if (nread<0) { nread=n; naccess = 12*Np; }
f6 = dist[nread + naccess];
nread = d_neighborList[n+6*Np]; naccess = 4*Np;
if (nread<0) { nread=n; naccess = 13*Np; }
f8 = dist[nread + naccess];
nread = d_neighborList[n+8*Np]; naccess = 5*Np;
if (nread<0) { nread=n; naccess = 14*Np; }
f10 = dist[nread + naccess];
nread = d_neighborList[n+10*Np]; naccess = 6*Np;
if (nread<0) { nread=n; naccess = 15*Np; }
f12 = dist[nread + naccess];
nread = d_neighborList[n+12*Np]; naccess = 7*Np;
if (nread<0) { nread=n; naccess = 16*Np; }
f14 = dist[nread + naccess];
nread = d_neighborList[n+14*Np]; naccess = 8*Np;
if (nread<0) { nread=n; naccess = 17*Np; }
f16 = dist[nread + naccess];
nread = d_neighborList[n+16*Np]; naccess = 9*Np;
if (nread<0) { nread=n; naccess = 18*Np; }
f18 = dist[nread + naccess];
nread = d_neighborList[n+Np]; naccess = 10*Np;
if (nread<0) { nread=n; naccess = Np; }
f1 = dist[nread + naccess];
nread = d_neighborList[n+3*Np]; naccess = 11*Np;
if (nread<0) { nread=n; naccess = 2*Np; }
f3 = dist[nread + naccess];
nread = d_neighborList[n+5*Np]; naccess = 12*Np;
if (nread<0) { nread=n; naccess = 3*Np; }
f5 = dist[nread + naccess];
nread = d_neighborList[n+7*Np]; naccess = 13*Np;
if (nread<0) { nread=n; naccess = 4*Np; }
f7 = dist[nread + naccess];
nread = d_neighborList[n+9*Np]; naccess = 14*Np;
if (nread<0) { nread=n; naccess = 5*Np; }
f9 = dist[nread + naccess];
nread = d_neighborList[n+11*Np]; naccess = 15*Np;
if (nread<0) { nread=n; naccess = 6*Np; }
f11 = dist[nread + naccess];
nread = d_neighborList[n+13*Np]; naccess = 16*Np;
if (nread<0) { nread=n; naccess = 7*Np; }
f13 = dist[nread + naccess];
nread = d_neighborList[n+15*Np]; naccess = 17*Np;
if (nread<0) { nread=n; naccess = 8*Np; }
f15 = dist[nread + naccess];
nread = d_neighborList[n+17*Np]; naccess = 18*Np;
if (nread<0) { nread=n; naccess = 9*Np; }
f17 = dist[nread + naccess];
// ORIGINAL CORRECT WRITES
// nwrite = d_neighborList[n]; naccess = 10*Np;
// if (nwrite<0) { nwrite=n; naccess = Np; }
// dist[nwrite + naccess] = f1;
// nwrite = d_neighborList[n+Np]; naccess = Np;
// if (nwrite<0) { nwrite=n; naccess = 10*Np; }
// dist[nwrite + naccess] = f2;
nread = d_neighborList[n]; naccess = Np;
if (nread<0) { nread=n; naccess = 10*Np; }
dist[nread + naccess] = f1;
nread = d_neighborList[n+2*Np]; naccess = 2*Np;
if (nread<0) { nread=n; naccess = 11*Np; }
dist[nread + naccess] = f3;
nread = d_neighborList[n+4*Np]; naccess = 3*Np;
if (nread<0) { nread=n; naccess = 12*Np; }
dist[nread + naccess] = f5;
nread = d_neighborList[n+6*Np]; naccess = 4*Np;
if (nread<0) { nread=n; naccess = 13*Np; }
dist[nread + naccess] = f7;
nread = d_neighborList[n+8*Np]; naccess = 5*Np;
if (nread<0) { nread=n; naccess = 14*Np; }
dist[nread + naccess] = f9;
nread = d_neighborList[n+10*Np]; naccess = 6*Np;
if (nread<0) { nread=n; naccess = 15*Np; }
dist[nread + naccess] = f11;
nread = d_neighborList[n+12*Np]; naccess = 7*Np;
if (nread<0) { nread=n; naccess = 16*Np; }
dist[nread + naccess] = f13;
nread = d_neighborList[n+14*Np]; naccess = 8*Np;
if (nread<0) { nread=n; naccess = 17*Np; }
dist[nread + naccess] = f15;
nread = d_neighborList[n+16*Np]; naccess = 9*Np;
if (nread<0) { nread=n; naccess = 18*Np; }
dist[nread + naccess] = f17;
nread = d_neighborList[n+Np]; naccess = 10*Np;
if (nread<0) { nread=n; naccess = Np; }
dist[nread + naccess] = f2;
nread = d_neighborList[n+3*Np]; naccess = 11*Np;
if (nread<0) { nread=n; naccess = 2*Np; }
dist[nread + naccess] = f4;
nread = d_neighborList[n+5*Np]; naccess = 12*Np;
if (nread<0) { nread=n; naccess = 3*Np; }
dist[nread + naccess] = f6;
nread = d_neighborList[n+7*Np]; naccess = 13*Np;
if (nread<0) { nread=n; naccess = 4*Np; }
dist[nread + naccess] = f8;
nread = d_neighborList[n+9*Np]; naccess = 14*Np;
if (nread<0) { nread=n; naccess = 5*Np; }
dist[nread + naccess] = f10;
nread = d_neighborList[n+11*Np]; naccess = 15*Np;
if (nread<0) { nread=n; naccess = 6*Np; }
dist[nread + naccess]= f12;
nread = d_neighborList[n+13*Np]; naccess = 16*Np;
if (nread<0) { nread=n; naccess = 7*Np; }
dist[nread + naccess] = f14;
nread = d_neighborList[n+15*Np]; naccess = 17*Np;
if (nread<0) { nread=n; naccess = 8*Np; }
dist[nread + naccess] = f16;
nread = d_neighborList[n+17*Np]; naccess = 18*Np;
if (nread<0) { nread=n; naccess = 9*Np; }
dist[nread + naccess] = f18;
}
}
}
}
__global__ void dvc_ScaLBL_AAeven_Compact(char * ID, double *dist, int Np) {
int q,n,nn;
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
double f10,f11,f12,f13,f14,f15,f16,f17,f18;
char id;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
id = ID[n];
if ( n<Np ){
if (id > 0) {
//........................................................................
// READ THE DISTRIBUTIONS
// (read from opposite array due to previous swap operation)
//........................................................................
// even
f2 = dist[10*Np+n];
f4 = dist[11*Np+n];
f6 = dist[12*Np+n];
f8 = dist[13*Np+n];
f10 = dist[14*Np+n];
f12 = dist[15*Np+n];
f14 = dist[16*Np+n];
f16 = dist[17*Np+n];
f18 = dist[18*Np+n];
f0 = dist[n];
// odd
f1 = dist[Np+n];
f3 = dist[2*Np+n];
f5 = dist[3*Np+n];
f7 = dist[4*Np+n];
f9 = dist[5*Np+n];
f11 = dist[6*Np+n];
f13 = dist[7*Np+n];
f15 = dist[8*Np+n];
f17 = dist[9*Np+n];
//........................................................................
// WRITE THE DISTRIBUTIONS
// even
//disteven[n] = f0;
dist[Np+n] = f2;
dist[2*Np+n] = f4;
dist[3*Np+n] = f6;
dist[4*Np+n] = f8;
dist[5*Np+n] = f10;
dist[6*Np+n] = f12;
dist[7*Np+n] = f14;
dist[8*Np+n] = f16;
dist[9*Np+n] = f18;
// odd
dist[10*Np+n] = f1;
dist[11*Np+n] = f3;
dist[12*Np+n] = f5;
dist[13*Np+n] = f7;
dist[14*Np+n] = f9;
dist[15*Np+n] = f11;
dist[16*Np+n] = f13;
dist[17*Np+n] = f15;
dist[18*Np+n] = f17;
//........................................................................
}
}
}
}
//__global__ void dvc_ScaLBL_AAodd_Compact_OLD(char * ID, int *d_neighborList, double *d_disteven, double *d_distodd, int Np) {
//
// int n, nn, nnn;
// double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
// double f10,f11,f12,f13,f14,f15,f16,f17,f18;
// char id;
// int nread,nwrite,naccess;
// int S = Np/NBLOCKS/NTHREADS+1;
//
// for (int s=0; s<S; s++){
// //........Get 1-D index for this thread....................
// n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
// id = ID[n];
// if (n<Np) {
// if (id > 0) {
// // Convention
// // nread - read neighbor location for distribution q
// // nwrite - write neighbor location for distribution q
// // read f1
//
// // Compact:
// // nn = d_neighborList[n+2*0*Np]; // nread
// // nnn=d_neighborList[n+(2*0+1)*Np]; // nwrite
// // // if ((!(nn<0)) && (!(nnn<0))) {
// // //printf("n=%d: nn=%d, nnn=%d\n",n,nn,nnn);
// // f1 = d_distodd[nnn + 0*Np];
// // f2 = d_disteven[nn + (0+1)*Np];
// // d_distodd[nnn+0*Np] = f2;
// // d_disteven[nn+(0+1)*Np] = f1;
//
// // still need f0
// //f0 = d_disteven[n]; // ?
//
//
// nread = d_neighborList[n]; naccess = 10*Np; // 1 "neighbor 0"
// if (nread<0) { nread=n; naccess = Np; }
// f2 = dist[nread + naccess];
//
// nread = d_neighborList[n+Np]; naccess = Np; // 2
// if (nread<0) { nread=n; naccess = 10*Np; }
// f1 = dist[nread + nacess];
//
//// nread = d_neighborList[n+2*Np]; // 3
//// if (nread<0) nread=n;
//// f4 = d_disteven[2*Np+nread];
////
//// nread = d_neighborList[n+3*Np]; // 4
//// if (nread<0) nread=n;
//// f3 = d_distodd[Np+nread];
////
//// nread = d_neighborList[n+4*Np]; // 5
//// if (nread<0) nread=n;
//// f6 = d_disteven[3*Np+nread];
////
//// nread = d_neighborList[n+5*Np]; // 6
//// if (nread<0) nread=n;
//// f5 = d_distodd[2*Np+nread];
////
//// nread = d_neighborList[n+6*Np]; // 7
//// if (nread<0) nread=n;
//// f8 = d_disteven[4*Np+nread];
////
//// nread = d_neighborList[n+7*Np]; // 8
//// if (nread<0) nread=n;
//// f7 = d_distodd[3*Np+nread];
////
//// nread = d_neighborList[n+8*Np]; // 9
//// if (nread<0) nread=n;
//// f10 = d_disteven[5*Np+nread];
////
//// nread = d_neighborList[n+9*Np]; // 10
//// if (nread<0) nread=n;
//// f9 = d_distodd[4*Np+nread];
////
//// nread = d_neighborList[n+10*Np]; // 11
//// if (nread<0) nread=n;
//// f12 = d_disteven[6*Np+nread];
////
//// nread = d_neighborList[n+11*Np]; // 12
//// if (nread<0) nread=n;
//// f11 = d_distodd[5*Np+nread];
////
//// nread = d_neighborList[n+12*Np]; // 13
//// if (nread<0) nread=n;
//// f14 = d_disteven[7*Np+nread];
////
//// nread = d_neighborList[n+13*Np]; // 14
//// if (nread<0) nread=n;
//// f13 = d_distodd[6*Np+nread];
////
//// nread = d_neighborList[n+14*Np]; //15
//// if (nread<0) nread=n;
//// f16 = d_disteven[8*Np+nread];
////
//// nread = d_neighborList[n+15*Np]; //16
//// if (nread<0) nread=n;
//// f15 = d_distodd[7*Np+nread];
////
//// nread = d_neighborList[n+16*Np]; //17
//// if (nread<0) nread=n;
//// f18 = d_disteven[9*Np+nread];
////
//// nread = d_neighborList[n+17*Np]; // 18
//// if (nread<0) nread=n;
//// f17 = d_distodd[8*Np+nread];
//
// // At this point we need all f0, f1, f2, f3, f4 ..., f18
// // COLLISION
//
//
//
// // write
// nwrite = d_neighborList[n]; // 1
// if (nwrite<0) nwrite=n;
// d_disteven[Np+nwrite] = f1;
//
// nwrite = d_neighborList[Np+n]; // 2
// if (nwrite<0) nwrite=n;
// d_distodd[nwrite] = f2;
//
//// nwrite = d_neighborList[2*Np+n]; // 3
//// if (nwrite<0) nwrite=n;
//// d_disteven[2*Np+nwrite] = f3;
////
//// nwrite = d_neighborList[3*Np+n]; // 4
//// if (nwrite<0) nwrite=n;
//// d_distodd[Np+nwrite] = f4;
////
//// nwrite = d_neighborList[4*Np+n]; // 5
//// if (nwrite<0) nwrite=n;
//// d_disteven[3*Np+nwrite] = f5;
////
//// nwrite = d_neighborList[5*Np+n]; // 6
//// if (nwrite<0) nwrite=n;
//// d_distodd[2*Np+nwrite] = f6;
////
//// nwrite = d_neighborList[6*Np+n]; // 7
//// if (nwrite<0) nwrite=n;
//// d_disteven[4*Np+nwrite] = f7;
////
//// nwrite = d_neighborList[7*Np+n]; // 8
//// if (nwrite<0) nwrite=n;
//// d_distodd[3*Np+nwrite] = f8;
////
//// nwrite = d_neighborList[8*Np+n]; // 9
//// if (nwrite<0) nwrite=n;
//// d_disteven[5*Np+nwrite] = f9;
////
//// nwrite = d_neighborList[9*Np+n]; // 10
//// if (nwrite<0) nwrite=n;
//// d_distodd[4*Np+nwrite] = f10;
////
//// nwrite = d_neighborList[10*Np+n]; // 11
//// if (nwrite<0) nwrite=n;
//// d_disteven[6*Np+nwrite] = f11;
////
//// nwrite = d_neighborList[11*Np+n]; // 12
//// if (nwrite<0) nwrite=n;
//// d_distodd[5*Np+nwrite] = f12;
////
//// nwrite = d_neighborList[12*Np+n]; // 13
//// if (nwrite<0) nwrite=n;
//// d_disteven[7*Np+nwrite] = f13;
////
//// nwrite = d_neighborList[13*Np+n]; // 14
//// if (nwrite<0) nwrite=n;
//// d_distodd[6*Np+nwrite] = f14;
////
//// nwrite = d_neighborList[14*Np+n]; // 15
//// if (nwrite<0) nwrite=n;
//// d_disteven[8*Np+nwrite] = f15;
////
//// nwrite = d_neighborList[15*Np+n]; // 16
//// if (nwrite<0) nwrite=n;
//// d_distodd[7*Np+nwrite] = f16;
////
//// nwrite = d_neighborList[16*Np+n]; // 17
//// if (nwrite<0) nwrite=n;
//// d_disteven[9*Np+nwrite] = f17;
////
//// nwrite = d_neighborList[17*Np+n]; // 18
//// if (nwrite<0) nwrite=n;
//// d_distodd[8*Np+nwrite] = f18;
//
// }
// }
// }
//}
__global__ void dvc_ScaLBL_D3Q19_Swap(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz) __global__ void dvc_ScaLBL_D3Q19_Swap(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz)
{ {
int i,j,k,n,nn,N; int i,j,k,n,nn,N;
@ -1040,6 +581,9 @@ __global__ void dvc_D3Q19_Flux_BC_z(double *disteven, double *distodd, double fl
//localsum[n]=sum; //localsum[n]=sum;
} }
//atomicAdd(dvcsum, sum);
//sum = warpReduceSum(sum); //sum = warpReduceSum(sum);
//if (threadIdx.x & (warpSize-1) == 0 ){ //if (threadIdx.x & (warpSize-1) == 0 ){
// atomicAdd(dvcsum,sum); // atomicAdd(dvcsum,sum);
@ -1095,6 +639,11 @@ __global__ void dvc_D3Q19_Flux_BC_Z(double *disteven, double *distodd, double fl
//localsum[n]=sum; //localsum[n]=sum;
} }
//sum = warpReduceSum(sum);
// if (threadIdx.x & (warpSize-1) == 0 ){
// atomicAdd(dvcsum,sum);
// }
sum = blockReduceSum(sum); sum = blockReduceSum(sum);
if (threadIdx.x==0) if (threadIdx.x==0)
atomicAdd(dvcsum, sum); atomicAdd(dvcsum, sum);
@ -1173,123 +722,6 @@ __global__ void dvc_ScaLBL_D3Q19_Pressure_BC_z(double *disteven, double *distod
} }
} }
__global__ void dvc_ScaLBL_D3Q19_Init_Simple(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
{
int n,N;
N = Nx*Ny*Nz;
char id;
int S = N/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<N ){
id = ID[n];
if (id > 0 ){
f_even[n] = 0 + 0.01*0;
f_odd[n] = 0+ 0.01*1; //double(100*n)+1.f;
f_even[N+n] = 1+ 0.01*2; //double(100*n)+2.f;
f_odd[N+n] = 1+ 0.01*3; //double(100*n)+3.f;
f_even[2*N+n] = 2+ 0.01*4; //double(100*n)+4.f;
f_odd[2*N+n] = 2+ 0.01*5; //double(100*n)+5.f;
f_even[3*N+n] = 3+ 0.01*6; //double(100*n)+6.f;
f_odd[3*N+n] = 3+ 0.01*7; //double(100*n)+7.f;
f_even[4*N+n] = 4+ 0.01*8; //double(100*n)+8.f;
f_odd[4*N+n] = 4+ 0.01*9; //double(100*n)+9.f;
f_even[5*N+n] = 5+ 0.01*10; //double(100*n)+10.f;
f_odd[5*N+n] = 5+ 0.01*11; //double(100*n)+11.f;
f_even[6*N+n] = 6+ 0.01*12; //double(100*n)+12.f;
f_odd[6*N+n] = 6+ 0.01*13; //double(100*n)+13.f;
f_even[7*N+n] = 7+ 0.01*14; //double(100*n)+14.f;
f_odd[7*N+n] = 7+ 0.01*15; //double(100*n)+15.f;
f_even[8*N+n] = 8+ 0.01*16; //double(100*n)+16.f;
f_odd[8*N+n] = 8+ 0.01*17; //double(100*n)+17.f;
f_even[9*N+n] = 9+ 0.01*18; //double(100*n)+18.f;
}
else{
for(int q=0; q<9; q++){
f_even[q*N+n] = -1.0;
f_odd[q*N+n] = -1.0;
}
f_even[9*N+n] = -1.0;
}
}
}
}
//__global__ void dvc_ScaLBL_AAeven_Compact(char * ID, double *disteven, double *distodd, int Np) {
//
//
// int q,n,nn;
// double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
// double f10,f11,f12,f13,f14,f15,f16,f17,f18;
//
// char id;
// int S = Np/NBLOCKS/NTHREADS + 1;
// for (int s=0; s<S; s++){
// //........Get 1-D index for this thread....................
// n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
// id = ID[n];
//
// if ( n<Np ){
// if (id > 0) {
//
// //........................................................................
// // READ THE DISTRIBUTIONS
// // (read from opposite array due to previous swap operation)
// //........................................................................
// // even
// f2 = distodd[n];
// f4 = distodd[Np+n];
// f6 = distodd[2*Np+n];
// f8 = distodd[3*Np+n];
// f10 = distodd[4*Np+n];
// f12 = distodd[5*Np+n];
// f14 = distodd[6*Np+n];
// f16 = distodd[7*Np+n];
// f18 = distodd[8*Np+n];
// f0 = disteven[n];
// // odd
// f1 = disteven[Np+n];
// f3 = disteven[2*Np+n];
// f5 = disteven[3*Np+n];
// f7 = disteven[4*Np+n];
// f9 = disteven[5*Np+n];
// f11 = disteven[6*Np+n];
// f13 = disteven[7*Np+n];
// f15 = disteven[8*Np+n];
// f17 = disteven[9*Np+n];
//
// //........................................................................
// // WRITE THE DISTRIBUTIONS
// // even
// //disteven[n] = f0;
// disteven[Np+n] = f2;
// disteven[2*Np+n] = f4;
// disteven[3*Np+n] = f6;
// disteven[4*Np+n] = f8;
// disteven[5*Np+n] = f10;
// disteven[6*Np+n] = f12;
// disteven[7*Np+n] = f14;
// disteven[8*Np+n] = f16;
// disteven[9*Np+n] = f18;
// // odd
// distodd[n] = f1;
// distodd[Np+n] = f3;
// distodd[2*Np+n] = f5;
// distodd[3*Np+n] = f7;
// distodd[4*Np+n] = f9;
// distodd[5*Np+n] = f11;
// distodd[6*Np+n] = f13;
// distodd[7*Np+n] = f15;
// distodd[8*Np+n] = f17;
// //........................................................................
// }
// }
// }
//}
__global__ void dvc_ScaLBL_D3Q19_Pressure_BC_Z(double *disteven, double *distodd, double dout, __global__ void dvc_ScaLBL_D3Q19_Pressure_BC_Z(double *disteven, double *distodd, double dout,
int Nx, int Ny, int Nz, int outlet) int Nx, int Ny, int Nz, int outlet)
{ {
@ -1385,8 +817,7 @@ extern "C" void ScaLBL_D3Q19_Unpack(int q, int *list, int start, int count, dou
} }
//************************************************************************* //*************************************************************************
extern "C" void ScaLBL_D3Q19_Init(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz){ extern "C" void ScaLBL_D3Q19_Init(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz){
// dvc_ScaLBL_D3Q19_Init<<<NBLOCKS,NTHREADS >>>(ID, f_even, f_odd, Nx, Ny, Nz); dvc_ScaLBL_D3Q19_Init<<<NBLOCKS,NTHREADS >>>(ID, f_even, f_odd, Nx, Ny, Nz);
dvc_ScaLBL_D3Q19_Init_Simple<<<NBLOCKS,NTHREADS >>>(ID, f_even, f_odd, Nx, Ny, Nz);
cudaError_t err = cudaGetLastError(); cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){ if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",cudaGetErrorString(err)); printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",cudaGetErrorString(err));
@ -1418,31 +849,6 @@ extern "C" void ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *disteven, d
} }
} }
//extern "C" void ScaLBL_D3Q19_AAeven_Compact(char * ID, double *d_disteven, double *d_distodd, int Np) {
// dvc_ScaLBL_AAeven_Compact<<<NBLOCKS,NTHREADS>>>(ID, d_disteven, d_distodd,Np);
// cudaError_t err = cudaGetLastError();
// if (cudaSuccess != err){
// printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",cudaGetErrorString(err));
// }
//}
extern "C" void ScaLBL_D3Q19_AAeven_Compact(char * ID, double *d_dist, int Np) {
dvc_ScaLBL_AAeven_Compact<<<NBLOCKS,NTHREADS>>>(ID, d_dist, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_AAodd_Compact(char * ID, int *d_neighborList, double *d_dist, int Np) {
dvc_ScaLBL_AAodd_Compact<<<NBLOCKS,NTHREADS>>>(ID,d_neighborList, d_dist,Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_Velocity(char *ID, double *disteven, double *distodd, double *vel, int Nx, int \ extern "C" void ScaLBL_D3Q19_Velocity(char *ID, double *disteven, double *distodd, double *vel, int Nx, int \
Ny, int Nz){ Ny, int Nz){
@ -1463,9 +869,6 @@ extern "C" void ScaLBL_D3Q19_Velocity_BC_Z(double *disteven, double *distodd, do
dvc_D3Q19_Velocity_BC_Z<<<GRID,512>>>(disteven, distodd, uz, Nx, Ny, Nz, outlet); dvc_D3Q19_Velocity_BC_Z<<<GRID,512>>>(disteven, distodd, uz, Nx, Ny, Nz, outlet);
} }
extern "C" double ScaLBL_D3Q19_Flux_BC_z(double *disteven, double *distodd, double flux,int Nx, int Ny, int Nz){ extern "C" double ScaLBL_D3Q19_Flux_BC_z(double *disteven, double *distodd, double flux,int Nx, int Ny, int Nz){
int GRID = Nx*Ny / 512 + 1; int GRID = Nx*Ny / 512 + 1;

View File

@ -20,7 +20,7 @@ ADD_LBPM_EXECUTABLE( lbpm_inkbottle_pp )
ADD_LBPM_EXECUTABLE( lbpm_plates_pp ) ADD_LBPM_EXECUTABLE( lbpm_plates_pp )
ADD_LBPM_EXECUTABLE( lbpm_squaretube_pp ) ADD_LBPM_EXECUTABLE( lbpm_squaretube_pp )
ADD_LBPM_EXECUTABLE( lbpm_BlobAnalysis ) ADD_LBPM_EXECUTABLE( lbpm_BlobAnalysis )
ADD_LBPM_EXECUTABLE( TestBubble ) #ADD_LBPM_EXECUTABLE( TestBubble )
ADD_LBPM_EXECUTABLE( ComponentLabel ) ADD_LBPM_EXECUTABLE( ComponentLabel )
ADD_LBPM_EXECUTABLE( ColorToBinary ) ADD_LBPM_EXECUTABLE( ColorToBinary )
ADD_LBPM_EXECUTABLE( BlobAnalysis ) ADD_LBPM_EXECUTABLE( BlobAnalysis )
@ -33,7 +33,7 @@ CONFIGURE_FILE( ${CMAKE_CURRENT_SOURCE_DIR}/cylindertest ${CMAKE_CURRENT_BINARY_
# Add the tests # Add the tests
ADD_LBPM_TEST( pmmc_cylinder ) ADD_LBPM_TEST( pmmc_cylinder )
ADD_LBPM_TEST( TestBubble ) #ADD_LBPM_TEST( TestBubble )
ADD_LBPM_TEST( TestTorus ) ADD_LBPM_TEST( TestTorus )
ADD_LBPM_TEST( TestFluxBC ) ADD_LBPM_TEST( TestFluxBC )
ADD_LBPM_TEST( TestInterfaceSpeed ) ADD_LBPM_TEST( TestInterfaceSpeed )

View File

@ -662,7 +662,6 @@ int main(int argc, char **argv)
//ScaLBL_Color_BC_Z(Phi,Den,A_even,A_odd,B_even,B_odd,Nx,Ny,Nz); //ScaLBL_Color_BC_Z(Phi,Den,A_even,A_odd,B_even,B_odd,Nx,Ny,Nz);
ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-1); ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-1);
} }
// Set dynamic pressure boundary conditions // Set dynamic pressure boundary conditions
double dp, slope; double dp, slope;
dp = slope = 0.0; dp = slope = 0.0;
@ -687,18 +686,6 @@ int main(int argc, char **argv)
ScaLBL_D3Q19_Pressure(ID,f_even,f_odd,Pressure,Nx,Ny,Nz); ScaLBL_D3Q19_Pressure(ID,f_even,f_odd,Pressure,Nx,Ny,Nz);
ScaLBL_D3Q19_Velocity(ID,f_even,f_odd,Velocity,Nx,Ny,Nz); ScaLBL_D3Q19_Velocity(ID,f_even,f_odd,Velocity,Nx,Ny,Nz);
/* if (BoundaryCondition==1 && Mask.kproc == 0){
for (n=Nx*Ny; n<2*Nx*Ny; n++){
if (Mask.id[n]>0 && 3.0*Pressure[n] != din) printf("Inlet pBC error: %f != %f \n",3.0*Pressure[n],din);
}
}
if (BoundaryCondition==1 && Mask.kproc == nprocz-1){
for (n=Nx*Ny*(Nz-2); n<Nx*Ny*(Nz-1); n++){
if (Mask.id[n]>0 && 3.0*Pressure[n] != dout) printf("Outlet pBC error: %f != %f \n",3.0*Pressure[n],din);
}
}
*/
//........................................................................... //...........................................................................
// Copy the phase indicator field for the earlier timestep // Copy the phase indicator field for the earlier timestep
ScaLBL_DeviceBarrier(); ScaLBL_DeviceBarrier();