trying to make nicer strides for performance benchmark

This commit is contained in:
James E McClure
2018-04-05 14:35:03 -04:00
parent f40b0f7175
commit d1d24cc2ae
3 changed files with 20 additions and 14 deletions

View File

@@ -2373,7 +2373,7 @@ void ScaLBL_Communicator::MemoryOptimizedLayoutFull(IntArray &Map, int *neighbor
}
void ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborList, char *id, int Np){
int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborList, char *id, int Np){
/*
* Generate a memory optimized layout
* id[n] == 0 implies that site n should be ignored (treat as a mask)
@@ -2422,6 +2422,9 @@ void ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLi
next=idx;
// create a stride
first_interior=(next/16 + 1)*16;
idx = first_interior;
// Step 2/2: Next loop over the domain interior in block-cyclic fashion
for (k=2; k<Nz-2; k++){
for (j=2; j<Ny-2; j++){
@@ -2435,6 +2438,7 @@ void ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLi
}
}
}
last_interior=idx;
/*
int MemBlockSize=32;
@@ -2469,9 +2473,9 @@ void ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLi
}
*/
if (idx > Np ){
ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Failed to create memory efficient layout!\n");
}
// if (idx > Np ){
// ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Failed to create memory efficient layout!\n");
// }
/*
for (k=1;k<Nz-1;k++){
printf("....k=%i .....\n",k);
@@ -2486,14 +2490,16 @@ void ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLi
}
printf("\n\n");
*/
Np = (last_interior%16 + 1)*16
// Now use Map to determine the neighbors for each lattice direction
for (k=1;k<Nz-1;k++){
for (j=1;j<Ny-1;j++){
for (i=1;i<Nx-1;i++){
n=k*Nx*Ny+j*Nx+i;
idx=Map(i,j,k);
if (idx > Np) printf("ScaLBL_Communicator::MemoryOptimizedLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np);
//if (idx > Np) printf("ScaLBL_Communicator::MemoryOptimizedLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np);
else if (!(idx<0)){
// store the idx associated with each neighbor
// store idx for self if neighbor is in solid or out of domain
@@ -2960,9 +2966,10 @@ void ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLi
// Reset the value of N to match the dense structure
N = Np;
// Clean up
delete [] TempBuffer;
return(Np);
}

View File

@@ -240,6 +240,7 @@ public:
unsigned long int CommunicationCount,SendCount,RecvCount;
int Nx,Ny,Nz,N;
int next;
int first_interior,last_interior;
int BoundaryCondition;
//......................................................................................
// Set up for D319 distributions

View File

@@ -276,17 +276,15 @@ int main(int argc, char **argv)
// LBM variables
if (rank==0) printf ("Allocating distributions \n");
int neighborSize=18*Np*sizeof(int);
int *neighborList;
IntArray Map(Nx,Ny,Nz);
neighborList= new int[18*Np];
ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Mask.id,Np);
neighborList= new int[18*(Np+32)];
Np = ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Mask.id,Np);
MPI_Barrier(comm);
//......................device distributions.................................
int dist_mem_size = Np*sizeof(double);
int neighborSize=18*(Np*sizeof(int));
int *NeighborList;
// double *f_even,*f_odd;
@@ -348,14 +346,14 @@ int main(int argc, char **argv)
timestep++;
ScaLBL_Comm.SendD3Q19AA(dist); //READ FROM NORMAL
ScaLBL_D3Q19_AAodd_BGK(NeighborList, dist, ScaLBL_Comm.next, Np, Np, rlx, Fx, Fy, Fz);
ScaLBL_D3Q19_AAodd_BGK(NeighborList, dist, ScaLBL_Comm.first_exterior, ScaLBL_Comm.last_exterior, Np, rlx, Fx, Fy, Fz);
ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE
ScaLBL_D3Q19_AAodd_BGK(NeighborList, dist, 0, ScaLBL_Comm.next, Np, rlx, Fx, Fy, Fz);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
timestep++;
ScaLBL_Comm.SendD3Q19AA(dist); //READ FORM NORMAL
ScaLBL_D3Q19_AAeven_BGK(dist, ScaLBL_Comm.next, Np, Np, rlx, Fx, Fy, Fz);
ScaLBL_D3Q19_AAeven_BGK(dist, ScaLBL_Comm.first_exterior, ScaLBL_Comm.last_exterior, Np, rlx, Fx, Fy, Fz);
ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE
ScaLBL_D3Q19_AAeven_BGK(dist, 0, ScaLBL_Comm.next, Np, rlx, Fx, Fy, Fz);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);