From 94156e066e4ff02ba2a8f38e15bee0617982bacc Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Thu, 12 Mar 2020 10:26:17 -0400 Subject: [PATCH 001/205] Trying to fix bug with MPI communication --- analysis/morphology.cpp | 382 +++++++++---------- analysis/runAnalysis.cpp | 41 ++- analysis/runAnalysis.h | 4 +- common/Communication.h | 4 +- common/Domain.cpp | 627 ++++++++++++++++---------------- common/Domain.h | 48 +-- common/MPI.cpp | 20 +- common/ScaLBL.cpp | 230 ++++++------ common/ScaLBL.h | 2 +- models/ColorModel.cpp | 4 +- models/DFHModel.cpp | 2 +- models/MRTModel.cpp | 2 +- tests/GenerateSphereTest.cpp | 184 +++++----- tests/TestBubbleDFH.cpp | 2 +- tests/TestColorGradDFH.cpp | 2 +- tests/TestCommD3Q19.cpp | 2 +- tests/TestFluxBC.cpp | 2 +- tests/TestForceMoments.cpp | 4 +- tests/TestMap.cpp | 2 +- tests/TestPressVel.cpp | 6 +- tests/lbpm_minkowski_scalar.cpp | 2 +- 21 files changed, 777 insertions(+), 795 deletions(-) diff --git a/analysis/morphology.cpp b/analysis/morphology.cpp index 84ed3652..0980a4f0 100644 --- a/analysis/morphology.cpp +++ b/analysis/morphology.cpp @@ -1,7 +1,7 @@ #include // Implementation of morphological opening routine -inline void PackID(int *list, int count, signed char *sendbuf, signed char *ID){ +inline void PackID(const int *list, int count, signed char *sendbuf, signed char *ID){ // Fill in the phase ID values from neighboring processors // This packs up the values that need to be sent from one processor to another int idx,n; @@ -13,7 +13,7 @@ inline void PackID(int *list, int count, signed char *sendbuf, signed char *ID){ } //*************************************************************************************** -inline void UnpackID(int *list, int count, signed char *recvbuf, signed char *ID){ +inline void UnpackID(const int *list, int count, signed char *recvbuf, signed char *ID){ // Fill in the phase ID values from neighboring processors // This unpacks the values once they have been recieved from neighbors int idx,n; @@ -77,44 +77,44 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr signed char *recvID_xy, *recvID_yz, *recvID_xz, *recvID_Xy, *recvID_Yz, *recvID_xZ; signed char *recvID_xY, *recvID_yZ, *recvID_Xz, *recvID_XY, *recvID_YZ, *recvID_XZ; // send buffers - sendID_x = new signed char [Dm->sendCount_x]; - sendID_y = new signed char [Dm->sendCount_y]; - sendID_z = new signed char [Dm->sendCount_z]; - sendID_X = new signed char [Dm->sendCount_X]; - sendID_Y = new signed char [Dm->sendCount_Y]; - sendID_Z = new signed char [Dm->sendCount_Z]; - sendID_xy = new signed char [Dm->sendCount_xy]; - sendID_yz = new signed char [Dm->sendCount_yz]; - sendID_xz = new signed char [Dm->sendCount_xz]; - sendID_Xy = new signed char [Dm->sendCount_Xy]; - sendID_Yz = new signed char [Dm->sendCount_Yz]; - sendID_xZ = new signed char [Dm->sendCount_xZ]; - sendID_xY = new signed char [Dm->sendCount_xY]; - sendID_yZ = new signed char [Dm->sendCount_yZ]; - sendID_Xz = new signed char [Dm->sendCount_Xz]; - sendID_XY = new signed char [Dm->sendCount_XY]; - sendID_YZ = new signed char [Dm->sendCount_YZ]; - sendID_XZ = new signed char [Dm->sendCount_XZ]; + sendID_x = new signed char [Dm->sendCount("x")]; + sendID_y = new signed char [Dm->sendCount("y")]; + sendID_z = new signed char [Dm->sendCount("z")]; + sendID_X = new signed char [Dm->sendCount("X")]; + sendID_Y = new signed char [Dm->sendCount("Y")]; + sendID_Z = new signed char [Dm->sendCount("Z")]; + sendID_xy = new signed char [Dm->sendCount("xy")]; + sendID_yz = new signed char [Dm->sendCount("yz")]; + sendID_xz = new signed char [Dm->sendCount("xz")]; + sendID_Xy = new signed char [Dm->sendCount("Xy")]; + sendID_Yz = new signed char [Dm->sendCount("Yz")]; + sendID_xZ = new signed char [Dm->sendCount("xZ")]; + sendID_xY = new signed char [Dm->sendCount("xY")]; + sendID_yZ = new signed char [Dm->sendCount("yZ")]; + sendID_Xz = new signed char [Dm->sendCount("Xz")]; + sendID_XY = new signed char [Dm->sendCount("XY")]; + sendID_YZ = new signed char [Dm->sendCount("YZ")]; + sendID_XZ = new signed char [Dm->sendCount("XZ")]; //...................................................................................... // recv buffers - recvID_x = new signed char [Dm->recvCount_x]; - recvID_y = new signed char [Dm->recvCount_y]; - recvID_z = new signed char [Dm->recvCount_z]; - recvID_X = new signed char [Dm->recvCount_X]; - recvID_Y = new signed char [Dm->recvCount_Y]; - recvID_Z = new signed char [Dm->recvCount_Z]; - recvID_xy = new signed char [Dm->recvCount_xy]; - recvID_yz = new signed char [Dm->recvCount_yz]; - recvID_xz = new signed char [Dm->recvCount_xz]; - recvID_Xy = new signed char [Dm->recvCount_Xy]; - recvID_xZ = new signed char [Dm->recvCount_xZ]; - recvID_xY = new signed char [Dm->recvCount_xY]; - recvID_yZ = new signed char [Dm->recvCount_yZ]; - recvID_Yz = new signed char [Dm->recvCount_Yz]; - recvID_Xz = new signed char [Dm->recvCount_Xz]; - recvID_XY = new signed char [Dm->recvCount_XY]; - recvID_YZ = new signed char [Dm->recvCount_YZ]; - recvID_XZ = new signed char [Dm->recvCount_XZ]; + recvID_x = new signed char [Dm->recvCount("x")]; + recvID_y = new signed char [Dm->recvCount("y")]; + recvID_z = new signed char [Dm->recvCount("z")]; + recvID_X = new signed char [Dm->recvCount("X")]; + recvID_Y = new signed char [Dm->recvCount("Y")]; + recvID_Z = new signed char [Dm->recvCount("Z")]; + recvID_xy = new signed char [Dm->recvCount("xy")]; + recvID_yz = new signed char [Dm->recvCount("yz")]; + recvID_xz = new signed char [Dm->recvCount("xz")]; + recvID_Xy = new signed char [Dm->recvCount("Xy")]; + recvID_xZ = new signed char [Dm->recvCount("xZ")]; + recvID_xY = new signed char [Dm->recvCount("xY")]; + recvID_yZ = new signed char [Dm->recvCount("yZ")]; + recvID_Yz = new signed char [Dm->recvCount("Yz")]; + recvID_Xz = new signed char [Dm->recvCount("Xz")]; + recvID_XY = new signed char [Dm->recvCount("XY")]; + recvID_YZ = new signed char [Dm->recvCount("YZ")]; + recvID_XZ = new signed char [Dm->recvCount("XZ")]; //...................................................................................... int sendtag,recvtag; sendtag = recvtag = 7; @@ -182,62 +182,62 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr } } // Pack and send the updated ID values - PackID(Dm->sendList_x, Dm->sendCount_x ,sendID_x, id); - PackID(Dm->sendList_X, Dm->sendCount_X ,sendID_X, id); - PackID(Dm->sendList_y, Dm->sendCount_y ,sendID_y, id); - PackID(Dm->sendList_Y, Dm->sendCount_Y ,sendID_Y, id); - PackID(Dm->sendList_z, Dm->sendCount_z ,sendID_z, id); - PackID(Dm->sendList_Z, Dm->sendCount_Z ,sendID_Z, id); - PackID(Dm->sendList_xy, Dm->sendCount_xy ,sendID_xy, id); - PackID(Dm->sendList_Xy, Dm->sendCount_Xy ,sendID_Xy, id); - PackID(Dm->sendList_xY, Dm->sendCount_xY ,sendID_xY, id); - PackID(Dm->sendList_XY, Dm->sendCount_XY ,sendID_XY, id); - PackID(Dm->sendList_xz, Dm->sendCount_xz ,sendID_xz, id); - PackID(Dm->sendList_Xz, Dm->sendCount_Xz ,sendID_Xz, id); - PackID(Dm->sendList_xZ, Dm->sendCount_xZ ,sendID_xZ, id); - PackID(Dm->sendList_XZ, Dm->sendCount_XZ ,sendID_XZ, id); - PackID(Dm->sendList_yz, Dm->sendCount_yz ,sendID_yz, id); - PackID(Dm->sendList_Yz, Dm->sendCount_Yz ,sendID_Yz, id); - PackID(Dm->sendList_yZ, Dm->sendCount_yZ ,sendID_yZ, id); - PackID(Dm->sendList_YZ, Dm->sendCount_YZ ,sendID_YZ, id); + PackID(Dm->sendList("x"), Dm->sendCount("x") ,sendID_x, id); + PackID(Dm->sendList("X"), Dm->sendCount("X") ,sendID_X, id); + PackID(Dm->sendList("y"), Dm->sendCount("y") ,sendID_y, id); + PackID(Dm->sendList("Y"), Dm->sendCount("Y") ,sendID_Y, id); + PackID(Dm->sendList("z"), Dm->sendCount("z") ,sendID_z, id); + PackID(Dm->sendList("Z"), Dm->sendCount("Z") ,sendID_Z, id); + PackID(Dm->sendList("xy"), Dm->sendCount("xy") ,sendID_xy, id); + PackID(Dm->sendList("Xy"), Dm->sendCount("Xy") ,sendID_Xy, id); + PackID(Dm->sendList("xY"), Dm->sendCount("xY") ,sendID_xY, id); + PackID(Dm->sendList("XY"), Dm->sendCount("XY") ,sendID_XY, id); + PackID(Dm->sendList("xz"), Dm->sendCount("xz") ,sendID_xz, id); + PackID(Dm->sendList("Xz"), Dm->sendCount("Xz") ,sendID_Xz, id); + PackID(Dm->sendList("xZ"), Dm->sendCount("xZ") ,sendID_xZ, id); + PackID(Dm->sendList("XZ"), Dm->sendCount("XZ") ,sendID_XZ, id); + PackID(Dm->sendList("yz"), Dm->sendCount("yz") ,sendID_yz, id); + PackID(Dm->sendList("Yz"), Dm->sendCount("Yz") ,sendID_Yz, id); + PackID(Dm->sendList("yZ"), Dm->sendCount("yZ") ,sendID_yZ, id); + PackID(Dm->sendList("YZ"), Dm->sendCount("YZ") ,sendID_YZ, id); //...................................................................................... - Dm->Comm.sendrecv(sendID_x,Dm->sendCount_x,Dm->rank_x(),sendtag,recvID_X,Dm->recvCount_X,Dm->rank_X(),recvtag); - Dm->Comm.sendrecv(sendID_X,Dm->sendCount_X,Dm->rank_X(),sendtag,recvID_x,Dm->recvCount_x,Dm->rank_x(),recvtag); - Dm->Comm.sendrecv(sendID_y,Dm->sendCount_y,Dm->rank_y(),sendtag,recvID_Y,Dm->recvCount_Y,Dm->rank_Y(),recvtag); - Dm->Comm.sendrecv(sendID_Y,Dm->sendCount_Y,Dm->rank_Y(),sendtag,recvID_y,Dm->recvCount_y,Dm->rank_y(),recvtag); - Dm->Comm.sendrecv(sendID_z,Dm->sendCount_z,Dm->rank_z(),sendtag,recvID_Z,Dm->recvCount_Z,Dm->rank_Z(),recvtag); - Dm->Comm.sendrecv(sendID_Z,Dm->sendCount_Z,Dm->rank_Z(),sendtag,recvID_z,Dm->recvCount_z,Dm->rank_z(),recvtag); - Dm->Comm.sendrecv(sendID_xy,Dm->sendCount_xy,Dm->rank_xy(),sendtag,recvID_XY,Dm->recvCount_XY,Dm->rank_XY(),recvtag); - Dm->Comm.sendrecv(sendID_XY,Dm->sendCount_XY,Dm->rank_XY(),sendtag,recvID_xy,Dm->recvCount_xy,Dm->rank_xy(),recvtag); - Dm->Comm.sendrecv(sendID_Xy,Dm->sendCount_Xy,Dm->rank_Xy(),sendtag,recvID_xY,Dm->recvCount_xY,Dm->rank_xY(),recvtag); - Dm->Comm.sendrecv(sendID_xY,Dm->sendCount_xY,Dm->rank_xY(),sendtag,recvID_Xy,Dm->recvCount_Xy,Dm->rank_Xy(),recvtag); - Dm->Comm.sendrecv(sendID_xz,Dm->sendCount_xz,Dm->rank_xz(),sendtag,recvID_XZ,Dm->recvCount_XZ,Dm->rank_XZ(),recvtag); - Dm->Comm.sendrecv(sendID_XZ,Dm->sendCount_XZ,Dm->rank_XZ(),sendtag,recvID_xz,Dm->recvCount_xz,Dm->rank_xz(),recvtag); - Dm->Comm.sendrecv(sendID_Xz,Dm->sendCount_Xz,Dm->rank_Xz(),sendtag,recvID_xZ,Dm->recvCount_xZ,Dm->rank_xZ(),recvtag); - Dm->Comm.sendrecv(sendID_xZ,Dm->sendCount_xZ,Dm->rank_xZ(),sendtag,recvID_Xz,Dm->recvCount_Xz,Dm->rank_Xz(),recvtag); - Dm->Comm.sendrecv(sendID_yz,Dm->sendCount_yz,Dm->rank_yz(),sendtag,recvID_YZ,Dm->recvCount_YZ,Dm->rank_YZ(),recvtag); - Dm->Comm.sendrecv(sendID_YZ,Dm->sendCount_YZ,Dm->rank_YZ(),sendtag,recvID_yz,Dm->recvCount_yz,Dm->rank_yz(),recvtag); - Dm->Comm.sendrecv(sendID_Yz,Dm->sendCount_Yz,Dm->rank_Yz(),sendtag,recvID_yZ,Dm->recvCount_yZ,Dm->rank_yZ(),recvtag); - Dm->Comm.sendrecv(sendID_yZ,Dm->sendCount_yZ,Dm->rank_yZ(),sendtag,recvID_Yz,Dm->recvCount_Yz,Dm->rank_Yz(),recvtag); + Dm->Comm.sendrecv(sendID_x,Dm->sendCount("x"),Dm->rank_x(),sendtag,recvID_X,Dm->recvCount("X"),Dm->rank_X(),recvtag); + Dm->Comm.sendrecv(sendID_X,Dm->sendCount("X"),Dm->rank_X(),sendtag,recvID_x,Dm->recvCount("x"),Dm->rank_x(),recvtag); + Dm->Comm.sendrecv(sendID_y,Dm->sendCount("y"),Dm->rank_y(),sendtag,recvID_Y,Dm->recvCount("Y"),Dm->rank_Y(),recvtag); + Dm->Comm.sendrecv(sendID_Y,Dm->sendCount("Y"),Dm->rank_Y(),sendtag,recvID_y,Dm->recvCount("y"),Dm->rank_y(),recvtag); + Dm->Comm.sendrecv(sendID_z,Dm->sendCount("z"),Dm->rank_z(),sendtag,recvID_Z,Dm->recvCount("Z"),Dm->rank_Z(),recvtag); + Dm->Comm.sendrecv(sendID_Z,Dm->sendCount("Z"),Dm->rank_Z(),sendtag,recvID_z,Dm->recvCount("z"),Dm->rank_z(),recvtag); + Dm->Comm.sendrecv(sendID_xy,Dm->sendCount("xy"),Dm->rank_xy(),sendtag,recvID_XY,Dm->recvCount("XY"),Dm->rank_XY(),recvtag); + Dm->Comm.sendrecv(sendID_XY,Dm->sendCount("XY"),Dm->rank_XY(),sendtag,recvID_xy,Dm->recvCount("xy"),Dm->rank_xy(),recvtag); + Dm->Comm.sendrecv(sendID_Xy,Dm->sendCount("Xy"),Dm->rank_Xy(),sendtag,recvID_xY,Dm->recvCount("xY"),Dm->rank_xY(),recvtag); + Dm->Comm.sendrecv(sendID_xY,Dm->sendCount("xY"),Dm->rank_xY(),sendtag,recvID_Xy,Dm->recvCount("Xy"),Dm->rank_Xy(),recvtag); + Dm->Comm.sendrecv(sendID_xz,Dm->sendCount("xz"),Dm->rank_xz(),sendtag,recvID_XZ,Dm->recvCount("XZ"),Dm->rank_XZ(),recvtag); + Dm->Comm.sendrecv(sendID_XZ,Dm->sendCount("XZ"),Dm->rank_XZ(),sendtag,recvID_xz,Dm->recvCount("xz"),Dm->rank_xz(),recvtag); + Dm->Comm.sendrecv(sendID_Xz,Dm->sendCount("Xz"),Dm->rank_Xz(),sendtag,recvID_xZ,Dm->recvCount("xZ"),Dm->rank_xZ(),recvtag); + Dm->Comm.sendrecv(sendID_xZ,Dm->sendCount("xZ"),Dm->rank_xZ(),sendtag,recvID_Xz,Dm->recvCount("Xz"),Dm->rank_Xz(),recvtag); + Dm->Comm.sendrecv(sendID_yz,Dm->sendCount("yz"),Dm->rank_yz(),sendtag,recvID_YZ,Dm->recvCount("YZ"),Dm->rank_YZ(),recvtag); + Dm->Comm.sendrecv(sendID_YZ,Dm->sendCount("YZ"),Dm->rank_YZ(),sendtag,recvID_yz,Dm->recvCount("yz"),Dm->rank_yz(),recvtag); + Dm->Comm.sendrecv(sendID_Yz,Dm->sendCount("Yz"),Dm->rank_Yz(),sendtag,recvID_yZ,Dm->recvCount("yZ"),Dm->rank_yZ(),recvtag); + Dm->Comm.sendrecv(sendID_yZ,Dm->sendCount("yZ"),Dm->rank_yZ(),sendtag,recvID_Yz,Dm->recvCount("Yz"),Dm->rank_Yz(),recvtag); //...................................................................................... - UnpackID(Dm->recvList_x, Dm->recvCount_x ,recvID_x, id); - UnpackID(Dm->recvList_X, Dm->recvCount_X ,recvID_X, id); - UnpackID(Dm->recvList_y, Dm->recvCount_y ,recvID_y, id); - UnpackID(Dm->recvList_Y, Dm->recvCount_Y ,recvID_Y, id); - UnpackID(Dm->recvList_z, Dm->recvCount_z ,recvID_z, id); - UnpackID(Dm->recvList_Z, Dm->recvCount_Z ,recvID_Z, id); - UnpackID(Dm->recvList_xy, Dm->recvCount_xy ,recvID_xy, id); - UnpackID(Dm->recvList_Xy, Dm->recvCount_Xy ,recvID_Xy, id); - UnpackID(Dm->recvList_xY, Dm->recvCount_xY ,recvID_xY, id); - UnpackID(Dm->recvList_XY, Dm->recvCount_XY ,recvID_XY, id); - UnpackID(Dm->recvList_xz, Dm->recvCount_xz ,recvID_xz, id); - UnpackID(Dm->recvList_Xz, Dm->recvCount_Xz ,recvID_Xz, id); - UnpackID(Dm->recvList_xZ, Dm->recvCount_xZ ,recvID_xZ, id); - UnpackID(Dm->recvList_XZ, Dm->recvCount_XZ ,recvID_XZ, id); - UnpackID(Dm->recvList_yz, Dm->recvCount_yz ,recvID_yz, id); - UnpackID(Dm->recvList_Yz, Dm->recvCount_Yz ,recvID_Yz, id); - UnpackID(Dm->recvList_yZ, Dm->recvCount_yZ ,recvID_yZ, id); - UnpackID(Dm->recvList_YZ, Dm->recvCount_YZ ,recvID_YZ, id); + UnpackID(Dm->recvList("x"), Dm->recvCount("x") ,recvID_x, id); + UnpackID(Dm->recvList("X"), Dm->recvCount("X") ,recvID_X, id); + UnpackID(Dm->recvList("y"), Dm->recvCount("y") ,recvID_y, id); + UnpackID(Dm->recvList("Y"), Dm->recvCount("Y") ,recvID_Y, id); + UnpackID(Dm->recvList("z"), Dm->recvCount("z") ,recvID_z, id); + UnpackID(Dm->recvList("Z"), Dm->recvCount("Z") ,recvID_Z, id); + UnpackID(Dm->recvList("xy"), Dm->recvCount("xy") ,recvID_xy, id); + UnpackID(Dm->recvList("Xy"), Dm->recvCount("Xy") ,recvID_Xy, id); + UnpackID(Dm->recvList("xY"), Dm->recvCount("xY") ,recvID_xY, id); + UnpackID(Dm->recvList("XY"), Dm->recvCount("XY") ,recvID_XY, id); + UnpackID(Dm->recvList("xz"), Dm->recvCount("xz") ,recvID_xz, id); + UnpackID(Dm->recvList("Xz"), Dm->recvCount("Xz") ,recvID_Xz, id); + UnpackID(Dm->recvList("xZ"), Dm->recvCount("xZ") ,recvID_xZ, id); + UnpackID(Dm->recvList("XZ"), Dm->recvCount("XZ") ,recvID_XZ, id); + UnpackID(Dm->recvList("yz"), Dm->recvCount("yz") ,recvID_yz, id); + UnpackID(Dm->recvList("Yz"), Dm->recvCount("Yz") ,recvID_Yz, id); + UnpackID(Dm->recvList("yZ"), Dm->recvCount("yZ") ,recvID_yZ, id); + UnpackID(Dm->recvList("YZ"), Dm->recvCount("YZ") ,recvID_YZ, id); //...................................................................................... //double GlobalNumber = Dm->Comm.sumReduce( LocalNumber ); @@ -359,44 +359,44 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrsendCount_x]; - sendID_y = new signed char [Dm->sendCount_y]; - sendID_z = new signed char [Dm->sendCount_z]; - sendID_X = new signed char [Dm->sendCount_X]; - sendID_Y = new signed char [Dm->sendCount_Y]; - sendID_Z = new signed char [Dm->sendCount_Z]; - sendID_xy = new signed char [Dm->sendCount_xy]; - sendID_yz = new signed char [Dm->sendCount_yz]; - sendID_xz = new signed char [Dm->sendCount_xz]; - sendID_Xy = new signed char [Dm->sendCount_Xy]; - sendID_Yz = new signed char [Dm->sendCount_Yz]; - sendID_xZ = new signed char [Dm->sendCount_xZ]; - sendID_xY = new signed char [Dm->sendCount_xY]; - sendID_yZ = new signed char [Dm->sendCount_yZ]; - sendID_Xz = new signed char [Dm->sendCount_Xz]; - sendID_XY = new signed char [Dm->sendCount_XY]; - sendID_YZ = new signed char [Dm->sendCount_YZ]; - sendID_XZ = new signed char [Dm->sendCount_XZ]; + sendID_x = new signed char [Dm->sendCount("x")]; + sendID_y = new signed char [Dm->sendCount("y")]; + sendID_z = new signed char [Dm->sendCount("z")]; + sendID_X = new signed char [Dm->sendCount("X")]; + sendID_Y = new signed char [Dm->sendCount("Y")]; + sendID_Z = new signed char [Dm->sendCount("Z")]; + sendID_xy = new signed char [Dm->sendCount("xy")]; + sendID_yz = new signed char [Dm->sendCount("yz")]; + sendID_xz = new signed char [Dm->sendCount("xz")]; + sendID_Xy = new signed char [Dm->sendCount("Xy")]; + sendID_Yz = new signed char [Dm->sendCount("Yz")]; + sendID_xZ = new signed char [Dm->sendCount("xZ")]; + sendID_xY = new signed char [Dm->sendCount("xY")]; + sendID_yZ = new signed char [Dm->sendCount("yZ")]; + sendID_Xz = new signed char [Dm->sendCount("Xz")]; + sendID_XY = new signed char [Dm->sendCount("XY")]; + sendID_YZ = new signed char [Dm->sendCount("YZ")]; + sendID_XZ = new signed char [Dm->sendCount("XZ")]; //...................................................................................... // recv buffers - recvID_x = new signed char [Dm->recvCount_x]; - recvID_y = new signed char [Dm->recvCount_y]; - recvID_z = new signed char [Dm->recvCount_z]; - recvID_X = new signed char [Dm->recvCount_X]; - recvID_Y = new signed char [Dm->recvCount_Y]; - recvID_Z = new signed char [Dm->recvCount_Z]; - recvID_xy = new signed char [Dm->recvCount_xy]; - recvID_yz = new signed char [Dm->recvCount_yz]; - recvID_xz = new signed char [Dm->recvCount_xz]; - recvID_Xy = new signed char [Dm->recvCount_Xy]; - recvID_xZ = new signed char [Dm->recvCount_xZ]; - recvID_xY = new signed char [Dm->recvCount_xY]; - recvID_yZ = new signed char [Dm->recvCount_yZ]; - recvID_Yz = new signed char [Dm->recvCount_Yz]; - recvID_Xz = new signed char [Dm->recvCount_Xz]; - recvID_XY = new signed char [Dm->recvCount_XY]; - recvID_YZ = new signed char [Dm->recvCount_YZ]; - recvID_XZ = new signed char [Dm->recvCount_XZ]; + recvID_x = new signed char [Dm->recvCount("x")]; + recvID_y = new signed char [Dm->recvCount("y")]; + recvID_z = new signed char [Dm->recvCount("z")]; + recvID_X = new signed char [Dm->recvCount("X")]; + recvID_Y = new signed char [Dm->recvCount("Y")]; + recvID_Z = new signed char [Dm->recvCount("Z")]; + recvID_xy = new signed char [Dm->recvCount("xy")]; + recvID_yz = new signed char [Dm->recvCount("yz")]; + recvID_xz = new signed char [Dm->recvCount("xz")]; + recvID_Xy = new signed char [Dm->recvCount("Xy")]; + recvID_xZ = new signed char [Dm->recvCount("xZ")]; + recvID_xY = new signed char [Dm->recvCount("xY")]; + recvID_yZ = new signed char [Dm->recvCount("yZ")]; + recvID_Yz = new signed char [Dm->recvCount("Yz")]; + recvID_Xz = new signed char [Dm->recvCount("Xz")]; + recvID_XY = new signed char [Dm->recvCount("XY")]; + recvID_YZ = new signed char [Dm->recvCount("YZ")]; + recvID_XZ = new signed char [Dm->recvCount("XZ")]; //...................................................................................... int sendtag,recvtag; sendtag = recvtag = 7; @@ -469,80 +469,62 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrsendList_x, Dm->sendCount_x ,sendID_x, id); - PackID(Dm->sendList_X, Dm->sendCount_X ,sendID_X, id); - PackID(Dm->sendList_y, Dm->sendCount_y ,sendID_y, id); - PackID(Dm->sendList_Y, Dm->sendCount_Y ,sendID_Y, id); - PackID(Dm->sendList_z, Dm->sendCount_z ,sendID_z, id); - PackID(Dm->sendList_Z, Dm->sendCount_Z ,sendID_Z, id); - PackID(Dm->sendList_xy, Dm->sendCount_xy ,sendID_xy, id); - PackID(Dm->sendList_Xy, Dm->sendCount_Xy ,sendID_Xy, id); - PackID(Dm->sendList_xY, Dm->sendCount_xY ,sendID_xY, id); - PackID(Dm->sendList_XY, Dm->sendCount_XY ,sendID_XY, id); - PackID(Dm->sendList_xz, Dm->sendCount_xz ,sendID_xz, id); - PackID(Dm->sendList_Xz, Dm->sendCount_Xz ,sendID_Xz, id); - PackID(Dm->sendList_xZ, Dm->sendCount_xZ ,sendID_xZ, id); - PackID(Dm->sendList_XZ, Dm->sendCount_XZ ,sendID_XZ, id); - PackID(Dm->sendList_yz, Dm->sendCount_yz ,sendID_yz, id); - PackID(Dm->sendList_Yz, Dm->sendCount_Yz ,sendID_Yz, id); - PackID(Dm->sendList_yZ, Dm->sendCount_yZ ,sendID_yZ, id); - PackID(Dm->sendList_YZ, Dm->sendCount_YZ ,sendID_YZ, id); + PackID(Dm->sendList("x"), Dm->sendCount("x") ,sendID_x, id); + PackID(Dm->sendList("X"), Dm->sendCount("X") ,sendID_X, id); + PackID(Dm->sendList("y"), Dm->sendCount("y") ,sendID_y, id); + PackID(Dm->sendList("Y"), Dm->sendCount("Y") ,sendID_Y, id); + PackID(Dm->sendList("z"), Dm->sendCount("z") ,sendID_z, id); + PackID(Dm->sendList("Z"), Dm->sendCount("Z") ,sendID_Z, id); + PackID(Dm->sendList("xy"), Dm->sendCount("xy") ,sendID_xy, id); + PackID(Dm->sendList("Xy"), Dm->sendCount("Xy") ,sendID_Xy, id); + PackID(Dm->sendList("xY"), Dm->sendCount("xY") ,sendID_xY, id); + PackID(Dm->sendList("XY"), Dm->sendCount("XY") ,sendID_XY, id); + PackID(Dm->sendList("xz"), Dm->sendCount("xz") ,sendID_xz, id); + PackID(Dm->sendList("Xz"), Dm->sendCount("Xz") ,sendID_Xz, id); + PackID(Dm->sendList("xZ"), Dm->sendCount("xZ") ,sendID_xZ, id); + PackID(Dm->sendList("XZ"), Dm->sendCount("XZ") ,sendID_XZ, id); + PackID(Dm->sendList("yz"), Dm->sendCount("yz") ,sendID_yz, id); + PackID(Dm->sendList("Yz"), Dm->sendCount("Yz") ,sendID_Yz, id); + PackID(Dm->sendList("yZ"), Dm->sendCount("yZ") ,sendID_yZ, id); + PackID(Dm->sendList("YZ"), Dm->sendCount("YZ") ,sendID_YZ, id); //...................................................................................... - Dm->Comm.sendrecv(sendID_x,Dm->sendCount_x,Dm->rank_x(),sendtag, - recvID_X,Dm->recvCount_X,Dm->rank_X(),recvtag); - Dm->Comm.sendrecv(sendID_X,Dm->sendCount_X,Dm->rank_X(),sendtag, - recvID_x,Dm->recvCount_x,Dm->rank_x(),recvtag); - Dm->Comm.sendrecv(sendID_y,Dm->sendCount_y,Dm->rank_y(),sendtag, - recvID_Y,Dm->recvCount_Y,Dm->rank_Y(),recvtag); - Dm->Comm.sendrecv(sendID_Y,Dm->sendCount_Y,Dm->rank_Y(),sendtag, - recvID_y,Dm->recvCount_y,Dm->rank_y(),recvtag); - Dm->Comm.sendrecv(sendID_z,Dm->sendCount_z,Dm->rank_z(),sendtag, - recvID_Z,Dm->recvCount_Z,Dm->rank_Z(),recvtag); - Dm->Comm.sendrecv(sendID_Z,Dm->sendCount_Z,Dm->rank_Z(),sendtag, - recvID_z,Dm->recvCount_z,Dm->rank_z(),recvtag); - Dm->Comm.sendrecv(sendID_xy,Dm->sendCount_xy,Dm->rank_xy(),sendtag, - recvID_XY,Dm->recvCount_XY,Dm->rank_XY(),recvtag); - Dm->Comm.sendrecv(sendID_XY,Dm->sendCount_XY,Dm->rank_XY(),sendtag, - recvID_xy,Dm->recvCount_xy,Dm->rank_xy(),recvtag); - Dm->Comm.sendrecv(sendID_Xy,Dm->sendCount_Xy,Dm->rank_Xy(),sendtag, - recvID_xY,Dm->recvCount_xY,Dm->rank_xY(),recvtag); - Dm->Comm.sendrecv(sendID_xY,Dm->sendCount_xY,Dm->rank_xY(),sendtag, - recvID_Xy,Dm->recvCount_Xy,Dm->rank_Xy(),recvtag); - Dm->Comm.sendrecv(sendID_xz,Dm->sendCount_xz,Dm->rank_xz(),sendtag, - recvID_XZ,Dm->recvCount_XZ,Dm->rank_XZ(),recvtag); - Dm->Comm.sendrecv(sendID_XZ,Dm->sendCount_XZ,Dm->rank_XZ(),sendtag, - recvID_xz,Dm->recvCount_xz,Dm->rank_xz(),recvtag); - Dm->Comm.sendrecv(sendID_Xz,Dm->sendCount_Xz,Dm->rank_Xz(),sendtag, - recvID_xZ,Dm->recvCount_xZ,Dm->rank_xZ(),recvtag); - Dm->Comm.sendrecv(sendID_xZ,Dm->sendCount_xZ,Dm->rank_xZ(),sendtag, - recvID_Xz,Dm->recvCount_Xz,Dm->rank_Xz(),recvtag); - Dm->Comm.sendrecv(sendID_yz,Dm->sendCount_yz,Dm->rank_yz(),sendtag, - recvID_YZ,Dm->recvCount_YZ,Dm->rank_YZ(),recvtag); - Dm->Comm.sendrecv(sendID_YZ,Dm->sendCount_YZ,Dm->rank_YZ(),sendtag, - recvID_yz,Dm->recvCount_yz,Dm->rank_yz(),recvtag); - Dm->Comm.sendrecv(sendID_Yz,Dm->sendCount_Yz,Dm->rank_Yz(),sendtag, - recvID_yZ,Dm->recvCount_yZ,Dm->rank_yZ(),recvtag); - Dm->Comm.sendrecv(sendID_yZ,Dm->sendCount_yZ,Dm->rank_yZ(),sendtag, - recvID_Yz,Dm->recvCount_Yz,Dm->rank_Yz(),recvtag); + Dm->Comm.sendrecv(sendID_x,Dm->sendCount("x"),Dm->rank_x(),sendtag,recvID_X,Dm->recvCount("X"),Dm->rank_X(),recvtag); + Dm->Comm.sendrecv(sendID_X,Dm->sendCount("X"),Dm->rank_X(),sendtag,recvID_x,Dm->recvCount("x"),Dm->rank_x(),recvtag); + Dm->Comm.sendrecv(sendID_y,Dm->sendCount("y"),Dm->rank_y(),sendtag,recvID_Y,Dm->recvCount("Y"),Dm->rank_Y(),recvtag); + Dm->Comm.sendrecv(sendID_Y,Dm->sendCount("Y"),Dm->rank_Y(),sendtag,recvID_y,Dm->recvCount("y"),Dm->rank_y(),recvtag); + Dm->Comm.sendrecv(sendID_z,Dm->sendCount("z"),Dm->rank_z(),sendtag,recvID_Z,Dm->recvCount("Z"),Dm->rank_Z(),recvtag); + Dm->Comm.sendrecv(sendID_Z,Dm->sendCount("Z"),Dm->rank_Z(),sendtag,recvID_z,Dm->recvCount("z"),Dm->rank_z(),recvtag); + Dm->Comm.sendrecv(sendID_xy,Dm->sendCount("xy"),Dm->rank_xy(),sendtag,recvID_XY,Dm->recvCount("XY"),Dm->rank_XY(),recvtag); + Dm->Comm.sendrecv(sendID_XY,Dm->sendCount("XY"),Dm->rank_XY(),sendtag,recvID_xy,Dm->recvCount("xy"),Dm->rank_xy(),recvtag); + Dm->Comm.sendrecv(sendID_Xy,Dm->sendCount("Xy"),Dm->rank_Xy(),sendtag,recvID_xY,Dm->recvCount("xY"),Dm->rank_xY(),recvtag); + Dm->Comm.sendrecv(sendID_xY,Dm->sendCount("xY"),Dm->rank_xY(),sendtag,recvID_Xy,Dm->recvCount("Xy"),Dm->rank_Xy(),recvtag); + Dm->Comm.sendrecv(sendID_xz,Dm->sendCount("xz"),Dm->rank_xz(),sendtag,recvID_XZ,Dm->recvCount("XZ"),Dm->rank_XZ(),recvtag); + Dm->Comm.sendrecv(sendID_XZ,Dm->sendCount("XZ"),Dm->rank_XZ(),sendtag,recvID_xz,Dm->recvCount("xz"),Dm->rank_xz(),recvtag); + Dm->Comm.sendrecv(sendID_Xz,Dm->sendCount("Xz"),Dm->rank_Xz(),sendtag,recvID_xZ,Dm->recvCount("xZ"),Dm->rank_xZ(),recvtag); + Dm->Comm.sendrecv(sendID_xZ,Dm->sendCount("xZ"),Dm->rank_xZ(),sendtag,recvID_Xz,Dm->recvCount("Xz"),Dm->rank_Xz(),recvtag); + Dm->Comm.sendrecv(sendID_yz,Dm->sendCount("yz"),Dm->rank_yz(),sendtag,recvID_YZ,Dm->recvCount("YZ"),Dm->rank_YZ(),recvtag); + Dm->Comm.sendrecv(sendID_YZ,Dm->sendCount("YZ"),Dm->rank_YZ(),sendtag,recvID_yz,Dm->recvCount("yz"),Dm->rank_yz(),recvtag); + Dm->Comm.sendrecv(sendID_Yz,Dm->sendCount("Yz"),Dm->rank_Yz(),sendtag,recvID_yZ,Dm->recvCount("yZ"),Dm->rank_yZ(),recvtag); + Dm->Comm.sendrecv(sendID_yZ,Dm->sendCount("yZ"),Dm->rank_yZ(),sendtag,recvID_Yz,Dm->recvCount("Yz"),Dm->rank_Yz(),recvtag); //...................................................................................... - UnpackID(Dm->recvList_x, Dm->recvCount_x ,recvID_x, id); - UnpackID(Dm->recvList_X, Dm->recvCount_X ,recvID_X, id); - UnpackID(Dm->recvList_y, Dm->recvCount_y ,recvID_y, id); - UnpackID(Dm->recvList_Y, Dm->recvCount_Y ,recvID_Y, id); - UnpackID(Dm->recvList_z, Dm->recvCount_z ,recvID_z, id); - UnpackID(Dm->recvList_Z, Dm->recvCount_Z ,recvID_Z, id); - UnpackID(Dm->recvList_xy, Dm->recvCount_xy ,recvID_xy, id); - UnpackID(Dm->recvList_Xy, Dm->recvCount_Xy ,recvID_Xy, id); - UnpackID(Dm->recvList_xY, Dm->recvCount_xY ,recvID_xY, id); - UnpackID(Dm->recvList_XY, Dm->recvCount_XY ,recvID_XY, id); - UnpackID(Dm->recvList_xz, Dm->recvCount_xz ,recvID_xz, id); - UnpackID(Dm->recvList_Xz, Dm->recvCount_Xz ,recvID_Xz, id); - UnpackID(Dm->recvList_xZ, Dm->recvCount_xZ ,recvID_xZ, id); - UnpackID(Dm->recvList_XZ, Dm->recvCount_XZ ,recvID_XZ, id); - UnpackID(Dm->recvList_yz, Dm->recvCount_yz ,recvID_yz, id); - UnpackID(Dm->recvList_Yz, Dm->recvCount_Yz ,recvID_Yz, id); - UnpackID(Dm->recvList_yZ, Dm->recvCount_yZ ,recvID_yZ, id); - UnpackID(Dm->recvList_YZ, Dm->recvCount_YZ ,recvID_YZ, id); + UnpackID(Dm->recvList("x"), Dm->recvCount("x") ,recvID_x, id); + UnpackID(Dm->recvList("X"), Dm->recvCount("X") ,recvID_X, id); + UnpackID(Dm->recvList("y"), Dm->recvCount("y") ,recvID_y, id); + UnpackID(Dm->recvList("Y"), Dm->recvCount("Y") ,recvID_Y, id); + UnpackID(Dm->recvList("z"), Dm->recvCount("z") ,recvID_z, id); + UnpackID(Dm->recvList("Z"), Dm->recvCount("Z") ,recvID_Z, id); + UnpackID(Dm->recvList("xy"), Dm->recvCount("xy") ,recvID_xy, id); + UnpackID(Dm->recvList("Xy"), Dm->recvCount("Xy") ,recvID_Xy, id); + UnpackID(Dm->recvList("xY"), Dm->recvCount("xY") ,recvID_xY, id); + UnpackID(Dm->recvList("XY"), Dm->recvCount("XY") ,recvID_XY, id); + UnpackID(Dm->recvList("xz"), Dm->recvCount("xz") ,recvID_xz, id); + UnpackID(Dm->recvList("Xz"), Dm->recvCount("Xz") ,recvID_Xz, id); + UnpackID(Dm->recvList("xZ"), Dm->recvCount("xZ") ,recvID_xZ, id); + UnpackID(Dm->recvList("XZ"), Dm->recvCount("XZ") ,recvID_XZ, id); + UnpackID(Dm->recvList("yz"), Dm->recvCount("yz") ,recvID_yz, id); + UnpackID(Dm->recvList("Yz"), Dm->recvCount("Yz") ,recvID_Yz, id); + UnpackID(Dm->recvList("yZ"), Dm->recvCount("yZ") ,recvID_yZ, id); + UnpackID(Dm->recvList("YZ"), Dm->recvCount("YZ") ,recvID_YZ, id); //...................................................................................... // double GlobalNumber = Dm->Comm.sumReduce( LocalNumber ); diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index 89451c7b..83153f6c 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -510,7 +510,7 @@ runAnalysis::commWrapper runAnalysis::getComm( ) runAnalysis::runAnalysis( std::shared_ptr input_db, const RankInfoStruct& rank_info, std::shared_ptr ScaLBL_Comm, - std::shared_ptr Dm, + std::shared_ptr Dm, int Np, bool Regular, IntArray Map ): @@ -518,8 +518,7 @@ runAnalysis::runAnalysis( std::shared_ptr input_db, d_regular ( Regular), d_rank_info( rank_info ), d_Map( Map ), - d_fillData(Dm->Comm,Dm->rank_info,{Dm->Nx-2,Dm->Ny-2,Dm->Nz-2},{1,1,1},0,1), - d_comm( Utilities::MPI( MPI_COMM_WORLD ).dup() ), + d_comm( Dm->Comm.dup() ), d_ScaLBL_Comm( ScaLBL_Comm) { @@ -535,6 +534,9 @@ runAnalysis::runAnalysis( std::shared_ptr input_db, char rankString[20]; sprintf(rankString,"%05d",Dm->rank()); + d_n[0] = Dm->Nx-2; + d_n[1] = Dm->Ny-2; + d_n[2] = Dm->Nz-2; d_N[0] = Dm->Nx; d_N[1] = Dm->Ny; d_N[2] = Dm->Nz; @@ -566,7 +568,7 @@ runAnalysis::runAnalysis( std::shared_ptr input_db, d_meshData.resize(1); d_meshData[0].meshName = "domain"; - d_meshData[0].mesh = std::make_shared( Dm->rank_info,Dm->Nx-2,Dm->Ny-2,Dm->Nz-2,Dm->Lx,Dm->Ly,Dm->Lz ); + d_meshData[0].mesh = std::make_shared( d_rank_info,d_n[0],d_n[1],d_n[2],Dm->Lx,Dm->Ly,Dm->Lz ); auto PhaseVar = std::make_shared(); auto PressVar = std::make_shared(); auto VxVar = std::make_shared(); @@ -579,7 +581,7 @@ runAnalysis::runAnalysis( std::shared_ptr input_db, PhaseVar->name = "phase"; PhaseVar->type = IO::VariableType::VolumeVariable; PhaseVar->dim = 1; - PhaseVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + PhaseVar->data.resize(d_n[0],d_n[1],d_n[2]); d_meshData[0].vars.push_back(PhaseVar); } @@ -587,7 +589,7 @@ runAnalysis::runAnalysis( std::shared_ptr input_db, PressVar->name = "Pressure"; PressVar->type = IO::VariableType::VolumeVariable; PressVar->dim = 1; - PressVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + PressVar->data.resize(d_n[0],d_n[1],d_n[2]); d_meshData[0].vars.push_back(PressVar); } @@ -595,17 +597,17 @@ runAnalysis::runAnalysis( std::shared_ptr input_db, VxVar->name = "Velocity_x"; VxVar->type = IO::VariableType::VolumeVariable; VxVar->dim = 1; - VxVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + VxVar->data.resize(d_n[0],d_n[1],d_n[2]); d_meshData[0].vars.push_back(VxVar); VyVar->name = "Velocity_y"; VyVar->type = IO::VariableType::VolumeVariable; VyVar->dim = 1; - VyVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + VyVar->data.resize(d_n[0],d_n[1],d_n[2]); d_meshData[0].vars.push_back(VyVar); VzVar->name = "Velocity_z"; VzVar->type = IO::VariableType::VolumeVariable; VzVar->dim = 1; - VzVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + VzVar->data.resize(d_n[0],d_n[1],d_n[2]); d_meshData[0].vars.push_back(VzVar); } @@ -613,7 +615,7 @@ runAnalysis::runAnalysis( std::shared_ptr input_db, SignDistVar->name = "SignDist"; SignDistVar->type = IO::VariableType::VolumeVariable; SignDistVar->dim = 1; - SignDistVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + SignDistVar->data.resize(d_n[0],d_n[1],d_n[2]); d_meshData[0].vars.push_back(SignDistVar); } @@ -621,7 +623,7 @@ runAnalysis::runAnalysis( std::shared_ptr input_db, BlobIDVar->name = "BlobID"; BlobIDVar->type = IO::VariableType::VolumeVariable; BlobIDVar->dim = 1; - BlobIDVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + BlobIDVar->data.resize(d_n[0],d_n[1],d_n[2]); d_meshData[0].vars.push_back(BlobIDVar); } @@ -682,7 +684,10 @@ void runAnalysis::createThreads( const std::string& method, int N_threads ) } // Create the threads const auto cores = d_tpool.getProcessAffinity(); - if ( cores.empty() ) { + if ( N_threads == 0 ) { + // Special case to serials the analysis for debugging + d_tpool.setNumThreads( 0 ); + } else if ( cores.empty() ) { // We were not able to get the cores for the process d_tpool.setNumThreads( N_threads ); } else if ( method == "default" ) { @@ -921,7 +926,9 @@ void runAnalysis::run(int timestep, std::shared_ptr input_db, TwoPhase // if ( matches(type,AnalysisType::CreateRestart) ) { if (timestep%d_restart_interval==0){ // Write the vis files - auto work = new WriteVisWorkItem( timestep, d_meshData, Averages, d_fillData, getComm() ); + commWrapper comm = getComm(); + fillHalo fillData( comm.comm, d_rank_info, d_n, {1,1,1}, 0, 1 ); + auto work = new WriteVisWorkItem( timestep, d_meshData, Averages, fillData, std::move( comm ) ); work->add_dependency(d_wait_blobID); work->add_dependency(d_wait_analysis); work->add_dependency(d_wait_vis); @@ -1025,7 +1032,9 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha if (timestep%d_visualization_interval==0){ // Write the vis files - auto work = new IOWorkItem( timestep, input_db, d_meshData, Averages, d_fillData, getComm() ); + commWrapper comm = getComm(); + fillHalo fillData( comm.comm, d_rank_info, d_n, {1,1,1}, 0, 1 ); + auto work = new IOWorkItem( timestep, input_db, d_meshData, Averages, fillData, std::move( comm ) ); work->add_dependency(d_wait_analysis); work->add_dependency(d_wait_subphase); work->add_dependency(d_wait_vis); @@ -1058,7 +1067,9 @@ void runAnalysis::WriteVisData(int timestep, std::shared_ptr input_db, PROFILE_START("write vis",1); // if (Averages.WriteVis == true){ - auto work2 = new IOWorkItem(timestep, input_db, d_meshData, Averages, d_fillData, getComm() ); + commWrapper comm = getComm(); + fillHalo fillData( comm.comm, d_rank_info, d_n, {1,1,1}, 0, 1 ); + auto work2 = new IOWorkItem(timestep, input_db, d_meshData, Averages, fillData, std::move( comm ) ); work2->add_dependency(d_wait_vis); d_wait_vis = d_tpool.add_work(work2); diff --git a/analysis/runAnalysis.h b/analysis/runAnalysis.h index 3c5bc7f0..33adbcb0 100644 --- a/analysis/runAnalysis.h +++ b/analysis/runAnalysis.h @@ -84,7 +84,8 @@ public: private: - int d_N[3]; + std::array d_n; // Number of local cells + std::array d_N; // NNumber of local cells with ghosts int d_Np; int d_rank; int d_restart_interval, d_analysis_interval, d_blobid_interval, d_visualization_interval; @@ -98,7 +99,6 @@ private: BlobIDstruct d_last_index; BlobIDList d_last_id_map; std::vector d_meshData; - fillHalo d_fillData; std::string d_restartFile; Utilities::MPI d_comm; Utilities::MPI d_comms[1024]; diff --git a/common/Communication.h b/common/Communication.h index cf83ffe3..4cd9ad70 100644 --- a/common/Communication.h +++ b/common/Communication.h @@ -102,7 +102,7 @@ private: //*************************************************************************************** -inline void PackMeshData(int *list, int count, double *sendbuf, double *data){ +inline void PackMeshData(const int *list, int count, double *sendbuf, double *data){ // Fill in the phase ID values from neighboring processors // This packs up the values that need to be sent from one processor to another int idx,n; @@ -111,7 +111,7 @@ inline void PackMeshData(int *list, int count, double *sendbuf, double *data){ sendbuf[idx] = data[n]; } } -inline void UnpackMeshData(int *list, int count, double *recvbuf, double *data){ +inline void UnpackMeshData(const int *list, int count, double *recvbuf, double *data){ // Fill in the phase ID values from neighboring processors // This unpacks the values once they have been recieved from neighbors int idx,n; diff --git a/common/Domain.cpp b/common/Domain.cpp index eadda60d..ab457f33 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -23,40 +23,15 @@ static inline void fgetl( char * str, int num, FILE * stream ) } /******************************************************** - * Constructors/Destructor * + * Constructors * ********************************************************/ Domain::Domain( int nx, int ny, int nz, int rnk, int npx, int npy, int npz, double lx, double ly, double lz, int BC): - database(NULL), Nx(0), Ny(0), Nz(0), + database(nullptr), Nx(0), Ny(0), Nz(0), Lx(0), Ly(0), Lz(0), Volume(0), BoundaryCondition(0), voxel_length(1), - Comm(MPI_COMM_WORLD), + Comm( Utilities::MPI( MPI_COMM_WORLD).dup() ), inlet_layers_x(0), inlet_layers_y(0), inlet_layers_z(0), - inlet_layers_phase(1),outlet_layers_phase(2), - sendCount_x(0), sendCount_y(0), sendCount_z(0), sendCount_X(0), sendCount_Y(0), sendCount_Z(0), - sendCount_xy(0), sendCount_yz(0), sendCount_xz(0), sendCount_Xy(0), sendCount_Yz(0), sendCount_xZ(0), - sendCount_xY(0), sendCount_yZ(0), sendCount_Xz(0), sendCount_XY(0), sendCount_YZ(0), sendCount_XZ(0), - sendList_x(NULL), sendList_y(NULL), sendList_z(NULL), sendList_X(NULL), sendList_Y(NULL), sendList_Z(NULL), - sendList_xy(NULL), sendList_yz(NULL), sendList_xz(NULL), sendList_Xy(NULL), sendList_Yz(NULL), sendList_xZ(NULL), - sendList_xY(NULL), sendList_yZ(NULL), sendList_Xz(NULL), sendList_XY(NULL), sendList_YZ(NULL), sendList_XZ(NULL), - sendBuf_x(NULL), sendBuf_y(NULL), sendBuf_z(NULL), sendBuf_X(NULL), sendBuf_Y(NULL), sendBuf_Z(NULL), - sendBuf_xy(NULL), sendBuf_yz(NULL), sendBuf_xz(NULL), sendBuf_Xy(NULL), sendBuf_Yz(NULL), sendBuf_xZ(NULL), - sendBuf_xY(NULL), sendBuf_yZ(NULL), sendBuf_Xz(NULL), sendBuf_XY(NULL), sendBuf_YZ(NULL), sendBuf_XZ(NULL), - recvCount_x(0), recvCount_y(0), recvCount_z(0), recvCount_X(0), recvCount_Y(0), recvCount_Z(0), - recvCount_xy(0), recvCount_yz(0), recvCount_xz(0), recvCount_Xy(0), recvCount_Yz(0), recvCount_xZ(0), - recvCount_xY(0), recvCount_yZ(0), recvCount_Xz(0), recvCount_XY(0), recvCount_YZ(0), recvCount_XZ(0), - recvList_x(NULL), recvList_y(NULL), recvList_z(NULL), recvList_X(NULL), recvList_Y(NULL), recvList_Z(NULL), - recvList_xy(NULL), recvList_yz(NULL), recvList_xz(NULL), recvList_Xy(NULL), recvList_Yz(NULL), recvList_xZ(NULL), - recvList_xY(NULL), recvList_yZ(NULL), recvList_Xz(NULL), recvList_XY(NULL), recvList_YZ(NULL), recvList_XZ(NULL), - recvBuf_x(NULL), recvBuf_y(NULL), recvBuf_z(NULL), recvBuf_X(NULL), recvBuf_Y(NULL), recvBuf_Z(NULL), - recvBuf_xy(NULL), recvBuf_yz(NULL), recvBuf_xz(NULL), recvBuf_Xy(NULL), recvBuf_Yz(NULL), recvBuf_xZ(NULL), - recvBuf_xY(NULL), recvBuf_yZ(NULL), recvBuf_Xz(NULL), recvBuf_XY(NULL), recvBuf_YZ(NULL), recvBuf_XZ(NULL), - sendData_x(NULL), sendData_y(NULL), sendData_z(NULL), sendData_X(NULL), sendData_Y(NULL), sendData_Z(NULL), - sendData_xy(NULL), sendData_yz(NULL), sendData_xz(NULL), sendData_Xy(NULL), sendData_Yz(NULL), sendData_xZ(NULL), - sendData_xY(NULL), sendData_yZ(NULL), sendData_Xz(NULL), sendData_XY(NULL), sendData_YZ(NULL), sendData_XZ(NULL), - recvData_x(NULL), recvData_y(NULL), recvData_z(NULL), recvData_X(NULL), recvData_Y(NULL), recvData_Z(NULL), - recvData_xy(NULL), recvData_yz(NULL), recvData_xz(NULL), recvData_Xy(NULL), recvData_Yz(NULL), recvData_xZ(NULL), - recvData_xY(NULL), recvData_yZ(NULL), recvData_Xz(NULL), recvData_XY(NULL), recvData_YZ(NULL), recvData_XZ(NULL), - id(NULL) + inlet_layers_phase(1),outlet_layers_phase(2) { NULL_USE( rnk ); NULL_USE( npy ); @@ -80,32 +55,7 @@ Domain::Domain( std::shared_ptr db, const Utilities::MPI& Communicator Lx(0), Ly(0), Lz(0), Volume(0), BoundaryCondition(0), inlet_layers_x(0), inlet_layers_y(0), inlet_layers_z(0), outlet_layers_x(0), outlet_layers_y(0), outlet_layers_z(0), - inlet_layers_phase(1),outlet_layers_phase(2), - sendCount_x(0), sendCount_y(0), sendCount_z(0), sendCount_X(0), sendCount_Y(0), sendCount_Z(0), - sendCount_xy(0), sendCount_yz(0), sendCount_xz(0), sendCount_Xy(0), sendCount_Yz(0), sendCount_xZ(0), - sendCount_xY(0), sendCount_yZ(0), sendCount_Xz(0), sendCount_XY(0), sendCount_YZ(0), sendCount_XZ(0), - sendList_x(NULL), sendList_y(NULL), sendList_z(NULL), sendList_X(NULL), sendList_Y(NULL), sendList_Z(NULL), - sendList_xy(NULL), sendList_yz(NULL), sendList_xz(NULL), sendList_Xy(NULL), sendList_Yz(NULL), sendList_xZ(NULL), - sendList_xY(NULL), sendList_yZ(NULL), sendList_Xz(NULL), sendList_XY(NULL), sendList_YZ(NULL), sendList_XZ(NULL), - sendBuf_x(NULL), sendBuf_y(NULL), sendBuf_z(NULL), sendBuf_X(NULL), sendBuf_Y(NULL), sendBuf_Z(NULL), - sendBuf_xy(NULL), sendBuf_yz(NULL), sendBuf_xz(NULL), sendBuf_Xy(NULL), sendBuf_Yz(NULL), sendBuf_xZ(NULL), - sendBuf_xY(NULL), sendBuf_yZ(NULL), sendBuf_Xz(NULL), sendBuf_XY(NULL), sendBuf_YZ(NULL), sendBuf_XZ(NULL), - recvCount_x(0), recvCount_y(0), recvCount_z(0), recvCount_X(0), recvCount_Y(0), recvCount_Z(0), - recvCount_xy(0), recvCount_yz(0), recvCount_xz(0), recvCount_Xy(0), recvCount_Yz(0), recvCount_xZ(0), - recvCount_xY(0), recvCount_yZ(0), recvCount_Xz(0), recvCount_XY(0), recvCount_YZ(0), recvCount_XZ(0), - recvList_x(NULL), recvList_y(NULL), recvList_z(NULL), recvList_X(NULL), recvList_Y(NULL), recvList_Z(NULL), - recvList_xy(NULL), recvList_yz(NULL), recvList_xz(NULL), recvList_Xy(NULL), recvList_Yz(NULL), recvList_xZ(NULL), - recvList_xY(NULL), recvList_yZ(NULL), recvList_Xz(NULL), recvList_XY(NULL), recvList_YZ(NULL), recvList_XZ(NULL), - recvBuf_x(NULL), recvBuf_y(NULL), recvBuf_z(NULL), recvBuf_X(NULL), recvBuf_Y(NULL), recvBuf_Z(NULL), - recvBuf_xy(NULL), recvBuf_yz(NULL), recvBuf_xz(NULL), recvBuf_Xy(NULL), recvBuf_Yz(NULL), recvBuf_xZ(NULL), - recvBuf_xY(NULL), recvBuf_yZ(NULL), recvBuf_Xz(NULL), recvBuf_XY(NULL), recvBuf_YZ(NULL), recvBuf_XZ(NULL), - sendData_x(NULL), sendData_y(NULL), sendData_z(NULL), sendData_X(NULL), sendData_Y(NULL), sendData_Z(NULL), - sendData_xy(NULL), sendData_yz(NULL), sendData_xz(NULL), sendData_Xy(NULL), sendData_Yz(NULL), sendData_xZ(NULL), - sendData_xY(NULL), sendData_yZ(NULL), sendData_Xz(NULL), sendData_XY(NULL), sendData_YZ(NULL), sendData_XZ(NULL), - recvData_x(NULL), recvData_y(NULL), recvData_z(NULL), recvData_X(NULL), recvData_Y(NULL), recvData_Z(NULL), - recvData_xy(NULL), recvData_yz(NULL), recvData_xz(NULL), recvData_Xy(NULL), recvData_Yz(NULL), recvData_xZ(NULL), - recvData_xY(NULL), recvData_yZ(NULL), recvData_Xz(NULL), recvData_XY(NULL), recvData_YZ(NULL), recvData_XZ(NULL), - id(NULL) + inlet_layers_phase(1),outlet_layers_phase(2) { Comm = Communicator.dup(); @@ -116,54 +66,18 @@ Domain::Domain( std::shared_ptr db, const Utilities::MPI& Communicator Comm.barrier(); } + +/******************************************************** + * Destructor * + ********************************************************/ Domain::~Domain() { - // Free sendList - delete [] sendList_x; delete [] sendList_y; delete [] sendList_z; - delete [] sendList_X; delete [] sendList_Y; delete [] sendList_Z; - delete [] sendList_xy; delete [] sendList_yz; delete [] sendList_xz; - delete [] sendList_Xy; delete [] sendList_Yz; delete [] sendList_xZ; - delete [] sendList_xY; delete [] sendList_yZ; delete [] sendList_Xz; - delete [] sendList_XY; delete [] sendList_YZ; delete [] sendList_XZ; - // Free sendBuf - delete [] sendBuf_x; delete [] sendBuf_y; delete [] sendBuf_z; - delete [] sendBuf_X; delete [] sendBuf_Y; delete [] sendBuf_Z; - delete [] sendBuf_xy; delete [] sendBuf_yz; delete [] sendBuf_xz; - delete [] sendBuf_Xy; delete [] sendBuf_Yz; delete [] sendBuf_xZ; - delete [] sendBuf_xY; delete [] sendBuf_yZ; delete [] sendBuf_Xz; - delete [] sendBuf_XY; delete [] sendBuf_YZ; delete [] sendBuf_XZ; - // Free recvList - delete [] recvList_x; delete [] recvList_y; delete [] recvList_z; - delete [] recvList_X; delete [] recvList_Y; delete [] recvList_Z; - delete [] recvList_xy; delete [] recvList_yz; delete [] recvList_xz; - delete [] recvList_Xy; delete [] recvList_Yz; delete [] recvList_xZ; - delete [] recvList_xY; delete [] recvList_yZ; delete [] recvList_Xz; - delete [] recvList_XY; delete [] recvList_YZ; delete [] recvList_XZ; - // Free recvBuf - delete [] recvBuf_x; delete [] recvBuf_y; delete [] recvBuf_z; - delete [] recvBuf_X; delete [] recvBuf_Y; delete [] recvBuf_Z; - delete [] recvBuf_xy; delete [] recvBuf_yz; delete [] recvBuf_xz; - delete [] recvBuf_Xy; delete [] recvBuf_Yz; delete [] recvBuf_xZ; - delete [] recvBuf_xY; delete [] recvBuf_yZ; delete [] recvBuf_Xz; - delete [] recvBuf_XY; delete [] recvBuf_YZ; delete [] recvBuf_XZ; - // Free sendData - delete [] sendData_x; delete [] sendData_y; delete [] sendData_z; - delete [] sendData_X; delete [] sendData_Y; delete [] sendData_Z; - delete [] sendData_xy; delete [] sendData_xY; delete [] sendData_Xy; - delete [] sendData_XY; delete [] sendData_xz; delete [] sendData_xZ; - delete [] sendData_Xz; delete [] sendData_XZ; delete [] sendData_yz; - delete [] sendData_yZ; delete [] sendData_Yz; delete [] sendData_YZ; - // Free recvData - delete [] recvData_x; delete [] recvData_y; delete [] recvData_z; - delete [] recvData_X; delete [] recvData_Y; delete [] recvData_Z; - delete [] recvData_xy; delete [] recvData_xY; delete [] recvData_Xy; - delete [] recvData_XY; delete [] recvData_xz; delete [] recvData_xZ; - delete [] recvData_Xz; delete [] recvData_XZ; delete [] recvData_yz; - delete [] recvData_yZ; delete [] recvData_Yz; delete [] recvData_YZ; - // Free id - delete [] id; } + +/******************************************************** + * Initialization * + ********************************************************/ void Domain::initialize( std::shared_ptr db ) { d_db = db; @@ -228,13 +142,115 @@ void Domain::initialize( std::shared_ptr db ) if (myrank==0) printf("voxel length = %f micron \n", voxel_length); - id = new signed char[N]; - memset(id,0,N); + id = std::vector( N, 0 ); BoundaryCondition = d_db->getScalar("BC"); int nprocs = Comm.getSize(); INSIST(nprocs == nproc[0]*nproc[1]*nproc[2],"Fatal error in processor count!"); } + +/******************************************************** + * Get send/recv lists * + ********************************************************/ +const std::vector& Domain::getRecvList( const char* dir ) const +{ + if ( dir[0] == 'x' ) { + if ( dir[1] == 0 ) + return recvList_x; + else if ( dir[1] == 'y' ) + return recvList_xy; + else if ( dir[1] == 'Y' ) + return recvList_xY; + else if ( dir[1] == 'z' ) + return recvList_xz; + else if ( dir[1] == 'Z' ) + return recvList_xZ; + } else if ( dir[0] == 'y' ) { + if ( dir[1] == 0 ) + return recvList_y; + else if ( dir[1] == 'z' ) + return recvList_yz; + else if ( dir[1] == 'Z' ) + return recvList_yZ; + } else if ( dir[0] == 'z' ) { + if ( dir[1] == 0 ) + return recvList_z; + } else if ( dir[0] == 'X' ) { + if ( dir[1] == 0 ) + return recvList_X; + else if ( dir[1] == 'y' ) + return recvList_Xy; + else if ( dir[1] == 'Y' ) + return recvList_XY; + else if ( dir[1] == 'z' ) + return recvList_Xz; + else if ( dir[1] == 'Z' ) + return recvList_XZ; + } else if ( dir[0] == 'Y' ) { + if ( dir[1] == 0 ) + return recvList_Y; + else if ( dir[1] == 'z' ) + return recvList_Yz; + else if ( dir[1] == 'Z' ) + return recvList_YZ; + } else if ( dir[0] == 'Z' ) { + if ( dir[1] == 0 ) + return recvList_Z; + } + throw std::logic_error("Internal error"); +} +const std::vector& Domain::getSendList( const char* dir ) const +{ + if ( dir[0] == 'x' ) { + if ( dir[1] == 0 ) + return sendList_x; + else if ( dir[1] == 'y' ) + return sendList_xy; + else if ( dir[1] == 'Y' ) + return sendList_xY; + else if ( dir[1] == 'z' ) + return sendList_xz; + else if ( dir[1] == 'Z' ) + return sendList_xZ; + } else if ( dir[0] == 'y' ) { + if ( dir[1] == 0 ) + return sendList_y; + else if ( dir[1] == 'z' ) + return sendList_yz; + else if ( dir[1] == 'Z' ) + return sendList_yZ; + } else if ( dir[0] == 'z' ) { + if ( dir[1] == 0 ) + return sendList_z; + } else if ( dir[0] == 'X' ) { + if ( dir[1] == 0 ) + return sendList_X; + else if ( dir[1] == 'y' ) + return sendList_Xy; + else if ( dir[1] == 'Y' ) + return sendList_XY; + else if ( dir[1] == 'z' ) + return sendList_Xz; + else if ( dir[1] == 'Z' ) + return sendList_XZ; + } else if ( dir[0] == 'Y' ) { + if ( dir[1] == 0 ) + return sendList_Y; + else if ( dir[1] == 'z' ) + return sendList_Yz; + else if ( dir[1] == 'Z' ) + return sendList_YZ; + } else if ( dir[0] == 'Z' ) { + if ( dir[1] == 0 ) + return sendList_Z; + } + throw std::logic_error("Internal error"); +} + + +/******************************************************** + * Decomp * + ********************************************************/ void Domain::Decomp( const std::string& Filename ) { //....................................................................... @@ -319,7 +335,7 @@ void Domain::Decomp( const std::string& Filename ) global_Ny = SIZE[1]; global_Nz = SIZE[2]; nprocs=nprocx*nprocy*nprocz; - char *SegData = NULL; + char *SegData = nullptr; if (RANK==0){ printf("Input media: %s\n",Filename.c_str()); @@ -337,7 +353,7 @@ void Domain::Decomp( const std::string& Filename ) if (ReadType == "8bit"){ printf("Reading 8-bit input data \n"); FILE *SEGDAT = fopen(Filename.c_str(),"rb"); - if (SEGDAT==NULL) ERROR("Domain.cpp: Error reading segmented data"); + if (!SEGDAT) ERROR("Domain.cpp: Error reading segmented data"); size_t ReadSeg; ReadSeg=fread(SegData,1,SIZE,SEGDAT); if (ReadSeg != size_t(SIZE)) printf("Domain.cpp: Error reading segmented data \n"); @@ -348,7 +364,7 @@ void Domain::Decomp( const std::string& Filename ) short int *InputData; InputData = new short int[SIZE]; FILE *SEGDAT = fopen(Filename.c_str(),"rb"); - if (SEGDAT==NULL) ERROR("Domain.cpp: Error reading segmented data"); + if (!SEGDAT) ERROR("Domain.cpp: Error reading segmented data"); size_t ReadSeg; ReadSeg=fread(InputData,2,SIZE,SEGDAT); if (ReadSeg != size_t(SIZE)) printf("Domain.cpp: Error reading segmented data \n"); @@ -575,7 +591,7 @@ void Domain::Decomp( const std::string& Filename ) else{ // Recieve the subdomain from rank = 0 //printf("Ready to recieve data %i at process %i \n", N,rank); - Comm.recv(id,N,0,15); + Comm.recv(id.data(),N,0,15); } Comm.barrier(); } @@ -680,6 +696,9 @@ void Domain::CommInit() int sendtag = 21; int recvtag = 21; //...................................................................................... + int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z; + int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ; + int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ; sendCount_x = sendCount_y = sendCount_z = sendCount_X = sendCount_Y = sendCount_Z = 0; sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = sendCount_xZ = 0; sendCount_xY = sendCount_yZ = sendCount_Xz = sendCount_XY = sendCount_YZ = sendCount_XZ = 0; @@ -717,24 +736,24 @@ void Domain::CommInit() } // allocate send lists - sendList_x = new int [sendCount_x]; - sendList_y = new int [sendCount_y]; - sendList_z = new int [sendCount_z]; - sendList_X = new int [sendCount_X]; - sendList_Y = new int [sendCount_Y]; - sendList_Z = new int [sendCount_Z]; - sendList_xy = new int [sendCount_xy]; - sendList_yz = new int [sendCount_yz]; - sendList_xz = new int [sendCount_xz]; - sendList_Xy = new int [sendCount_Xy]; - sendList_Yz = new int [sendCount_Yz]; - sendList_xZ = new int [sendCount_xZ]; - sendList_xY = new int [sendCount_xY]; - sendList_yZ = new int [sendCount_yZ]; - sendList_Xz = new int [sendCount_Xz]; - sendList_XY = new int [sendCount_XY]; - sendList_YZ = new int [sendCount_YZ]; - sendList_XZ = new int [sendCount_XZ]; + sendList_x.resize( sendCount_x, 0 ); + sendList_y.resize( sendCount_y, 0 ); + sendList_z.resize( sendCount_z, 0 ); + sendList_X.resize( sendCount_X, 0 ); + sendList_Y.resize( sendCount_Y, 0 ); + sendList_Z.resize( sendCount_Z, 0 ); + sendList_xy.resize( sendCount_xy, 0 ); + sendList_yz.resize( sendCount_yz, 0 ); + sendList_xz.resize( sendCount_xz, 0 ); + sendList_Xy.resize( sendCount_Xy, 0 ); + sendList_Yz.resize( sendCount_Yz, 0 ); + sendList_xZ.resize( sendCount_xZ, 0 ); + sendList_xY.resize( sendCount_xY, 0 ); + sendList_yZ.resize( sendCount_yZ, 0 ); + sendList_Xz.resize( sendCount_Xz, 0 ); + sendList_XY.resize( sendCount_XY, 0 ); + sendList_YZ.resize( sendCount_YZ, 0 ); + sendList_XZ.resize( sendCount_XZ, 0 ); // Populate the send list sendCount_x = sendCount_y = sendCount_z = sendCount_X = sendCount_Y = sendCount_Z = 0; sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = sendCount_xZ = 0; @@ -772,26 +791,10 @@ void Domain::CommInit() } } - // allocate send buffers - sendBuf_x = new int [sendCount_x]; - sendBuf_y = new int [sendCount_y]; - sendBuf_z = new int [sendCount_z]; - sendBuf_X = new int [sendCount_X]; - sendBuf_Y = new int [sendCount_Y]; - sendBuf_Z = new int [sendCount_Z]; - sendBuf_xy = new int [sendCount_xy]; - sendBuf_yz = new int [sendCount_yz]; - sendBuf_xz = new int [sendCount_xz]; - sendBuf_Xy = new int [sendCount_Xy]; - sendBuf_Yz = new int [sendCount_Yz]; - sendBuf_xZ = new int [sendCount_xZ]; - sendBuf_xY = new int [sendCount_xY]; - sendBuf_yZ = new int [sendCount_yZ]; - sendBuf_Xz = new int [sendCount_Xz]; - sendBuf_XY = new int [sendCount_XY]; - sendBuf_YZ = new int [sendCount_YZ]; - sendBuf_XZ = new int [sendCount_XZ]; //...................................................................................... + int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z; + int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, recvCount_xZ; + int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, recvCount_XZ; req1[0] = Comm.Isend(&sendCount_x,1,rank_x(),sendtag+0); req2[0] = Comm.Irecv(&recvCount_X,1,rank_X(),recvtag+0); req1[1] = Comm.Isend(&sendCount_X,1,rank_X(),sendtag+1); @@ -831,63 +834,62 @@ void Domain::CommInit() Comm.waitAll(18,req1); Comm.waitAll(18,req2); Comm.barrier(); + // allocate recv lists + recvList_x.resize( recvCount_x, 0 ); + recvList_y.resize( recvCount_y, 0 ); + recvList_z.resize( recvCount_z, 0 ); + recvList_X.resize( recvCount_X, 0 ); + recvList_Y.resize( recvCount_Y, 0 ); + recvList_Z.resize( recvCount_Z, 0 ); + recvList_xy.resize( recvCount_xy, 0 ); + recvList_yz.resize( recvCount_yz, 0 ); + recvList_xz.resize( recvCount_xz, 0 ); + recvList_Xy.resize( recvCount_Xy, 0 ); + recvList_Yz.resize( recvCount_Yz, 0 ); + recvList_xZ.resize( recvCount_xZ, 0 ); + recvList_xY.resize( recvCount_xY, 0 ); + recvList_yZ.resize( recvCount_yZ, 0 ); + recvList_Xz.resize( recvCount_Xz, 0 ); + recvList_XY.resize( recvCount_XY, 0 ); + recvList_YZ.resize( recvCount_YZ, 0 ); + recvList_XZ.resize( recvCount_XZ, 0 ); //...................................................................................... - // recv buffers - recvList_x = new int [recvCount_x]; - recvList_y = new int [recvCount_y]; - recvList_z = new int [recvCount_z]; - recvList_X = new int [recvCount_X]; - recvList_Y = new int [recvCount_Y]; - recvList_Z = new int [recvCount_Z]; - recvList_xy = new int [recvCount_xy]; - recvList_yz = new int [recvCount_yz]; - recvList_xz = new int [recvCount_xz]; - recvList_Xy = new int [recvCount_Xy]; - recvList_Yz = new int [recvCount_Yz]; - recvList_xZ = new int [recvCount_xZ]; - recvList_xY = new int [recvCount_xY]; - recvList_yZ = new int [recvCount_yZ]; - recvList_Xz = new int [recvCount_Xz]; - recvList_XY = new int [recvCount_XY]; - recvList_YZ = new int [recvCount_YZ]; - recvList_XZ = new int [recvCount_XZ]; - //...................................................................................... - req1[0] = Comm.Isend(sendList_x,sendCount_x,rank_x(),sendtag); - req2[0] = Comm.Irecv(recvList_X,recvCount_X,rank_X(),recvtag); - req1[1] = Comm.Isend(sendList_X,sendCount_X,rank_X(),sendtag); - req2[1] = Comm.Irecv(recvList_x,recvCount_x,rank_x(),recvtag); - req1[2] = Comm.Isend(sendList_y,sendCount_y,rank_y(),sendtag); - req2[2] = Comm.Irecv(recvList_Y,recvCount_Y,rank_Y(),recvtag); - req1[3] = Comm.Isend(sendList_Y,sendCount_Y,rank_Y(),sendtag); - req2[3] = Comm.Irecv(recvList_y,recvCount_y,rank_y(),recvtag); - req1[4] = Comm.Isend(sendList_z,sendCount_z,rank_z(),sendtag); - req2[4] = Comm.Irecv(recvList_Z,recvCount_Z,rank_Z(),recvtag); - req1[5] = Comm.Isend(sendList_Z,sendCount_Z,rank_Z(),sendtag); - req2[5] = Comm.Irecv(recvList_z,recvCount_z,rank_z(),recvtag); - req1[6] = Comm.Isend(sendList_xy,sendCount_xy,rank_xy(),sendtag); - req2[6] = Comm.Irecv(recvList_XY,recvCount_XY,rank_XY(),recvtag); - req1[7] = Comm.Isend(sendList_XY,sendCount_XY,rank_XY(),sendtag); - req2[7] = Comm.Irecv(recvList_xy,recvCount_xy,rank_xy(),recvtag); - req1[8] = Comm.Isend(sendList_Xy,sendCount_Xy,rank_Xy(),sendtag); - req2[8] = Comm.Irecv(recvList_xY,recvCount_xY,rank_xY(),recvtag); - req1[9] = Comm.Isend(sendList_xY,sendCount_xY,rank_xY(),sendtag); - req2[9] = Comm.Irecv(recvList_Xy,recvCount_Xy,rank_Xy(),recvtag); - req1[10] = Comm.Isend(sendList_xz,sendCount_xz,rank_xz(),sendtag); - req2[10] = Comm.Irecv(recvList_XZ,recvCount_XZ,rank_XZ(),recvtag); - req1[11] = Comm.Isend(sendList_XZ,sendCount_XZ,rank_XZ(),sendtag); - req2[11] = Comm.Irecv(recvList_xz,recvCount_xz,rank_xz(),recvtag); - req1[12] = Comm.Isend(sendList_Xz,sendCount_Xz,rank_Xz(),sendtag); - req2[12] = Comm.Irecv(recvList_xZ,recvCount_xZ,rank_xZ(),recvtag); - req1[13] = Comm.Isend(sendList_xZ,sendCount_xZ,rank_xZ(),sendtag); - req2[13] = Comm.Irecv(recvList_Xz,recvCount_Xz,rank_Xz(),recvtag); - req1[14] = Comm.Isend(sendList_yz,sendCount_yz,rank_yz(),sendtag); - req2[14] = Comm.Irecv(recvList_YZ,recvCount_YZ,rank_YZ(),recvtag); - req1[15] = Comm.Isend(sendList_YZ,sendCount_YZ,rank_YZ(),sendtag); - req2[15] = Comm.Irecv(recvList_yz,recvCount_yz,rank_yz(),recvtag); - req1[16] = Comm.Isend(sendList_Yz,sendCount_Yz,rank_Yz(),sendtag); - req2[16] = Comm.Irecv(recvList_yZ,recvCount_yZ,rank_yZ(),recvtag); - req1[17] = Comm.Isend(sendList_yZ,sendCount_yZ,rank_yZ(),sendtag); - req2[17] = Comm.Irecv(recvList_Yz,recvCount_Yz,rank_Yz(),recvtag); + req1[0] = Comm.Isend(sendList_x.data(),sendCount_x,rank_x(),sendtag); + req2[0] = Comm.Irecv(recvList_X.data(),recvCount_X,rank_X(),recvtag); + req1[1] = Comm.Isend(sendList_X.data(),sendCount_X,rank_X(),sendtag); + req2[1] = Comm.Irecv(recvList_x.data(),recvCount_x,rank_x(),recvtag); + req1[2] = Comm.Isend(sendList_y.data(),sendCount_y,rank_y(),sendtag); + req2[2] = Comm.Irecv(recvList_Y.data(),recvCount_Y,rank_Y(),recvtag); + req1[3] = Comm.Isend(sendList_Y.data(),sendCount_Y,rank_Y(),sendtag); + req2[3] = Comm.Irecv(recvList_y.data(),recvCount_y,rank_y(),recvtag); + req1[4] = Comm.Isend(sendList_z.data(),sendCount_z,rank_z(),sendtag); + req2[4] = Comm.Irecv(recvList_Z.data(),recvCount_Z,rank_Z(),recvtag); + req1[5] = Comm.Isend(sendList_Z.data(),sendCount_Z,rank_Z(),sendtag); + req2[5] = Comm.Irecv(recvList_z.data(),recvCount_z,rank_z(),recvtag); + req1[6] = Comm.Isend(sendList_xy.data(),sendCount_xy,rank_xy(),sendtag); + req2[6] = Comm.Irecv(recvList_XY.data(),recvCount_XY,rank_XY(),recvtag); + req1[7] = Comm.Isend(sendList_XY.data(),sendCount_XY,rank_XY(),sendtag); + req2[7] = Comm.Irecv(recvList_xy.data(),recvCount_xy,rank_xy(),recvtag); + req1[8] = Comm.Isend(sendList_Xy.data(),sendCount_Xy,rank_Xy(),sendtag); + req2[8] = Comm.Irecv(recvList_xY.data(),recvCount_xY,rank_xY(),recvtag); + req1[9] = Comm.Isend(sendList_xY.data(),sendCount_xY,rank_xY(),sendtag); + req2[9] = Comm.Irecv(recvList_Xy.data(),recvCount_Xy,rank_Xy(),recvtag); + req1[10] = Comm.Isend(sendList_xz.data(),sendCount_xz,rank_xz(),sendtag); + req2[10] = Comm.Irecv(recvList_XZ.data(),recvCount_XZ,rank_XZ(),recvtag); + req1[11] = Comm.Isend(sendList_XZ.data(),sendCount_XZ,rank_XZ(),sendtag); + req2[11] = Comm.Irecv(recvList_xz.data(),recvCount_xz,rank_xz(),recvtag); + req1[12] = Comm.Isend(sendList_Xz.data(),sendCount_Xz,rank_Xz(),sendtag); + req2[12] = Comm.Irecv(recvList_xZ.data(),recvCount_xZ,rank_xZ(),recvtag); + req1[13] = Comm.Isend(sendList_xZ.data(),sendCount_xZ,rank_xZ(),sendtag); + req2[13] = Comm.Irecv(recvList_Xz.data(),recvCount_Xz,rank_Xz(),recvtag); + req1[14] = Comm.Isend(sendList_yz.data(),sendCount_yz,rank_yz(),sendtag); + req2[14] = Comm.Irecv(recvList_YZ.data(),recvCount_YZ,rank_YZ(),recvtag); + req1[15] = Comm.Isend(sendList_YZ.data(),sendCount_YZ,rank_YZ(),sendtag); + req2[15] = Comm.Irecv(recvList_yz.data(),recvCount_yz,rank_yz(),recvtag); + req1[16] = Comm.Isend(sendList_Yz.data(),sendCount_Yz,rank_Yz(),sendtag); + req2[16] = Comm.Irecv(recvList_yZ.data(),recvCount_yZ,rank_yZ(),recvtag); + req1[17] = Comm.Isend(sendList_yZ.data(),sendCount_yZ,rank_yZ(),sendtag); + req2[17] = Comm.Irecv(recvList_Yz.data(),recvCount_Yz,rank_Yz(),recvtag); Comm.waitAll(18,req1); Comm.waitAll(18,req2); //...................................................................................... @@ -910,65 +912,7 @@ void Domain::CommInit() for (int idx=0; idx #include #include +#include #include #include #include @@ -155,24 +156,14 @@ public: // Public variables (need to create accessors instead) // Get the actual D3Q19 communication counts (based on location of solid phase) // Discrete velocity set symmetry implies the sendcount = recvcount //...................................................................................... - int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z; - int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ; - int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ; - //...................................................................................... - int *sendList_x, *sendList_y, *sendList_z, *sendList_X, *sendList_Y, *sendList_Z; - int *sendList_xy, *sendList_yz, *sendList_xz, *sendList_Xy, *sendList_Yz, *sendList_xZ; - int *sendList_xY, *sendList_yZ, *sendList_Xz, *sendList_XY, *sendList_YZ, *sendList_XZ; - //...................................................................................... - int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z; - int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, recvCount_xZ; - int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, recvCount_XZ; - //...................................................................................... - int *recvList_x, *recvList_y, *recvList_z, *recvList_X, *recvList_Y, *recvList_Z; - int *recvList_xy, *recvList_yz, *recvList_xz, *recvList_Xy, *recvList_Yz, *recvList_xZ; - int *recvList_xY, *recvList_yZ, *recvList_Xz, *recvList_XY, *recvList_YZ, *recvList_XZ; + inline int recvCount( const char* dir ) const { return getRecvList( dir ).size(); } + inline int sendCount( const char* dir ) const { return getSendList( dir ).size(); } + inline const int* recvList( const char* dir ) const { return getRecvList( dir ).data(); } + inline const int* sendList( const char* dir ) const { return getSendList( dir ).data(); } + //...................................................................................... // Solid indicator function - signed char *id; + std::vector id; void ReadIDs(); void Decomp( const std::string& filename ); @@ -189,21 +180,18 @@ private: //...................................................................................... MPI_Request req1[18], req2[18]; + //...................................................................................... + std::vector sendList_x, sendList_y, sendList_z, sendList_X, sendList_Y, sendList_Z; + std::vector sendList_xy, sendList_yz, sendList_xz, sendList_Xy, sendList_Yz, sendList_xZ; + std::vector sendList_xY, sendList_yZ, sendList_Xz, sendList_XY, sendList_YZ, sendList_XZ; + //...................................................................................... + std::vector recvList_x, recvList_y, recvList_z, recvList_X, recvList_Y, recvList_Z; + std::vector recvList_xy, recvList_yz, recvList_xz, recvList_Xy, recvList_Yz, recvList_xZ; + std::vector recvList_xY, recvList_yZ, recvList_Xz, recvList_XY, recvList_YZ, recvList_XZ; + //...................................................................................... + const std::vector& getRecvList( const char* dir ) const; + const std::vector& getSendList( const char* dir ) const; - int *sendBuf_x, *sendBuf_y, *sendBuf_z, *sendBuf_X, *sendBuf_Y, *sendBuf_Z; - int *sendBuf_xy, *sendBuf_yz, *sendBuf_xz, *sendBuf_Xy, *sendBuf_Yz, *sendBuf_xZ; - int *sendBuf_xY, *sendBuf_yZ, *sendBuf_Xz, *sendBuf_XY, *sendBuf_YZ, *sendBuf_XZ; - //...................................................................................... - int *recvBuf_x, *recvBuf_y, *recvBuf_z, *recvBuf_X, *recvBuf_Y, *recvBuf_Z; - int *recvBuf_xy, *recvBuf_yz, *recvBuf_xz, *recvBuf_Xy, *recvBuf_Yz, *recvBuf_xZ; - int *recvBuf_xY, *recvBuf_yZ, *recvBuf_Xz, *recvBuf_XY, *recvBuf_YZ, *recvBuf_XZ; - //...................................................................................... - double *sendData_x, *sendData_y, *sendData_z, *sendData_X, *sendData_Y, *sendData_Z; - double *sendData_xy, *sendData_yz, *sendData_xz, *sendData_Xy, *sendData_Yz, *sendData_xZ; - double *sendData_xY, *sendData_yZ, *sendData_Xz, *sendData_XY, *sendData_YZ, *sendData_XZ; - double *recvData_x, *recvData_y, *recvData_z, *recvData_X, *recvData_Y, *recvData_Z; - double *recvData_xy, *recvData_yz, *recvData_xz, *recvData_Xy, *recvData_Yz, *recvData_xZ; - double *recvData_xY, *recvData_yZ, *recvData_Xz, *recvData_XY, *recvData_YZ, *recvData_XZ; }; diff --git a/common/MPI.cpp b/common/MPI.cpp index 8b09bc49..7604ae27 100644 --- a/common/MPI.cpp +++ b/common/MPI.cpp @@ -3491,7 +3491,8 @@ void MPI_CLASS::wait( MPI_Request request ) { PROFILE_START( "wait", profile_level ); MPI_Status status; - int flag = 0; + MPI_Wait( &request, &status ); + /*int flag = 0; int err = MPI_Test( &request, &flag, &status ); MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid while ( !flag ) { @@ -3499,7 +3500,7 @@ void MPI_CLASS::wait( MPI_Request request ) sched_yield(); // Check if the request has finished MPI_Test( &request, &flag, &status ); - } + }*/ PROFILE_STOP( "wait", profile_level ); } int MPI_CLASS::waitAny( int count, MPI_Request *request ) @@ -3508,8 +3509,9 @@ int MPI_CLASS::waitAny( int count, MPI_Request *request ) return -1; PROFILE_START( "waitAny", profile_level ); int index = -1; - int flag = 0; auto status = new MPI_Status[count]; + MPI_Waitany( count, request, &index, status ); + /*int flag = 0; int err = MPI_Testany( count, request, &index, &flag, status ); MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid while ( !flag ) { @@ -3518,7 +3520,7 @@ int MPI_CLASS::waitAny( int count, MPI_Request *request ) // Check if the request has finished MPI_Testany( count, request, &index, &flag, status ); } - MPI_ASSERT( index >= 0 ); // Check that the index is valid + MPI_ASSERT( index >= 0 ); // Check that the index is valid*/ delete[] status; PROFILE_STOP( "waitAny", profile_level ); return index; @@ -3528,8 +3530,9 @@ void MPI_CLASS::waitAll( int count, MPI_Request *request ) if ( count == 0 ) return; PROFILE_START( "waitAll", profile_level ); - int flag = 0; auto status = new MPI_Status[count]; + MPI_Waitall( count, request, status ); + /*int flag = 0; int err = MPI_Testall( count, request, &flag, status ); MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid while ( !flag ) { @@ -3537,7 +3540,7 @@ void MPI_CLASS::waitAll( int count, MPI_Request *request ) sched_yield(); // Check if the request has finished MPI_Testall( count, request, &flag, status ); - } + }*/ PROFILE_STOP( "waitAll", profile_level ); delete[] status; } @@ -3549,7 +3552,8 @@ std::vector MPI_CLASS::waitSome( int count, MPI_Request *request ) std::vector indicies( count, -1 ); auto *status = new MPI_Status[count]; int outcount = 0; - int err = MPI_Testsome( count, request, &outcount, &indicies[0], status ); + MPI_Waitsome( count, request, &outcount, indicies.data(), status ); + /*int err = MPI_Testsome( count, request, &outcount, &indicies[0], status ); MPI_ASSERT( err == MPI_SUCCESS ); // Check that the first call is valid MPI_ASSERT( outcount != MPI_UNDEFINED ); // Check that the first call is valid while ( outcount == 0 ) { @@ -3557,7 +3561,7 @@ std::vector MPI_CLASS::waitSome( int count, MPI_Request *request ) sched_yield(); // Check if the request has finished MPI_Testsome( count, request, &outcount, &indicies[0], status ); - } + }*/ indicies.resize( outcount ); delete[] status; PROFILE_STOP( "waitSome", profile_level ); diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 6f2966e7..51821176 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -32,42 +32,42 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ rank_YZ=Dm->rank_YZ(); rank_yZ=Dm->rank_yZ(); rank_Yz=Dm->rank_Yz(); - sendCount_x=Dm->sendCount_x; - sendCount_y=Dm->sendCount_y; - sendCount_z=Dm->sendCount_z; - sendCount_X=Dm->sendCount_X; - sendCount_Y=Dm->sendCount_Y; - sendCount_Z=Dm->sendCount_Z; - sendCount_xy=Dm->sendCount_xy; - sendCount_yz=Dm->sendCount_yz; - sendCount_xz=Dm->sendCount_xz; - sendCount_Xy=Dm->sendCount_Xy; - sendCount_Yz=Dm->sendCount_Yz; - sendCount_xZ=Dm->sendCount_xZ; - sendCount_xY=Dm->sendCount_xY; - sendCount_yZ=Dm->sendCount_yZ; - sendCount_Xz=Dm->sendCount_Xz; - sendCount_XY=Dm->sendCount_XY; - sendCount_YZ=Dm->sendCount_YZ; - sendCount_XZ=Dm->sendCount_XZ; - recvCount_x=Dm->recvCount_x; - recvCount_y=Dm->recvCount_y; - recvCount_z=Dm->recvCount_z; - recvCount_X=Dm->recvCount_X; - recvCount_Y=Dm->recvCount_Y; - recvCount_Z=Dm->recvCount_Z; - recvCount_xy=Dm->recvCount_xy; - recvCount_yz=Dm->recvCount_yz; - recvCount_xz=Dm->recvCount_xz; - recvCount_Xy=Dm->recvCount_Xy; - recvCount_Yz=Dm->recvCount_Yz; - recvCount_xZ=Dm->recvCount_xZ; - recvCount_xY=Dm->recvCount_xY; - recvCount_yZ=Dm->recvCount_yZ; - recvCount_Xz=Dm->recvCount_Xz; - recvCount_XY=Dm->recvCount_XY; - recvCount_YZ=Dm->recvCount_YZ; - recvCount_XZ=Dm->recvCount_XZ; + sendCount_x=Dm->sendCount("x"); + sendCount_y=Dm->sendCount("y"); + sendCount_z=Dm->sendCount("z"); + sendCount_X=Dm->sendCount("X"); + sendCount_Y=Dm->sendCount("Y"); + sendCount_Z=Dm->sendCount("Z"); + sendCount_xy=Dm->sendCount("xy"); + sendCount_yz=Dm->sendCount("yz"); + sendCount_xz=Dm->sendCount("xz"); + sendCount_Xy=Dm->sendCount("Xy"); + sendCount_Yz=Dm->sendCount("Yz"); + sendCount_xZ=Dm->sendCount("xZ"); + sendCount_xY=Dm->sendCount("xY"); + sendCount_yZ=Dm->sendCount("yZ"); + sendCount_Xz=Dm->sendCount("Xz"); + sendCount_XY=Dm->sendCount("XY"); + sendCount_YZ=Dm->sendCount("YZ"); + sendCount_XZ=Dm->sendCount("XZ"); + recvCount_x=Dm->recvCount("x"); + recvCount_y=Dm->recvCount("y"); + recvCount_z=Dm->recvCount("z"); + recvCount_X=Dm->recvCount("X"); + recvCount_Y=Dm->recvCount("Y"); + recvCount_Z=Dm->recvCount("Z"); + recvCount_xy=Dm->recvCount("xy"); + recvCount_yz=Dm->recvCount("yz"); + recvCount_xz=Dm->recvCount("xz"); + recvCount_Xy=Dm->recvCount("Xy"); + recvCount_Yz=Dm->recvCount("Yz"); + recvCount_xZ=Dm->recvCount("xZ"); + recvCount_xY=Dm->recvCount("xY"); + recvCount_yZ=Dm->recvCount("yZ"); + recvCount_Xz=Dm->recvCount("Xz"); + recvCount_XY=Dm->recvCount("XY"); + recvCount_YZ=Dm->recvCount("YZ"); + recvCount_XZ=Dm->recvCount("XZ"); iproc = Dm->iproc(); jproc = Dm->jproc(); @@ -174,43 +174,43 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_YZ, recvCount_YZ*sizeof(int)); // Allocate device memory //...................................................................................... - ScaLBL_CopyToZeroCopy(dvcSendList_x,Dm->sendList_x,sendCount_x*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_X,Dm->sendList_X,sendCount_X*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_y,Dm->sendList_y,sendCount_y*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_Y,Dm->sendList_Y,sendCount_Y*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_z,Dm->sendList_z,sendCount_z*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_Z,Dm->sendList_Z,sendCount_Z*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_xy,Dm->sendList_xy,sendCount_xy*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_XY,Dm->sendList_XY,sendCount_XY*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_xY,Dm->sendList_xY,sendCount_xY*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_Xy,Dm->sendList_Xy,sendCount_Xy*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_xz,Dm->sendList_xz,sendCount_xz*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_XZ,Dm->sendList_XZ,sendCount_XZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_xZ,Dm->sendList_xZ,sendCount_xZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_Xz,Dm->sendList_Xz,sendCount_Xz*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_yz,Dm->sendList_yz,sendCount_yz*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_YZ,Dm->sendList_YZ,sendCount_YZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_yZ,Dm->sendList_yZ,sendCount_yZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_Yz,Dm->sendList_Yz,sendCount_Yz*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_x,Dm->sendList("x"),sendCount_x*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_X,Dm->sendList("X"),sendCount_X*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_y,Dm->sendList("y"),sendCount_y*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_Y,Dm->sendList("Y"),sendCount_Y*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_z,Dm->sendList("z"),sendCount_z*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_Z,Dm->sendList("Z"),sendCount_Z*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_xy,Dm->sendList("xy"),sendCount_xy*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_XY,Dm->sendList("XY"),sendCount_XY*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_xY,Dm->sendList("xY"),sendCount_xY*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_Xy,Dm->sendList("Xy"),sendCount_Xy*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_xz,Dm->sendList("xz"),sendCount_xz*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_XZ,Dm->sendList("XZ"),sendCount_XZ*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_xZ,Dm->sendList("xZ"),sendCount_xZ*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_Xz,Dm->sendList("Xz"),sendCount_Xz*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_yz,Dm->sendList("yz"),sendCount_yz*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_YZ,Dm->sendList("YZ"),sendCount_YZ*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_yZ,Dm->sendList("yZ"),sendCount_yZ*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_Yz,Dm->sendList("Yz"),sendCount_Yz*sizeof(int)); //...................................................................................... - ScaLBL_CopyToZeroCopy(dvcRecvList_x,Dm->recvList_x,recvCount_x*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_X,Dm->recvList_X,recvCount_X*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_y,Dm->recvList_y,recvCount_y*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_Y,Dm->recvList_Y,recvCount_Y*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_z,Dm->recvList_z,recvCount_z*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_Z,Dm->recvList_Z,recvCount_Z*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_xy,Dm->recvList_xy,recvCount_xy*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_XY,Dm->recvList_XY,recvCount_XY*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_xY,Dm->recvList_xY,recvCount_xY*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_Xy,Dm->recvList_Xy,recvCount_Xy*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_xz,Dm->recvList_xz,recvCount_xz*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_XZ,Dm->recvList_XZ,recvCount_XZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_xZ,Dm->recvList_xZ,recvCount_xZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_Xz,Dm->recvList_Xz,recvCount_Xz*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_yz,Dm->recvList_yz,recvCount_yz*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_YZ,Dm->recvList_YZ,recvCount_YZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_yZ,Dm->recvList_yZ,recvCount_yZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_Yz,Dm->recvList_Yz,recvCount_Yz*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_x,Dm->recvList("x"),recvCount_x*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_X,Dm->recvList("X"),recvCount_X*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_y,Dm->recvList("y"),recvCount_y*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_Y,Dm->recvList("Y"),recvCount_Y*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_z,Dm->recvList("z"),recvCount_z*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_Z,Dm->recvList("Z"),recvCount_Z*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_xy,Dm->recvList("xy"),recvCount_xy*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_XY,Dm->recvList("XY"),recvCount_XY*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_xY,Dm->recvList("xY"),recvCount_xY*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_Xy,Dm->recvList("Xy"),recvCount_Xy*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_xz,Dm->recvList("xz"),recvCount_xz*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_XZ,Dm->recvList("XZ"),recvCount_XZ*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_xZ,Dm->recvList("xZ"),recvCount_xZ*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_Xz,Dm->recvList("Xz"),recvCount_Xz*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_yz,Dm->recvList("yz"),recvCount_yz*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_YZ,Dm->recvList("YZ"),recvCount_YZ*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_yZ,Dm->recvList("yZ"),recvCount_yZ*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_Yz,Dm->recvList("Yz"),recvCount_Yz*sizeof(int)); //...................................................................................... MPI_COMM_SCALBL.barrier(); @@ -219,70 +219,70 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ // Set up the recieve distribution lists //................................................................................... //...Map recieve list for the X face: q=2,8,10,12,14 ................................. - D3Q19_MapRecv(-1,0,0,Dm->recvList_X,0,recvCount_X,dvcRecvDist_X); - D3Q19_MapRecv(-1,-1,0,Dm->recvList_X,recvCount_X,recvCount_X,dvcRecvDist_X); - D3Q19_MapRecv(-1,1,0,Dm->recvList_X,2*recvCount_X,recvCount_X,dvcRecvDist_X); - D3Q19_MapRecv(-1,0,-1,Dm->recvList_X,3*recvCount_X,recvCount_X,dvcRecvDist_X); - D3Q19_MapRecv(-1,0,1,Dm->recvList_X,4*recvCount_X,recvCount_X,dvcRecvDist_X); + D3Q19_MapRecv(-1,0,0, Dm->recvList("X"),0,recvCount_X,dvcRecvDist_X); + D3Q19_MapRecv(-1,-1,0,Dm->recvList("X"),recvCount_X,recvCount_X,dvcRecvDist_X); + D3Q19_MapRecv(-1,1,0, Dm->recvList("X"),2*recvCount_X,recvCount_X,dvcRecvDist_X); + D3Q19_MapRecv(-1,0,-1,Dm->recvList("X"),3*recvCount_X,recvCount_X,dvcRecvDist_X); + D3Q19_MapRecv(-1,0,1, Dm->recvList("X"),4*recvCount_X,recvCount_X,dvcRecvDist_X); //................................................................................... //...Map recieve list for the x face: q=1,7,9,11,13.................................. - D3Q19_MapRecv(1,0,0,Dm->recvList_x,0,recvCount_x,dvcRecvDist_x); - D3Q19_MapRecv(1,1,0,Dm->recvList_x,recvCount_x,recvCount_x,dvcRecvDist_x); - D3Q19_MapRecv(1,-1,0,Dm->recvList_x,2*recvCount_x,recvCount_x,dvcRecvDist_x); - D3Q19_MapRecv(1,0,1,Dm->recvList_x,3*recvCount_x,recvCount_x,dvcRecvDist_x); - D3Q19_MapRecv(1,0,-1,Dm->recvList_x,4*recvCount_x,recvCount_x,dvcRecvDist_x); + D3Q19_MapRecv(1,0,0, Dm->recvList("x"),0,recvCount_x,dvcRecvDist_x); + D3Q19_MapRecv(1,1,0, Dm->recvList("x"),recvCount_x,recvCount_x,dvcRecvDist_x); + D3Q19_MapRecv(1,-1,0,Dm->recvList("x"),2*recvCount_x,recvCount_x,dvcRecvDist_x); + D3Q19_MapRecv(1,0,1, Dm->recvList("x"),3*recvCount_x,recvCount_x,dvcRecvDist_x); + D3Q19_MapRecv(1,0,-1,Dm->recvList("x"),4*recvCount_x,recvCount_x,dvcRecvDist_x); //................................................................................... //...Map recieve list for the y face: q=4,8,9,16,18 ................................... - D3Q19_MapRecv(0,-1,0,Dm->recvList_Y,0,recvCount_Y,dvcRecvDist_Y); - D3Q19_MapRecv(-1,-1,0,Dm->recvList_Y,recvCount_Y,recvCount_Y,dvcRecvDist_Y); - D3Q19_MapRecv(1,-1,0,Dm->recvList_Y,2*recvCount_Y,recvCount_Y,dvcRecvDist_Y); - D3Q19_MapRecv(0,-1,-1,Dm->recvList_Y,3*recvCount_Y,recvCount_Y,dvcRecvDist_Y); - D3Q19_MapRecv(0,-1,1,Dm->recvList_Y,4*recvCount_Y,recvCount_Y,dvcRecvDist_Y); + D3Q19_MapRecv(0,-1,0, Dm->recvList("Y"),0,recvCount_Y,dvcRecvDist_Y); + D3Q19_MapRecv(-1,-1,0,Dm->recvList("Y"),recvCount_Y,recvCount_Y,dvcRecvDist_Y); + D3Q19_MapRecv(1,-1,0, Dm->recvList("Y"),2*recvCount_Y,recvCount_Y,dvcRecvDist_Y); + D3Q19_MapRecv(0,-1,-1,Dm->recvList("Y"),3*recvCount_Y,recvCount_Y,dvcRecvDist_Y); + D3Q19_MapRecv(0,-1,1, Dm->recvList("Y"),4*recvCount_Y,recvCount_Y,dvcRecvDist_Y); //................................................................................... //...Map recieve list for the Y face: q=3,7,10,15,17 .................................. - D3Q19_MapRecv(0,1,0,Dm->recvList_y,0,recvCount_y,dvcRecvDist_y); - D3Q19_MapRecv(1,1,0,Dm->recvList_y,recvCount_y,recvCount_y,dvcRecvDist_y); - D3Q19_MapRecv(-1,1,0,Dm->recvList_y,2*recvCount_y,recvCount_y,dvcRecvDist_y); - D3Q19_MapRecv(0,1,1,Dm->recvList_y,3*recvCount_y,recvCount_y,dvcRecvDist_y); - D3Q19_MapRecv(0,1,-1,Dm->recvList_y,4*recvCount_y,recvCount_y,dvcRecvDist_y); + D3Q19_MapRecv(0,1,0, Dm->recvList("y"),0,recvCount_y,dvcRecvDist_y); + D3Q19_MapRecv(1,1,0, Dm->recvList("y"),recvCount_y,recvCount_y,dvcRecvDist_y); + D3Q19_MapRecv(-1,1,0,Dm->recvList("y"),2*recvCount_y,recvCount_y,dvcRecvDist_y); + D3Q19_MapRecv(0,1,1, Dm->recvList("y"),3*recvCount_y,recvCount_y,dvcRecvDist_y); + D3Q19_MapRecv(0,1,-1,Dm->recvList("y"),4*recvCount_y,recvCount_y,dvcRecvDist_y); //................................................................................... //...Map recieve list for the z face<<<6,12,13,16,17).............................................. - D3Q19_MapRecv(0,0,-1,Dm->recvList_Z,0,recvCount_Z,dvcRecvDist_Z); - D3Q19_MapRecv(-1,0,-1,Dm->recvList_Z,recvCount_Z,recvCount_Z,dvcRecvDist_Z); - D3Q19_MapRecv(1,0,-1,Dm->recvList_Z,2*recvCount_Z,recvCount_Z,dvcRecvDist_Z); - D3Q19_MapRecv(0,-1,-1,Dm->recvList_Z,3*recvCount_Z,recvCount_Z,dvcRecvDist_Z); - D3Q19_MapRecv(0,1,-1,Dm->recvList_Z,4*recvCount_Z,recvCount_Z,dvcRecvDist_Z); + D3Q19_MapRecv(0,0,-1, Dm->recvList("Z"),0,recvCount_Z,dvcRecvDist_Z); + D3Q19_MapRecv(-1,0,-1,Dm->recvList("Z"),recvCount_Z,recvCount_Z,dvcRecvDist_Z); + D3Q19_MapRecv(1,0,-1, Dm->recvList("Z"),2*recvCount_Z,recvCount_Z,dvcRecvDist_Z); + D3Q19_MapRecv(0,-1,-1,Dm->recvList("Z"),3*recvCount_Z,recvCount_Z,dvcRecvDist_Z); + D3Q19_MapRecv(0,1,-1, Dm->recvList("Z"),4*recvCount_Z,recvCount_Z,dvcRecvDist_Z); //...Map recieve list for the Z face<<<5,11,14,15,18).............................................. - D3Q19_MapRecv(0,0,1,Dm->recvList_z,0,recvCount_z,dvcRecvDist_z); - D3Q19_MapRecv(1,0,1,Dm->recvList_z,recvCount_z,recvCount_z,dvcRecvDist_z); - D3Q19_MapRecv(-1,0,1,Dm->recvList_z,2*recvCount_z,recvCount_z,dvcRecvDist_z); - D3Q19_MapRecv(0,1,1,Dm->recvList_z,3*recvCount_z,recvCount_z,dvcRecvDist_z); - D3Q19_MapRecv(0,-1,1,Dm->recvList_z,4*recvCount_z,recvCount_z,dvcRecvDist_z); + D3Q19_MapRecv(0,0,1, Dm->recvList("z"),0,recvCount_z,dvcRecvDist_z); + D3Q19_MapRecv(1,0,1, Dm->recvList("z"),recvCount_z,recvCount_z,dvcRecvDist_z); + D3Q19_MapRecv(-1,0,1,Dm->recvList("z"),2*recvCount_z,recvCount_z,dvcRecvDist_z); + D3Q19_MapRecv(0,1,1, Dm->recvList("z"),3*recvCount_z,recvCount_z,dvcRecvDist_z); + D3Q19_MapRecv(0,-1,1,Dm->recvList("z"),4*recvCount_z,recvCount_z,dvcRecvDist_z); //.................................................................................. //...Map recieve list for the xy edge <<<8)................................ - D3Q19_MapRecv(-1,-1,0,Dm->recvList_XY,0,recvCount_XY,dvcRecvDist_XY); + D3Q19_MapRecv(-1,-1,0,Dm->recvList("XY"),0,recvCount_XY,dvcRecvDist_XY); //...Map recieve list for the Xy edge <<<9)................................ - D3Q19_MapRecv(1,-1,0,Dm->recvList_xY,0,recvCount_xY,dvcRecvDist_xY); + D3Q19_MapRecv(1,-1,0,Dm->recvList("xY"),0,recvCount_xY,dvcRecvDist_xY); //...Map recieve list for the xY edge <<<10)................................ - D3Q19_MapRecv(-1,1,0,Dm->recvList_Xy,0,recvCount_Xy,dvcRecvDist_Xy); + D3Q19_MapRecv(-1,1,0,Dm->recvList("Xy"),0,recvCount_Xy,dvcRecvDist_Xy); //...Map recieve list for the XY edge <<<7)................................ - D3Q19_MapRecv(1,1,0,Dm->recvList_xy,0,recvCount_xy,dvcRecvDist_xy); + D3Q19_MapRecv(1,1,0,Dm->recvList("xy"),0,recvCount_xy,dvcRecvDist_xy); //...Map recieve list for the xz edge <<<12)................................ - D3Q19_MapRecv(-1,0,-1,Dm->recvList_XZ,0,recvCount_XZ,dvcRecvDist_XZ); + D3Q19_MapRecv(-1,0,-1,Dm->recvList("XZ"),0,recvCount_XZ,dvcRecvDist_XZ); //...Map recieve list for the xZ edge <<<14)................................ - D3Q19_MapRecv(-1,0,1,Dm->recvList_Xz,0,recvCount_Xz,dvcRecvDist_Xz); + D3Q19_MapRecv(-1,0,1,Dm->recvList("Xz"),0,recvCount_Xz,dvcRecvDist_Xz); //...Map recieve list for the Xz edge <<<13)................................ - D3Q19_MapRecv(1,0,-1,Dm->recvList_xZ,0,recvCount_xZ,dvcRecvDist_xZ); + D3Q19_MapRecv(1,0,-1,Dm->recvList("xZ"),0,recvCount_xZ,dvcRecvDist_xZ); //...Map recieve list for the XZ edge <<<11)................................ - D3Q19_MapRecv(1,0,1,Dm->recvList_xz,0,recvCount_xz,dvcRecvDist_xz); + D3Q19_MapRecv(1,0,1,Dm->recvList("xz"),0,recvCount_xz,dvcRecvDist_xz); //...Map recieve list for the yz edge <<<16)................................ - D3Q19_MapRecv(0,-1,-1,Dm->recvList_YZ,0,recvCount_YZ,dvcRecvDist_YZ); + D3Q19_MapRecv(0,-1,-1,Dm->recvList("YZ"),0,recvCount_YZ,dvcRecvDist_YZ); //...Map recieve list for the yZ edge <<<18)................................ - D3Q19_MapRecv(0,-1,1,Dm->recvList_Yz,0,recvCount_Yz,dvcRecvDist_Yz); + D3Q19_MapRecv(0,-1,1,Dm->recvList("Yz"),0,recvCount_Yz,dvcRecvDist_Yz); //...Map recieve list for the Yz edge <<<17)................................ - D3Q19_MapRecv(0,1,-1,Dm->recvList_yZ,0,recvCount_yZ,dvcRecvDist_yZ); + D3Q19_MapRecv(0,1,-1,Dm->recvList("yZ"),0,recvCount_yZ,dvcRecvDist_yZ); //...Map recieve list for the YZ edge <<<15)................................ - D3Q19_MapRecv(0,1,1,Dm->recvList_yz,0,recvCount_yz,dvcRecvDist_yz); + D3Q19_MapRecv(0,1,1,Dm->recvList("yz"),0,recvCount_yz,dvcRecvDist_yz); //................................................................................... //...................................................................................... @@ -319,7 +319,7 @@ int ScaLBL_Communicator::LastInterior(){ return last_interior; } -void ScaLBL_Communicator::D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, int *list, int start, int count, +void ScaLBL_Communicator::D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, const int *list, int start, int count, int *d3q19_recvlist){ int i,j,k,n,nn,idx; int * ReturnDist; diff --git a/common/ScaLBL.h b/common/ScaLBL.h index d7f012d1..50eabdd3 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -195,7 +195,7 @@ public: private: //void D3Q19_MapRecv_OLD(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count, int *d3q19_recvlist); - void D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, int *list, int start, int count, int *d3q19_recvlist); + void D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, const int *list, int start, int count, int *d3q19_recvlist); bool Lock; // use Lock to make sure only one call at a time to protect data in transit // only one set of Send requests can be active at any time (per instance) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 5a9c56d4..f085c0b2 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -199,7 +199,7 @@ void ScaLBL_ColorModel::ReadInput(){ ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 ); fillHalo fill( comm, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); Array id_view; - id_view.viewRaw( size1, Mask->id ); + id_view.viewRaw( size1, Mask->id.data() ); fill.copy( input_id, id_view ); fill.fill( id_view ); } @@ -332,7 +332,7 @@ void ScaLBL_ColorModel::Create(){ if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np); comm.barrier(); //........................................................................... diff --git a/models/DFHModel.cpp b/models/DFHModel.cpp index 9709b107..718ce8f1 100644 --- a/models/DFHModel.cpp +++ b/models/DFHModel.cpp @@ -205,7 +205,7 @@ void ScaLBL_DFHModel::Create(){ if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np); comm.barrier(); //........................................................................... diff --git a/models/MRTModel.cpp b/models/MRTModel.cpp index 76d54571..95b5090e 100644 --- a/models/MRTModel.cpp +++ b/models/MRTModel.cpp @@ -151,7 +151,7 @@ void ScaLBL_MRTModel::Create(){ if (rank==0) printf ("Set up memory efficient layout \n"); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np); comm.barrier(); //........................................................................... // MAIN VARIABLES ALLOCATED HERE diff --git a/tests/GenerateSphereTest.cpp b/tests/GenerateSphereTest.cpp index 0c84287e..9e4cdfda 100644 --- a/tests/GenerateSphereTest.cpp +++ b/tests/GenerateSphereTest.cpp @@ -20,7 +20,7 @@ using namespace std; -inline void PackID(int *list, int count, char *sendbuf, char *ID){ +inline void PackID(const int *list, int count, char *sendbuf, char *ID){ // Fill in the phase ID values from neighboring processors // This packs up the values that need to be sent from one processor to another int idx,n; @@ -32,7 +32,7 @@ inline void PackID(int *list, int count, char *sendbuf, char *ID){ } //*************************************************************************************** -inline void UnpackID(int *list, int count, char *recvbuf, char *ID){ +inline void UnpackID(const int *list, int count, char *recvbuf, char *ID){ // Fill in the phase ID values from neighboring processors // This unpacks the values once they have been recieved from neighbors int idx,n; @@ -91,44 +91,44 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny char *recvID_xy, *recvID_yz, *recvID_xz, *recvID_Xy, *recvID_Yz, *recvID_xZ; char *recvID_xY, *recvID_yZ, *recvID_Xz, *recvID_XY, *recvID_YZ, *recvID_XZ; // send buffers - sendID_x = new char [Dm.sendCount_x]; - sendID_y = new char [Dm.sendCount_y]; - sendID_z = new char [Dm.sendCount_z]; - sendID_X = new char [Dm.sendCount_X]; - sendID_Y = new char [Dm.sendCount_Y]; - sendID_Z = new char [Dm.sendCount_Z]; - sendID_xy = new char [Dm.sendCount_xy]; - sendID_yz = new char [Dm.sendCount_yz]; - sendID_xz = new char [Dm.sendCount_xz]; - sendID_Xy = new char [Dm.sendCount_Xy]; - sendID_Yz = new char [Dm.sendCount_Yz]; - sendID_xZ = new char [Dm.sendCount_xZ]; - sendID_xY = new char [Dm.sendCount_xY]; - sendID_yZ = new char [Dm.sendCount_yZ]; - sendID_Xz = new char [Dm.sendCount_Xz]; - sendID_XY = new char [Dm.sendCount_XY]; - sendID_YZ = new char [Dm.sendCount_YZ]; - sendID_XZ = new char [Dm.sendCount_XZ]; + sendID_x = new char [Dm.sendCount("x")]; + sendID_y = new char [Dm.sendCount("y")]; + sendID_z = new char [Dm.sendCount("z")]; + sendID_X = new char [Dm.sendCount("X")]; + sendID_Y = new char [Dm.sendCount("Y")]; + sendID_Z = new char [Dm.sendCount("Z")]; + sendID_xy = new char [Dm.sendCount("xy")]; + sendID_yz = new char [Dm.sendCount("yz")]; + sendID_xz = new char [Dm.sendCount("xz")]; + sendID_Xy = new char [Dm.sendCount("Xy")]; + sendID_Yz = new char [Dm.sendCount("Yz")]; + sendID_xZ = new char [Dm.sendCount("xZ")]; + sendID_xY = new char [Dm.sendCount("xY")]; + sendID_yZ = new char [Dm.sendCount("yZ")]; + sendID_Xz = new char [Dm.sendCount("Xz")]; + sendID_XY = new char [Dm.sendCount("XY")]; + sendID_YZ = new char [Dm.sendCount("YZ")]; + sendID_XZ = new char [Dm.sendCount("XZ")]; //...................................................................................... // recv buffers - recvID_x = new char [Dm.recvCount_x]; - recvID_y = new char [Dm.recvCount_y]; - recvID_z = new char [Dm.recvCount_z]; - recvID_X = new char [Dm.recvCount_X]; - recvID_Y = new char [Dm.recvCount_Y]; - recvID_Z = new char [Dm.recvCount_Z]; - recvID_xy = new char [Dm.recvCount_xy]; - recvID_yz = new char [Dm.recvCount_yz]; - recvID_xz = new char [Dm.recvCount_xz]; - recvID_Xy = new char [Dm.recvCount_Xy]; - recvID_xZ = new char [Dm.recvCount_xZ]; - recvID_xY = new char [Dm.recvCount_xY]; - recvID_yZ = new char [Dm.recvCount_yZ]; - recvID_Yz = new char [Dm.recvCount_Yz]; - recvID_Xz = new char [Dm.recvCount_Xz]; - recvID_XY = new char [Dm.recvCount_XY]; - recvID_YZ = new char [Dm.recvCount_YZ]; - recvID_XZ = new char [Dm.recvCount_XZ]; + recvID_x = new char [Dm.recvCount("x")]; + recvID_y = new char [Dm.recvCount("y")]; + recvID_z = new char [Dm.recvCount("z")]; + recvID_X = new char [Dm.recvCount("X")]; + recvID_Y = new char [Dm.recvCount("Y")]; + recvID_Z = new char [Dm.recvCount("Z")]; + recvID_xy = new char [Dm.recvCount("xy")]; + recvID_yz = new char [Dm.recvCount("yz")]; + recvID_xz = new char [Dm.recvCount("xz")]; + recvID_Xy = new char [Dm.recvCount("Xy")]; + recvID_xZ = new char [Dm.recvCount("xZ")]; + recvID_xY = new char [Dm.recvCount("xY")]; + recvID_yZ = new char [Dm.recvCount("yZ")]; + recvID_Yz = new char [Dm.recvCount("Yz")]; + recvID_Xz = new char [Dm.recvCount("Xz")]; + recvID_XY = new char [Dm.recvCount("XY")]; + recvID_YZ = new char [Dm.recvCount("YZ")]; + recvID_XZ = new char [Dm.recvCount("XZ")]; //...................................................................................... int sendtag,recvtag; sendtag = recvtag = 7; @@ -194,62 +194,62 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny // Pack and send the updated ID values - PackID(Dm.sendList_x, Dm.sendCount_x ,sendID_x, id); - PackID(Dm.sendList_X, Dm.sendCount_X ,sendID_X, id); - PackID(Dm.sendList_y, Dm.sendCount_y ,sendID_y, id); - PackID(Dm.sendList_Y, Dm.sendCount_Y ,sendID_Y, id); - PackID(Dm.sendList_z, Dm.sendCount_z ,sendID_z, id); - PackID(Dm.sendList_Z, Dm.sendCount_Z ,sendID_Z, id); - PackID(Dm.sendList_xy, Dm.sendCount_xy ,sendID_xy, id); - PackID(Dm.sendList_Xy, Dm.sendCount_Xy ,sendID_Xy, id); - PackID(Dm.sendList_xY, Dm.sendCount_xY ,sendID_xY, id); - PackID(Dm.sendList_XY, Dm.sendCount_XY ,sendID_XY, id); - PackID(Dm.sendList_xz, Dm.sendCount_xz ,sendID_xz, id); - PackID(Dm.sendList_Xz, Dm.sendCount_Xz ,sendID_Xz, id); - PackID(Dm.sendList_xZ, Dm.sendCount_xZ ,sendID_xZ, id); - PackID(Dm.sendList_XZ, Dm.sendCount_XZ ,sendID_XZ, id); - PackID(Dm.sendList_yz, Dm.sendCount_yz ,sendID_yz, id); - PackID(Dm.sendList_Yz, Dm.sendCount_Yz ,sendID_Yz, id); - PackID(Dm.sendList_yZ, Dm.sendCount_yZ ,sendID_yZ, id); - PackID(Dm.sendList_YZ, Dm.sendCount_YZ ,sendID_YZ, id); + PackID(Dm.sendList("x"), Dm.sendCount("x") ,sendID_x, id); + PackID(Dm.sendList("X"), Dm.sendCount("X") ,sendID_X, id); + PackID(Dm.sendList("y"), Dm.sendCount("y") ,sendID_y, id); + PackID(Dm.sendList("Y"), Dm.sendCount("Y") ,sendID_Y, id); + PackID(Dm.sendList("z"), Dm.sendCount("z") ,sendID_z, id); + PackID(Dm.sendList("Z"), Dm.sendCount("Z") ,sendID_Z, id); + PackID(Dm.sendList("xy"), Dm.sendCount("xy") ,sendID_xy, id); + PackID(Dm.sendList("Xy"), Dm.sendCount("Xy") ,sendID_Xy, id); + PackID(Dm.sendList("xY"), Dm.sendCount("xY") ,sendID_xY, id); + PackID(Dm.sendList("XY"), Dm.sendCount("XY") ,sendID_XY, id); + PackID(Dm.sendList("xz"), Dm.sendCount("xz") ,sendID_xz, id); + PackID(Dm.sendList("Xz"), Dm.sendCount("Xz") ,sendID_Xz, id); + PackID(Dm.sendList("xZ"), Dm.sendCount("xZ") ,sendID_xZ, id); + PackID(Dm.sendList("XZ"), Dm.sendCount("XZ") ,sendID_XZ, id); + PackID(Dm.sendList("yz"), Dm.sendCount("yz") ,sendID_yz, id); + PackID(Dm.sendList("Yz"), Dm.sendCount("Yz") ,sendID_Yz, id); + PackID(Dm.sendList("yZ"), Dm.sendCount("yZ") ,sendID_yZ, id); + PackID(Dm.sendList("YZ"), Dm.sendCount("YZ") ,sendID_YZ, id); //...................................................................................... - Dm.Comm.sendrecv(sendID_x,Dm.sendCount_x,Dm.rank_x(),sendtag,recvID_X,Dm.recvCount_X,Dm.rank_X(),recvtag); - Dm.Comm.sendrecv(sendID_X,Dm.sendCount_X,Dm.rank_X(),sendtag,recvID_x,Dm.recvCount_x,Dm.rank_x(),recvtag); - Dm.Comm.sendrecv(sendID_y,Dm.sendCount_y,Dm.rank_y(),sendtag,recvID_Y,Dm.recvCount_Y,Dm.rank_Y(),recvtag); - Dm.Comm.sendrecv(sendID_Y,Dm.sendCount_Y,Dm.rank_Y(),sendtag,recvID_y,Dm.recvCount_y,Dm.rank_y(),recvtag); - Dm.Comm.sendrecv(sendID_z,Dm.sendCount_z,Dm.rank_z(),sendtag,recvID_Z,Dm.recvCount_Z,Dm.rank_Z(),recvtag); - Dm.Comm.sendrecv(sendID_Z,Dm.sendCount_Z,Dm.rank_Z(),sendtag,recvID_z,Dm.recvCount_z,Dm.rank_z(),recvtag); - Dm.Comm.sendrecv(sendID_xy,Dm.sendCount_xy,Dm.rank_xy(),sendtag,recvID_XY,Dm.recvCount_XY,Dm.rank_XY(),recvtag); - Dm.Comm.sendrecv(sendID_XY,Dm.sendCount_XY,Dm.rank_XY(),sendtag,recvID_xy,Dm.recvCount_xy,Dm.rank_xy(),recvtag); - Dm.Comm.sendrecv(sendID_Xy,Dm.sendCount_Xy,Dm.rank_Xy(),sendtag,recvID_xY,Dm.recvCount_xY,Dm.rank_xY(),recvtag); - Dm.Comm.sendrecv(sendID_xY,Dm.sendCount_xY,Dm.rank_xY(),sendtag,recvID_Xy,Dm.recvCount_Xy,Dm.rank_Xy(),recvtag); - Dm.Comm.sendrecv(sendID_xz,Dm.sendCount_xz,Dm.rank_xz(),sendtag,recvID_XZ,Dm.recvCount_XZ,Dm.rank_XZ(),recvtag); - Dm.Comm.sendrecv(sendID_XZ,Dm.sendCount_XZ,Dm.rank_XZ(),sendtag,recvID_xz,Dm.recvCount_xz,Dm.rank_xz(),recvtag); - Dm.Comm.sendrecv(sendID_Xz,Dm.sendCount_Xz,Dm.rank_Xz(),sendtag,recvID_xZ,Dm.recvCount_xZ,Dm.rank_xZ(),recvtag); - Dm.Comm.sendrecv(sendID_xZ,Dm.sendCount_xZ,Dm.rank_xZ(),sendtag,recvID_Xz,Dm.recvCount_Xz,Dm.rank_Xz(),recvtag); - Dm.Comm.sendrecv(sendID_yz,Dm.sendCount_yz,Dm.rank_yz(),sendtag,recvID_YZ,Dm.recvCount_YZ,Dm.rank_YZ(),recvtag); - Dm.Comm.sendrecv(sendID_YZ,Dm.sendCount_YZ,Dm.rank_YZ(),sendtag,recvID_yz,Dm.recvCount_yz,Dm.rank_yz(),recvtag); - Dm.Comm.sendrecv(sendID_Yz,Dm.sendCount_Yz,Dm.rank_Yz(),sendtag,recvID_yZ,Dm.recvCount_yZ,Dm.rank_yZ(),recvtag); - Dm.Comm.sendrecv(sendID_yZ,Dm.sendCount_yZ,Dm.rank_yZ(),sendtag,recvID_Yz,Dm.recvCount_Yz,Dm.rank_Yz(),recvtag); + Dm.Comm.sendrecv(sendID_x,Dm.sendCount("x"),Dm.rank_x(),sendtag,recvID_X,Dm.recvCount("X"),Dm.rank_X(),recvtag); + Dm.Comm.sendrecv(sendID_X,Dm.sendCount("X"),Dm.rank_X(),sendtag,recvID_x,Dm.recvCount("x"),Dm.rank_x(),recvtag); + Dm.Comm.sendrecv(sendID_y,Dm.sendCount("y"),Dm.rank_y(),sendtag,recvID_Y,Dm.recvCount("Y"),Dm.rank_Y(),recvtag); + Dm.Comm.sendrecv(sendID_Y,Dm.sendCount("Y"),Dm.rank_Y(),sendtag,recvID_y,Dm.recvCount("y"),Dm.rank_y(),recvtag); + Dm.Comm.sendrecv(sendID_z,Dm.sendCount("z"),Dm.rank_z(),sendtag,recvID_Z,Dm.recvCount("Z"),Dm.rank_Z(),recvtag); + Dm.Comm.sendrecv(sendID_Z,Dm.sendCount("Z"),Dm.rank_Z(),sendtag,recvID_z,Dm.recvCount("z"),Dm.rank_z(),recvtag); + Dm.Comm.sendrecv(sendID_xy,Dm.sendCount("xy"),Dm.rank_xy(),sendtag,recvID_XY,Dm.recvCount("XY"),Dm.rank_XY(),recvtag); + Dm.Comm.sendrecv(sendID_XY,Dm.sendCount("XY"),Dm.rank_XY(),sendtag,recvID_xy,Dm.recvCount("xy"),Dm.rank_xy(),recvtag); + Dm.Comm.sendrecv(sendID_Xy,Dm.sendCount("Xy"),Dm.rank_Xy(),sendtag,recvID_xY,Dm.recvCount("xY"),Dm.rank_xY(),recvtag); + Dm.Comm.sendrecv(sendID_xY,Dm.sendCount("xY"),Dm.rank_xY(),sendtag,recvID_Xy,Dm.recvCount("Xy"),Dm.rank_Xy(),recvtag); + Dm.Comm.sendrecv(sendID_xz,Dm.sendCount("xz"),Dm.rank_xz(),sendtag,recvID_XZ,Dm.recvCount("XZ"),Dm.rank_XZ(),recvtag); + Dm.Comm.sendrecv(sendID_XZ,Dm.sendCount("XZ"),Dm.rank_XZ(),sendtag,recvID_xz,Dm.recvCount("xz"),Dm.rank_xz(),recvtag); + Dm.Comm.sendrecv(sendID_Xz,Dm.sendCount("Xz"),Dm.rank_Xz(),sendtag,recvID_xZ,Dm.recvCount("xZ"),Dm.rank_xZ(),recvtag); + Dm.Comm.sendrecv(sendID_xZ,Dm.sendCount("xZ"),Dm.rank_xZ(),sendtag,recvID_Xz,Dm.recvCount("Xz"),Dm.rank_Xz(),recvtag); + Dm.Comm.sendrecv(sendID_yz,Dm.sendCount("yz"),Dm.rank_yz(),sendtag,recvID_YZ,Dm.recvCount("YZ"),Dm.rank_YZ(),recvtag); + Dm.Comm.sendrecv(sendID_YZ,Dm.sendCount("YZ"),Dm.rank_YZ(),sendtag,recvID_yz,Dm.recvCount("yz"),Dm.rank_yz(),recvtag); + Dm.Comm.sendrecv(sendID_Yz,Dm.sendCount("Yz"),Dm.rank_Yz(),sendtag,recvID_yZ,Dm.recvCount("yZ"),Dm.rank_yZ(),recvtag); + Dm.Comm.sendrecv(sendID_yZ,Dm.sendCount("yZ"),Dm.rank_yZ(),sendtag,recvID_Yz,Dm.recvCount("Yz"),Dm.rank_Yz(),recvtag); //...................................................................................... - UnpackID(Dm.recvList_x, Dm.recvCount_x ,recvID_x, id); - UnpackID(Dm.recvList_X, Dm.recvCount_X ,recvID_X, id); - UnpackID(Dm.recvList_y, Dm.recvCount_y ,recvID_y, id); - UnpackID(Dm.recvList_Y, Dm.recvCount_Y ,recvID_Y, id); - UnpackID(Dm.recvList_z, Dm.recvCount_z ,recvID_z, id); - UnpackID(Dm.recvList_Z, Dm.recvCount_Z ,recvID_Z, id); - UnpackID(Dm.recvList_xy, Dm.recvCount_xy ,recvID_xy, id); - UnpackID(Dm.recvList_Xy, Dm.recvCount_Xy ,recvID_Xy, id); - UnpackID(Dm.recvList_xY, Dm.recvCount_xY ,recvID_xY, id); - UnpackID(Dm.recvList_XY, Dm.recvCount_XY ,recvID_XY, id); - UnpackID(Dm.recvList_xz, Dm.recvCount_xz ,recvID_xz, id); - UnpackID(Dm.recvList_Xz, Dm.recvCount_Xz ,recvID_Xz, id); - UnpackID(Dm.recvList_xZ, Dm.recvCount_xZ ,recvID_xZ, id); - UnpackID(Dm.recvList_XZ, Dm.recvCount_XZ ,recvID_XZ, id); - UnpackID(Dm.recvList_yz, Dm.recvCount_yz ,recvID_yz, id); - UnpackID(Dm.recvList_Yz, Dm.recvCount_Yz ,recvID_Yz, id); - UnpackID(Dm.recvList_yZ, Dm.recvCount_yZ ,recvID_yZ, id); - UnpackID(Dm.recvList_YZ, Dm.recvCount_YZ ,recvID_YZ, id); + UnpackID(Dm.recvList("x"), Dm.recvCount("x") ,recvID_x, id); + UnpackID(Dm.recvList("X"), Dm.recvCount("X") ,recvID_X, id); + UnpackID(Dm.recvList("y"), Dm.recvCount("y") ,recvID_y, id); + UnpackID(Dm.recvList("Y"), Dm.recvCount("Y") ,recvID_Y, id); + UnpackID(Dm.recvList("z"), Dm.recvCount("z") ,recvID_z, id); + UnpackID(Dm.recvList("Z"), Dm.recvCount("Z") ,recvID_Z, id); + UnpackID(Dm.recvList("xy"), Dm.recvCount("xy") ,recvID_xy, id); + UnpackID(Dm.recvList("Xy"), Dm.recvCount("Xy") ,recvID_Xy, id); + UnpackID(Dm.recvList("xY"), Dm.recvCount("xY") ,recvID_xY, id); + UnpackID(Dm.recvList("XY"), Dm.recvCount("XY") ,recvID_XY, id); + UnpackID(Dm.recvList("xz"), Dm.recvCount("xz") ,recvID_xz, id); + UnpackID(Dm.recvList("Xz"), Dm.recvCount("Xz") ,recvID_Xz, id); + UnpackID(Dm.recvList("xZ"), Dm.recvCount("xZ") ,recvID_xZ, id); + UnpackID(Dm.recvList("XZ"), Dm.recvCount("XZ") ,recvID_XZ, id); + UnpackID(Dm.recvList("yz"), Dm.recvCount("yz") ,recvID_yz, id); + UnpackID(Dm.recvList("Yz"), Dm.recvCount("Yz") ,recvID_Yz, id); + UnpackID(Dm.recvList("yZ"), Dm.recvCount("yZ") ,recvID_yZ, id); + UnpackID(Dm.recvList("YZ"), Dm.recvCount("YZ") ,recvID_YZ, id); //...................................................................................... //double GlobalNumber = Dm.Comm.sumReduce( LocalNumber ); diff --git a/tests/TestBubbleDFH.cpp b/tests/TestBubbleDFH.cpp index 8b4f1a9b..3eecb13f 100644 --- a/tests/TestBubbleDFH.cpp +++ b/tests/TestBubbleDFH.cpp @@ -248,7 +248,7 @@ int main(int argc, char **argv) if (rank==0) printf ("Set up memory efficient layout Npad=%i \n",Npad); IntArray Map(Nx,Ny,Nz); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np); comm.barrier(); //........................................................................... diff --git a/tests/TestColorGradDFH.cpp b/tests/TestColorGradDFH.cpp index b04aebce..02c0dc9d 100644 --- a/tests/TestColorGradDFH.cpp +++ b/tests/TestColorGradDFH.cpp @@ -100,7 +100,7 @@ int main(int argc, char **argv) int *neighborList; IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id.data(),Np); comm.barrier(); //......................device distributions................................. diff --git a/tests/TestCommD3Q19.cpp b/tests/TestCommD3Q19.cpp index c4a045ae..1ffa2465 100644 --- a/tests/TestCommD3Q19.cpp +++ b/tests/TestCommD3Q19.cpp @@ -283,7 +283,7 @@ int main(int argc, char **argv) auto neighborList= new int[18*Npad]; IntArray Map(Nx,Ny,Nz); Map.fill(-2); - Np = ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); + Np = ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm->id.data(),Np); comm.barrier(); int neighborSize=18*Np*sizeof(int); //......................device distributions................................. diff --git a/tests/TestFluxBC.cpp b/tests/TestFluxBC.cpp index 3e999715..0798a481 100644 --- a/tests/TestFluxBC.cpp +++ b/tests/TestFluxBC.cpp @@ -88,7 +88,7 @@ int main (int argc, char **argv) IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id.data(),Np); comm.barrier(); //......................device distributions................................. diff --git a/tests/TestForceMoments.cpp b/tests/TestForceMoments.cpp index b10954b1..fab5fe68 100644 --- a/tests/TestForceMoments.cpp +++ b/tests/TestForceMoments.cpp @@ -162,7 +162,7 @@ int main(int argc, char **argv) char *ID; ScaLBL_AllocateDeviceMemory((void **) &ID, N); // Allocate device memory // Copy to the device - ScaLBL_CopyToDevice(ID, Dm->id, N); + ScaLBL_CopyToDevice(ID, Dm->id.data(), N); //........................................................................... if (rank==0){ @@ -179,7 +179,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Np]; - ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); + ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id.data(),Np); if (rank == 0) PrintNeighborList(neighborList,Np, rank); diff --git a/tests/TestMap.cpp b/tests/TestMap.cpp index f3010081..3ba403a7 100644 --- a/tests/TestMap.cpp +++ b/tests/TestMap.cpp @@ -88,7 +88,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id.data(),Np); comm.barrier(); // Check the neighborlist diff --git a/tests/TestPressVel.cpp b/tests/TestPressVel.cpp index 25afd226..c19bdcef 100644 --- a/tests/TestPressVel.cpp +++ b/tests/TestPressVel.cpp @@ -106,13 +106,13 @@ int main(int argc, char **argv) if (rank==0) printf ("Create ScaLBL_Communicator \n"); // Create a communicator for the device - auto ScaLBL_Comm = std::shared_ptr(new ScaLBL_Communicator(Dm)); + auto ScaLBL_Comm = std::make_shared( Dm ); //...........device phase ID................................................. if (rank==0) printf ("Copying phase ID to device \n"); char *ID; ScaLBL_AllocateDeviceMemory((void **) &ID, N); // Allocate device memory // Copy to the device - ScaLBL_CopyToDevice(ID, Dm->id, N); + ScaLBL_CopyToDevice(ID, Dm->id.data(), N); //........................................................................... if (rank==0){ @@ -127,7 +127,7 @@ int main(int argc, char **argv) int *neighborList; IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id.data(),Np); comm.barrier(); //......................device distributions................................. diff --git a/tests/lbpm_minkowski_scalar.cpp b/tests/lbpm_minkowski_scalar.cpp index 721207a1..c5c27426 100644 --- a/tests/lbpm_minkowski_scalar.cpp +++ b/tests/lbpm_minkowski_scalar.cpp @@ -159,7 +159,7 @@ int main(int argc, char **argv) else{ // Recieve the subdomain from rank = 0 printf("Ready to recieve data %i at process %i \n", N,rank); - comm.recv(Dm->id,N,0,15); + comm.recv(Dm->id.data(),N,0,15); } comm.barrier(); From 85cc3363da4d23ac3a331b0218126eee08935de7 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Fri, 15 May 2020 13:39:36 -0400 Subject: [PATCH 002/205] Adding function to reorder ranks based on load balancing --- common/MPI.cpp | 21 +++++ common/MPI.h | 7 ++ common/Utilities.hpp | 216 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 244 insertions(+) create mode 100644 common/Utilities.hpp diff --git a/common/MPI.cpp b/common/MPI.cpp index 7604ae27..8818b9c5 100644 --- a/common/MPI.cpp +++ b/common/MPI.cpp @@ -2,6 +2,7 @@ #include "common/MPI.h" #include "common/Utilities.h" +#include "common/Utilities.hpp" #include "ProfilerApp.h" #include "StackTrace/ErrorHandlers.h" @@ -3806,5 +3807,25 @@ void MPI_CLASS::stop_MPI() } +/**************************************************************************** + * Function to perform load balancing * + ****************************************************************************/ +MPI MPI::loadBalance( double local, std::vector work ) +{ + MPI_ASSERT( (int) work.size() == getSize() ); + auto perf = allGather( local ); + std::vector I( work.size() ); + for ( size_t i=0; i key( work.size() ); + for ( size_t i=0; i work ); + private: // Private helper functions for templated MPI operations; template void call_sumReduce( type *x, const int n = 1 ) const; diff --git a/common/Utilities.hpp b/common/Utilities.hpp new file mode 100644 index 00000000..d4bca4f7 --- /dev/null +++ b/common/Utilities.hpp @@ -0,0 +1,216 @@ +#ifndef included_Utilities_hpp +#define included_Utilities_hpp + + +#include "Utilities.h" + +#include + + +namespace Utilities { + + +/************************************************************************ + * templated quicksort routines * + ************************************************************************/ +template +void quicksort( std::vector &x ) +{ + if ( x.size() <= 1u ) + return; + T *arr = &x[0]; + bool test; + long int i, ir, j, jstack, k, l, istack[100]; + T a, tmp_a; + jstack = 0; + l = 0; + ir = x.size() - 1; + while ( 1 ) { + if ( ir - l < 7 ) { // Insertion sort when subarray small enough. + for ( j = l + 1; j <= ir; j++ ) { + a = arr[j]; + test = true; + for ( i = j - 1; i >= 0; i-- ) { + if ( arr[i] < a ) { + arr[i + 1] = a; + test = false; + break; + } + arr[i + 1] = arr[i]; + } + if ( test ) { + i = l - 1; + arr[i + 1] = a; + } + } + if ( jstack == 0 ) + return; + ir = istack[jstack]; // Pop stack and begin a new round of partitioning. + l = istack[jstack - 1]; + jstack -= 2; + } else { + k = ( l + ir ) / 2; // Choose median of left, center and right elements as partitioning + // element a. Also rearrange so that a(l) < a(l+1) < a(ir). + tmp_a = arr[k]; + arr[k] = arr[l + 1]; + arr[l + 1] = tmp_a; + if ( arr[l] > arr[ir] ) { + tmp_a = arr[l]; + arr[l] = arr[ir]; + arr[ir] = tmp_a; + } + if ( arr[l + 1] > arr[ir] ) { + tmp_a = arr[l + 1]; + arr[l + 1] = arr[ir]; + arr[ir] = tmp_a; + } + if ( arr[l] > arr[l + 1] ) { + tmp_a = arr[l]; + arr[l] = arr[l + 1]; + arr[l + 1] = tmp_a; + } + // Scan up to find element > a + j = ir; + a = arr[l + 1]; // Partitioning element. + for ( i = l + 2; i <= ir; i++ ) { + if ( arr[i] < a ) + continue; + while ( arr[j] > a ) // Scan down to find element < a. + j--; + if ( j < i ) + break; // Pointers crossed. Exit with partitioning complete. + tmp_a = arr[i]; // Exchange elements of both arrays. + arr[i] = arr[j]; + arr[j] = tmp_a; + } + arr[l + 1] = arr[j]; // Insert partitioning element in both arrays. + arr[j] = a; + jstack += 2; + // Push pointers to larger subarray on stack, process smaller subarray immediately. + if ( ir - i + 1 >= j - l ) { + istack[jstack] = ir; + istack[jstack - 1] = i; + ir = j - 1; + } else { + istack[jstack] = j - 1; + istack[jstack - 1] = l; + l = i; + } + } + } +} +template +void quicksort( std::vector &x, std::vector &y ) +{ + if ( x.size() <= 1u ) + return; + T1 *arr = &x[0]; + T2 *brr = &y[0]; + bool test; + long int i, ir, j, jstack, k, l, istack[100]; + T1 a, tmp_a; + T2 b, tmp_b; + jstack = 0; + l = 0; + ir = x.size() - 1; + while ( 1 ) { + if ( ir - l < 7 ) { // Insertion sort when subarray small enough. + for ( j = l + 1; j <= ir; j++ ) { + a = arr[j]; + b = brr[j]; + test = true; + for ( i = j - 1; i >= 0; i-- ) { + if ( arr[i] < a ) { + arr[i + 1] = a; + brr[i + 1] = b; + test = false; + break; + } + arr[i + 1] = arr[i]; + brr[i + 1] = brr[i]; + } + if ( test ) { + i = l - 1; + arr[i + 1] = a; + brr[i + 1] = b; + } + } + if ( jstack == 0 ) + return; + ir = istack[jstack]; // Pop stack and begin a new round of partitioning. + l = istack[jstack - 1]; + jstack -= 2; + } else { + k = ( l + ir ) / 2; // Choose median of left, center and right elements as partitioning + // element a. Also rearrange so that a(l) ? a(l+1) ? a(ir). + tmp_a = arr[k]; + arr[k] = arr[l + 1]; + arr[l + 1] = tmp_a; + tmp_b = brr[k]; + brr[k] = brr[l + 1]; + brr[l + 1] = tmp_b; + if ( arr[l] > arr[ir] ) { + tmp_a = arr[l]; + arr[l] = arr[ir]; + arr[ir] = tmp_a; + tmp_b = brr[l]; + brr[l] = brr[ir]; + brr[ir] = tmp_b; + } + if ( arr[l + 1] > arr[ir] ) { + tmp_a = arr[l + 1]; + arr[l + 1] = arr[ir]; + arr[ir] = tmp_a; + tmp_b = brr[l + 1]; + brr[l + 1] = brr[ir]; + brr[ir] = tmp_b; + } + if ( arr[l] > arr[l + 1] ) { + tmp_a = arr[l]; + arr[l] = arr[l + 1]; + arr[l + 1] = tmp_a; + tmp_b = brr[l]; + brr[l] = brr[l + 1]; + brr[l + 1] = tmp_b; + } + // Scan up to find element > a + j = ir; + a = arr[l + 1]; // Partitioning element. + b = brr[l + 1]; + for ( i = l + 2; i <= ir; i++ ) { + if ( arr[i] < a ) + continue; + while ( arr[j] > a ) // Scan down to find element < a. + j--; + if ( j < i ) + break; // Pointers crossed. Exit with partitioning complete. + tmp_a = arr[i]; // Exchange elements of both arrays. + arr[i] = arr[j]; + arr[j] = tmp_a; + tmp_b = brr[i]; + brr[i] = brr[j]; + brr[j] = tmp_b; + } + arr[l + 1] = arr[j]; // Insert partitioning element in both arrays. + arr[j] = a; + brr[l + 1] = brr[j]; + brr[j] = b; + jstack += 2; + // Push pointers to larger subarray on stack, process smaller subarray immediately. + if ( ir - i + 1 >= j - l ) { + istack[jstack] = ir; + istack[jstack - 1] = i; + ir = j - 1; + } else { + istack[jstack] = j - 1; + istack[jstack - 1] = l; + l = i; + } + } + } +} + + +} + +#endif From 6cbce3f4ee5712d8db403aa21891268c3dc01a7a Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Thu, 28 May 2020 13:01:52 -0400 Subject: [PATCH 003/205] Adding mainpage for doxygen --- doxygen/DoxygenMainpage.h | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 doxygen/DoxygenMainpage.h diff --git a/doxygen/DoxygenMainpage.h b/doxygen/DoxygenMainpage.h new file mode 100644 index 00000000..0d42f215 --- /dev/null +++ b/doxygen/DoxygenMainpage.h @@ -0,0 +1,9 @@ +/** \mainpage LBPM + * + * This is the documentation for LBPM + * + * - \ref IO "IO routines" + * - \ref Utilities "Utility routines" + * + * \author James McClure + */ From fb67bfa069fa86419ba6c6cd95445ac3962cebaf Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Thu, 28 May 2020 13:12:09 -0400 Subject: [PATCH 004/205] Updating documentation --- IO/netcdf.h | 24 ++++++++++++------------ IO/silo.h | 39 ++++++++++++++++++++------------------- doxygen/DoxygenMainpage.h | 2 ++ 3 files changed, 34 insertions(+), 31 deletions(-) diff --git a/IO/netcdf.h b/IO/netcdf.h index b4559e51..e1f65e61 100644 --- a/IO/netcdf.h +++ b/IO/netcdf.h @@ -26,7 +26,7 @@ std::string VariableTypeName( VariableType type ); /*! * @brief Open netcdf file - * @detailed This function opens a netcdf file + * @details This function opens a netcdf file * @return This function returns a handle to the file * @param filename File to open * @param mode Open the file for reading or writing @@ -37,7 +37,7 @@ int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm /*! * @brief Close netcdf file - * @detailed This function closes a netcdf file + * @details This function closes a netcdf file * @param fid Handle to the open file */ void close( int fid ); @@ -45,7 +45,7 @@ void close( int fid ); /*! * @brief Read the variable names - * @detailed This function reads a list of the variable names in the file + * @details This function reads a list of the variable names in the file * @param fid Handle to the open file */ std::vector getVarNames( int fid ); @@ -53,7 +53,7 @@ std::vector getVarNames( int fid ); /*! * @brief Read the attribute names - * @detailed This function reads a list of the attribute names in the file + * @details This function reads a list of the attribute names in the file * @param fid Handle to the open file */ std::vector getAttNames( int fid ); @@ -61,7 +61,7 @@ std::vector getAttNames( int fid ); /*! * @brief Return the variable type - * @detailed This function returns the type for a variable + * @details This function returns the type for a variable * @param fid Handle to the open file * @param var Variable to read */ @@ -70,7 +70,7 @@ VariableType getVarType( int fid, const std::string& var ); /*! * @brief Return the attribute type - * @detailed This function returns the type for an attribute + * @details This function returns the type for an attribute * @param fid Handle to the open file * @param att Attribute to read */ @@ -79,7 +79,7 @@ VariableType getAttType( int fid, const std::string& att ); /*! * @brief Return the variable dimensions - * @detailed This function returns the die for a variable + * @details This function returns the die for a variable * @param fid Handle to the open file * @param var Variable to read */ @@ -88,7 +88,7 @@ std::vector getVarDim( int fid, const std::string& var ); /*! * @brief Read a variable - * @detailed This function reads a variable with the given name from the file + * @details This function reads a variable with the given name from the file * @param fid Handle to the open file * @param var Variable to read */ @@ -98,7 +98,7 @@ Array getVar( int fid, const std::string& var ); /*! * @brief Read a strided variable - * @detailed This function reads a strided variable with the given name from the file + * @details This function reads a strided variable with the given name from the file * @param fid Handle to the open file * @param var Variable to read * @param start Starting corner for the read @@ -112,7 +112,7 @@ Array getVar( int fid, const std::string& var, const std::vector& sta /*! * @brief Read an attribute - * @detailed This function reads an attribute with the given name from the file + * @details This function reads an attribute with the given name from the file * @param fid Handle to the open file * @param att Attribute to read */ @@ -122,7 +122,7 @@ Array getAtt( int fid, const std::string& att ); /*! * @brief Write the dimensions - * @detailed This function writes the grid dimensions to netcdf. + * @details This function writes the grid dimensions to netcdf. * @param fid Handle to the open file */ std::vector defDim( int fid, const std::vector& names, const std::vector& dims ); @@ -130,7 +130,7 @@ std::vector defDim( int fid, const std::vector& names, const s /*! * @brief Write a variable - * @detailed This function writes a variable to netcdf. + * @details This function writes a variable to netcdf. * @param fid Handle to the open file */ template diff --git a/IO/silo.h b/IO/silo.h index 339a5c34..40a023d7 100644 --- a/IO/silo.h +++ b/IO/silo.h @@ -30,7 +30,7 @@ enum class VariableDataType { DOUBLE, FLOAT, INT, UNKNOWN }; /*! * @brief Open silo file - * @detailed This function opens a silo file + * @details This function opens a silo file * @param[in] filename File to open * @param[in] mode Open the file for reading or writing * @return This function returns a handle to the file @@ -40,7 +40,7 @@ DBfile* open( const std::string& filename, FileMode mode ); /*! * @brief Close silo file - * @detailed This function closes a silo file + * @details This function closes a silo file * @param[in] fid Handle to the open file */ void close( DBfile* fid ); @@ -48,7 +48,7 @@ void close( DBfile* fid ); /*! * @brief Get the variable type - * @detailed This function returns the type of variable data + * @details This function returns the type of variable data * @param[in] fid Handle to the open file * @param[in] name Name of variable */ @@ -57,7 +57,7 @@ VariableDataType varDataType( DBfile *dbfile, const std::string& name ); /*! * @brief Write data to silo - * @detailed This function writes an arbitrary array to silo + * @details This function writes an arbitrary array to silo * @param[in] fid Handle to the open file * @param[in] varname Variable name * @param[in] data Data to write @@ -68,7 +68,7 @@ void write( DBfile* fid, const std::string& varname, const std::vector& da /*! * @brief Write data to silo - * @detailed This function writes an arbitrary array to silo + * @details This function writes an arbitrary array to silo * @param[in] fid Handle to the open file * @param[in] varname Variable name * @return Data read @@ -79,7 +79,7 @@ std::vector read( DBfile* fid, const std::string& varname ); /*! * @brief Write a uniform grid - * @detailed This function writes a uniform grid to silo as a Quadmesh + * @details This function writes a uniform grid to silo as a Quadmesh * @param[in] fid Handle to the open file * @param[in] meshname Mesh name * @param[in] range Range of mesh { xmin, xmax, ymin, ymax, zmin, zmax } @@ -92,7 +92,7 @@ void writeUniformMesh( DBfile* fid, const std::string& meshname, /*! * @brief Read a uniform grid - * @detailed This function reads a uniform grid from silo + * @details This function reads a uniform grid from silo * @param[in] fid Handle to the open file * @param[in] meshname Mesh name * @param[out] range Range of mesh { xmin, xmax, ymin, ymax, zmin, zmax } @@ -104,7 +104,7 @@ void readUniformMesh( DBfile* fid, const std::string& meshname, /*! * @brief Write a uniform grid variable - * @detailed This function writes a uniform grid variable to silo as a Quadmesh + * @details This function writes a uniform grid variable to silo as a Quadmesh * @param[in] fid Handle to the open file * @param[in] meshname Mesh name * @param[in] N Number of cells in each direction @@ -119,7 +119,7 @@ void writeUniformMeshVariable( DBfile* fid, const std::string& meshname, const s /*! * @brief Read a uniform mesh grid variable - * @detailed This function read a uniform mesh variable to silo + * @details This function read a uniform mesh variable to silo * @param[in] fid Handle to the open file * @param[in] varname Variable name * @return Variable data @@ -130,7 +130,7 @@ Array readUniformMeshVariable( DBfile* fid, const std::string& varname ); /*! * @brief Write a pointmesh - * @detailed This function writes a pointmesh to silo + * @details This function writes a pointmesh to silo * @param[in] fid Handle to the open file * @param[in] meshname Mesh name * @param[in] ndim Number of dimensions @@ -144,7 +144,7 @@ void writePointMesh( DBfile* fid, const std::string& meshname, /*! * @brief Read a pointmesh - * @detailed This function reads a pointmesh from silo + * @details This function reads a pointmesh from silo * @param[in] fid Handle to the open file * @param[in] meshname Mesh name * @return Returns the coordinates as a N x ndim array @@ -155,7 +155,7 @@ Array readPointMesh( DBfile* fid, const std::string& meshname ); /*! * @brief Write a pointmesh grid variable - * @detailed This function writes a pointmesh variable to silo + * @details This function writes a pointmesh variable to silo * @param[in] fid Handle to the open file * @param[in] meshname Mesh name * @param[in] varname Variable name @@ -168,7 +168,7 @@ void writePointMeshVariable( DBfile* fid, const std::string& meshname, /*! * @brief Read a pointmesh grid variable - * @detailed This function reads a pointmesh variable from silo + * @details This function reads a pointmesh variable from silo * @param[in] fid Handle to the open file * @param[in] varname Variable name * @return Variable data @@ -179,7 +179,7 @@ Array readPointMeshVariable( DBfile* fid, const std::string& varname ); /*! * @brief Write a triangle mesh - * @detailed This function writes a triangle (or simplex) based mesh to silo + * @details This function writes a triangle (or simplex) based mesh to silo * @param[in] fid Handle to the open file * @param[in] meshname Mesh name * @param[in] ndim Number of dimensions for the coordinates @@ -196,7 +196,7 @@ void writeTriMesh( DBfile* fid, const std::string& meshname, /*! * @brief Read a triangle mesh - * @detailed This function reads a triangle (or simplex) based mesh to silo + * @details This function reads a triangle (or simplex) based mesh to silo * @param[in] fid Handle to the open file * @param[in] meshname Mesh name * @param[in] coords Coordinates of the points @@ -208,8 +208,9 @@ void readTriMesh( DBfile* fid, const std::string& meshname, Array& coords, /*! * @brief Write a triangle mesh grid variable - * @detailed This function writes a triangle mesh variable to silo + * @details This function writes a triangle mesh variable to silo * @param[in] fid Handle to the open file + * @param[in] ndim Number of dimensions * @param[in] meshname Mesh name * @param[in] varname Variable name * @param[in] data Variable data @@ -222,7 +223,7 @@ void writeTriMeshVariable( DBfile* fid, int ndim, const std::string& meshname, /*! * @brief Read a triangle mesh grid variable - * @detailed This function read a triangle mesh variable to silo + * @details This function read a triangle mesh variable to silo * @param[in] fid Handle to the open file * @param[in] varname Variable name * @return Variable data @@ -233,7 +234,7 @@ Array readTriMeshVariable( DBfile* fid, const std::string& varname ); /*! * @brief Write a multimesh - * @detailed This function writes a multimesh to silo + * @details This function writes a multimesh to silo * @param[in] fid Handle to the open file * @param[in] meshname Mesh name * @param[in] subMeshNames Names of the sub meshes in the form "filename:meshname" @@ -246,7 +247,7 @@ void writeMultiMesh( DBfile* fid, const std::string& meshname, /*! * @brief Write a multivariable - * @detailed This function writes a multivariable to silo + * @details This function writes a multivariable to silo * @return This function returns a handle to the file * @param[in] fid Handle to the open file * @param[in] varname Mesh name diff --git a/doxygen/DoxygenMainpage.h b/doxygen/DoxygenMainpage.h index 0d42f215..d3dae521 100644 --- a/doxygen/DoxygenMainpage.h +++ b/doxygen/DoxygenMainpage.h @@ -4,6 +4,8 @@ * * - \ref IO "IO routines" * - \ref Utilities "Utility routines" + * - \ref silo "Access to silo routines" + * - \ref netcdf "Access to netcdf routines" * * \author James McClure */ From 8c5dd2e43a7423ec85c69daa60175e9bff5e0987 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Mon, 20 Jul 2020 13:41:48 -0400 Subject: [PATCH 005/205] Adding MPI test --- common/MPI.cpp | 10 +- common/Utilities.hpp | 18 + tests/CMakeLists.txt | 1 + tests/test_MPI.cpp | 1537 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 1561 insertions(+), 5 deletions(-) create mode 100644 tests/test_MPI.cpp diff --git a/common/MPI.cpp b/common/MPI.cpp index 8818b9c5..fa3ea667 100644 --- a/common/MPI.cpp +++ b/common/MPI.cpp @@ -2810,7 +2810,7 @@ MPI_Request MPI_CLASS::IrecvBytes( /************************************************************************ * sendrecv * ************************************************************************/ -#if defined( USE_MPI ) || defined( USE_EXT_MPI ) +#if defined( USE_MPI ) template<> void MPI_CLASS::sendrecv( const char* sendbuf, int sendcount, int dest, int sendtag, char* recvbuf, int recvcount, int source, int recvtag ) const @@ -3760,12 +3760,12 @@ void MPI_CLASS::serializeStop() /**************************************************************************** * Function to start/stop MPI * ****************************************************************************/ -#ifdef USE_EXT_MPI +#ifdef USE_MPI static bool called_MPI_Init = false; #endif bool MPI_CLASS::MPI_Active() { -#ifdef USE_EXT_MPI +#ifdef USE_MPI int MPI_initialized, MPI_finialized; MPI_Initialized( &MPI_initialized ); MPI_Finalized( &MPI_finialized ); @@ -3779,7 +3779,7 @@ void MPI_CLASS::start_MPI( int argc, char *argv[], int profile_level ) changeProfileLevel( profile_level ); NULL_USE( argc ); NULL_USE( argv ); -#ifdef USE_EXT_MPI +#ifdef USE_MPI if ( MPI_Active() ) { called_MPI_Init = false; } else { @@ -3795,7 +3795,7 @@ void MPI_CLASS::start_MPI( int argc, char *argv[], int profile_level ) } void MPI_CLASS::stop_MPI() { -#ifdef USE_EXT_MPI +#ifdef USE_MPI int finalized; MPI_Finalized( &finalized ); if ( called_MPI_Init && !finalized ) { diff --git a/common/Utilities.hpp b/common/Utilities.hpp index d4bca4f7..bcdc4057 100644 --- a/common/Utilities.hpp +++ b/common/Utilities.hpp @@ -209,6 +209,24 @@ void quicksort( std::vector &x, std::vector &y ) } } } +template +void unique( std::vector &x ) +{ + if ( x.size() <= 1 ) + return; + // First perform a quicksort + quicksort( x ); + // Next remove duplicate entries + size_t pos = 1; + for ( size_t i = 1; i < x.size(); i++ ) { + if ( x[i] != x[pos - 1] ) { + x[pos] = x[i]; + pos++; + } + } + if ( pos < x.size() ) + x.resize( pos ); +} } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8d600321..7b5ac69a 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -78,6 +78,7 @@ ENDIF() # Sample test that will run with 1, 2, and 4 processors, failing with 4 or more procs ADD_LBPM_TEST_1_2_4( hello_world ) +ADD_LBPM_TEST_1_2_4( test_MPI ) ADD_LBPM_TEST( TestColorBubble ../example/Bubble/input.db) ADD_LBPM_TEST( TestColorSquareTube ../example/Bubble/input.db) diff --git a/tests/test_MPI.cpp b/tests/test_MPI.cpp new file mode 100644 index 00000000..c0cf35af --- /dev/null +++ b/tests/test_MPI.cpp @@ -0,0 +1,1537 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "common/MPI.h" +#include "common/UnitTest.h" +#include "common/Utilities.h" +#include "common/Utilities.hpp" +#include "ProfilerApp.h" + + +#if defined( WIN32 ) || defined( _WIN32 ) || defined( WIN64 ) || defined( _WIN64 ) +#include +#define sched_yield() Sleep( 0 ) +#else +#include +#endif + + +#undef MPI_CLASS +#define MPI_CLASS Utilities::MPI +#define MPI_ASSERT ASSERT + + +// Return the time elapsed in seconds +static inline double time() { return MPI_CLASS::time(); } + + +struct mytype { + int a; + double b; + mytype() + { + a = -1; + b = -1.0; + } + mytype( int i ) + { + a = i; + b = -1.0; + } + mytype( int i, double d ) + { + a = i; + b = d; + } + bool operator==( const mytype &other ) + { + if ( a == other.a && b == other.b ) + return true; + return false; + } + bool operator!=( const mytype &other ) + { + if ( a != other.a || b != other.b ) + return true; + return false; + } +}; + + +// Routines to test Reduce with known data types +// flag - 0: all tests should pass +// 1: basic reduce should pass, reduce with rank should fail with error message +template +int testReduce( MPI_CLASS comm, UnitTest *ut, int flag ); +template<> +int testReduce>( MPI_CLASS comm, UnitTest *ut, int ) +{ + PROFILE_START( "testReduce" ); + char message[128]; + std::complex rank = comm.getRank() + 1; + std::complex N = ( ( comm.getSize() * ( comm.getSize() + 1 ) ) / 2 ); + // Test sumReduce + sprintf( message, "sumReduce (%s)", typeid( std::complex ).name() ); + if ( comm.sumReduce>( rank ) == N ) + ut->passes( message ); + else + ut->failure( message ); + sprintf( message, "sumReduce (%s) (x,y)", typeid( std::complex ).name() ); + std::complex y; + comm.sumReduce>( &rank, &y, 1 ); + if ( y == N ) + ut->passes( message ); + else + ut->failure( message ); + PROFILE_STOP( "testReduce" ); + return 2; // Return the number of tests +} +template +int testReduce( MPI_CLASS comm, UnitTest *ut, int flag ) +{ + PROFILE_START( "testReduce" ); + char message[128]; + auto rank = (type) comm.getRank(); + auto size = (type) comm.getSize(); + if ( (int) ( size ) != comm.getSize() ) { + sprintf( message, + "Reduce (%s) cannot represent the number of processors", + typeid( type ).name() ); + ut->expected_failure( message ); + PROFILE_STOP2( "testReduce" ); + return 0; + } + type x, y; + int N = ( ( comm.getSize() * ( comm.getSize() + 1 ) ) / 2 ); + // Test sumReduce + sprintf( message, "sumReduce (%s)", typeid( type ).name() ); + if ( ( (int) ( (type) N ) ) != N ) + ut->expected_failure( message ); // type cannot represent N + else if ( comm.sumReduce( rank + 1 ) == (type) N ) + ut->passes( message ); + else + ut->failure( message ); + sprintf( message, "sumReduce (%s) (x,y)", typeid( type ).name() ); + x = rank + 1; + comm.sumReduce( &x, &y, 1 ); + if ( ( (int) ( (type) N ) ) != N ) + ut->expected_failure( message ); + else if ( y == (type) N ) + ut->passes( message ); + else + ut->failure( message ); + // Test minReduce + sprintf( message, "minReduce (%s)", typeid( type ).name() ); + if ( comm.minReduce( rank + 1 ) == 1 ) + ut->passes( message ); + else + ut->failure( message ); + sprintf( message, "minReduce (%s) (x,y)", typeid( type ).name() ); + comm.minReduce( &x, &y, 1, nullptr ); + if ( y == 1 ) + ut->passes( message ); + else + ut->failure( message ); + // Test maxReduce + sprintf( message, "maxReduce (%s)", typeid( type ).name() ); + if ( comm.maxReduce( rank + 1 ) == size ) + ut->passes( message ); + else + ut->failure( message ); + sprintf( message, "maxReduce (%s) (x,y)", typeid( type ).name() ); + comm.maxReduce( &x, &y, 1, nullptr ); + if ( y == size ) + ut->passes( message ); + else + ut->failure( message ); + // Test minReduce with rank + int rank_of_min = -1; + int rank_of_max = -1; + type rank_min = rank + 1; + type rank_max = rank + 1; + sprintf( message, "minReduce-rank (%s)", typeid( type ).name() ); + try { + comm.minReduce( &rank_min, 1, &rank_of_min ); + if ( rank_min == 1 && rank_of_min == 0 ) + ut->passes( message ); + else + ut->failure( message ); + if ( flag == 1 && comm.getSize() > 1 ) + ut->failure( message ); + } catch ( ... ) { + if ( flag == 1 && comm.getSize() > 1 ) + ut->expected_failure( message ); + else + ut->failure( message ); + } + sprintf( message, "minReduce-rank (%s) (x,y)", typeid( type ).name() ); + try { + comm.minReduce( &x, &rank_min, 1, &rank_of_min ); + if ( rank_min == 1 && rank_of_min == 0 ) + ut->passes( message ); + else + ut->failure( message ); + if ( flag == 1 && comm.getSize() > 1 ) + ut->failure( message ); + } catch ( ... ) { + if ( flag == 1 && comm.getSize() > 1 ) + ut->expected_failure( message ); + else + ut->failure( message ); + } + // Test maxReduce with rank + sprintf( message, "maxReduce-rank (%s)", typeid( type ).name() ); + try { + comm.maxReduce( &rank_max, 1, &rank_of_max ); + if ( rank_max == size && rank_of_max == comm.getSize() - 1 ) + ut->passes( message ); + else + ut->failure( message ); + if ( flag == 1 && comm.getSize() > 1 ) + ut->failure( message ); + } catch ( ... ) { + if ( flag == 1 && comm.getSize() > 1 ) + ut->expected_failure( message ); + else + ut->failure( message ); + } + sprintf( message, "maxReduce-rank (%s) (x,y)", typeid( type ).name() ); + try { + comm.maxReduce( &x, &rank_max, 1, &rank_of_max ); + if ( rank_max == size && rank_of_max == comm.getSize() - 1 ) + ut->passes( message ); + else + ut->failure( message ); + if ( flag == 1 && comm.getSize() > 1 ) + ut->failure( message ); + } catch ( ... ) { + if ( flag == 1 && comm.getSize() > 1 ) + ut->expected_failure( message ); + else + ut->failure( message ); + } + PROFILE_STOP( "testReduce" ); + return 10; // Return the number of tests +} + + +// Routine to test Scan with known data types +// flag - 0: all tests should pass +// 1: only sumScan is valid (complex) +template +int testScan( MPI_CLASS comm, UnitTest *ut, int flag = 0 ) +{ + PROFILE_START( "testScan" ); + char message[500]; + auto x = ( type )( comm.getRank() + 1 ); + type y; + sprintf( message, "sumScan (%s)", typeid( type ).name() ); + comm.sumScan( &x, &y, 1 ); + auto N = ( type )( ( ( comm.getRank() + 1 ) * ( comm.getRank() + 2 ) ) / 2 ); + if ( y == N ) + ut->passes( message ); + else + ut->failure( message ); + if ( flag == 1 ) { + PROFILE_STOP2( "testScan" ); + return 1; + } + sprintf( message, "minScan (%s)", typeid( type ).name() ); + comm.minScan( &x, &y, 1 ); + if ( y == (type) 1 ) + ut->passes( message ); + else + ut->failure( message ); + sprintf( message, "maxScan (%s)", typeid( type ).name() ); + comm.maxScan( &x, &y, 1 ); + if ( y == x ) + ut->passes( message ); + else + ut->failure( message ); + PROFILE_STOP( "testScan" ); + return 3; // Return the number of tests +} + + +// Routine to test bcast +template +int testBcast( MPI_CLASS comm, UnitTest *ut, type default_val, type new_val ) +{ + PROFILE_START( "testBcast" ); + char message[128]; + for ( int i = 0; i < comm.getSize(); i++ ) { + type tmp1 = default_val; + if ( comm.getRank() == i ) + tmp1 = new_val; + sprintf( message, "bcast scalar (%s) from rank %i", typeid( type ).name(), i ); + if ( comm.bcast( tmp1, i ) == new_val ) + ut->passes( message ); + else + ut->failure( message ); + type tmp2[2]; + tmp2[0] = default_val; + tmp2[1] = default_val; + if ( comm.getRank() == i ) { + tmp2[0] = new_val; + tmp2[1] = new_val; + } + sprintf( message, "bcast vector (%s) from rank %i", typeid( type ).name(), i ); + comm.bcast( tmp2, 2, i ); + if ( tmp2[0] == new_val && tmp2[1] == new_val ) + ut->passes( message ); + else + ut->failure( message ); + } + PROFILE_STOP( "testBcast" ); + return 2 * comm.getSize(); // Return the number of tests +} + + +// Routine to test allGather +template +int testAllGather( MPI_CLASS comm, UnitTest *ut ) +{ + PROFILE_START( "testAllGather" ); + char message[128]; + // Test scalar allGather + auto x1 = (type) comm.getRank(); + auto *x2 = new type[comm.getSize()]; + comm.allGather( x1, x2 ); + bool pass = true; + for ( int i = 0; i < comm.getSize(); i++ ) { + type test = i; + if ( x2[i] != test ) + pass = false; + } + sprintf( message, "allGather scalar (%s)", typeid( type ).name() ); + if ( pass ) + ut->passes( message ); + else + ut->failure( message ); + // Test vector allGather + int N = ( comm.getSize() * ( comm.getSize() + 1 ) ) / 2; + auto *x3 = new type[comm.getRank() + 1]; + auto *x4 = new type[N]; + auto *x5 = new type[N]; + int *size = new int[comm.getSize()]; + for ( int i = 0; i <= comm.getRank(); i++ ) + x3[i] = (type) comm.getRank(); + int tot1 = comm.allGather( x3, comm.getRank() + 1, x4 ); + int tot2 = comm.allGather( x3, comm.getRank() + 1, x5, size ); + pass = true; + if ( tot1 != N || tot2 != N ) + pass = false; + int k = 0; + for ( int i = 0; i < comm.getSize(); i++ ) { + if ( size[i] != i + 1 ) + pass = false; + if ( !pass ) + break; + for ( int j = 0; j <= i; j++ ) { + type test = i; + if ( x4[k] != test || x5[k] != test ) + pass = false; + k++; + } + } + sprintf( message, "allGather vector (%s)", typeid( type ).name() ); + if ( pass ) + ut->passes( message ); + else + ut->failure( message ); + delete[] x2; + delete[] x3; + delete[] x4; + delete[] x5; + delete[] size; + // Test vector allGather with know recive sizes and non-zero displacements + auto *send = new type[comm.getRank() + 1]; + auto *recv = new type[comm.getSize() * comm.getSize() + 1]; + auto *recv_size = new int[comm.getSize()]; + auto *recv_disp = new int[comm.getSize()]; + for ( int i = 0; i <= comm.getRank(); i++ ) + send[i] = i; + for ( int i = 0; i < comm.getSize(); i++ ) + recv_size[i] = i + 1; + for ( int i = 0; i < comm.getSize(); i++ ) + recv_disp[i] = 1 + i * comm.getSize() + comm.getSize() - i - 1; + for ( int i = 0; i <= comm.getSize() * comm.getSize(); i++ ) + recv[i] = (type) -1; + int tot = comm.allGather( send, comm.getRank() + 1, recv, recv_size, recv_disp, true ); + pass = true; + if ( tot != N ) + pass = false; + auto test = (type) -1; + if ( recv[0] != test ) + pass = false; + for ( int i = 0; i < comm.getSize(); i++ ) { + for ( int j = 0; j < comm.getSize(); j++ ) { + int l = j + i * comm.getSize() + 1 - recv_disp[i]; + if ( l >= 0 ) + test = l; + else + test = (type) -1; + if ( recv[j + i * comm.getSize() + 1] != test ) + pass = false; + } + } + sprintf( message, + "allGather vector with known recv and non-zero displacements (%s)", + typeid( type ).name() ); + if ( pass ) + ut->passes( message ); + else + ut->failure( message ); + delete[] send; + delete[] recv; + delete[] recv_size; + delete[] recv_disp; + // Test vector allGather with no elements + size = new int[comm.getSize()]; + sprintf( message, "allGather scalar (%s)", typeid( type ).name() ); + try { + comm.allGather( &x1, 0, (type *) nullptr, size ); + ut->passes( message ); + } catch ( ... ) { + ut->failure( message ); + } + delete[] size; + PROFILE_STOP( "testAllGather" ); + return 4; // Return the number of tests +} + + +// Routine to test setGather +template +int testSetGather( MPI_CLASS comm, UnitTest *ut ) +{ + PROFILE_START( "testSetGather" ); + char message[500]; + auto x1 = (type) comm.getRank(); + std::set set; + set.insert( x1 ); + comm.setGather( set ); + bool pass = true; + for ( int i = 0; i < comm.getSize(); i++ ) { + type x2 = i; + if ( set.find( x2 ) == set.end() ) + pass = false; + } + sprintf( message, "setGather (%s)", typeid( type ).name() ); + if ( pass ) + ut->passes( message ); + else + ut->failure( message ); + PROFILE_STOP( "testSetGather" ); + return 1; // Return the number of tests +} + + +// Routine to test mapGather +template +int testMapGather( MPI_CLASS comm, UnitTest *ut ) +{ + PROFILE_START( "testMapGather" ); + char message[128]; + auto x1 = (type) comm.getRank(); + std::map map; + map.insert( std::pair( comm.getRank(), x1 ) ); + comm.mapGather( map ); + bool pass = true; + for ( int i = 0; i < comm.getSize(); i++ ) { + type x2 = i; + auto it = map.find( i ); + if ( it == map.end() ) + pass = false; + else if ( it->second != x2 ) + pass = false; + } + sprintf( message, "mapGather (%s)", typeid( type ).name() ); + if ( pass ) + ut->passes( message ); + else + ut->failure( message ); + PROFILE_STOP( "testMapGather" ); + return 1; // Return the number of tests +} + + +// Routine to test allToAll +template +int testAllToAll( MPI_CLASS comm, UnitTest *ut ) +{ + PROFILE_START( "testAllToAll" ); + bool pass; + char message[128]; + int size = 0; + type *send_data, *recv_data; + auto *send_cnt = new int[comm.getSize()]; + auto *recv_cnt = new int[comm.getSize()]; + auto *send_disp = new int[comm.getSize()]; + auto *recv_disp = new int[comm.getSize()]; + // Test allToAll with a scalar value to each processor + send_data = new type[comm.getSize()]; + recv_data = new type[comm.getSize()]; + for ( int i = 0; i < comm.getSize(); i++ ) + send_data[i] = comm.getSize(); + comm.allToAll( 1, send_data, recv_data ); + pass = true; + for ( int i = 0; i < comm.getSize(); i++ ) { + type test = comm.getSize(); + if ( recv_data[i] != test ) + pass = false; + } + delete[] send_data; + delete[] recv_data; + sprintf( message, "allToAll with scalar (%s)", typeid( type ).name() ); + if ( pass ) + ut->passes( message ); + else + ut->failure( message ); + // Test allToAll vector with a scalar value to each processor + send_data = new type[comm.getSize()]; + recv_data = new type[comm.getSize()]; + for ( int i = 0; i < comm.getSize(); i++ ) { + send_cnt[i] = 1; + recv_cnt[i] = 1; + send_disp[i] = i; + recv_disp[i] = i; + send_data[i] = comm.getSize(); + recv_data[i] = 0; + } + size = comm.allToAll( send_data, send_cnt, send_disp, recv_data, recv_cnt, recv_disp, true ); + pass = true; + if ( size != comm.getSize() ) + pass = false; + for ( int i = 0; i < comm.getSize(); i++ ) { + type test = comm.getSize(); + if ( recv_data[i] != test ) + pass = false; + } + delete[] send_data; + delete[] recv_data; + sprintf( message, "allToAll vector with scalar (%s)", typeid( type ).name() ); + if ( pass ) + ut->passes( message ); + else + ut->failure( message ); + // Test allToAll with a variable number of values per processor and spacing + send_data = new type[comm.getSize() * comm.getSize()]; + recv_data = new type[2 * comm.getRank() * comm.getSize()]; + for ( int i = 0; i < comm.getSize(); i++ ) { + send_cnt[i] = i; + recv_cnt[i] = comm.getRank(); + send_disp[i] = i * comm.getSize(); + recv_disp[i] = 2 * i * comm.getRank(); + for ( int j = 0; j < comm.getSize(); j++ ) { + if ( j < i ) + send_data[j + send_disp[i]] = i; + else + send_data[j + send_disp[i]] = (type) -1; + } + } + for ( int i = 0; i < 2 * comm.getRank() * comm.getSize(); i++ ) + recv_data[i] = (type) -2; + size = comm.allToAll( send_data, send_cnt, send_disp, recv_data, recv_cnt, recv_disp, true ); + pass = true; + if ( size != comm.getRank() * comm.getSize() ) + pass = false; + for ( int i = 0; i < comm.getSize(); i++ ) { + for ( int j = 0; j < 2 * comm.getRank(); j++ ) { + if ( j < comm.getRank() ) { + type test = comm.getRank(); + if ( recv_data[j + recv_disp[i]] != test ) + pass = false; + } else { + auto test = (type) -2; + if ( recv_data[j + recv_disp[i]] != test ) + pass = false; + } + } + } + delete[] send_data; + delete[] recv_data; + sprintf( message, + "allToAll with vector of known size and displacements (%s)", + typeid( type ).name() ); + if ( pass ) + ut->passes( message ); + else + ut->failure( message ); + // Test allToAll with a unknown recieve length + send_data = new type[comm.getSize() * comm.getSize()]; + auto *recv_data1 = new type[comm.getSize() * comm.getSize()]; + auto *recv_data2 = new type[comm.getSize() * comm.getSize()]; + for ( int i = 0; i < comm.getSize(); i++ ) { + send_cnt[i] = i; + recv_cnt[i] = -1; + send_disp[i] = i * comm.getSize(); + recv_disp[i] = -1; + for ( int j = 0; j < comm.getSize(); j++ ) { + if ( j < i ) + send_data[j + send_disp[i]] = i; + else + send_data[j + send_disp[i]] = (type) -1; + } + } + for ( int i = 0; i < comm.getSize() * comm.getSize(); i++ ) { + recv_data1[i] = (type) -2; + recv_data2[i] = (type) -2; + } + int size1 = + comm.allToAll( send_data, send_cnt, send_disp, recv_data1, recv_cnt, recv_disp, false ); + int size2 = comm.allToAll( send_data, send_cnt, send_disp, recv_data2 ); + bool pass1 = true; + bool pass2 = true; + if ( size1 != comm.getRank() * comm.getSize() ) + pass1 = false; + if ( size2 != comm.getRank() * comm.getSize() ) + pass2 = false; + for ( int i = 0; i < comm.getSize(); i++ ) { + if ( recv_cnt[i] != comm.getRank() || recv_disp[i] != i * comm.getRank() ) + pass1 = false; + } + for ( int i = 0; i < comm.getRank() * comm.getSize(); i++ ) { + type test = comm.getRank(); + if ( recv_data1[i] != test ) + pass1 = false; + if ( recv_data2[i] != test ) + pass2 = false; + } + delete[] send_data; + delete[] recv_data1; + delete[] recv_data2; + sprintf( message, "allToAll with vector of unknown size (%s)", typeid( type ).name() ); + if ( pass1 ) + ut->passes( message ); + else + ut->failure( message ); + sprintf( + message, "allToAll with vector of unknown size with NULL recv(%s)", typeid( type ).name() ); + if ( pass2 ) + ut->passes( message ); + else + ut->failure( message ); + // Free temporary variables + delete[] send_cnt; + delete[] recv_cnt; + delete[] send_disp; + delete[] recv_disp; + PROFILE_STOP( "testAllToAll" ); + return 5; // Return the number of tests +} + + +// Routine to test send/recv +template +int testSendRecv( MPI_CLASS comm, UnitTest *ut, type v1, type v2 ) +{ + PROFILE_START( "testSendRecv" ); + char message[128]; + // Test send-recv with a known length + for ( int i = 0; i < comm.getSize(); i++ ) { + for ( int j = 0; j < comm.getSize(); j++ ) { + type x = v1; + int tag = i + j * comm.getSize(); + sprintf( message, "send-recv %i-%i known length (%s)", i, j, typeid( type ).name() ); + if ( i == j ) { + // We are not allowed to send/recieve from the same processor + continue; + } else if ( i == comm.getRank() ) { + // We are sending + x = v2; + comm.send( &x, 1, j, tag ); + } else if ( j == comm.getRank() ) { + // We are recieving + int size = 1; + comm.recv( &x, size, i, false, tag ); + if ( size == 1 && x == v2 ) + ut->passes( message ); + else + ut->failure( message ); + } + } + } + // Test send-recv with an unknown length + for ( int i = 0; i < comm.getSize(); i++ ) { + for ( int j = 0; j < comm.getSize(); j++ ) { + type x = v1; + int tag = i + j * comm.getSize(); + sprintf( message, "send-recv %i-%i unknown length (%s)", i, j, typeid( type ).name() ); + if ( i == j ) { + // We are not allowed to send/recieve from the same processor + continue; + } else if ( i == comm.getRank() ) { + // We are sending + x = v2; + comm.send( &x, 1, j, tag ); + } else if ( j == comm.getRank() ) { + // We are recieving + int size = 1; + comm.recv( &x, size, i, true, tag ); + if ( size == 1 && x == v2 ) + ut->passes( message ); + else + ut->failure( message ); + } + } + } + // Test send-recv with an empty length + for ( int i = 0; i < comm.getSize(); i++ ) { + for ( int j = 0; j < comm.getSize(); j++ ) { + type x = v1; + int tag = i + j * comm.getSize(); + sprintf( message, "send-recv %i-%i empty length (%s)", i, j, typeid( type ).name() ); + if ( i == j ) { + // We are not allowed to send/recieve from the same processor + continue; + } else if ( i == comm.getRank() ) { + // We are sending + x = v2; + comm.send( &x, 0, j, tag ); + } else if ( j == comm.getRank() ) { + // We are recieving + int size = comm.probe( i, tag ); + comm.recv( &x, size, i, false, tag ); + if ( size == 0 ) + ut->passes( message ); + else + ut->failure( message ); + } + } + } + PROFILE_STOP( "testSendRecv" ); + return 3 * comm.getSize() * comm.getSize(); // Return the number of tests +} + + +// Routine to test Isend/Irecv +template +int testIsendIrecv( MPI_CLASS comm, UnitTest *ut, type v1, type v2 ) +{ + PROFILE_START( "testIsendIrecv" ); + char message[128]; + std::vector sendRequest; + std::vector recvRequest; + // Send all messages + for ( int i = 0; i < comm.getSize(); i++ ) { + // Check if the current rank is sending + if ( i != comm.getRank() ) + continue; + for ( int j = 0; j < comm.getSize(); j++ ) { + // Start a non-blocking send + int tag = i + j * comm.getSize(); + MPI_Request request = comm.Isend( &v1, 1, j, tag ); + sendRequest.insert( sendRequest.begin(), request ); + } + } + // Recv all messages + auto *recv_buffer = new type[comm.getSize()]; + for ( int i = 0; i < comm.getSize(); i++ ) + recv_buffer[i] = v2; + recv_buffer[comm.getRank()] = v1; + for ( int j = 0; j < comm.getSize(); j++ ) { + // Check if the current rank is recieving + if ( j != comm.getRank() ) + continue; + for ( int i = 0; i < comm.getSize(); i++ ) { + // Start a non-blocking recv + int tag = i + j * comm.getSize(); + MPI_Request request = comm.Irecv( &recv_buffer[i], 1, i, tag ); + recvRequest.insert( recvRequest.begin(), request ); + } + } + // Wait for all communications to finish + MPI_CLASS::wait( sendRequest[0] ); + sendRequest.erase( sendRequest.begin() + 0 ); + while ( !sendRequest.empty() ) { + int index = comm.waitAny( sendRequest.size(), &( sendRequest[0] ) ); + sendRequest.erase( sendRequest.begin() + index ); + } + auto finished = MPI_CLASS::waitSome( recvRequest.size(), recvRequest.data() ); + if ( !recvRequest.empty() ) { + MPI_ASSERT( !finished.empty() ); + for ( auto it = finished.rbegin(); it != finished.rend(); ++it ) + recvRequest.erase( recvRequest.begin() + ( *it ) ); + } + if ( !recvRequest.empty() ) + MPI_CLASS::waitAll( recvRequest.size(), &( recvRequest[0] ) ); + Utilities::unique( finished ); + // Check the recieved values + bool pass = true; + for ( int i = 0; i < comm.getSize(); i++ ) { + if ( recv_buffer[i] != v1 ) + pass = false; + } + sprintf( message, "Isend-Irecv (%s)", typeid( type ).name() ); + if ( pass ) + ut->passes( message ); + else + ut->failure( message ); + delete[] recv_buffer; + PROFILE_STOP( "testIsendIrecv" ); + return comm.getSize() * comm.getSize(); // Return the number of tests +} + + +// Routine to test CommRanks +int testCommRanks( MPI_CLASS comm, UnitTest *ut ) +{ + std::vector neighbors; + for ( int i = 0; i < comm.getSize(); i++ ) + if ( ( i % 2 ) == 0 ) + neighbors.push_back( i ); + std::vector neighbors2 = comm.commRanks( neighbors ); + bool pass = true; + if ( comm.getRank() % 2 == 0 ) { + pass = static_cast( neighbors2.size() ) == comm.getSize(); + if ( pass ) { + for ( int i = 0; i < comm.getSize(); i++ ) + pass = pass && neighbors2[i] == i; + } + } else { + pass = neighbors2.empty(); + } + auto ranks = comm.globalRanks(); + pass = pass && (int) ranks.size() == comm.getSize(); + for ( int rank : ranks ) + pass = pass && rank >= 0; + auto ranks2 = ranks; + Utilities::unique( ranks2 ); + pass = pass && ranks.size() == ranks2.size(); + comm.barrier(); + if ( pass ) + ut->passes( "commRanks" ); + else + ut->failure( "commRanks" ); + return 1; // Return the number of tests +} + + +// Structure to contain timer results +struct testCommTimerResults { + int N_reduce; + int N_scan; + int N_bcast; + int N_allGather; + int N_setGather; + int N_mapGather; + int N_allToAll; + int N_sendRecv; + int N_IsendIrecv; + double t_reduce; + double t_scan; + double t_bcast; + double t_allGather; + double t_setGather; + double t_mapGather; + double t_allToAll; + double t_sendRecv; + double t_IsendIrecv; + // Constructor + testCommTimerResults() + { + N_reduce = 0; + N_scan = 0; + N_bcast = 0; + N_allGather = 0; + N_setGather = 0; + N_mapGather = 0; + N_allToAll = 0; + N_sendRecv = 0; + N_IsendIrecv = 0; + t_reduce = 0.0; + t_scan = 0.0; + t_bcast = 0.0; + t_allGather = 0.0; + t_setGather = 0.0; + t_mapGather = 0.0; + t_allToAll = 0.0; + t_sendRecv = 0.0; + t_IsendIrecv = 0.0; + } + // Print the results + void print() + { + printf( " Reduce: N = %5i, t_tot = %0.5e, t_avg = %6.1f us\n", + N_reduce, + t_reduce, + 1e6 * t_reduce / N_reduce ); + printf( " Scan: N = %5i, t_tot = %0.5e, t_avg = %6.1f us\n", + N_scan, + t_scan, + 1e6 * t_scan / N_scan ); + printf( " Bcast: N = %5i, t_tot = %0.5e, t_avg = %6.1f us\n", + N_bcast, + t_bcast, + 1e6 * t_bcast / N_bcast ); + printf( " allGather: N = %5i, t_tot = %0.5e, t_avg = %6.1f us\n", + N_allGather, + t_allGather, + 1e6 * t_allGather / N_allGather ); + printf( " allToAll: N = %5i, t_tot = %0.5e, t_avg = %6.1f us\n", + N_allToAll, + t_allToAll, + 1e6 * t_allToAll / N_allToAll ); + printf( " send-recv: N = %5i, t_tot = %0.5e, t_avg = %6.1f us\n", + N_sendRecv, + t_sendRecv, + 1e6 * t_sendRecv / N_sendRecv ); + printf( " Isend-Irecv: N = %5i, t_tot = %0.5e, t_avg = %6.1f us\n", + N_IsendIrecv, + t_IsendIrecv, + 1e6 * t_IsendIrecv / N_IsendIrecv ); + printf( " setGather: N = %5i, t_tot = %0.5e, t_avg = %6.1f us\n", + N_setGather, + t_setGather, + 1e6 * t_setGather / N_setGather ); + printf( " mapGather: N = %5i, t_tot = %0.5e, t_avg = %6.1f us\n", + N_mapGather, + t_mapGather, + 1e6 * t_mapGather / N_mapGather ); + } +}; + + +// This routine will test a single MPI communicator +testCommTimerResults testComm( MPI_CLASS comm, UnitTest *ut ) +{ + PROFILE_START( "testComm" ); + testCommTimerResults timer; + double start_time; + // Test the tag + int tag0 = comm.newTag(); + MPI_CLASS comm2 = comm; + MPI_CLASS comm3( comm ); + bool pass = tag0 > 0 && tag0 < comm.maxTag(); + for ( int i = 1; i < 64; i++ ) { + if ( comm.newTag() != tag0 + i ) + pass = false; + } + for ( int i = 1; i <= 64; i++ ) { + if ( comm2.newTag() != tag0 + 63 + i ) + pass = false; + } + for ( int i = 1; i <= 128; i++ ) { + if ( comm3.newTag() != tag0 + 127 + i ) + pass = false; + } + if ( pass ) + ut->passes( "newTag" ); + else + ut->failure( "newTag" ); + // Test all and any reduce + bool test1 = !comm.allReduce( comm.getRank() != 0 ); + bool test2 = comm.allReduce( true ); + if ( test1 && test2 ) + ut->passes( "allReduce" ); + else + ut->failure( "allReduce" ); + test1 = comm.anyReduce( comm.getRank() == 0 ); + test2 = !comm.anyReduce( false ); + if ( test1 && test2 ) + ut->passes( "anyReduce" ); + else + ut->failure( "anyReduce" ); + // Test min, max, and sum reduce + start_time = time(); + timer.N_reduce += testReduce( comm, ut, 0 ); + timer.N_reduce += testReduce( comm, ut, 0 ); + timer.N_reduce += testReduce( comm, ut, 0 ); + timer.N_reduce += testReduce( comm, ut, 0 ); + timer.N_reduce += testReduce( comm, ut, 0 ); + timer.N_reduce += testReduce( comm, ut, 0 ); + timer.N_reduce += testReduce( comm, ut, 0 ); + timer.N_reduce += testReduce( comm, ut, 0 ); + timer.N_reduce += testReduce( comm, ut, 0 ); + timer.N_reduce += testReduce>( + comm, ut, 2 ); // only sumreduce is valid for complex numbers + mytype tmp1( 1, -1.0 ); + mytype tmp2; + if ( comm.getSize() > 1 ) { + // We can't perform a reduce on an unknown data type (this should throw an error) + try { + // This should fail + tmp2 = comm.sumReduce( tmp1 ); + ut->failure( "sumReduce should give an error with an unknown type" ); + } catch ( ... ) { + ut->passes( "sumReduce should give an error with an unknown type" ); + } + try { + // This should fail + tmp2 = comm.minReduce( tmp1 ); + ut->failure( "minReduce should give an error with an unknown type" ); + } catch ( ... ) { + ut->passes( "minReduce should give an error with an unknown type" ); + } + try { + // This should fail + tmp2 = comm.maxReduce( tmp1 ); + ut->failure( "maxReduce should give an error with an unknown type" ); + } catch ( ... ) { + ut->passes( "maxReduce should give an error with an unknown type" ); + } + timer.N_reduce += 3; + } + timer.t_reduce = time() - start_time; + // Test min, max, and sum scan + start_time = time(); + timer.N_scan += testScan( comm, ut ); + timer.N_scan += testScan( comm, ut ); + timer.N_scan += testScan( comm, ut ); + timer.N_scan += testScan( comm, ut ); + timer.N_scan += testScan( comm, ut ); + timer.N_scan += testScan( comm, ut ); + timer.N_scan += testScan( comm, ut ); + timer.N_scan += testScan( comm, ut ); + timer.N_scan += testScan( comm, ut ); + timer.N_scan += + testScan>( comm, ut, 1 ); // Only sumScan is valid with complex data + if ( comm.getSize() > 1 ) { + // We can't perform a reduce on an unknown data type (this should throw an error) + try { + // This should fail + comm.sumScan( &tmp1, &tmp2, 1 ); + ut->failure( "sumReduce should give an error with an unknown type" ); + } catch ( ... ) { + ut->passes( "sumReduce should give an error with an unknown type" ); + } + try { + // This should fail + comm.minScan( &tmp1, &tmp2, 1 ); + ut->failure( "minReduce should give an error with an unknown type" ); + } catch ( ... ) { + ut->passes( "minReduce should give an error with an unknown type" ); + } + try { + // This should fail + comm.maxScan( &tmp1, &tmp2, 1 ); + ut->failure( "maxReduce should give an error with an unknown type" ); + } catch ( ... ) { + ut->passes( "maxReduce should give an error with an unknown type" ); + } + timer.N_scan += 3; + } + timer.t_scan = time() - start_time; + // Test bcast + start_time = time(); + timer.N_bcast += testBcast( comm, ut, 0, 1 ); + timer.N_bcast += testBcast( comm, ut, -1, 1 ); + timer.N_bcast += testBcast( comm, ut, 0, 1 ); + timer.N_bcast += testBcast( comm, ut, -1, 1 ); + timer.N_bcast += testBcast( comm, ut, 0, 1 ); + timer.N_bcast += testBcast( comm, ut, -1, 1 ); + timer.N_bcast += testBcast( comm, ut, 0, 1 ); + timer.N_bcast += testBcast( comm, ut, -1.0, 1.0 ); + timer.N_bcast += testBcast( comm, ut, -1.0, 1.0 ); + mytype tmp3( -1, -1.0 ); + mytype tmp4( 1, 1.0 ); + timer.N_bcast += testBcast( comm, ut, tmp3, tmp4 ); + timer.t_bcast = time() - start_time; + // Test barrier + comm.barrier(); + // Test gather + start_time = time(); + timer.N_allGather += testAllGather( comm, ut ); + timer.N_allGather += testAllGather( comm, ut ); + timer.N_allGather += testAllGather( comm, ut ); + timer.N_allGather += testAllGather( comm, ut ); + timer.N_allGather += testAllGather( comm, ut ); + timer.N_allGather += testAllGather( comm, ut ); + timer.N_allGather += testAllGather( comm, ut ); + timer.N_allGather += testAllGather( comm, ut ); + timer.N_allGather += testAllGather( comm, ut ); + timer.N_allGather += testAllGather>( comm, ut ); + timer.N_allGather += testAllGather( comm, ut ); + timer.t_allGather = time() - start_time; + // Test std::set gather + start_time = time(); + timer.N_setGather += testSetGather( comm, ut ); + timer.N_setGather += testSetGather( comm, ut ); + timer.N_setGather += testSetGather( comm, ut ); + timer.N_setGather += testSetGather( comm, ut ); + timer.N_setGather += testSetGather( comm, ut ); + timer.N_setGather += testSetGather( comm, ut ); + timer.N_setGather += testSetGather( comm, ut ); + timer.N_setGather += testSetGather( comm, ut ); + timer.N_setGather += testSetGather( comm, ut ); + timer.t_setGather = time() - start_time; + // Test std::map gather + start_time = time(); + timer.N_mapGather += testMapGather( comm, ut ); + timer.N_mapGather += testMapGather( comm, ut ); + timer.N_mapGather += testMapGather( comm, ut ); + timer.N_mapGather += testMapGather( comm, ut ); + timer.N_mapGather += testMapGather( comm, ut ); + timer.N_mapGather += testMapGather( comm, ut ); + timer.N_mapGather += testMapGather( comm, ut ); + timer.N_mapGather += testMapGather( comm, ut ); + timer.N_mapGather += testMapGather( comm, ut ); + timer.t_mapGather = time() - start_time; + // Test allToAlll + start_time = time(); + timer.N_allToAll += testAllToAll( comm, ut ); + timer.N_allToAll += testAllToAll( comm, ut ); + timer.N_allToAll += testAllToAll( comm, ut ); + timer.N_allToAll += testAllToAll( comm, ut ); + timer.N_allToAll += testAllToAll( comm, ut ); + timer.N_allToAll += testAllToAll( comm, ut ); + timer.N_allToAll += testAllToAll( comm, ut ); + timer.N_allToAll += testAllToAll( comm, ut ); + timer.N_allToAll += testAllToAll( comm, ut ); + timer.N_allToAll += testAllToAll>( comm, ut ); + timer.N_allToAll += testAllToAll( comm, ut ); + timer.t_allToAll = time() - start_time; + // Test send/recv + start_time = time(); + timer.N_sendRecv += testSendRecv( comm, ut, 0, 1 ); + timer.N_sendRecv += testSendRecv( comm, ut, -1, 1 ); + timer.N_sendRecv += testSendRecv( comm, ut, 0, 1 ); + timer.N_sendRecv += testSendRecv( comm, ut, -1, 1 ); + timer.N_sendRecv += testSendRecv( comm, ut, 0, 1 ); + timer.N_sendRecv += testSendRecv( comm, ut, -1, 1 ); + timer.N_sendRecv += testSendRecv( comm, ut, 0, 1 ); + timer.N_sendRecv += testSendRecv( comm, ut, -1.0, 1.0 ); + timer.N_sendRecv += testSendRecv( comm, ut, -1.0, 1.0 ); + timer.N_sendRecv += testSendRecv( comm, ut, tmp3, tmp4 ); + timer.t_sendRecv = time() - start_time; + // Test Isend/Irecv + start_time = time(); + timer.N_IsendIrecv += testIsendIrecv( comm, ut, 0, 1 ); + timer.N_IsendIrecv += testIsendIrecv( comm, ut, -1, 1 ); + timer.N_IsendIrecv += testIsendIrecv( comm, ut, 0, 1 ); + timer.N_IsendIrecv += testIsendIrecv( comm, ut, -1, 1 ); + timer.N_IsendIrecv += testIsendIrecv( comm, ut, 0, 1 ); + timer.N_IsendIrecv += testIsendIrecv( comm, ut, -1, 1 ); + timer.N_IsendIrecv += testIsendIrecv( comm, ut, 0, 1 ); + timer.N_IsendIrecv += testIsendIrecv( comm, ut, -1.0, 1.0 ); + timer.N_IsendIrecv += testIsendIrecv( comm, ut, -1.0, 1.0 ); + timer.N_IsendIrecv += testIsendIrecv( comm, ut, tmp3, tmp4 ); + timer.t_IsendIrecv = time() - start_time; + // Test commRanks + testCommRanks( comm, ut ); + PROFILE_STOP( "testComm" ); + return timer; +} + + +// Test comm dup and the number of communicators that can be created +void testCommDup( UnitTest *ut ) +{ +#if defined( USING_CLANG ) && defined( __APPLE__ ) + // The MPI error handler crashes so this test fails + // This seems to be a MAC? + Clang + MPICH? issue only + ut->expected_failure( "testCommDup skipped for this architecture/compiler" ); +#else + MPI_CLASS globalComm( MPI_COMM_WORLD ); + MPI_CLASS dupComm = globalComm.dup(); + if ( globalComm.getCommunicator() != dupComm.getCommunicator() && + dupComm.getSize() == globalComm.getSize() && dupComm.getRank() == globalComm.getRank() ) { + ut->passes( "dup comm" ); + } else { + ut->failure( "dup comm" ); + return; + } +#if defined( USE_PETSC ) && !defined( USE_MPI ) + ut->expected_failure( "Skipping dup tests, PETSc (no-mpi) has a limit of 128 unique comms" ); + return; +#endif + int N_comm_try = 2000; // Maximum number of comms to try and create + std::vector comms; + comms.reserve( N_comm_try ); + try { + for ( int i = 0; i < N_comm_try; i++ ) { + MPI_CLASS tmp_comm = globalComm.dup(); + comms.push_back( tmp_comm ); + MPI_ASSERT( globalComm.getCommunicator() != comms[i].getCommunicator() ); + MPI_ASSERT( comms.back().sumReduce( 1 ) == + globalComm.getSize() ); // We need to communicate as part of the test + } + ut->passes( Utilities::stringf( "Created %i comms", N_comm_try ) ); + } catch ( ... ) { + if ( comms.size() < 252 ) { + ut->failure( "Could not create 252 different communicators" ); + } else { + int N = comms.size(); + ut->expected_failure( Utilities::stringf( + "Failed to create an unlimited number of comms (%i)", N ) ); + } + std::cout << "Maximum number of concurrent communicators: " << comms.size() << std::endl; + } + comms.clear(); + size_t N_dup = 0; + globalComm.barrier(); + try { + double start = MPI_CLASS::time(); + for ( int i = 0; i < N_comm_try; i++ ) { + MPI_CLASS tmp_comm1 = globalComm.dup(); + MPI_CLASS tmp_comm2 = globalComm.dup(); + MPI_ASSERT( globalComm.getCommunicator() != tmp_comm1.getCommunicator() ); + MPI_ASSERT( globalComm.getCommunicator() != tmp_comm2.getCommunicator() ); + MPI_ASSERT( tmp_comm1.getCommunicator() != tmp_comm2.getCommunicator() ); + MPI_ASSERT( tmp_comm1.sumReduce( 1 ) == + globalComm.getSize() ); // We need to communicate as part of the test + MPI_ASSERT( tmp_comm2.sumReduce( 1 ) == + globalComm.getSize() ); // We need to communicate as part of the test + N_dup += 2; + } + double stop = MPI_CLASS::time(); + ut->passes( "Created/Destroyed an unlimited number of comms" ); + char message[128]; + sprintf( message, + "Time to create/destroy comm using MPI_CLASS::dup() is: %0.1f us", + 1e6 * ( stop - start ) / N_dup ); + std::cout << message << std::endl; + } catch ( ... ) { + ut->failure( "Failed to create/destroy an unlimited number of comms" ); + std::cout << "Maximum number of communicators created with destruction: " << N_dup + << std::endl; + } +#endif +} + + +// This test will test the MPI class +int main( int argc, char *argv[] ) +{ + // Start MPI + Utilities::MPI::start_MPI( argc, argv ); + + // Create the unit test + UnitTest ut; + PROFILE_ENABLE( 0 ); + PROFILE_START( "Main" ); + + + // Limit the scope so objects are destroyed + { + + // Get the start time for the tests + double start_time = time(); + + // Print the global size (if we are using MPI) + int global_size = 0; +#ifdef USE_MPI + MPI_Comm_size( MPI_COMM_WORLD, &global_size ); +#else + global_size = 1; +#endif + + // Test the global communicator (MPI_COMM_WORLD) + MPI_CLASS globalComm = MPI_CLASS( MPI_COMM_WORLD ); + if ( !globalComm.isNull() ) + ut.passes( "Global communicator created" ); + else + ut.failure( "Global communicator created" ); + if ( globalComm.getSize() == global_size ) + ut.passes( "Global communicator size" ); + else + ut.failure( "Global communicator size" ); + if ( globalComm.getRank() == 0 ) { + std::cout << "MPI_COMM_WORLD = " << global_size << " processors" << std::endl; + std::cout << " Largest tag value = " << globalComm.maxTag() << std::endl << std::endl; + } +#ifdef USE_MPI + if ( globalComm.getCommunicator() == MPI_COMM_WORLD ) + ut.passes( "Communicator == MPI_COMM_WORLD" ); + else + ut.failure( "Communicator == MPI_COMM_WORLD" ); +#endif + testCommTimerResults commTimer = testComm( globalComm, &ut ); + if ( globalComm.getRank() == 0 ) { + std::cout << "Results for global timer (rank 0)" << std::endl; + commTimer.print(); + std::cout << std::endl; + } + + // Test bcast with std::string + std::string rank_string; + if ( globalComm.getRank() == 0 ) + rank_string = "Rank 0"; + rank_string = globalComm.bcast( rank_string, 0 ); + if ( rank_string == "Rank 0" ) + ut.passes( "Bcast std::string" ); + else + ut.failure( "Bcast std::string" ); + + // Test MPI_COMM_SELF + MPI_CLASS selfComm = MPI_CLASS( MPI_COMM_SELF ); + if ( !selfComm.isNull() ) + ut.passes( "Self communicator created" ); + else + ut.failure( "Self communicator created" ); +#ifdef USE_MPI + if ( selfComm.getCommunicator() == MPI_COMM_SELF ) + ut.passes( "Communicator == MPI_COMM_SELF" ); + else + ut.failure( "Communicator == MPI_COMM_SELF" ); +#endif + testComm( selfComm, &ut ); + + // Test == and != + if ( globalComm == globalComm && !( selfComm == globalComm ) ) + ut.passes( "==" ); + else + ut.failure( "==" ); + if ( selfComm != globalComm && !( globalComm != globalComm ) ) + ut.passes( "!=" ); + else + ut.failure( "!=" ); + + // Test MPI_COMM_NULL + MPI_CLASS nullComm = MPI_CLASS( MPI_COMM_NULL ); + if ( nullComm.isNull() ) + ut.passes( "Null communicator created" ); + else + ut.failure( "Null communicator created" ); + if ( nullComm.getSize() == 0 ) + ut.passes( "Null communicator has zero size" ); + else + ut.failure( "Null communicator has zero size" ); +#ifdef USE_MPI + if ( nullComm.getCommunicator() == MPI_COMM_NULL ) + ut.passes( "Communicator == MPI_COMM_NULL" ); + else + ut.failure( "Communicator == MPI_COMM_NULL" ); +#endif + + // Test dup + MPI_CLASS dupComm = globalComm.dup(); + if ( nullComm.dup().isNull() ) + ut.passes( "Null communicator duplicates a Null communicator" ); + else + ut.failure( "Null communicator duplicates a Null communicator" ); + testCommDup( &ut ); + + // Test compare + if ( globalComm.compare( globalComm ) == 1 ) + ut.passes( "compare comm global==global" ); + else + ut.failure( "compare comm global==global" ); + if ( globalComm.compare( dupComm ) == 3 ) + ut.passes( "compare comm global~=dup" ); + else + ut.failure( "compare comm global~=dup" ); + if ( global_size == 1 ) { + if ( globalComm.compare( selfComm ) == 3 ) + ut.passes( "compare comm global~=self (global size=1)" ); + else + ut.failure( "compare comm global~=self (global size=1)" ); + } else { + if ( globalComm.compare( selfComm ) == 0 ) + ut.passes( "compare comm global!=self" ); + else + ut.failure( "compare comm global!=self" ); + } + + // Split the global comm and test + PROFILE_START( "Split" ); + int color; + if ( globalComm.getRank() == 0 ) + color = 0; + else if ( globalComm.getRank() < 3 ) + color = 1; + else + color = 2 + ( globalComm.getRank() - 2 ) / 4; + std::vector splitComms( 4 ); + splitComms[0] = globalComm.split( color ); + splitComms[1] = globalComm.split( color, globalComm.getRank() ); + if ( splitComms[0].getCommunicator() != globalComm.getCommunicator() && + splitComms[1].getCommunicator() != globalComm.getCommunicator() && + splitComms[0].getCommunicator() != splitComms[1].getCommunicator() ) + ut.passes( "split comm has different communicator" ); + else + ut.failure( "split comm has different communicator" ); + if ( globalComm.getSize() > 1 ) { + if ( splitComms[0].getSize() < globalComm.getSize() ) + ut.passes( "split comm is smaller" ); + else + ut.failure( "split comm is smaller" ); + } + if ( splitComms[0].getRank() == splitComms[1].getRank() ) + ut.passes( "split sort by rank" ); + else + ut.failure( "split sort by rank" ); + testComm( splitComms[0], &ut ); + splitComms[2] = globalComm.split( -1 ); + if ( splitComms[2].isNull() ) + ut.passes( "split with color=-1 returns NULL communicator" ); + else + ut.failure( "split with color=-1 returns NULL communicator" ); + splitComms[3] = splitComms[0]; // Make a copy to ensure there are no memory leaks + splitComms[3] = splitComms[2]; // Perform assignement to check memory leaks + MPI_ASSERT( splitComms[3] == splitComms[2] ); + PROFILE_STOP( "Split" ); + + // Test < <= > >= + if ( globalComm.getSize() > 1 ) { + if ( splitComms[0] < globalComm && splitComms[1] < globalComm && + !( globalComm < globalComm ) && !( globalComm < splitComms[0] ) ) + ut.passes( " < comm" ); + else + ut.failure( " < comm" ); + if ( splitComms[0] <= globalComm && splitComms[1] <= globalComm && + globalComm <= globalComm && !( globalComm <= splitComms[0] ) ) + ut.passes( " <= comm" ); + else + ut.failure( " <= comm" ); + if ( globalComm > splitComms[0] && globalComm > splitComms[1] && + !( globalComm > globalComm ) && !( splitComms[0] > globalComm ) ) + ut.passes( " > comm" ); + else + ut.failure( " > comm" ); + if ( globalComm >= splitComms[0] && globalComm >= splitComms[1] && + globalComm >= globalComm && !( splitComms[0] >= globalComm ) ) + ut.passes( " >= comm" ); + else + ut.failure( " >= comm" ); + } + + // Test intersection + // Test globalComm with selfComm + if ( globalComm.getSize() > 1 ) { + MPI_CLASS comm1 = MPI_CLASS::intersect( globalComm, selfComm ); + MPI_CLASS comm2 = MPI_CLASS::intersect( selfComm, globalComm ); + MPI_CLASS comm3 = MPI_CLASS::intersect( globalComm, globalComm ); + if ( comm1.compare( globalComm ) == 0 && comm1.compare( selfComm ) != 0 && + comm2.compare( globalComm ) == 0 && comm2.compare( selfComm ) != 0 && + comm3.compare( globalComm ) != 0 && comm3.compare( selfComm ) == 0 ) + ut.passes( "intersection of globalComm and selfComm" ); + else + ut.failure( "intersection of globalComm and selfComm" ); + } + + // Test case where we have disjoint sets (this can only happen of one of the comms is null) + { + MPI_CLASS intersection = MPI_CLASS::intersect( globalComm, nullComm ); + if ( intersection.isNull() ) + ut.passes( "intersection of non-overlapping comms" ); + else + ut.failure( "intersection of non-overlapping comms" ); + } + + // Test case where the comms partially overlap + if ( globalComm.getSize() > 2 ) { + int n = globalComm.getSize() - 1; + // Intersect 2 comms (all other ranks will be null) + MPI_CLASS split1 = globalComm.split( globalComm.getRank() == 0 ? -1 : 0 ); + MPI_CLASS split2 = globalComm.split( globalComm.getRank() == n ? -1 : 0 ); + MPI_CLASS intersection = MPI_CLASS::intersect( split1, split2 ); + bool pass = true; + if ( globalComm.getRank() == 0 || globalComm.getRank() == n ) { + if ( !intersection.isNull() ) + pass = false; + } else { + if ( intersection.compare( split1 ) != 0 || intersection.compare( split2 ) != 0 || + intersection.getSize() != globalComm.getSize() - 2 ) + pass = false; + } + // Intersect 2 sets for ranks (3 groups should result) + // split1 = globalComm.split(globalComm.getRank()==0?1:2); + // split2 = globalComm.split(globalComm.getRank()==n?1:2); + // intersection = MPI_CLASS::intersect( split1, split2 ); + // bool pass = true; + // if ( globalComm.getRank()==0 || globalComm.getRank()==n ) { + // if ( intersection.compare(selfComm)==0 ) + // pass = false; + //} else { + // if ( intersection.compare(split1)!=0 || intersection.compare(split2)!=0 || + // intersection.getSize()!=globalComm.getSize()-2 ) + // pass = false; + //} + if ( pass ) + ut.passes( "intersection of partially overlapping comms" ); + else + ut.failure( "intersection of partially overlapping comms" ); + } + + // Test splitByNode + MPI_CLASS nodeComm = globalComm.splitByNode(); + int length; + char name[MPI_MAX_PROCESSOR_NAME]; + MPI_Get_processor_name( name, &length ); + std::string localName( name ); + std::vector globalStrings( globalComm.getSize() ); + std::vector nodeStrings( nodeComm.getSize() ); + globalComm.allGather( localName, &globalStrings[0] ); + nodeComm.allGather( localName, &nodeStrings[0] ); + int N_local = 0; + for ( auto &nodeString : nodeStrings ) { + if ( nodeString == localName ) + N_local++; + } + int N_global = 0; + for ( auto &globalString : globalStrings ) { + if ( globalString == localName ) + N_global++; + } + if ( !nodeComm.isNull() && N_local == nodeComm.getSize() && N_local == N_global ) + ut.passes( "splitByNode" ); + else + ut.failure( "splitByNode" ); + + // Test the call to load balance the processes + MPI_CLASS::balanceProcesses( globalComm, 1 ); + std::vector cpus = MPI_CLASS::getProcessAffinity(); + size_t maxProcNode = nodeComm.maxReduce( cpus.size() ); + bool pass_balance = cpus.size() == maxProcNode && !cpus.empty(); + MPI_CLASS::balanceProcesses( globalComm, 2 ); + cpus = MPI_CLASS::getProcessAffinity(); + if ( cpus.size() < 1 || cpus.size() > maxProcNode / nodeComm.getSize() ) + pass_balance = false; + if ( pass_balance ) { + ut.passes( "balanceProcesses" ); + } else { +#ifdef __APPLE__ + ut.expected_failure( "balanceProcesses" ); +#else + ut.failure( "balanceProcesses" ); +#endif + } + + // Test the performance of sched_yield (used internally by MPI wait routines) + globalComm.barrier(); + double start_yield = time(); + for ( int i = 0; i < 10000; i++ ) + sched_yield(); + double time_yield = ( time() - start_yield ) / 10000; + if ( globalComm.getRank() == 0 ) + std::cout << "Time to yield: " << time_yield * 1e6 << " us" << std::endl; + + // Test time and tick + double end_time = MPI_CLASS::time(); + double time_res = MPI_CLASS::tick(); + if ( globalComm.getRank() == 0 ) { + std::cout << "Time to run tests: " << end_time - start_time << std::endl; + std::cout << "Timer resolution: " << time_res << std::endl; + if ( time_res > 0 && time_res < 1 && ( end_time - start_time ) >= time_res ) + ut.passes( "time and tick" ); + else + ut.failure( "time and tick" ); + std::cout << std::endl; + } + + } // Limit the scope so objects are destroyed + + // Finished testing, report the results + PROFILE_START( "Report" ); + double start_time = time(); + ut.report(); + int num_failed = ut.NumFailGlobal(); + double end_time = time(); + if ( MPI_CLASS( MPI_COMM_WORLD ).getRank() == 0 ) + std::cout << "Time to report: " << end_time - start_time << std::endl << std::endl; + PROFILE_STOP( "Report" ); + PROFILE_STOP( "Main" ); + + // Shutdown + PROFILE_SAVE( "test_MPI" ); + ut.reset(); + Utilities::MPI::stop_MPI(); + return num_failed; +} From 00c30866f5c2fb18cdcf304ca6a26911072144c2 Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 29 Jul 2020 09:53:39 -0400 Subject: [PATCH 006/205] copy electrokinetic from color --- models/ElectroKinetic.cpp | 1568 +++++++++++++++++++++++++++++++++++++ models/ElectroKinetic.h | 88 +++ 2 files changed, 1656 insertions(+) create mode 100644 models/ElectroKinetic.cpp create mode 100644 models/ElectroKinetic.h diff --git a/models/ElectroKinetic.cpp b/models/ElectroKinetic.cpp new file mode 100644 index 00000000..a8c21a75 --- /dev/null +++ b/models/ElectroKinetic.cpp @@ -0,0 +1,1568 @@ +/* +color lattice boltzmann model + */ +#include "models/ColorModel.h" +#include "analysis/distance.h" +#include "analysis/morphology.h" +#include "common/Communication.h" +#include "common/ReadMicroCT.h" +#include +#include + +ScaLBL_ColorModel::ScaLBL_ColorModel(int RANK, int NP, MPI_Comm COMM): +rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),alpha(0),beta(0), +Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0), +Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) +{ + REVERSE_FLOW_DIRECTION = false; +} +ScaLBL_ColorModel::~ScaLBL_ColorModel(){ + +} + +/*void ScaLBL_ColorModel::WriteCheckpoint(const char *FILENAME, const double *cPhi, const double *cfq, int Np) +{ + int q,n; + double value; + ofstream File(FILENAME,ios::binary); + for (n=0; n( filename ); + domain_db = db->getDatabase( "Domain" ); + color_db = db->getDatabase( "Color" ); + analysis_db = db->getDatabase( "Analysis" ); + vis_db = db->getDatabase( "Visualization" ); + + // set defaults + timestepMax = 100000; + tauA = tauB = 1.0; + rhoA = rhoB = 1.0; + Fx = Fy = Fz = 0.0; + alpha=1e-3; + beta=0.95; + Restart=false; + din=dout=1.0; + flux=0.0; + + // Color Model parameters + if (color_db->keyExists( "timestepMax" )){ + timestepMax = color_db->getScalar( "timestepMax" ); + } + if (color_db->keyExists( "tauA" )){ + tauA = color_db->getScalar( "tauA" ); + } + if (color_db->keyExists( "tauB" )){ + tauB = color_db->getScalar( "tauB" ); + } + if (color_db->keyExists( "rhoA" )){ + rhoA = color_db->getScalar( "rhoA" ); + } + if (color_db->keyExists( "rhoB" )){ + rhoB = color_db->getScalar( "rhoB" ); + } + if (color_db->keyExists( "F" )){ + Fx = color_db->getVector( "F" )[0]; + Fy = color_db->getVector( "F" )[1]; + Fz = color_db->getVector( "F" )[2]; + } + if (color_db->keyExists( "alpha" )){ + alpha = color_db->getScalar( "alpha" ); + } + if (color_db->keyExists( "beta" )){ + beta = color_db->getScalar( "beta" ); + } + if (color_db->keyExists( "Restart" )){ + Restart = color_db->getScalar( "Restart" ); + } + if (color_db->keyExists( "din" )){ + din = color_db->getScalar( "din" ); + } + if (color_db->keyExists( "dout" )){ + dout = color_db->getScalar( "dout" ); + } + if (color_db->keyExists( "flux" )){ + flux = color_db->getScalar( "flux" ); + } + inletA=1.f; + inletB=0.f; + outletA=0.f; + outletB=1.f; + //if (BoundaryCondition==4) flux *= rhoA; // mass flux must adjust for density (see formulation for details) + + BoundaryCondition = 0; + if (domain_db->keyExists( "BC" )){ + BoundaryCondition = domain_db->getScalar( "BC" ); + } + + // Override user-specified boundary condition for specific protocols + auto protocol = color_db->getWithDefault( "protocol", "none" ); + if (protocol == "seed water"){ + if (BoundaryCondition != 0 && BoundaryCondition != 5){ + BoundaryCondition = 0; + if (rank==0) printf("WARNING: protocol (seed water) supports only full periodic boundary condition \n"); + } + domain_db->putScalar( "BC", BoundaryCondition ); + } + else if (protocol == "open connected oil"){ + if (BoundaryCondition != 0 && BoundaryCondition != 5){ + BoundaryCondition = 0; + if (rank==0) printf("WARNING: protocol (open connected oil) supports only full periodic boundary condition \n"); + } + domain_db->putScalar( "BC", BoundaryCondition ); + } + else if (protocol == "shell aggregation"){ + if (BoundaryCondition != 0 && BoundaryCondition != 5){ + BoundaryCondition = 0; + if (rank==0) printf("WARNING: protocol (shell aggregation) supports only full periodic boundary condition \n"); + } + domain_db->putScalar( "BC", BoundaryCondition ); + } +} + +void ScaLBL_ColorModel::SetDomain(){ + Dm = std::shared_ptr(new Domain(domain_db,comm)); // full domain for analysis + Mask = std::shared_ptr(new Domain(domain_db,comm)); // mask domain removes immobile phases + // domain parameters + Nx = Dm->Nx; + Ny = Dm->Ny; + Nz = Dm->Nz; + Lx = Dm->Lx; + Ly = Dm->Ly; + Lz = Dm->Lz; + N = Nx*Ny*Nz; + id = new signed char [N]; + for (int i=0; iid[i] = 1; // initialize this way + //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object + Averages = std::shared_ptr ( new SubPhase(Dm) ); // TwoPhase analysis object + MPI_Barrier(comm); + Dm->CommInit(); + MPI_Barrier(comm); + // Read domain parameters + rank = Dm->rank(); + nprocx = Dm->nprocx(); + nprocy = Dm->nprocy(); + nprocz = Dm->nprocz(); +} + +void ScaLBL_ColorModel::ReadInput(){ + + sprintf(LocalRankString,"%05d",rank); + sprintf(LocalRankFilename,"%s%s","ID.",LocalRankString); + sprintf(LocalRestartFile,"%s%s","Restart.",LocalRankString); + + if (color_db->keyExists( "image_sequence" )){ + auto ImageList = color_db->getVector( "image_sequence"); + int IMAGE_INDEX = color_db->getWithDefault( "image_index", 0 ); + std::string first_image = ImageList[IMAGE_INDEX]; + Mask->Decomp(first_image); + IMAGE_INDEX++; + } + else if (domain_db->keyExists( "GridFile" )){ + // Read the local domain data + auto input_id = readMicroCT( *domain_db, MPI_COMM_WORLD ); + // Fill the halo (assuming GCW of 1) + array size0 = { (int) input_id.size(0), (int) input_id.size(1), (int) input_id.size(2) }; + ArraySize size1 = { (size_t) Mask->Nx, (size_t) Mask->Ny, (size_t) Mask->Nz }; + ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 ); + fillHalo fill( MPI_COMM_WORLD, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); + Array id_view; + id_view.viewRaw( size1, Mask->id ); + fill.copy( input_id, id_view ); + fill.fill( id_view ); + } + else if (domain_db->keyExists( "Filename" )){ + auto Filename = domain_db->getScalar( "Filename" ); + Mask->Decomp(Filename); + } + else{ + Mask->ReadIDs(); + } + for (int i=0; iid[i]; // save what was read + + // Generate the signed distance map + // Initialize the domain and communication + Array id_solid(Nx,Ny,Nz); + // Solve for the position of the solid phase + for (int k=0;kid[n]; + if (label > 0) id_solid(i,j,k) = 1; + else id_solid(i,j,k) = 0; + } + } + } + // Initialize the signed distance function + for (int k=0;kSDs(i,j,k) = 2.0*double(id_solid(i,j,k))-1.0; + } + } + } +// MeanFilter(Averages->SDs); + if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); + CalcDist(Averages->SDs,id_solid,*Mask); + + if (rank == 0) cout << "Domain set." << endl; + + Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta); +} + +void ScaLBL_ColorModel::AssignComponentLabels(double *phase) +{ + size_t NLABELS=0; + signed char VALUE=0; + double AFFINITY=0.f; + + auto LabelList = color_db->getVector( "ComponentLabels" ); + auto AffinityList = color_db->getVector( "ComponentAffinity" ); + + NLABELS=LabelList.size(); + if (NLABELS != AffinityList.size()){ + ERROR("Error: ComponentLabels and ComponentAffinity must be the same length! \n"); + } + + double label_count[NLABELS]; + double label_count_global[NLABELS]; + // Assign the labels + + for (size_t idx=0; idxid[n] = 0; // set mask to zero since this is an immobile component + } + } + // fluid labels are reserved + if (VALUE == 1) AFFINITY=1.0; + else if (VALUE == 2) AFFINITY=-1.0; + phase[n] = AFFINITY; + } + } + } + + // Set Dm to match Mask + for (int i=0; iid[i] = Mask->id[i]; + + for (size_t idx=0; idxComm, label_count[idx]); + + if (rank==0){ + printf("Component labels: %lu \n",NLABELS); + for (unsigned int idx=0; idxid[i] = Mask->id[i]; + Mask->CommInit(); + Np=Mask->PoreCount(); + //........................................................................... + if (rank==0) printf ("Create ScaLBL_Communicator \n"); + // Create a communicator for the device (will use optimized layout) + // ScaLBL_Communicator ScaLBL_Comm(Mask); // original + ScaLBL_Comm = std::shared_ptr(new ScaLBL_Communicator(Mask)); + ScaLBL_Comm_Regular = std::shared_ptr(new ScaLBL_Communicator(Mask)); + + int Npad=(Np/16 + 2)*16; + if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N); + Map.resize(Nx,Ny,Nz); Map.fill(-2); + auto neighborList= new int[18*Npad]; + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + MPI_Barrier(comm); + + //........................................................................... + // MAIN VARIABLES ALLOCATED HERE + //........................................................................... + // LBM variables + if (rank==0) printf ("Allocating distributions \n"); + //......................device distributions................................. + dist_mem_size = Np*sizeof(double); + neighborSize=18*(Np*sizeof(int)); + //........................................................................... + ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize); + ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Np); + ScaLBL_AllocateDeviceMemory((void **) &fq, 19*dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &Aq, 7*dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &Bq, 7*dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &Den, 2*dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &Phi, sizeof(double)*Nx*Ny*Nz); + ScaLBL_AllocateDeviceMemory((void **) &Pressure, sizeof(double)*Np); + ScaLBL_AllocateDeviceMemory((void **) &Velocity, 3*sizeof(double)*Np); + ScaLBL_AllocateDeviceMemory((void **) &ColorGrad, 3*sizeof(double)*Np); + //........................................................................... + // Update GPU data structures + if (rank==0) printf ("Setting up device map and neighbor list \n"); + fflush(stdout); + int *TmpMap; + TmpMap=new int[Np]; + for (int k=1; kLastExterior(); idx++){ + auto n = TmpMap[idx]; + if (n > Nx*Ny*Nz){ + printf("Bad value! idx=%i \n", n); + TmpMap[idx] = Nx*Ny*Nz-1; + } + } + for (int idx=ScaLBL_Comm->FirstInterior(); idxLastInterior(); idx++){ + auto n = TmpMap[idx]; + if ( n > Nx*Ny*Nz ){ + printf("Bad value! idx=%i \n",n); + TmpMap[idx] = Nx*Ny*Nz-1; + } + } + ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np); + ScaLBL_DeviceBarrier(); + delete [] TmpMap; + + // copy the neighbor list + ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); + // initialize phi based on PhaseLabel (include solid component labels) + double *PhaseLabel; + PhaseLabel = new double[N]; + AssignComponentLabels(PhaseLabel); + ScaLBL_CopyToDevice(Phi, PhaseLabel, N*sizeof(double)); +} + +/******************************************************** + * AssignComponentLabels * + ********************************************************/ + +void ScaLBL_ColorModel::Initialize(){ + + if (rank==0) printf ("Initializing distributions \n"); + ScaLBL_D3Q19_Init(fq, Np); + /* + * This function initializes model + */ + if (Restart == true){ + if (rank==0){ + printf("Reading restart file! \n"); + } + + // Read in the restart file to CPU buffers + int *TmpMap; + TmpMap = new int[Np]; + + double *cPhi, *cDist, *cDen; + cPhi = new double[N]; + cDen = new double[2*Np]; + cDist = new double[19*Np]; + ScaLBL_CopyToHost(TmpMap, dvcMap, Np*sizeof(int)); + ScaLBL_CopyToHost(cPhi, Phi, N*sizeof(double)); + + ifstream File(LocalRestartFile,ios::binary); + int idx; + double value,va,vb; + for (int n=0; nLastExterior(); n++){ + va = cDen[n]; + vb = cDen[Np + n]; + value = (va-vb)/(va+vb); + idx = TmpMap[n]; + if (!(idx < 0) && idxFirstInterior(); nLastInterior(); n++){ + va = cDen[n]; + vb = cDen[Np + n]; + value = (va-vb)/(va+vb); + idx = TmpMap[n]; + if (!(idx < 0) && idxLastExterior(), Np); + ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + + // establish reservoirs for external bC + if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4 ){ + if (Dm->kproc()==0){ + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0); + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,1); + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,2); + } + if (Dm->kproc() == nprocz-1){ + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-1); + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-2); + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-3); + } + } + ScaLBL_CopyToHost(Averages->Phi.data(),Phi,N*sizeof(double)); +} + +void ScaLBL_ColorModel::Run(){ + int nprocs=nprocx*nprocy*nprocz; + const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); + + int IMAGE_INDEX = 0; + int IMAGE_COUNT = 0; + std::vector ImageList; + bool SET_CAPILLARY_NUMBER = false; + bool RESCALE_FORCE = false; + bool MORPH_ADAPT = false; + bool USE_MORPH = false; + bool USE_SEED = false; + bool USE_DIRECT = false; + bool USE_MORPHOPEN_OIL = false; + int MAX_MORPH_TIMESTEPS = 50000; // maximum number of LBM timesteps to spend in morphological adaptation routine + int MIN_STEADY_TIMESTEPS = 100000; + int MAX_STEADY_TIMESTEPS = 200000; + int RESCALE_FORCE_AFTER_TIMESTEP = 0; + int RAMP_TIMESTEPS = 0;//50000; // number of timesteps to run initially (to get a reasonable velocity field before other pieces kick in) + int CURRENT_MORPH_TIMESTEPS=0; // counter for number of timesteps spent in morphological adaptation routine (reset each time) + int CURRENT_STEADY_TIMESTEPS=0; // counter for number of timesteps spent in morphological adaptation routine (reset each time) + int morph_interval = 100000; + int analysis_interval = 1000; // number of timesteps in between in situ analysis + int morph_timesteps = 0; + double morph_delta = 0.0; + double seed_water = 0.0; + double capillary_number = 0.0; + double tolerance = 0.01; + double Ca_previous = 0.f; + double initial_volume = 0.0; + double delta_volume = 0.0; + double delta_volume_target = 0.0; + + /* history for morphological algoirthm */ + double KRA_MORPH_FACTOR=0.5; + double volA_prev = 0.0; + double log_krA_prev = 1.0; + double log_krA_target = 1.0; + double log_krA = 1.0; + double slope_krA_volume = 0.0; + if (color_db->keyExists( "vol_A_previous" )){ + volA_prev = color_db->getScalar( "vol_A_previous" ); + } + if (color_db->keyExists( "log_krA_previous" )){ + log_krA_prev = color_db->getScalar( "log_krA_previous" ); + } + if (color_db->keyExists( "krA_morph_factor" )){ + KRA_MORPH_FACTOR = color_db->getScalar( "krA_morph_factor" ); + } + + /* defaults for simulation protocols */ + auto protocol = color_db->getWithDefault( "protocol", "none" ); + if (protocol == "image sequence"){ + // Get the list of images + USE_DIRECT = true; + ImageList = color_db->getVector( "image_sequence"); + IMAGE_INDEX = color_db->getWithDefault( "image_index", 0 ); + IMAGE_COUNT = ImageList.size(); + morph_interval = 10000; + USE_MORPH = true; + } + else if (protocol == "seed water"){ + morph_delta = -0.05; + seed_water = 0.01; + USE_SEED = true; + USE_MORPH = true; + } + else if (protocol == "open connected oil"){ + morph_delta = -0.05; + USE_MORPH = true; + USE_MORPHOPEN_OIL = true; + } + else if (protocol == "shell aggregation"){ + morph_delta = -0.05; + USE_MORPH = true; + } + if (color_db->keyExists( "capillary_number" )){ + capillary_number = color_db->getScalar( "capillary_number" ); + SET_CAPILLARY_NUMBER=true; + } + if (color_db->keyExists( "rescale_force_after_timestep" )){ + RESCALE_FORCE_AFTER_TIMESTEP = color_db->getScalar( "rescale_force_after_timestep" ); + RESCALE_FORCE = true; + } + if (color_db->keyExists( "timestep" )){ + timestep = color_db->getScalar( "timestep" ); + } + if (BoundaryCondition != 0 && BoundaryCondition != 5 && SET_CAPILLARY_NUMBER==true){ + if (rank == 0) printf("WARINING: capillary number target only supported for BC = 0 or 5 \n"); + SET_CAPILLARY_NUMBER=false; + } + if (analysis_db->keyExists( "seed_water" )){ + seed_water = analysis_db->getScalar( "seed_water" ); + if (rank == 0) printf("Seed water in oil %f (seed_water) \n",seed_water); + USE_SEED = true; + } + if (analysis_db->keyExists( "morph_delta" )){ + morph_delta = analysis_db->getScalar( "morph_delta" ); + if (rank == 0) printf("Target volume change %f (morph_delta) \n",morph_delta); + } + if (analysis_db->keyExists( "morph_interval" )){ + morph_interval = analysis_db->getScalar( "morph_interval" ); + USE_MORPH = true; + } + if (analysis_db->keyExists( "use_morphopen_oil" )){ + USE_MORPHOPEN_OIL = analysis_db->getScalar( "use_morphopen_oil" ); + if (rank == 0 && USE_MORPHOPEN_OIL) printf("Volume change by morphological opening \n"); + USE_MORPH = true; + } + if (analysis_db->keyExists( "tolerance" )){ + tolerance = analysis_db->getScalar( "tolerance" ); + } + if (analysis_db->keyExists( "analysis_interval" )){ + analysis_interval = analysis_db->getScalar( "analysis_interval" ); + } + if (analysis_db->keyExists( "min_steady_timesteps" )){ + MIN_STEADY_TIMESTEPS = analysis_db->getScalar( "min_steady_timesteps" ); + } + if (analysis_db->keyExists( "max_steady_timesteps" )){ + MAX_STEADY_TIMESTEPS = analysis_db->getScalar( "max_steady_timesteps" ); + } + if (analysis_db->keyExists( "max_morph_timesteps" )){ + MAX_MORPH_TIMESTEPS = analysis_db->getScalar( "max_morph_timesteps" ); + } + + + if (rank==0){ + printf("********************************************************\n"); + if (protocol == "image sequence"){ + printf(" using protocol = image sequence \n"); + printf(" min_steady_timesteps = %i \n",MIN_STEADY_TIMESTEPS); + printf(" max_steady_timesteps = %i \n",MAX_STEADY_TIMESTEPS); + printf(" tolerance = %f \n",tolerance); + std::string first_image = ImageList[IMAGE_INDEX]; + printf(" first image in sequence: %s ***\n", first_image.c_str()); + } + else if (protocol == "seed water"){ + printf(" using protocol = seed water \n"); + printf(" min_steady_timesteps = %i \n",MIN_STEADY_TIMESTEPS); + printf(" max_steady_timesteps = %i \n",MAX_STEADY_TIMESTEPS); + printf(" tolerance = %f \n",tolerance); + printf(" morph_delta = %f \n",morph_delta); + printf(" seed_water = %f \n",seed_water); + } + else if (protocol == "open connected oil"){ + printf(" using protocol = open connected oil \n"); + printf(" min_steady_timesteps = %i \n",MIN_STEADY_TIMESTEPS); + printf(" max_steady_timesteps = %i \n",MAX_STEADY_TIMESTEPS); + printf(" tolerance = %f \n",tolerance); + printf(" morph_delta = %f \n",morph_delta); + } + else if (protocol == "shell aggregation"){ + printf(" using protocol = shell aggregation \n"); + printf(" min_steady_timesteps = %i \n",MIN_STEADY_TIMESTEPS); + printf(" max_steady_timesteps = %i \n",MAX_STEADY_TIMESTEPS); + printf(" tolerance = %f \n",tolerance); + printf(" morph_delta = %f \n",morph_delta); + } + printf("No. of timesteps: %i \n", timestepMax); + fflush(stdout); + } + + //.......create and start timer............ + double starttime,stoptime,cputime; + ScaLBL_DeviceBarrier(); + MPI_Barrier(comm); + starttime = MPI_Wtime(); + //......................................... + + //************ MAIN ITERATION LOOP ***************************************/ + PROFILE_START("Loop"); + //std::shared_ptr analysis_db; + bool Regular = false; + auto current_db = db->cloneDatabase(); + runAnalysis analysis( current_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map ); + //analysis.createThreads( analysis_method, 4 ); + while (timestep < timestepMax ) { + //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } + PROFILE_START("Update"); + // *************ODD TIMESTEP************* + timestep++; + // Compute the Phase indicator field + // Read for Aq, Bq happens in this routine (requires communication) + ScaLBL_Comm->BiSendD3Q7AA(Aq,Bq); //READ FROM NORMAL + ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, Aq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->BiRecvD3Q7AA(Aq,Bq); //WRITE INTO OPPOSITE + ScaLBL_DeviceBarrier(); + ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, Aq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); + + // Perform the collision operation + ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL + if (BoundaryCondition > 0 && BoundaryCondition < 5){ + ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); + ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); + } + // Halo exchange for phase field + ScaLBL_Comm_Regular->SendHalo(Phi); + + ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm_Regular->RecvHalo(Phi); + ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + ScaLBL_DeviceBarrier(); + // Set BCs + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + } + ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_DeviceBarrier(); + MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + + // *************EVEN TIMESTEP************* + timestep++; + // Compute the Phase indicator field + ScaLBL_Comm->BiSendD3Q7AA(Aq,Bq); //READ FROM NORMAL + ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, Aq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->BiRecvD3Q7AA(Aq,Bq); //WRITE INTO OPPOSITE + ScaLBL_DeviceBarrier(); + ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, Aq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); + + // Perform the collision operation + ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL + // Halo exchange for phase field + if (BoundaryCondition > 0 && BoundaryCondition < 5){ + ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); + ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); + } + ScaLBL_Comm_Regular->SendHalo(Phi); + ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm_Regular->RecvHalo(Phi); + ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + ScaLBL_DeviceBarrier(); + // Set boundary conditions + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + } + ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_DeviceBarrier(); + MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + //************************************************************************ + PROFILE_STOP("Update"); + + if (rank==0 && timestep%analysis_interval == 0 && BoundaryCondition == 4){ + printf("%i %f \n",timestep,din); + } + // Run the analysis + analysis.basic(timestep, current_db, *Averages, Phi, Pressure, Velocity, fq, Den ); + + // allow initial ramp-up to get closer to steady state + if (timestep > RAMP_TIMESTEPS && timestep%analysis_interval == 0 && USE_MORPH){ + analysis.finish(); + CURRENT_STEADY_TIMESTEPS += analysis_interval; + + double volB = Averages->gwb.V; + double volA = Averages->gnb.V; + volA /= Dm->Volume; + volB /= Dm->Volume;; + //initial_volume = volA*Dm->Volume; + double vA_x = Averages->gnb.Px/Averages->gnb.M; + double vA_y = Averages->gnb.Py/Averages->gnb.M; + double vA_z = Averages->gnb.Pz/Averages->gnb.M; + double vB_x = Averages->gwb.Px/Averages->gwb.M; + double vB_y = Averages->gwb.Py/Averages->gwb.M; + double vB_z = Averages->gwb.Pz/Averages->gwb.M; + double muA = rhoA*(tauA-0.5)/3.f; + double muB = rhoB*(tauB-0.5)/3.f; + double force_mag = sqrt(Fx*Fx+Fy*Fy+Fz*Fz); + double dir_x = Fx/force_mag; + double dir_y = Fy/force_mag; + double dir_z = Fz/force_mag; + if (force_mag == 0.0){ + // default to z direction + dir_x = 0.0; + dir_y = 0.0; + dir_z = 1.0; + force_mag = 1.0; + } + double current_saturation = volB/(volA+volB); + double flow_rate_A = volA*(vA_x*dir_x + vA_y*dir_y + vA_z*dir_z); + double flow_rate_B = volB*(vB_x*dir_x + vB_y*dir_y + vB_z*dir_z); + double Ca = fabs(muA*flow_rate_A + muB*flow_rate_B)/(5.796*alpha); + + if ( morph_timesteps > morph_interval ){ + + bool isSteady = false; + if ( (fabs((Ca - Ca_previous)/Ca) < tolerance && CURRENT_STEADY_TIMESTEPS > MIN_STEADY_TIMESTEPS)) + isSteady = true; + if (CURRENT_STEADY_TIMESTEPS > MAX_STEADY_TIMESTEPS) + isSteady = true; + if (RESCALE_FORCE == true && SET_CAPILLARY_NUMBER == true && CURRENT_STEADY_TIMESTEPS > RESCALE_FORCE_AFTER_TIMESTEP){ + RESCALE_FORCE = false; + double RESCALE_FORCE_FACTOR = capillary_number / Ca; + if (RESCALE_FORCE_FACTOR > 2.0) RESCALE_FORCE_FACTOR = 2.0; + if (RESCALE_FORCE_FACTOR < 0.5) RESCALE_FORCE_FACTOR = 0.5; + Fx *= RESCALE_FORCE_FACTOR; + Fy *= RESCALE_FORCE_FACTOR; + Fz *= RESCALE_FORCE_FACTOR; + force_mag = sqrt(Fx*Fx+Fy*Fy+Fz*Fz); + if (force_mag > 1e-3){ + Fx *= 1e-3/force_mag; // impose ceiling for stability + Fy *= 1e-3/force_mag; + Fz *= 1e-3/force_mag; + } + if (rank == 0) printf(" -- adjust force by factor %f \n ",capillary_number / Ca); + Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta); + color_db->putVector("F",{Fx,Fy,Fz}); + } + if ( isSteady ){ + MORPH_ADAPT = true; + CURRENT_MORPH_TIMESTEPS=0; + delta_volume_target = Dm->Volume*volA *morph_delta; // set target volume change + //****** ENDPOINT ADAPTATION ********/ + double krA_TMP= fabs(muA*flow_rate_A / force_mag); + double krB_TMP= fabs(muB*flow_rate_B / force_mag); + log_krA = log(krA_TMP); + if (krA_TMP < 0.0){ + // cannot do endpoint adaptation if kr is negative + log_krA = log_krA_prev; + } + else if (krA_TMP < krB_TMP && morph_delta > 0.0){ + /** morphological target based on relative permeability for A **/ + log_krA_target = log(KRA_MORPH_FACTOR*(krA_TMP)); + slope_krA_volume = (log_krA - log_krA_prev)/(Dm->Volume*(volA - volA_prev)); + delta_volume_target=min(delta_volume_target,Dm->Volume*(volA+(log_krA_target - log_krA)/slope_krA_volume)); + if (rank==0){ + printf(" Enabling endpoint adaptation: krA = %f, krB = %f \n",krA_TMP,krB_TMP); + printf(" log(kr)=%f, volume=%f, TARGET log(kr)=%f, volume change=%f \n",log_krA, volA, log_krA_target, delta_volume_target/(volA*Dm->Volume)); + } + } + log_krA_prev = log_krA; + volA_prev = volA; + //******************************** **/ + /** compute averages & write data **/ + Averages->Full(); + Averages->Write(timestep); + analysis.WriteVisData(timestep, current_db, *Averages, Phi, Pressure, Velocity, fq, Den ); + analysis.finish(); + + if (rank==0){ + printf("** WRITE STEADY POINT *** "); + printf("Ca = %f, (previous = %f) \n",Ca,Ca_previous); + double h = Dm->voxel_length; + // pressures + double pA = Averages->gnb.p; + double pB = Averages->gwb.p; + double pAc = Averages->gnc.p; + double pBc = Averages->gwc.p; + double pAB = (pA-pB)/(h*6.0*alpha); + double pAB_connected = (pAc-pBc)/(h*6.0*alpha); + // connected contribution + double Vol_nc = Averages->gnc.V/Dm->Volume; + double Vol_wc = Averages->gwc.V/Dm->Volume; + double Vol_nd = Averages->gnd.V/Dm->Volume; + double Vol_wd = Averages->gwd.V/Dm->Volume; + double Mass_n = Averages->gnc.M + Averages->gnd.M; + double Mass_w = Averages->gwc.M + Averages->gwd.M; + double vAc_x = Averages->gnc.Px/Mass_n; + double vAc_y = Averages->gnc.Py/Mass_n; + double vAc_z = Averages->gnc.Pz/Mass_n; + double vBc_x = Averages->gwc.Px/Mass_w; + double vBc_y = Averages->gwc.Py/Mass_w; + double vBc_z = Averages->gwc.Pz/Mass_w; + // disconnected contribution + double vAd_x = Averages->gnd.Px/Mass_n; + double vAd_y = Averages->gnd.Py/Mass_n; + double vAd_z = Averages->gnd.Pz/Mass_n; + double vBd_x = Averages->gwd.Px/Mass_w; + double vBd_y = Averages->gwd.Py/Mass_w; + double vBd_z = Averages->gwd.Pz/Mass_w; + + double flow_rate_A_connected = Vol_nc*(vAc_x*dir_x + vAc_y*dir_y + vAc_z*dir_z); + double flow_rate_B_connected = Vol_wc*(vBc_x*dir_x + vBc_y*dir_y + vBc_z*dir_z); + double flow_rate_A_disconnected = (Vol_nd)*(vAd_x*dir_x + vAd_y*dir_y + vAd_z*dir_z); + double flow_rate_B_disconnected = (Vol_wd)*(vBd_x*dir_x + vBd_y*dir_y + vBd_z*dir_z); + + double kAeff_connected = h*h*muA*flow_rate_A_connected/(force_mag); + double kBeff_connected = h*h*muB*flow_rate_B_connected/(force_mag); + + double kAeff_disconnected = h*h*muA*flow_rate_A_disconnected/(force_mag); + double kBeff_disconnected = h*h*muB*flow_rate_B_disconnected/(force_mag); + + double kAeff = h*h*muA*(flow_rate_A)/(force_mag); + double kBeff = h*h*muB*(flow_rate_B)/(force_mag); + + double viscous_pressure_drop = (rhoA*volA + rhoB*volB)*force_mag; + double Mobility = muA/muB; + + bool WriteHeader=false; + FILE * kr_log_file = fopen("relperm.csv","r"); + if (kr_log_file != NULL) + fclose(kr_log_file); + else + WriteHeader=true; + kr_log_file = fopen("relperm.csv","a"); + if (WriteHeader) + fprintf(kr_log_file,"timesteps sat.water eff.perm.oil eff.perm.water eff.perm.oil.connected eff.perm.water.connected eff.perm.oil.disconnected eff.perm.water.disconnected cap.pressure cap.pressure.connected pressure.drop Ca M\n"); + + fprintf(kr_log_file,"%i %.5g %.5g %.5g %.5g %.5g %.5g %.5g %.5g %.5g %.5g %.5g %.5g\n",CURRENT_STEADY_TIMESTEPS,current_saturation,kAeff,kBeff,kAeff_connected,kBeff_connected,kAeff_disconnected,kBeff_disconnected,pAB,pAB_connected,viscous_pressure_drop,Ca,Mobility); + fclose(kr_log_file); + + printf(" Measured capillary number %f \n ",Ca); + } + if (SET_CAPILLARY_NUMBER ){ + Fx *= capillary_number / Ca; + Fy *= capillary_number / Ca; + Fz *= capillary_number / Ca; + if (force_mag > 1e-3){ + Fx *= 1e-3/force_mag; // impose ceiling for stability + Fy *= 1e-3/force_mag; + Fz *= 1e-3/force_mag; + } + if (rank == 0) printf(" -- adjust force by factor %f \n ",capillary_number / Ca); + Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta); + color_db->putVector("F",{Fx,Fy,Fz}); + } + + CURRENT_STEADY_TIMESTEPS = 0; + } + else{ + if (rank==0){ + printf("** Continue to simulate steady *** \n "); + printf("Ca = %f, (previous = %f) \n",Ca,Ca_previous); + } + } + morph_timesteps=0; + Ca_previous = Ca; + } + + if (MORPH_ADAPT ){ + CURRENT_MORPH_TIMESTEPS += analysis_interval; + if (USE_DIRECT){ + // Use image sequence + IMAGE_INDEX++; + MORPH_ADAPT = false; + if (IMAGE_INDEX < IMAGE_COUNT){ + std::string next_image = ImageList[IMAGE_INDEX]; + if (rank==0) printf("***Loading next image in sequence (%i) ***\n",IMAGE_INDEX); + color_db->putScalar("image_index",IMAGE_INDEX); + ImageInit(next_image); + } + else{ + if (rank==0) printf("Finished simulating image sequence \n"); + timestep = timestepMax; + } + } + else if (USE_SEED){ + delta_volume = volA*Dm->Volume - initial_volume; + CURRENT_MORPH_TIMESTEPS += analysis_interval; + double massChange = SeedPhaseField(seed_water); + if (rank==0) printf("***Seed water in oil %f, volume change %f / %f ***\n", massChange, delta_volume, delta_volume_target); + } + else if (USE_MORPHOPEN_OIL){ + delta_volume = volA*Dm->Volume - initial_volume; + if (rank==0) printf("***Morphological opening of connected oil, target volume change %f ***\n", delta_volume_target); + MorphOpenConnected(delta_volume_target); + } + else { + if (rank==0) printf("***Shell aggregation, target volume change %f ***\n", delta_volume_target); + //double delta_volume_target = volB - (volA + volB)*TARGET_SATURATION; // change in volume to A + delta_volume += MorphInit(beta,delta_volume_target-delta_volume); + } + + if ( (delta_volume - delta_volume_target)/delta_volume_target > 0.0 ){ + MORPH_ADAPT = false; + CURRENT_STEADY_TIMESTEPS=0; + initial_volume = volA*Dm->Volume; + delta_volume = 0.0; + if (RESCALE_FORCE_AFTER_TIMESTEP > 0) + RESCALE_FORCE = true; + } + else if (!(USE_DIRECT) && CURRENT_MORPH_TIMESTEPS > MAX_MORPH_TIMESTEPS) { + MORPH_ADAPT = false; + CURRENT_STEADY_TIMESTEPS=0; + initial_volume = volA*Dm->Volume; + delta_volume = 0.0; + RESCALE_FORCE = true; + if (RESCALE_FORCE_AFTER_TIMESTEP > 0) + RESCALE_FORCE = true; + } + } + morph_timesteps += analysis_interval; + } + MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + } + analysis.finish(); + PROFILE_STOP("Loop"); + PROFILE_SAVE("lbpm_color_simulator",1); + //************************************************************************ + ScaLBL_DeviceBarrier(); + MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + stoptime = MPI_Wtime(); + if (rank==0) printf("-------------------------------------------------------------------\n"); + // Compute the walltime per timestep + cputime = (stoptime - starttime)/timestep; + // Performance obtained from each node + double MLUPS = double(Np)/cputime/1000000; + + if (rank==0) printf("********************************************************\n"); + if (rank==0) printf("CPU time = %f \n", cputime); + if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); + MLUPS *= nprocs; + if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); + if (rank==0) printf("********************************************************\n"); + + // ************************************************************************ +} + +double ScaLBL_ColorModel::ImageInit(std::string Filename){ + + if (rank==0) printf("Re-initializing fluids from file: %s \n", Filename.c_str()); + Mask->Decomp(Filename); + for (int i=0; iid[i]; // save what was read + for (int i=0; iid[i] = Mask->id[i]; // save what was read + + double *PhaseLabel; + PhaseLabel = new double[Nx*Ny*Nz]; + AssignComponentLabels(PhaseLabel); + + double Count = 0.0; + double PoreCount = 0.0; + for (int k=1; kComm, Count); + PoreCount=sumReduce( Dm->Comm, PoreCount); + + if (rank==0) printf(" new saturation: %f (%f / %f) \n", Count / PoreCount, Count, PoreCount); + ScaLBL_CopyToDevice(Phi, PhaseLabel, Nx*Ny*Nz*sizeof(double)); + MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + + ScaLBL_D3Q19_Init(fq, Np); + ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + + ScaLBL_CopyToHost(Averages->Phi.data(),Phi,Nx*Ny*Nz*sizeof(double)); + + double saturation = Count/PoreCount; + return saturation; + +} + +double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){ + + int nx = Nx; + int ny = Ny; + int nz = Nz; + int n; + int N = nx*ny*nz; + double volume_change=0.0; + + if (target_volume_change < 0.0){ + Array id_solid(nx,ny,nz); + Array phase_label(nx,ny,nz); + DoubleArray distance(Nx,Ny,Nz); + DoubleArray phase(nx,ny,nz); + signed char *id_connected; + id_connected = new signed char [nx*ny*nz]; + + ScaLBL_CopyToHost(phase.data(), Phi, N*sizeof(double)); + + // Extract only the connected part of NWP + BlobIDstruct new_index; + double vF=0.0; double vS=0.0; + ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,Averages->SDs,vF,vS,phase_label,Dm->Comm); + MPI_Barrier(Dm->Comm); + + long long count_connected=0; + long long count_porespace=0; + long long count_water=0; + for (int k=1; k 0){ + count_porespace++; + } + if (id[n] == 2){ + count_water++; + } + } + } + } + count_connected=sumReduce( Dm->Comm, count_connected); + count_porespace=sumReduce( Dm->Comm, count_porespace); + count_water=sumReduce( Dm->Comm, count_water); + + for (int k=0; kSDs(i,j,k) > 0.f){ + if (d < 3.f){ + phase(i,j,k) = (2.f*(exp(-2.f*beta*d))/(1.f+exp(-2.f*beta*d))-1.f); + } + } + } + } + } + + int count_morphopen=0.0; + for (int k=1; kComm, count_morphopen); + volume_change = double(count_morphopen - count_connected); + + if (rank==0) printf(" opening of connected oil %f \n",volume_change/count_connected); + + ScaLBL_CopyToDevice(Phi,phase.data(),N*sizeof(double)); + ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4){ + if (Dm->kproc()==0){ + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0); + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,1); + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,2); + } + if (Dm->kproc() == nprocz-1){ + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-1); + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-2); + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-3); + } + } + } + return(volume_change); +} +double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){ + srand(time(NULL)); + double mass_loss =0.f; + double count =0.f; + double *Aq_tmp, *Bq_tmp; + + Aq_tmp = new double [7*Np]; + Bq_tmp = new double [7*Np]; + + ScaLBL_CopyToHost(Aq_tmp, Aq, 7*Np*sizeof(double)); + ScaLBL_CopyToHost(Bq_tmp, Bq, 7*Np*sizeof(double)); + + + for (int n=0; n < ScaLBL_Comm->LastExterior(); n++){ + double random_value = seed_water_in_oil*double(rand())/ RAND_MAX; + double dA = Aq_tmp[n] + Aq_tmp[n+Np] + Aq_tmp[n+2*Np] + Aq_tmp[n+3*Np] + Aq_tmp[n+4*Np] + Aq_tmp[n+5*Np] + Aq_tmp[n+6*Np]; + double dB = Bq_tmp[n] + Bq_tmp[n+Np] + Bq_tmp[n+2*Np] + Bq_tmp[n+3*Np] + Bq_tmp[n+4*Np] + Bq_tmp[n+5*Np] + Bq_tmp[n+6*Np]; + double phase_id = (dA - dB) / (dA + dB); + if (phase_id > 0.0){ + Aq_tmp[n] -= 0.3333333333333333*random_value; + Aq_tmp[n+Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+2*Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+3*Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+4*Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+5*Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+6*Np] -= 0.1111111111111111*random_value; + + Bq_tmp[n] += 0.3333333333333333*random_value; + Bq_tmp[n+Np] += 0.1111111111111111*random_value; + Bq_tmp[n+2*Np] += 0.1111111111111111*random_value; + Bq_tmp[n+3*Np] += 0.1111111111111111*random_value; + Bq_tmp[n+4*Np] += 0.1111111111111111*random_value; + Bq_tmp[n+5*Np] += 0.1111111111111111*random_value; + Bq_tmp[n+6*Np] += 0.1111111111111111*random_value; + } + mass_loss += random_value*seed_water_in_oil; + } + + for (int n=ScaLBL_Comm->FirstInterior(); n < ScaLBL_Comm->LastInterior(); n++){ + double random_value = seed_water_in_oil*double(rand())/ RAND_MAX; + double dA = Aq_tmp[n] + Aq_tmp[n+Np] + Aq_tmp[n+2*Np] + Aq_tmp[n+3*Np] + Aq_tmp[n+4*Np] + Aq_tmp[n+5*Np] + Aq_tmp[n+6*Np]; + double dB = Bq_tmp[n] + Bq_tmp[n+Np] + Bq_tmp[n+2*Np] + Bq_tmp[n+3*Np] + Bq_tmp[n+4*Np] + Bq_tmp[n+5*Np] + Bq_tmp[n+6*Np]; + double phase_id = (dA - dB) / (dA + dB); + if (phase_id > 0.0){ + Aq_tmp[n] -= 0.3333333333333333*random_value; + Aq_tmp[n+Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+2*Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+3*Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+4*Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+5*Np] -= 0.1111111111111111*random_value; + Aq_tmp[n+6*Np] -= 0.1111111111111111*random_value; + + Bq_tmp[n] += 0.3333333333333333*random_value; + Bq_tmp[n+Np] += 0.1111111111111111*random_value; + Bq_tmp[n+2*Np] += 0.1111111111111111*random_value; + Bq_tmp[n+3*Np] += 0.1111111111111111*random_value; + Bq_tmp[n+4*Np] += 0.1111111111111111*random_value; + Bq_tmp[n+5*Np] += 0.1111111111111111*random_value; + Bq_tmp[n+6*Np] += 0.1111111111111111*random_value; + } + mass_loss += random_value*seed_water_in_oil; + } + + count= sumReduce( Dm->Comm, count); + mass_loss= sumReduce( Dm->Comm, mass_loss); + if (rank == 0) printf("Remove mass %f from %f voxels \n",mass_loss,count); + + // Need to initialize Aq, Bq, Den, Phi directly + //ScaLBL_CopyToDevice(Phi,phase.data(),7*Np*sizeof(double)); + ScaLBL_CopyToDevice(Aq, Aq_tmp, 7*Np*sizeof(double)); + ScaLBL_CopyToDevice(Bq, Bq_tmp, 7*Np*sizeof(double)); + + return(mass_loss); +} + +double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta_volume){ + const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); + + double vF = 0.f; + double vS = 0.f; + double delta_volume; + double WallFactor = 0.0; + bool USE_CONNECTED_NWP = false; + + DoubleArray phase(Nx,Ny,Nz); + IntArray phase_label(Nx,Ny,Nz);; + DoubleArray phase_distance(Nx,Ny,Nz); + Array phase_id(Nx,Ny,Nz); + fillHalo fillDouble(Dm->Comm,Dm->rank_info,{Nx-2,Ny-2,Nz-2},{1,1,1},0,1); + + + // Basic algorithm to + // 1. Copy phase field to CPU + ScaLBL_CopyToHost(phase.data(), Phi, N*sizeof(double)); + + double count = 0.f; + for (int k=1; k 0.f && Averages->SDs(i,j,k) > 0.f) count+=1.f; + } + } + } + double volume_initial = sumReduce( Dm->Comm, count); + /* + sprintf(LocalRankFilename,"phi_initial.%05i.raw",rank); + FILE *INPUT = fopen(LocalRankFilename,"wb"); + fwrite(phase.data(),8,N,INPUT); + fclose(INPUT); + */ + // 2. Identify connected components of phase field -> phase_label + + double volume_connected = 0.0; + double second_biggest = 0.0; + if (USE_CONNECTED_NWP){ + BlobIDstruct new_index; + ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,phase,Averages->SDs,vF,vS,phase_label,comm); + MPI_Barrier(Dm->Comm); + + // only operate on component "0" + count = 0.0; + + for (int k=0; kComm, count); + second_biggest = sumReduce( Dm->Comm, second_biggest); + } + else { + // use the whole NWP + for (int k=0; kSDs(i,j,k) > 0.f){ + if (phase(i,j,k) > 0.f ){ + phase_id(i,j,k) = 0; + } + else { + phase_id(i,j,k) = 1; + } + } + else { + phase_id(i,j,k) = 1; + } + } + } + } + } + + /*int reach_x, reach_y, reach_z; + for (int k=0; k phase_distance + CalcDist(phase_distance,phase_id,*Dm); + + double temp,value; + double factor=0.5/beta; + for (int k=0; k 1.f) value=1.f; + if (value < -1.f) value=-1.f; + // temp -- distance based on analytical form McClure, Prins et al, Comp. Phys. Comm. + temp = -factor*log((1.0+value)/(1.0-value)); + /// use this approximation close to the object + if (fabs(value) < 0.8 && Averages->SDs(i,j,k) > 1.f ){ + phase_distance(i,j,k) = temp; + } + // erase the original object + phase(i,j,k) = -1.0; + } + } + } + } + + if (USE_CONNECTED_NWP){ + if (volume_connected - second_biggest < 2.0*fabs(target_delta_volume) && target_delta_volume < 0.0){ + // if connected volume is less than 2% just delete the whole thing + if (rank==0) printf("Connected region has shrunk! \n"); + REVERSE_FLOW_DIRECTION = true; + } + +/* else{*/ + if (rank==0) printf("Pathway volume / next largest ganglion %f \n",volume_connected/second_biggest ); + } + if (rank==0) printf("MorphGrow with target volume fraction change %f \n", target_delta_volume/volume_initial); + double target_delta_volume_incremental = target_delta_volume; + if (fabs(target_delta_volume) > 0.01*volume_initial) + target_delta_volume_incremental = 0.01*volume_initial*target_delta_volume/fabs(target_delta_volume); + delta_volume = MorphGrow(Averages->SDs,phase_distance,phase_id,Averages->Dm, target_delta_volume_incremental, WallFactor); + + for (int k=0; kSDs(i,j,k) > 0.f){ + if (d < 3.f){ + //phase(i,j,k) = -1.0; + phase(i,j,k) = (2.f*(exp(-2.f*beta*d))/(1.f+exp(-2.f*beta*d))-1.f); + } + } + } + } + } + fillDouble.fill(phase); + //} + + count = 0.f; + for (int k=1; k 0.f && Averages->SDs(i,j,k) > 0.f){ + count+=1.f; + } + } + } + } + double volume_final= sumReduce( Dm->Comm, count); + + delta_volume = (volume_final-volume_initial); + if (rank == 0) printf("MorphInit: change fluid volume fraction by %f \n", delta_volume/volume_initial); + if (rank == 0) printf(" new saturation = %f \n", volume_final/(0.238323*double((Nx-2)*(Ny-2)*(Nz-2)*nprocs))); + + // 6. copy back to the device + //if (rank==0) printf("MorphInit: copy data back to device\n"); + ScaLBL_CopyToDevice(Phi,phase.data(),N*sizeof(double)); + /* + sprintf(LocalRankFilename,"dist_final.%05i.raw",rank); + FILE *DIST = fopen(LocalRankFilename,"wb"); + fwrite(phase_distance.data(),8,N,DIST); + fclose(DIST); + + sprintf(LocalRankFilename,"phi_final.%05i.raw",rank); + FILE *PHI = fopen(LocalRankFilename,"wb"); + fwrite(phase.data(),8,N,PHI); + fclose(PHI); + */ + // 7. Re-initialize phase field and density + ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4){ + if (Dm->kproc()==0){ + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0); + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,1); + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,2); + } + if (Dm->kproc() == nprocz-1){ + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-1); + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-2); + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-3); + } + } + return delta_volume; +} + +void ScaLBL_ColorModel::WriteDebug(){ + // Copy back final phase indicator field and convert to regular layout + DoubleArray PhaseField(Nx,Ny,Nz); + //ScaLBL_Comm->RegularLayout(Map,Phi,PhaseField); + ScaLBL_CopyToHost(PhaseField.data(), Phi, sizeof(double)*N); + + FILE *OUTFILE; + sprintf(LocalRankFilename,"Phase.%05i.raw",rank); + OUTFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,OUTFILE); + fclose(OUTFILE); + + ScaLBL_Comm->RegularLayout(Map,&Den[0],PhaseField); + FILE *AFILE; + sprintf(LocalRankFilename,"A.%05i.raw",rank); + AFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,AFILE); + fclose(AFILE); + + ScaLBL_Comm->RegularLayout(Map,&Den[Np],PhaseField); + FILE *BFILE; + sprintf(LocalRankFilename,"B.%05i.raw",rank); + BFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,BFILE); + fclose(BFILE); + + ScaLBL_Comm->RegularLayout(Map,Pressure,PhaseField); + FILE *PFILE; + sprintf(LocalRankFilename,"Pressure.%05i.raw",rank); + PFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,PFILE); + fclose(PFILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField); + FILE *VELX_FILE; + sprintf(LocalRankFilename,"Velocity_X.%05i.raw",rank); + VELX_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELX_FILE); + fclose(VELX_FILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],PhaseField); + FILE *VELY_FILE; + sprintf(LocalRankFilename,"Velocity_Y.%05i.raw",rank); + VELY_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELY_FILE); + fclose(VELY_FILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],PhaseField); + FILE *VELZ_FILE; + sprintf(LocalRankFilename,"Velocity_Z.%05i.raw",rank); + VELZ_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELZ_FILE); + fclose(VELZ_FILE); + +/* ScaLBL_Comm->RegularLayout(Map,&ColorGrad[0],PhaseField); + FILE *CGX_FILE; + sprintf(LocalRankFilename,"Gradient_X.%05i.raw",rank); + CGX_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,CGX_FILE); + fclose(CGX_FILE); + + ScaLBL_Comm->RegularLayout(Map,&ColorGrad[Np],PhaseField); + FILE *CGY_FILE; + sprintf(LocalRankFilename,"Gradient_Y.%05i.raw",rank); + CGY_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,CGY_FILE); + fclose(CGY_FILE); + + ScaLBL_Comm->RegularLayout(Map,&ColorGrad[2*Np],PhaseField); + FILE *CGZ_FILE; + sprintf(LocalRankFilename,"Gradient_Z.%05i.raw",rank); + CGZ_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,CGZ_FILE); + fclose(CGZ_FILE); +*/ +} diff --git a/models/ElectroKinetic.h b/models/ElectroKinetic.h new file mode 100644 index 00000000..a3b3a124 --- /dev/null +++ b/models/ElectroKinetic.h @@ -0,0 +1,88 @@ +/* +Implementation of color lattice boltzmann model + */ +#include +#include +#include +#include +#include +#include +#include + +#include "common/Communication.h" +#include "analysis/TwoPhase.h" +#include "analysis/runAnalysis.h" +#include "common/MPI_Helpers.h" +#include "ProfilerApp.h" +#include "threadpool/thread_pool.h" + +class ScaLBL_ColorModel{ +public: + ScaLBL_ColorModel(int RANK, int NP, MPI_Comm COMM); + ~ScaLBL_ColorModel(); + + // functions in they should be run + void ReadParams(string filename); + void ReadParams(std::shared_ptr db0); + void SetDomain(); + void ReadInput(); + void Create(); + void Initialize(); + void Run(); + void WriteDebug(); + + bool Restart,pBC; + bool REVERSE_FLOW_DIRECTION; + int timestep,timestepMax; + int BoundaryCondition; + double tauA,tauB,rhoA,rhoB,alpha,beta; + double Fx,Fy,Fz,flux; + double din,dout,inletA,inletB,outletA,outletB; + + int Nx,Ny,Nz,N,Np; + int rank,nprocx,nprocy,nprocz,nprocs; + double Lx,Ly,Lz; + + std::shared_ptr Dm; // this domain is for analysis + std::shared_ptr Mask; // this domain is for lbm + std::shared_ptr ScaLBL_Comm; + std::shared_ptr ScaLBL_Comm_Regular; + //std::shared_ptr Averages; + std::shared_ptr Averages; + + // input database + std::shared_ptr db; + std::shared_ptr domain_db; + std::shared_ptr color_db; + std::shared_ptr analysis_db; + std::shared_ptr vis_db; + + IntArray Map; + signed char *id; + int *NeighborList; + int *dvcMap; + double *fq, *Aq, *Bq; + double *Den, *Phi; + double *ColorGrad; + double *Velocity; + double *Pressure; + +private: + MPI_Comm comm; + + int dist_mem_size; + int neighborSize; + // filenames + char LocalRankString[8]; + char LocalRankFilename[40]; + char LocalRestartFile[40]; + + //int rank,nprocs; + void LoadParams(std::shared_ptr db0); + void AssignComponentLabels(double *phase); + double ImageInit(std::string filename); + double MorphInit(const double beta, const double morph_delta); + double SeedPhaseField(const double seed_water_in_oil); + double MorphOpenConnected(double target_volume_change); +}; + From 5e6a9f552ce34392dedd6720dbf53f8185aee671 Mon Sep 17 00:00:00 2001 From: James McClure Date: Thu, 6 Aug 2020 15:41:40 -0400 Subject: [PATCH 007/205] Adding skeleton for electrokinetic LBM --- common/ScaLBL.cpp | 109 +++++++++ common/ScaLBL.h | 13 + cpu/Ion.cpp | 123 ++++++++++ cpu/Poisson.cpp | 123 ++++++++++ models/IonModel.cpp | 225 +++++++++++++++++ models/IonModel.h | 72 ++++++ models/PoissonSolver.cpp | 253 ++++++++++++++++++++ models/PoissonSolver.h | 68 ++++++ tests/lbpm_electrokinetic_dfh_simulator.cpp | 78 ++++++ 9 files changed, 1064 insertions(+) create mode 100644 cpu/Ion.cpp create mode 100644 cpu/Poisson.cpp create mode 100644 models/IonModel.cpp create mode 100644 models/IonModel.h create mode 100644 models/PoissonSolver.cpp create mode 100644 models/PoissonSolver.h create mode 100644 tests/lbpm_electrokinetic_dfh_simulator.cpp diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 07aa3f1d..eace3f3f 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -1175,6 +1175,115 @@ void ScaLBL_Communicator::RecvGrad(double *phi, double *grad){ //................................................................................... } +/** + * + */ +void ScaLBL_Communicator::SendD3Q7AA(double *Aq, int Component){ + + // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 + if (Lock==true){ + ERROR("ScaLBL Error (SendD3Q19): ScaLBL_Communicator is locked -- did you forget to match Send/Recv calls?"); + } + else{ + Lock=true; + } + // assign tag of 19 to D3Q19 communication + sendtag = recvtag = 7; + ScaLBL_DeviceBarrier(); + // Pack the distributions + //...Packing for x face(2,8,10,12,14)................................ + ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,&Aq[Component*N],N); + + MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_SCALBL,&req1[0]); + MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_SCALBL,&req2[0]); + + //...Packing for X face(1,7,9,11,13)................................ + ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,&Aq[Component*N],N); + + MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_SCALBL,&req1[1]); + MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_SCALBL,&req2[1]); + + //...Packing for y face(4,8,9,16,18)................................. + ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,&Aq[Component*N],N); + + MPI_Isend(sendbuf_y, 2*sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_SCALBL,&req1[2]); + MPI_Irecv(recvbuf_Y, 2*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_SCALBL,&req2[2]); + + //...Packing for Y face(3,7,10,15,17)................................. + ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,&Aq[Component*N],N); + ScaLBL_D3Q19_Pack(3,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,Bq,N); + + MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]); + MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]); + + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,&Aq[Component*N],N); + + MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_SCALBL,&req1[4]); + MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_SCALBL,&req2[4]); + + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,&Aq[Component*N],N); + + //................................................................................... + // Send all the distributions + MPI_Isend(sendbuf_Z, sendCount_Z,MPI_DOUBLE,rank_Z,sendtag,MPI_COMM_SCALBL,&req1[5]); + MPI_Irecv(recvbuf_z, recvCount_z,MPI_DOUBLE,rank_z,recvtag,MPI_COMM_SCALBL,&req2[5]); + +} + + +void ScaLBL_Communicator::RecvD3Q7AA(double *Aq, int Component){ + + // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 + //................................................................................... + // Wait for completion of D3Q19 communication + MPI_Waitall(6,req1,stat1); + MPI_Waitall(6,req2,stat2); + ScaLBL_DeviceBarrier(); + + //................................................................................... + // NOTE: AA Routine writes to opposite + // Unpack the distributions on the device + //................................................................................... + //...Unpacking for x face(2,8,10,12,14)................................ + ScaLBL_D3Q7_Unpack(2,dvcRecvDist_x,0,recvCount_x,recvbuf_x,&Aq[Component*N],N); + //................................................................................... + //...Packing for X face(1,7,9,11,13)................................ + ScaLBL_D3Q7_Unpack(1,dvcRecvDist_X,0,recvCount_X,recvbuf_X,&Aq[Component*N],N); + //................................................................................... + //...Packing for y face(4,8,9,16,18)................................. + ScaLBL_D3Q7_Unpack(4,dvcRecvDist_y,0,recvCount_y,recvbuf_y,&Aq[Component*N],N); + //................................................................................... + //...Packing for Y face(3,7,10,15,17)................................. + ScaLBL_D3Q7_Unpack(3,dvcRecvDist_Y,0,recvCount_Y,recvbuf_Y,&Aq[Component*N],N); + //................................................................................... + + if (BoundaryCondition > 0){ + if (kproc != 0){ + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,&Aq[Component*N],N); + } + if (kproc != nprocz-1){ + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,&Aq[Component*N],N); + } + } + else { + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,&Aq[Component*N],N); + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,&Aq[Component*N],N); + } + + //................................................................................... + Lock=false; // unlock the communicator after communications complete + //................................................................................... + +} +/* + */ + void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq){ diff --git a/common/ScaLBL.h b/common/ScaLBL.h index dec8b3d1..23cf6936 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -72,6 +72,17 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz, double *Poros,double *Perm, double *Velocity,double Den,double *Pressure); +// ION TRANSPORT MODEL +extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Velocity, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz); + +extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Velocity, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz); + + +// ION TRANSPORT MODEL +extern "C" void ScaLBL_D3Q7_AAeven_Poisson(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz); + +extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz); + // MRT MODEL extern "C" void ScaLBL_D3Q19_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, @@ -185,6 +196,8 @@ public: int MemoryOptimizedLayoutAA(IntArray &Map, int *neighborList, signed char *id, int Np); void SendD3Q19AA(double *dist); void RecvD3Q19AA(double *dist); + void SendD3Q7AA(double *fq, int Component); + void RecvD3Q7AA(double *fq, int Component) void BiSendD3Q7AA(double *Aq, double *Bq); void BiRecvD3Q7AA(double *Aq, double *Bq); void TriSendD3Q7AA(double *Aq, double *Bq, double *Cq); diff --git a/cpu/Ion.cpp b/cpu/Ion.cpp new file mode 100644 index 00000000..569c9a0a --- /dev/null +++ b/cpu/Ion.cpp @@ -0,0 +1,123 @@ +extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Velocity, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ + int n; + // conserved momemnts + double rho,ux,uy,uz,uu; + // non-conserved moments + double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + + for (int n=start; n 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; + + rho = f0+f2+f1+f4+f3+f6; + ux = Velocity[n]; + uy = Velocity[n+Np]; + uz = Velocity[n+2*Np]; + uu = 1.5*(ux*ux+uy*uy+uz*uz); + + // q=0 + dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu); + + // q = 1 + dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx; + + // q=2 + dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx; + + // q = 3 + dist[nr4] = f3*(1.0-rlx) + + rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy; + + // q = 4 + dist[nr3] = f4*(1.0-rlx) + + rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy; + + // q = 5 + dist[nr6] = f5*(1.0-rlx) + + rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz; + + // q = 6 + dist[nr5] = f6*(1.0-rlx) + + rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz; + + + } +} \ No newline at end of file diff --git a/cpu/Poisson.cpp b/cpu/Poisson.cpp new file mode 100644 index 00000000..355e4223 --- /dev/null +++ b/cpu/Poisson.cpp @@ -0,0 +1,123 @@ +extern "C" void ScaLBL_D3Q7_AAeven_Poisson(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ + int n; + // conserved momemnts + double rho,ux,uy,uz,uu; + // non-conserved moments + double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + + for (int n=start; n 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; + + rho = f0+f2+f1+f4+f3+f6+f5; + ux = f1-f2; + uy = f3-f4; + uz = f5-f6; + uu = 1.5*(ux*ux+uy*uy+uz*uz); + + + // q=0 + dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu); + + // q = 1 + dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx; + + // q=2 + dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx; + + // q = 3 + dist[nr4] = f3*(1.0-rlx) + + rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy; + + // q = 4 + dist[nr3] = f4*(1.0-rlx) + + rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy; + + // q = 5 + dist[nr6] = f5*(1.0-rlx) + + rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz; + + // q = 6 + dist[nr5] = f6*(1.0-rlx) + + rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz; + + + } +} \ No newline at end of file diff --git a/models/IonModel.cpp b/models/IonModel.cpp new file mode 100644 index 00000000..38088587 --- /dev/null +++ b/models/IonModel.cpp @@ -0,0 +1,225 @@ +/* + * Multi-relaxation time LBM Model + */ +#include "models/MRT.h" +#include "models/ElectroModel.h" +#include "analysis/distance.h" +#include "common/ReadMicroCT.h" + +ScaLBL_IonModel::ScaLBL_IonModel(int RANK, int NP, MPI_Comm COMM): +rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0), +Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),mu(0), +Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) +{ + +} +ScaLBL_IonModel::~ScaLBL_IonModel(){ + +} + +void ScaLBL_IonModel::ReadParams(string filename){ + // read the input database + db = std::make_shared( filename ); + domain_db = db->getDatabase( "Domain" ); + ion_db = db->getDatabase( "Ions" ); + + tau = 1.0; + timestepMax = 100000; + tolerance = 1.0e-8; + Fx = Fy = 0.0; + Fz = 1.0e-5; + + // Color Model parameters + if (ion_db->keyExists( "timestepMax" )){ + timestepMax = mrt_db->getScalar( "timestepMax" ); + } + + mu=(tau-0.5)/3.0; +} +void ScaLBL_IonModel::SetDomain(){ + Dm = std::shared_ptr(new Domain(domain_db,comm)); // full domain for analysis + Mask = std::shared_ptr(new Domain(domain_db,comm)); // mask domain removes immobile phases + + // domain parameters + Nx = Dm->Nx; + Ny = Dm->Ny; + Nz = Dm->Nz; + Lx = Dm->Lx; + Ly = Dm->Ly; + Lz = Dm->Lz; + + N = Nx*Ny*Nz; + Distance.resize(Nx,Ny,Nz); + Velocity_x.resize(Nx,Ny,Nz); + Velocity_y.resize(Nx,Ny,Nz); + Velocity_z.resize(Nx,Ny,Nz); + + for (int i=0; iid[i] = 1; // initialize this way + //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object + MPI_Barrier(comm); + Dm->CommInit(); + MPI_Barrier(comm); + + rank = Dm->rank(); + nprocx = Dm->nprocx(); + nprocy = Dm->nprocy(); + nprocz = Dm->nprocz(); +} + +void ScaLBL_IonModel::ReadInput(){ + + sprintf(LocalRankString,"%05d",Dm->rank()); + sprintf(LocalRankFilename,"%s%s","ID.",LocalRankString); + sprintf(LocalRestartFile,"%s%s","Restart.",LocalRankString); + + + if (domain_db->keyExists( "Filename" )){ + auto Filename = domain_db->getScalar( "Filename" ); + Mask->Decomp(Filename); + } + else if (domain_db->keyExists( "GridFile" )){ + // Read the local domain data + auto input_id = readMicroCT( *domain_db, comm ); + // Fill the halo (assuming GCW of 1) + array size0 = { (int) input_id.size(0), (int) input_id.size(1), (int) input_id.size(2) }; + ArraySize size1 = { (size_t) Mask->Nx, (size_t) Mask->Ny, (size_t) Mask->Nz }; + ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 ); + fillHalo fill( comm, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); + Array id_view; + id_view.viewRaw( size1, Mask->id ); + fill.copy( input_id, id_view ); + fill.fill( id_view ); + } + else{ + Mask->ReadIDs(); + } + + // Generate the signed distance map + // Initialize the domain and communication + Array id_solid(Nx,Ny,Nz); + // Solve for the position of the solid phase + for (int k=0;kid[n] > 0) id_solid(i,j,k) = 1; + else id_solid(i,j,k) = 0; + } + } + } + // Initialize the signed distance function + for (int k=0;kSDs); + if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); + CalcDist(Distance,id_solid,*Dm); + if (rank == 0) cout << "Domain set." << endl; +} + +void ScaLBL_IonModel::Create(){ + /* + * This function creates the variables needed to run a LBM + */ + int rank=Mask->rank(); + //......................................................... + // Initialize communication structures in averaging domain + for (int i=0; iid[i] = Mask->id[i]; + Mask->CommInit(); + Np=Mask->PoreCount(); + //........................................................................... + if (rank==0) printf ("Create ScaLBL_Communicator \n"); + // Create a communicator for the device (will use optimized layout) + // ScaLBL_Communicator ScaLBL_Comm(Mask); // original + ScaLBL_Comm = std::shared_ptr(new ScaLBL_Communicator(Mask)); + + int Npad=(Np/16 + 2)*16; + if (rank==0) printf ("Set up memory efficient layout \n"); + Map.resize(Nx,Ny,Nz); Map.fill(-2); + auto neighborList= new int[18*Npad]; + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + MPI_Barrier(comm); + //........................................................................... + // MAIN VARIABLES ALLOCATED HERE + //........................................................................... + // LBM variables + if (rank==0) printf ("Allocating distributions \n"); + //......................device distributions................................. + int dist_mem_size = Np*sizeof(double); + int neighborSize=18*(Np*sizeof(int)); + //........................................................................... + ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize); + ScaLBL_AllocateDeviceMemory((void **) &fq, number_ion_species*7*dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &Ci, number_ion_species*sizeof(double)*Np); + ScaLBL_AllocateDeviceMemory((void **) &ChargeDensity, sizeof(double)*Np); + //........................................................................... + // Update GPU data structures + if (rank==0) printf ("Setting up device map and neighbor list \n"); + // copy the neighbor list + ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); + MPI_Barrier(comm); + +} + +void ScaLBL_IonModel::Initialize(){ + /* + * This function initializes model + */ + if (rank==0) printf ("Initializing distributions \n"); + ScaLBL_D3Q19_Init(fq, Np); +} + +void ScaLBL_IonModel::Run(double *Velocity){ + + //.......create and start timer............ + double starttime,stoptime,cputime; + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + starttime = MPI_Wtime(); + if (rank==0) printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax); + if (rank==0) printf("********************************************************\n"); + timestep=0; + double error = 1.0; + double flow_rate_previous = 0.0; + while (timestep < timestepMax && error > tolerance) { + //************************************************************************/ + timestep++; + ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL + ScaLBL_D3Q7_AAodd_Ion(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + // Set boundary conditions + /* ... */ + ScaLBL_D3Q7_AAodd_Ion(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + timestep++; + ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL + ScaLBL_D3Q7_AAeven_Ion(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + // Set boundary conditions + /* ... */ + ScaLBL_D3Q7_AAeven_Ion(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + //************************************************************************/ + } + //************************************************************************/ + stoptime = MPI_Wtime(); + if (rank==0) printf("-------------------------------------------------------------------\n"); + // Compute the walltime per timestep + cputime = (stoptime - starttime)/timestep; + // Performance obtained from each node + double MLUPS = double(Np)/cputime/1000000; + + if (rank==0) printf("********************************************************\n"); + if (rank==0) printf("CPU time = %f \n", cputime); + if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); + MLUPS *= nprocs; + if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); + if (rank==0) printf("********************************************************\n"); + +} + diff --git a/models/IonModel.h b/models/IonModel.h new file mode 100644 index 00000000..f6ffad5e --- /dev/null +++ b/models/IonModel.h @@ -0,0 +1,72 @@ +/* + * Multi-relaxation time LBM Model + */ +#include +#include +#include +#include +#include +#include +#include + +#include "common/ScaLBL.h" +#include "common/Communication.h" +#include "common/MPI_Helpers.h" +#include "analysis/Minkowski.h" +#include "ProfilerApp.h" + +class ScaLBL_IonModel{ +public: + ScaLBL_IonModel(int RANK, int NP, MPI_Comm COMM); + ~ScaLBL_IonModel(); + + // functions in they should be run + void ReadParams(string filename); + void ReadParams(std::shared_ptr db0); + void SetDomain(); + void ReadInput(); + void Create(); + void Initialize(); + void Run(double *Velocity); + void VelocityField(); + + bool Restart,pBC; + int timestep,timestepMax; + int BoundaryCondition; + double tau,mu; + double Fx,Fy,Fz,flux; + double din,dout; + double tolerance; + + int number_ion_species; + + int Nx,Ny,Nz,N,Np; + int rank,nprocx,nprocy,nprocz,nprocs; + double Lx,Ly,Lz; + + std::shared_ptr Dm; // this domain is for analysis + std::shared_ptr Mask; // this domain is for lbm + std::shared_ptr ScaLBL_Comm; + // input database + std::shared_ptr db; + std::shared_ptr domain_db; + std::shared_ptr ion_db; + + IntArray Map; + DoubleArray Distance; + int *NeighborList; + double *fq; + double *Ci; + double *ChargeDensity; + +private: + MPI_Comm comm; + + // filenames + char LocalRankString[8]; + char LocalRankFilename[40]; + char LocalRestartFile[40]; + + //int rank,nprocs; + void LoadParams(std::shared_ptr db0); +}; diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp new file mode 100644 index 00000000..f4f15224 --- /dev/null +++ b/models/PoissonSolver.cpp @@ -0,0 +1,253 @@ +/* + * Multi-relaxation time LBM Model + */ +#include "models/MRT.h" +#include "models/ElectroModel.h" +#include "analysis/distance.h" +#include "common/ReadMicroCT.h" + +ScaLBL_Poisson::ScaLBL_Poisson(int RANK, int NP, MPI_Comm COMM): +rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0), +Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),mu(0), +Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) +{ + +} +ScaLBL_Poisson::~ScaLBL_Poisson(){ + +} + +void ScaLBL_Poisson::ReadParams(string filename){ + // read the input database + db = std::make_shared( filename ); + domain_db = db->getDatabase( "Domain" ); + mrt_db = db->getDatabase( "MRT" ); + electric_db = db->getDatabase( "Electrochemistry" ); + + tau = 1.0; + timestepMax = 100000; + tolerance = 1.0e-8; + Fx = Fy = 0.0; + Fz = 1.0e-5; + + // Color Model parameters + if (mrt_db->keyExists( "timestepMax" )){ + timestepMax = mrt_db->getScalar( "timestepMax" ); + } + if (mrt_db->keyExists( "tolerance" )){ + tolerance = mrt_db->getScalar( "tolerance" ); + } + if (mrt_db->keyExists( "tau" )){ + tau = mrt_db->getScalar( "tau" ); + } + if (mrt_db->keyExists( "F" )){ + Fx = mrt_db->getVector( "F" )[0]; + Fy = mrt_db->getVector( "F" )[1]; + Fz = mrt_db->getVector( "F" )[2]; + } + if (mrt_db->keyExists( "Restart" )){ + Restart = mrt_db->getScalar( "Restart" ); + } + if (mrt_db->keyExists( "din" )){ + din = mrt_db->getScalar( "din" ); + } + if (mrt_db->keyExists( "dout" )){ + dout = mrt_db->getScalar( "dout" ); + } + if (mrt_db->keyExists( "flux" )){ + flux = mrt_db->getScalar( "flux" ); + } + + // Read domain parameters + if (domain_db->keyExists( "BC" )){ + BoundaryCondition = domain_db->getScalar( "BC" ); + } + + + mu=(tau-0.5)/3.0; +} +void ScaLBL_Poisson::SetDomain(){ + Dm = std::shared_ptr(new Domain(domain_db,comm)); // full domain for analysis + Mask = std::shared_ptr(new Domain(domain_db,comm)); // mask domain removes immobile phases + + // domain parameters + Nx = Dm->Nx; + Ny = Dm->Ny; + Nz = Dm->Nz; + Lx = Dm->Lx; + Ly = Dm->Ly; + Lz = Dm->Lz; + + N = Nx*Ny*Nz; + Distance.resize(Nx,Ny,Nz); + Velocity_x.resize(Nx,Ny,Nz); + Velocity_y.resize(Nx,Ny,Nz); + Velocity_z.resize(Nx,Ny,Nz); + + for (int i=0; iid[i] = 1; // initialize this way + //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object + MPI_Barrier(comm); + Dm->CommInit(); + MPI_Barrier(comm); + + rank = Dm->rank(); + nprocx = Dm->nprocx(); + nprocy = Dm->nprocy(); + nprocz = Dm->nprocz(); +} + +void ScaLBL_Poisson::ReadInput(){ + + sprintf(LocalRankString,"%05d",Dm->rank()); + sprintf(LocalRankFilename,"%s%s","ID.",LocalRankString); + sprintf(LocalRestartFile,"%s%s","Restart.",LocalRankString); + + + if (domain_db->keyExists( "Filename" )){ + auto Filename = domain_db->getScalar( "Filename" ); + Mask->Decomp(Filename); + } + else if (domain_db->keyExists( "GridFile" )){ + // Read the local domain data + auto input_id = readMicroCT( *domain_db, comm ); + // Fill the halo (assuming GCW of 1) + array size0 = { (int) input_id.size(0), (int) input_id.size(1), (int) input_id.size(2) }; + ArraySize size1 = { (size_t) Mask->Nx, (size_t) Mask->Ny, (size_t) Mask->Nz }; + ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 ); + fillHalo fill( comm, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); + Array id_view; + id_view.viewRaw( size1, Mask->id ); + fill.copy( input_id, id_view ); + fill.fill( id_view ); + } + else{ + Mask->ReadIDs(); + } + + // Generate the signed distance map + // Initialize the domain and communication + Array id_solid(Nx,Ny,Nz); + // Solve for the position of the solid phase + for (int k=0;kid[n] > 0) id_solid(i,j,k) = 1; + else id_solid(i,j,k) = 0; + } + } + } + // Initialize the signed distance function + for (int k=0;kSDs); + if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); + CalcDist(Distance,id_solid,*Dm); + if (rank == 0) cout << "Domain set." << endl; +} + +void ScaLBL_Poisson::Create(){ + /* + * This function creates the variables needed to run a LBM + */ + int rank=Mask->rank(); + //......................................................... + // Initialize communication structures in averaging domain + for (int i=0; iid[i] = Mask->id[i]; + Mask->CommInit(); + Np=Mask->PoreCount(); + //........................................................................... + if (rank==0) printf ("Create ScaLBL_Communicator \n"); + // Create a communicator for the device (will use optimized layout) + // ScaLBL_Communicator ScaLBL_Comm(Mask); // original + ScaLBL_Comm = std::shared_ptr(new ScaLBL_Communicator(Mask)); + + int Npad=(Np/16 + 2)*16; + if (rank==0) printf ("Set up memory efficient layout \n"); + Map.resize(Nx,Ny,Nz); Map.fill(-2); + auto neighborList= new int[18*Npad]; + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + MPI_Barrier(comm); + //........................................................................... + // MAIN VARIABLES ALLOCATED HERE + //........................................................................... + // LBM variables + if (rank==0) printf ("Allocating distributions \n"); + //......................device distributions................................. + int dist_mem_size = Np*sizeof(double); + int neighborSize=18*(Np*sizeof(int)); + //........................................................................... + ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize); + ScaLBL_AllocateDeviceMemory((void **) &fq, 7*dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &Psi, sizeof(double)*Np); + //........................................................................... + // Update GPU data structures + if (rank==0) printf ("Setting up device map and neighbor list \n"); + // copy the neighbor list + ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); + MPI_Barrier(comm); + +} + +void ScaLBL_Poisson::Initialize(){ + /* + * This function initializes model + */ + if (rank==0) printf ("Initializing distributions \n"); + ScaLBL_D3Q19_Init(fq, Np); +} + +void ScaLBL_Poisson::Run(double *ChargeDensity){ + + //.......create and start timer............ + double starttime,stoptime,cputime; + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + starttime = MPI_Wtime(); + if (rank==0) printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax); + if (rank==0) printf("********************************************************\n"); + timestep=0; + double error = 1.0; + double flow_rate_previous = 0.0; + while (timestep < timestepMax && error > tolerance) { + //************************************************************************/ + timestep++; + ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL + ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + // Set boundary conditions + /* ... */ + ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + timestep++; + ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL + ScaLBL_D3Q7_AAeven_Poisson(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + // Set boundary conditions + /* ... */ + ScaLBL_D3Q7_AAeven_Poisson(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + //************************************************************************/ + } + //************************************************************************/ + stoptime = MPI_Wtime(); + if (rank==0) printf("-------------------------------------------------------------------\n"); + // Compute the walltime per timestep + cputime = (stoptime - starttime)/timestep; + // Performance obtained from each node + double MLUPS = double(Np)/cputime/1000000; + + if (rank==0) printf("********************************************************\n"); + if (rank==0) printf("CPU time = %f \n", cputime); + if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); + MLUPS *= nprocs; + if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); + if (rank==0) printf("********************************************************\n"); + +} diff --git a/models/PoissonSolver.h b/models/PoissonSolver.h new file mode 100644 index 00000000..63e10df0 --- /dev/null +++ b/models/PoissonSolver.h @@ -0,0 +1,68 @@ +/* + * Multi-relaxation time LBM Model + */ +#include +#include +#include +#include +#include +#include +#include + +#include "common/ScaLBL.h" +#include "common/Communication.h" +#include "common/MPI_Helpers.h" +#include "analysis/Minkowski.h" +#include "ProfilerApp.h" + +class ScaLBL_Poisson{ +public: + ScaLBL_Poisson(int RANK, int NP, MPI_Comm COMM); + ~ScaLBL_Poisson(); + + // functions in they should be run + void ReadParams(string filename); + void ReadParams(std::shared_ptr db0); + void SetDomain(); + void ReadInput(); + void Create(); + void Initialize(); + void Run(double *ChargeDensity); + + bool Restart,pBC; + int timestep,timestepMax; + int BoundaryCondition; + double tau,mu; + double Fx,Fy,Fz,flux; + double din,dout; + double tolerance; + + int Nx,Ny,Nz,N,Np; + int rank,nprocx,nprocy,nprocz,nprocs; + double Lx,Ly,Lz; + + std::shared_ptr Dm; // this domain is for analysis + std::shared_ptr Mask; // this domain is for lbm + std::shared_ptr ScaLBL_Comm; + // input database + std::shared_ptr db; + std::shared_ptr domain_db; + std::shared_ptr electric_db; + + IntArray Map; + DoubleArray Distance; + int *NeighborList; + double *fq; + double *Psi; + +private: + MPI_Comm comm; + + // filenames + char LocalRankString[8]; + char LocalRankFilename[40]; + char LocalRestartFile[40]; + + //int rank,nprocs; + void LoadParams(std::shared_ptr db0); +}; diff --git a/tests/lbpm_electrokinetic_dfh_simulator.cpp b/tests/lbpm_electrokinetic_dfh_simulator.cpp new file mode 100644 index 00000000..e3765d12 --- /dev/null +++ b/tests/lbpm_electrokinetic_dfh_simulator.cpp @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "models/DFHModel.h" +#include "models/IonModel.h" +#include "models/PoissonSolver.h" + +//#define WRE_SURFACES + +/* + * Simulator for two-phase flow in porous media + * James E. McClure 2013-2014 + */ + +using namespace std; + +//************************************************************************* +// Implementation of Two-Phase Immiscible LBM using CUDA +//************************************************************************* + +int main(int argc, char **argv) +{ + // Initialize MPI + int provided_thread_support = -1; + MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); + MPI_Comm comm; + MPI_Comm_dup(MPI_COMM_WORLD,&comm); + int rank = comm_rank(comm); + int nprocs = comm_size(comm); + if ( rank==0 && provided_thread_support Date: Thu, 6 Aug 2020 15:42:36 -0400 Subject: [PATCH 008/205] Adding skeleton for electrokinetic LBM --- models/ElectroKinetic.cpp | 1568 ------------------------------------- models/ElectroKinetic.h | 88 --- 2 files changed, 1656 deletions(-) delete mode 100644 models/ElectroKinetic.cpp delete mode 100644 models/ElectroKinetic.h diff --git a/models/ElectroKinetic.cpp b/models/ElectroKinetic.cpp deleted file mode 100644 index a8c21a75..00000000 --- a/models/ElectroKinetic.cpp +++ /dev/null @@ -1,1568 +0,0 @@ -/* -color lattice boltzmann model - */ -#include "models/ColorModel.h" -#include "analysis/distance.h" -#include "analysis/morphology.h" -#include "common/Communication.h" -#include "common/ReadMicroCT.h" -#include -#include - -ScaLBL_ColorModel::ScaLBL_ColorModel(int RANK, int NP, MPI_Comm COMM): -rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),alpha(0),beta(0), -Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0), -Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) -{ - REVERSE_FLOW_DIRECTION = false; -} -ScaLBL_ColorModel::~ScaLBL_ColorModel(){ - -} - -/*void ScaLBL_ColorModel::WriteCheckpoint(const char *FILENAME, const double *cPhi, const double *cfq, int Np) -{ - int q,n; - double value; - ofstream File(FILENAME,ios::binary); - for (n=0; n( filename ); - domain_db = db->getDatabase( "Domain" ); - color_db = db->getDatabase( "Color" ); - analysis_db = db->getDatabase( "Analysis" ); - vis_db = db->getDatabase( "Visualization" ); - - // set defaults - timestepMax = 100000; - tauA = tauB = 1.0; - rhoA = rhoB = 1.0; - Fx = Fy = Fz = 0.0; - alpha=1e-3; - beta=0.95; - Restart=false; - din=dout=1.0; - flux=0.0; - - // Color Model parameters - if (color_db->keyExists( "timestepMax" )){ - timestepMax = color_db->getScalar( "timestepMax" ); - } - if (color_db->keyExists( "tauA" )){ - tauA = color_db->getScalar( "tauA" ); - } - if (color_db->keyExists( "tauB" )){ - tauB = color_db->getScalar( "tauB" ); - } - if (color_db->keyExists( "rhoA" )){ - rhoA = color_db->getScalar( "rhoA" ); - } - if (color_db->keyExists( "rhoB" )){ - rhoB = color_db->getScalar( "rhoB" ); - } - if (color_db->keyExists( "F" )){ - Fx = color_db->getVector( "F" )[0]; - Fy = color_db->getVector( "F" )[1]; - Fz = color_db->getVector( "F" )[2]; - } - if (color_db->keyExists( "alpha" )){ - alpha = color_db->getScalar( "alpha" ); - } - if (color_db->keyExists( "beta" )){ - beta = color_db->getScalar( "beta" ); - } - if (color_db->keyExists( "Restart" )){ - Restart = color_db->getScalar( "Restart" ); - } - if (color_db->keyExists( "din" )){ - din = color_db->getScalar( "din" ); - } - if (color_db->keyExists( "dout" )){ - dout = color_db->getScalar( "dout" ); - } - if (color_db->keyExists( "flux" )){ - flux = color_db->getScalar( "flux" ); - } - inletA=1.f; - inletB=0.f; - outletA=0.f; - outletB=1.f; - //if (BoundaryCondition==4) flux *= rhoA; // mass flux must adjust for density (see formulation for details) - - BoundaryCondition = 0; - if (domain_db->keyExists( "BC" )){ - BoundaryCondition = domain_db->getScalar( "BC" ); - } - - // Override user-specified boundary condition for specific protocols - auto protocol = color_db->getWithDefault( "protocol", "none" ); - if (protocol == "seed water"){ - if (BoundaryCondition != 0 && BoundaryCondition != 5){ - BoundaryCondition = 0; - if (rank==0) printf("WARNING: protocol (seed water) supports only full periodic boundary condition \n"); - } - domain_db->putScalar( "BC", BoundaryCondition ); - } - else if (protocol == "open connected oil"){ - if (BoundaryCondition != 0 && BoundaryCondition != 5){ - BoundaryCondition = 0; - if (rank==0) printf("WARNING: protocol (open connected oil) supports only full periodic boundary condition \n"); - } - domain_db->putScalar( "BC", BoundaryCondition ); - } - else if (protocol == "shell aggregation"){ - if (BoundaryCondition != 0 && BoundaryCondition != 5){ - BoundaryCondition = 0; - if (rank==0) printf("WARNING: protocol (shell aggregation) supports only full periodic boundary condition \n"); - } - domain_db->putScalar( "BC", BoundaryCondition ); - } -} - -void ScaLBL_ColorModel::SetDomain(){ - Dm = std::shared_ptr(new Domain(domain_db,comm)); // full domain for analysis - Mask = std::shared_ptr(new Domain(domain_db,comm)); // mask domain removes immobile phases - // domain parameters - Nx = Dm->Nx; - Ny = Dm->Ny; - Nz = Dm->Nz; - Lx = Dm->Lx; - Ly = Dm->Ly; - Lz = Dm->Lz; - N = Nx*Ny*Nz; - id = new signed char [N]; - for (int i=0; iid[i] = 1; // initialize this way - //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object - Averages = std::shared_ptr ( new SubPhase(Dm) ); // TwoPhase analysis object - MPI_Barrier(comm); - Dm->CommInit(); - MPI_Barrier(comm); - // Read domain parameters - rank = Dm->rank(); - nprocx = Dm->nprocx(); - nprocy = Dm->nprocy(); - nprocz = Dm->nprocz(); -} - -void ScaLBL_ColorModel::ReadInput(){ - - sprintf(LocalRankString,"%05d",rank); - sprintf(LocalRankFilename,"%s%s","ID.",LocalRankString); - sprintf(LocalRestartFile,"%s%s","Restart.",LocalRankString); - - if (color_db->keyExists( "image_sequence" )){ - auto ImageList = color_db->getVector( "image_sequence"); - int IMAGE_INDEX = color_db->getWithDefault( "image_index", 0 ); - std::string first_image = ImageList[IMAGE_INDEX]; - Mask->Decomp(first_image); - IMAGE_INDEX++; - } - else if (domain_db->keyExists( "GridFile" )){ - // Read the local domain data - auto input_id = readMicroCT( *domain_db, MPI_COMM_WORLD ); - // Fill the halo (assuming GCW of 1) - array size0 = { (int) input_id.size(0), (int) input_id.size(1), (int) input_id.size(2) }; - ArraySize size1 = { (size_t) Mask->Nx, (size_t) Mask->Ny, (size_t) Mask->Nz }; - ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 ); - fillHalo fill( MPI_COMM_WORLD, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); - Array id_view; - id_view.viewRaw( size1, Mask->id ); - fill.copy( input_id, id_view ); - fill.fill( id_view ); - } - else if (domain_db->keyExists( "Filename" )){ - auto Filename = domain_db->getScalar( "Filename" ); - Mask->Decomp(Filename); - } - else{ - Mask->ReadIDs(); - } - for (int i=0; iid[i]; // save what was read - - // Generate the signed distance map - // Initialize the domain and communication - Array id_solid(Nx,Ny,Nz); - // Solve for the position of the solid phase - for (int k=0;kid[n]; - if (label > 0) id_solid(i,j,k) = 1; - else id_solid(i,j,k) = 0; - } - } - } - // Initialize the signed distance function - for (int k=0;kSDs(i,j,k) = 2.0*double(id_solid(i,j,k))-1.0; - } - } - } -// MeanFilter(Averages->SDs); - if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); - CalcDist(Averages->SDs,id_solid,*Mask); - - if (rank == 0) cout << "Domain set." << endl; - - Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta); -} - -void ScaLBL_ColorModel::AssignComponentLabels(double *phase) -{ - size_t NLABELS=0; - signed char VALUE=0; - double AFFINITY=0.f; - - auto LabelList = color_db->getVector( "ComponentLabels" ); - auto AffinityList = color_db->getVector( "ComponentAffinity" ); - - NLABELS=LabelList.size(); - if (NLABELS != AffinityList.size()){ - ERROR("Error: ComponentLabels and ComponentAffinity must be the same length! \n"); - } - - double label_count[NLABELS]; - double label_count_global[NLABELS]; - // Assign the labels - - for (size_t idx=0; idxid[n] = 0; // set mask to zero since this is an immobile component - } - } - // fluid labels are reserved - if (VALUE == 1) AFFINITY=1.0; - else if (VALUE == 2) AFFINITY=-1.0; - phase[n] = AFFINITY; - } - } - } - - // Set Dm to match Mask - for (int i=0; iid[i] = Mask->id[i]; - - for (size_t idx=0; idxComm, label_count[idx]); - - if (rank==0){ - printf("Component labels: %lu \n",NLABELS); - for (unsigned int idx=0; idxid[i] = Mask->id[i]; - Mask->CommInit(); - Np=Mask->PoreCount(); - //........................................................................... - if (rank==0) printf ("Create ScaLBL_Communicator \n"); - // Create a communicator for the device (will use optimized layout) - // ScaLBL_Communicator ScaLBL_Comm(Mask); // original - ScaLBL_Comm = std::shared_ptr(new ScaLBL_Communicator(Mask)); - ScaLBL_Comm_Regular = std::shared_ptr(new ScaLBL_Communicator(Mask)); - - int Npad=(Np/16 + 2)*16; - if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N); - Map.resize(Nx,Ny,Nz); Map.fill(-2); - auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); - MPI_Barrier(comm); - - //........................................................................... - // MAIN VARIABLES ALLOCATED HERE - //........................................................................... - // LBM variables - if (rank==0) printf ("Allocating distributions \n"); - //......................device distributions................................. - dist_mem_size = Np*sizeof(double); - neighborSize=18*(Np*sizeof(int)); - //........................................................................... - ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize); - ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Np); - ScaLBL_AllocateDeviceMemory((void **) &fq, 19*dist_mem_size); - ScaLBL_AllocateDeviceMemory((void **) &Aq, 7*dist_mem_size); - ScaLBL_AllocateDeviceMemory((void **) &Bq, 7*dist_mem_size); - ScaLBL_AllocateDeviceMemory((void **) &Den, 2*dist_mem_size); - ScaLBL_AllocateDeviceMemory((void **) &Phi, sizeof(double)*Nx*Ny*Nz); - ScaLBL_AllocateDeviceMemory((void **) &Pressure, sizeof(double)*Np); - ScaLBL_AllocateDeviceMemory((void **) &Velocity, 3*sizeof(double)*Np); - ScaLBL_AllocateDeviceMemory((void **) &ColorGrad, 3*sizeof(double)*Np); - //........................................................................... - // Update GPU data structures - if (rank==0) printf ("Setting up device map and neighbor list \n"); - fflush(stdout); - int *TmpMap; - TmpMap=new int[Np]; - for (int k=1; kLastExterior(); idx++){ - auto n = TmpMap[idx]; - if (n > Nx*Ny*Nz){ - printf("Bad value! idx=%i \n", n); - TmpMap[idx] = Nx*Ny*Nz-1; - } - } - for (int idx=ScaLBL_Comm->FirstInterior(); idxLastInterior(); idx++){ - auto n = TmpMap[idx]; - if ( n > Nx*Ny*Nz ){ - printf("Bad value! idx=%i \n",n); - TmpMap[idx] = Nx*Ny*Nz-1; - } - } - ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np); - ScaLBL_DeviceBarrier(); - delete [] TmpMap; - - // copy the neighbor list - ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); - // initialize phi based on PhaseLabel (include solid component labels) - double *PhaseLabel; - PhaseLabel = new double[N]; - AssignComponentLabels(PhaseLabel); - ScaLBL_CopyToDevice(Phi, PhaseLabel, N*sizeof(double)); -} - -/******************************************************** - * AssignComponentLabels * - ********************************************************/ - -void ScaLBL_ColorModel::Initialize(){ - - if (rank==0) printf ("Initializing distributions \n"); - ScaLBL_D3Q19_Init(fq, Np); - /* - * This function initializes model - */ - if (Restart == true){ - if (rank==0){ - printf("Reading restart file! \n"); - } - - // Read in the restart file to CPU buffers - int *TmpMap; - TmpMap = new int[Np]; - - double *cPhi, *cDist, *cDen; - cPhi = new double[N]; - cDen = new double[2*Np]; - cDist = new double[19*Np]; - ScaLBL_CopyToHost(TmpMap, dvcMap, Np*sizeof(int)); - ScaLBL_CopyToHost(cPhi, Phi, N*sizeof(double)); - - ifstream File(LocalRestartFile,ios::binary); - int idx; - double value,va,vb; - for (int n=0; nLastExterior(); n++){ - va = cDen[n]; - vb = cDen[Np + n]; - value = (va-vb)/(va+vb); - idx = TmpMap[n]; - if (!(idx < 0) && idxFirstInterior(); nLastInterior(); n++){ - va = cDen[n]; - vb = cDen[Np + n]; - value = (va-vb)/(va+vb); - idx = TmpMap[n]; - if (!(idx < 0) && idxLastExterior(), Np); - ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - - // establish reservoirs for external bC - if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4 ){ - if (Dm->kproc()==0){ - ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0); - ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,1); - ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,2); - } - if (Dm->kproc() == nprocz-1){ - ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-1); - ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-2); - ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-3); - } - } - ScaLBL_CopyToHost(Averages->Phi.data(),Phi,N*sizeof(double)); -} - -void ScaLBL_ColorModel::Run(){ - int nprocs=nprocx*nprocy*nprocz; - const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); - - int IMAGE_INDEX = 0; - int IMAGE_COUNT = 0; - std::vector ImageList; - bool SET_CAPILLARY_NUMBER = false; - bool RESCALE_FORCE = false; - bool MORPH_ADAPT = false; - bool USE_MORPH = false; - bool USE_SEED = false; - bool USE_DIRECT = false; - bool USE_MORPHOPEN_OIL = false; - int MAX_MORPH_TIMESTEPS = 50000; // maximum number of LBM timesteps to spend in morphological adaptation routine - int MIN_STEADY_TIMESTEPS = 100000; - int MAX_STEADY_TIMESTEPS = 200000; - int RESCALE_FORCE_AFTER_TIMESTEP = 0; - int RAMP_TIMESTEPS = 0;//50000; // number of timesteps to run initially (to get a reasonable velocity field before other pieces kick in) - int CURRENT_MORPH_TIMESTEPS=0; // counter for number of timesteps spent in morphological adaptation routine (reset each time) - int CURRENT_STEADY_TIMESTEPS=0; // counter for number of timesteps spent in morphological adaptation routine (reset each time) - int morph_interval = 100000; - int analysis_interval = 1000; // number of timesteps in between in situ analysis - int morph_timesteps = 0; - double morph_delta = 0.0; - double seed_water = 0.0; - double capillary_number = 0.0; - double tolerance = 0.01; - double Ca_previous = 0.f; - double initial_volume = 0.0; - double delta_volume = 0.0; - double delta_volume_target = 0.0; - - /* history for morphological algoirthm */ - double KRA_MORPH_FACTOR=0.5; - double volA_prev = 0.0; - double log_krA_prev = 1.0; - double log_krA_target = 1.0; - double log_krA = 1.0; - double slope_krA_volume = 0.0; - if (color_db->keyExists( "vol_A_previous" )){ - volA_prev = color_db->getScalar( "vol_A_previous" ); - } - if (color_db->keyExists( "log_krA_previous" )){ - log_krA_prev = color_db->getScalar( "log_krA_previous" ); - } - if (color_db->keyExists( "krA_morph_factor" )){ - KRA_MORPH_FACTOR = color_db->getScalar( "krA_morph_factor" ); - } - - /* defaults for simulation protocols */ - auto protocol = color_db->getWithDefault( "protocol", "none" ); - if (protocol == "image sequence"){ - // Get the list of images - USE_DIRECT = true; - ImageList = color_db->getVector( "image_sequence"); - IMAGE_INDEX = color_db->getWithDefault( "image_index", 0 ); - IMAGE_COUNT = ImageList.size(); - morph_interval = 10000; - USE_MORPH = true; - } - else if (protocol == "seed water"){ - morph_delta = -0.05; - seed_water = 0.01; - USE_SEED = true; - USE_MORPH = true; - } - else if (protocol == "open connected oil"){ - morph_delta = -0.05; - USE_MORPH = true; - USE_MORPHOPEN_OIL = true; - } - else if (protocol == "shell aggregation"){ - morph_delta = -0.05; - USE_MORPH = true; - } - if (color_db->keyExists( "capillary_number" )){ - capillary_number = color_db->getScalar( "capillary_number" ); - SET_CAPILLARY_NUMBER=true; - } - if (color_db->keyExists( "rescale_force_after_timestep" )){ - RESCALE_FORCE_AFTER_TIMESTEP = color_db->getScalar( "rescale_force_after_timestep" ); - RESCALE_FORCE = true; - } - if (color_db->keyExists( "timestep" )){ - timestep = color_db->getScalar( "timestep" ); - } - if (BoundaryCondition != 0 && BoundaryCondition != 5 && SET_CAPILLARY_NUMBER==true){ - if (rank == 0) printf("WARINING: capillary number target only supported for BC = 0 or 5 \n"); - SET_CAPILLARY_NUMBER=false; - } - if (analysis_db->keyExists( "seed_water" )){ - seed_water = analysis_db->getScalar( "seed_water" ); - if (rank == 0) printf("Seed water in oil %f (seed_water) \n",seed_water); - USE_SEED = true; - } - if (analysis_db->keyExists( "morph_delta" )){ - morph_delta = analysis_db->getScalar( "morph_delta" ); - if (rank == 0) printf("Target volume change %f (morph_delta) \n",morph_delta); - } - if (analysis_db->keyExists( "morph_interval" )){ - morph_interval = analysis_db->getScalar( "morph_interval" ); - USE_MORPH = true; - } - if (analysis_db->keyExists( "use_morphopen_oil" )){ - USE_MORPHOPEN_OIL = analysis_db->getScalar( "use_morphopen_oil" ); - if (rank == 0 && USE_MORPHOPEN_OIL) printf("Volume change by morphological opening \n"); - USE_MORPH = true; - } - if (analysis_db->keyExists( "tolerance" )){ - tolerance = analysis_db->getScalar( "tolerance" ); - } - if (analysis_db->keyExists( "analysis_interval" )){ - analysis_interval = analysis_db->getScalar( "analysis_interval" ); - } - if (analysis_db->keyExists( "min_steady_timesteps" )){ - MIN_STEADY_TIMESTEPS = analysis_db->getScalar( "min_steady_timesteps" ); - } - if (analysis_db->keyExists( "max_steady_timesteps" )){ - MAX_STEADY_TIMESTEPS = analysis_db->getScalar( "max_steady_timesteps" ); - } - if (analysis_db->keyExists( "max_morph_timesteps" )){ - MAX_MORPH_TIMESTEPS = analysis_db->getScalar( "max_morph_timesteps" ); - } - - - if (rank==0){ - printf("********************************************************\n"); - if (protocol == "image sequence"){ - printf(" using protocol = image sequence \n"); - printf(" min_steady_timesteps = %i \n",MIN_STEADY_TIMESTEPS); - printf(" max_steady_timesteps = %i \n",MAX_STEADY_TIMESTEPS); - printf(" tolerance = %f \n",tolerance); - std::string first_image = ImageList[IMAGE_INDEX]; - printf(" first image in sequence: %s ***\n", first_image.c_str()); - } - else if (protocol == "seed water"){ - printf(" using protocol = seed water \n"); - printf(" min_steady_timesteps = %i \n",MIN_STEADY_TIMESTEPS); - printf(" max_steady_timesteps = %i \n",MAX_STEADY_TIMESTEPS); - printf(" tolerance = %f \n",tolerance); - printf(" morph_delta = %f \n",morph_delta); - printf(" seed_water = %f \n",seed_water); - } - else if (protocol == "open connected oil"){ - printf(" using protocol = open connected oil \n"); - printf(" min_steady_timesteps = %i \n",MIN_STEADY_TIMESTEPS); - printf(" max_steady_timesteps = %i \n",MAX_STEADY_TIMESTEPS); - printf(" tolerance = %f \n",tolerance); - printf(" morph_delta = %f \n",morph_delta); - } - else if (protocol == "shell aggregation"){ - printf(" using protocol = shell aggregation \n"); - printf(" min_steady_timesteps = %i \n",MIN_STEADY_TIMESTEPS); - printf(" max_steady_timesteps = %i \n",MAX_STEADY_TIMESTEPS); - printf(" tolerance = %f \n",tolerance); - printf(" morph_delta = %f \n",morph_delta); - } - printf("No. of timesteps: %i \n", timestepMax); - fflush(stdout); - } - - //.......create and start timer............ - double starttime,stoptime,cputime; - ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); - starttime = MPI_Wtime(); - //......................................... - - //************ MAIN ITERATION LOOP ***************************************/ - PROFILE_START("Loop"); - //std::shared_ptr analysis_db; - bool Regular = false; - auto current_db = db->cloneDatabase(); - runAnalysis analysis( current_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map ); - //analysis.createThreads( analysis_method, 4 ); - while (timestep < timestepMax ) { - //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } - PROFILE_START("Update"); - // *************ODD TIMESTEP************* - timestep++; - // Compute the Phase indicator field - // Read for Aq, Bq happens in this routine (requires communication) - ScaLBL_Comm->BiSendD3Q7AA(Aq,Bq); //READ FROM NORMAL - ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, Aq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm->BiRecvD3Q7AA(Aq,Bq); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); - ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, Aq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); - - // Perform the collision operation - ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL - if (BoundaryCondition > 0 && BoundaryCondition < 5){ - ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); - ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); - } - // Halo exchange for phase field - ScaLBL_Comm_Regular->SendHalo(Phi); - - ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, - alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm_Regular->RecvHalo(Phi); - ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); - // Set BCs - if (BoundaryCondition == 3){ - ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); - ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); - } - if (BoundaryCondition == 4){ - din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); - ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); - } - else if (BoundaryCondition == 5){ - ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); - ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); - } - ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, - alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); - - // *************EVEN TIMESTEP************* - timestep++; - // Compute the Phase indicator field - ScaLBL_Comm->BiSendD3Q7AA(Aq,Bq); //READ FROM NORMAL - ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, Aq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm->BiRecvD3Q7AA(Aq,Bq); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); - ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, Aq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); - - // Perform the collision operation - ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL - // Halo exchange for phase field - if (BoundaryCondition > 0 && BoundaryCondition < 5){ - ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); - ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); - } - ScaLBL_Comm_Regular->SendHalo(Phi); - ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, - alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm_Regular->RecvHalo(Phi); - ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); - // Set boundary conditions - if (BoundaryCondition == 3){ - ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); - ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); - } - else if (BoundaryCondition == 4){ - din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); - ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); - } - else if (BoundaryCondition == 5){ - ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); - ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); - } - ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, - alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); - //************************************************************************ - PROFILE_STOP("Update"); - - if (rank==0 && timestep%analysis_interval == 0 && BoundaryCondition == 4){ - printf("%i %f \n",timestep,din); - } - // Run the analysis - analysis.basic(timestep, current_db, *Averages, Phi, Pressure, Velocity, fq, Den ); - - // allow initial ramp-up to get closer to steady state - if (timestep > RAMP_TIMESTEPS && timestep%analysis_interval == 0 && USE_MORPH){ - analysis.finish(); - CURRENT_STEADY_TIMESTEPS += analysis_interval; - - double volB = Averages->gwb.V; - double volA = Averages->gnb.V; - volA /= Dm->Volume; - volB /= Dm->Volume;; - //initial_volume = volA*Dm->Volume; - double vA_x = Averages->gnb.Px/Averages->gnb.M; - double vA_y = Averages->gnb.Py/Averages->gnb.M; - double vA_z = Averages->gnb.Pz/Averages->gnb.M; - double vB_x = Averages->gwb.Px/Averages->gwb.M; - double vB_y = Averages->gwb.Py/Averages->gwb.M; - double vB_z = Averages->gwb.Pz/Averages->gwb.M; - double muA = rhoA*(tauA-0.5)/3.f; - double muB = rhoB*(tauB-0.5)/3.f; - double force_mag = sqrt(Fx*Fx+Fy*Fy+Fz*Fz); - double dir_x = Fx/force_mag; - double dir_y = Fy/force_mag; - double dir_z = Fz/force_mag; - if (force_mag == 0.0){ - // default to z direction - dir_x = 0.0; - dir_y = 0.0; - dir_z = 1.0; - force_mag = 1.0; - } - double current_saturation = volB/(volA+volB); - double flow_rate_A = volA*(vA_x*dir_x + vA_y*dir_y + vA_z*dir_z); - double flow_rate_B = volB*(vB_x*dir_x + vB_y*dir_y + vB_z*dir_z); - double Ca = fabs(muA*flow_rate_A + muB*flow_rate_B)/(5.796*alpha); - - if ( morph_timesteps > morph_interval ){ - - bool isSteady = false; - if ( (fabs((Ca - Ca_previous)/Ca) < tolerance && CURRENT_STEADY_TIMESTEPS > MIN_STEADY_TIMESTEPS)) - isSteady = true; - if (CURRENT_STEADY_TIMESTEPS > MAX_STEADY_TIMESTEPS) - isSteady = true; - if (RESCALE_FORCE == true && SET_CAPILLARY_NUMBER == true && CURRENT_STEADY_TIMESTEPS > RESCALE_FORCE_AFTER_TIMESTEP){ - RESCALE_FORCE = false; - double RESCALE_FORCE_FACTOR = capillary_number / Ca; - if (RESCALE_FORCE_FACTOR > 2.0) RESCALE_FORCE_FACTOR = 2.0; - if (RESCALE_FORCE_FACTOR < 0.5) RESCALE_FORCE_FACTOR = 0.5; - Fx *= RESCALE_FORCE_FACTOR; - Fy *= RESCALE_FORCE_FACTOR; - Fz *= RESCALE_FORCE_FACTOR; - force_mag = sqrt(Fx*Fx+Fy*Fy+Fz*Fz); - if (force_mag > 1e-3){ - Fx *= 1e-3/force_mag; // impose ceiling for stability - Fy *= 1e-3/force_mag; - Fz *= 1e-3/force_mag; - } - if (rank == 0) printf(" -- adjust force by factor %f \n ",capillary_number / Ca); - Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta); - color_db->putVector("F",{Fx,Fy,Fz}); - } - if ( isSteady ){ - MORPH_ADAPT = true; - CURRENT_MORPH_TIMESTEPS=0; - delta_volume_target = Dm->Volume*volA *morph_delta; // set target volume change - //****** ENDPOINT ADAPTATION ********/ - double krA_TMP= fabs(muA*flow_rate_A / force_mag); - double krB_TMP= fabs(muB*flow_rate_B / force_mag); - log_krA = log(krA_TMP); - if (krA_TMP < 0.0){ - // cannot do endpoint adaptation if kr is negative - log_krA = log_krA_prev; - } - else if (krA_TMP < krB_TMP && morph_delta > 0.0){ - /** morphological target based on relative permeability for A **/ - log_krA_target = log(KRA_MORPH_FACTOR*(krA_TMP)); - slope_krA_volume = (log_krA - log_krA_prev)/(Dm->Volume*(volA - volA_prev)); - delta_volume_target=min(delta_volume_target,Dm->Volume*(volA+(log_krA_target - log_krA)/slope_krA_volume)); - if (rank==0){ - printf(" Enabling endpoint adaptation: krA = %f, krB = %f \n",krA_TMP,krB_TMP); - printf(" log(kr)=%f, volume=%f, TARGET log(kr)=%f, volume change=%f \n",log_krA, volA, log_krA_target, delta_volume_target/(volA*Dm->Volume)); - } - } - log_krA_prev = log_krA; - volA_prev = volA; - //******************************** **/ - /** compute averages & write data **/ - Averages->Full(); - Averages->Write(timestep); - analysis.WriteVisData(timestep, current_db, *Averages, Phi, Pressure, Velocity, fq, Den ); - analysis.finish(); - - if (rank==0){ - printf("** WRITE STEADY POINT *** "); - printf("Ca = %f, (previous = %f) \n",Ca,Ca_previous); - double h = Dm->voxel_length; - // pressures - double pA = Averages->gnb.p; - double pB = Averages->gwb.p; - double pAc = Averages->gnc.p; - double pBc = Averages->gwc.p; - double pAB = (pA-pB)/(h*6.0*alpha); - double pAB_connected = (pAc-pBc)/(h*6.0*alpha); - // connected contribution - double Vol_nc = Averages->gnc.V/Dm->Volume; - double Vol_wc = Averages->gwc.V/Dm->Volume; - double Vol_nd = Averages->gnd.V/Dm->Volume; - double Vol_wd = Averages->gwd.V/Dm->Volume; - double Mass_n = Averages->gnc.M + Averages->gnd.M; - double Mass_w = Averages->gwc.M + Averages->gwd.M; - double vAc_x = Averages->gnc.Px/Mass_n; - double vAc_y = Averages->gnc.Py/Mass_n; - double vAc_z = Averages->gnc.Pz/Mass_n; - double vBc_x = Averages->gwc.Px/Mass_w; - double vBc_y = Averages->gwc.Py/Mass_w; - double vBc_z = Averages->gwc.Pz/Mass_w; - // disconnected contribution - double vAd_x = Averages->gnd.Px/Mass_n; - double vAd_y = Averages->gnd.Py/Mass_n; - double vAd_z = Averages->gnd.Pz/Mass_n; - double vBd_x = Averages->gwd.Px/Mass_w; - double vBd_y = Averages->gwd.Py/Mass_w; - double vBd_z = Averages->gwd.Pz/Mass_w; - - double flow_rate_A_connected = Vol_nc*(vAc_x*dir_x + vAc_y*dir_y + vAc_z*dir_z); - double flow_rate_B_connected = Vol_wc*(vBc_x*dir_x + vBc_y*dir_y + vBc_z*dir_z); - double flow_rate_A_disconnected = (Vol_nd)*(vAd_x*dir_x + vAd_y*dir_y + vAd_z*dir_z); - double flow_rate_B_disconnected = (Vol_wd)*(vBd_x*dir_x + vBd_y*dir_y + vBd_z*dir_z); - - double kAeff_connected = h*h*muA*flow_rate_A_connected/(force_mag); - double kBeff_connected = h*h*muB*flow_rate_B_connected/(force_mag); - - double kAeff_disconnected = h*h*muA*flow_rate_A_disconnected/(force_mag); - double kBeff_disconnected = h*h*muB*flow_rate_B_disconnected/(force_mag); - - double kAeff = h*h*muA*(flow_rate_A)/(force_mag); - double kBeff = h*h*muB*(flow_rate_B)/(force_mag); - - double viscous_pressure_drop = (rhoA*volA + rhoB*volB)*force_mag; - double Mobility = muA/muB; - - bool WriteHeader=false; - FILE * kr_log_file = fopen("relperm.csv","r"); - if (kr_log_file != NULL) - fclose(kr_log_file); - else - WriteHeader=true; - kr_log_file = fopen("relperm.csv","a"); - if (WriteHeader) - fprintf(kr_log_file,"timesteps sat.water eff.perm.oil eff.perm.water eff.perm.oil.connected eff.perm.water.connected eff.perm.oil.disconnected eff.perm.water.disconnected cap.pressure cap.pressure.connected pressure.drop Ca M\n"); - - fprintf(kr_log_file,"%i %.5g %.5g %.5g %.5g %.5g %.5g %.5g %.5g %.5g %.5g %.5g %.5g\n",CURRENT_STEADY_TIMESTEPS,current_saturation,kAeff,kBeff,kAeff_connected,kBeff_connected,kAeff_disconnected,kBeff_disconnected,pAB,pAB_connected,viscous_pressure_drop,Ca,Mobility); - fclose(kr_log_file); - - printf(" Measured capillary number %f \n ",Ca); - } - if (SET_CAPILLARY_NUMBER ){ - Fx *= capillary_number / Ca; - Fy *= capillary_number / Ca; - Fz *= capillary_number / Ca; - if (force_mag > 1e-3){ - Fx *= 1e-3/force_mag; // impose ceiling for stability - Fy *= 1e-3/force_mag; - Fz *= 1e-3/force_mag; - } - if (rank == 0) printf(" -- adjust force by factor %f \n ",capillary_number / Ca); - Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta); - color_db->putVector("F",{Fx,Fy,Fz}); - } - - CURRENT_STEADY_TIMESTEPS = 0; - } - else{ - if (rank==0){ - printf("** Continue to simulate steady *** \n "); - printf("Ca = %f, (previous = %f) \n",Ca,Ca_previous); - } - } - morph_timesteps=0; - Ca_previous = Ca; - } - - if (MORPH_ADAPT ){ - CURRENT_MORPH_TIMESTEPS += analysis_interval; - if (USE_DIRECT){ - // Use image sequence - IMAGE_INDEX++; - MORPH_ADAPT = false; - if (IMAGE_INDEX < IMAGE_COUNT){ - std::string next_image = ImageList[IMAGE_INDEX]; - if (rank==0) printf("***Loading next image in sequence (%i) ***\n",IMAGE_INDEX); - color_db->putScalar("image_index",IMAGE_INDEX); - ImageInit(next_image); - } - else{ - if (rank==0) printf("Finished simulating image sequence \n"); - timestep = timestepMax; - } - } - else if (USE_SEED){ - delta_volume = volA*Dm->Volume - initial_volume; - CURRENT_MORPH_TIMESTEPS += analysis_interval; - double massChange = SeedPhaseField(seed_water); - if (rank==0) printf("***Seed water in oil %f, volume change %f / %f ***\n", massChange, delta_volume, delta_volume_target); - } - else if (USE_MORPHOPEN_OIL){ - delta_volume = volA*Dm->Volume - initial_volume; - if (rank==0) printf("***Morphological opening of connected oil, target volume change %f ***\n", delta_volume_target); - MorphOpenConnected(delta_volume_target); - } - else { - if (rank==0) printf("***Shell aggregation, target volume change %f ***\n", delta_volume_target); - //double delta_volume_target = volB - (volA + volB)*TARGET_SATURATION; // change in volume to A - delta_volume += MorphInit(beta,delta_volume_target-delta_volume); - } - - if ( (delta_volume - delta_volume_target)/delta_volume_target > 0.0 ){ - MORPH_ADAPT = false; - CURRENT_STEADY_TIMESTEPS=0; - initial_volume = volA*Dm->Volume; - delta_volume = 0.0; - if (RESCALE_FORCE_AFTER_TIMESTEP > 0) - RESCALE_FORCE = true; - } - else if (!(USE_DIRECT) && CURRENT_MORPH_TIMESTEPS > MAX_MORPH_TIMESTEPS) { - MORPH_ADAPT = false; - CURRENT_STEADY_TIMESTEPS=0; - initial_volume = volA*Dm->Volume; - delta_volume = 0.0; - RESCALE_FORCE = true; - if (RESCALE_FORCE_AFTER_TIMESTEP > 0) - RESCALE_FORCE = true; - } - } - morph_timesteps += analysis_interval; - } - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); - } - analysis.finish(); - PROFILE_STOP("Loop"); - PROFILE_SAVE("lbpm_color_simulator",1); - //************************************************************************ - ScaLBL_DeviceBarrier(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); - stoptime = MPI_Wtime(); - if (rank==0) printf("-------------------------------------------------------------------\n"); - // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; - // Performance obtained from each node - double MLUPS = double(Np)/cputime/1000000; - - if (rank==0) printf("********************************************************\n"); - if (rank==0) printf("CPU time = %f \n", cputime); - if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); - MLUPS *= nprocs; - if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); - if (rank==0) printf("********************************************************\n"); - - // ************************************************************************ -} - -double ScaLBL_ColorModel::ImageInit(std::string Filename){ - - if (rank==0) printf("Re-initializing fluids from file: %s \n", Filename.c_str()); - Mask->Decomp(Filename); - for (int i=0; iid[i]; // save what was read - for (int i=0; iid[i] = Mask->id[i]; // save what was read - - double *PhaseLabel; - PhaseLabel = new double[Nx*Ny*Nz]; - AssignComponentLabels(PhaseLabel); - - double Count = 0.0; - double PoreCount = 0.0; - for (int k=1; kComm, Count); - PoreCount=sumReduce( Dm->Comm, PoreCount); - - if (rank==0) printf(" new saturation: %f (%f / %f) \n", Count / PoreCount, Count, PoreCount); - ScaLBL_CopyToDevice(Phi, PhaseLabel, Nx*Ny*Nz*sizeof(double)); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); - - ScaLBL_D3Q19_Init(fq, Np); - ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); - - ScaLBL_CopyToHost(Averages->Phi.data(),Phi,Nx*Ny*Nz*sizeof(double)); - - double saturation = Count/PoreCount; - return saturation; - -} - -double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){ - - int nx = Nx; - int ny = Ny; - int nz = Nz; - int n; - int N = nx*ny*nz; - double volume_change=0.0; - - if (target_volume_change < 0.0){ - Array id_solid(nx,ny,nz); - Array phase_label(nx,ny,nz); - DoubleArray distance(Nx,Ny,Nz); - DoubleArray phase(nx,ny,nz); - signed char *id_connected; - id_connected = new signed char [nx*ny*nz]; - - ScaLBL_CopyToHost(phase.data(), Phi, N*sizeof(double)); - - // Extract only the connected part of NWP - BlobIDstruct new_index; - double vF=0.0; double vS=0.0; - ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,Averages->SDs,vF,vS,phase_label,Dm->Comm); - MPI_Barrier(Dm->Comm); - - long long count_connected=0; - long long count_porespace=0; - long long count_water=0; - for (int k=1; k 0){ - count_porespace++; - } - if (id[n] == 2){ - count_water++; - } - } - } - } - count_connected=sumReduce( Dm->Comm, count_connected); - count_porespace=sumReduce( Dm->Comm, count_porespace); - count_water=sumReduce( Dm->Comm, count_water); - - for (int k=0; kSDs(i,j,k) > 0.f){ - if (d < 3.f){ - phase(i,j,k) = (2.f*(exp(-2.f*beta*d))/(1.f+exp(-2.f*beta*d))-1.f); - } - } - } - } - } - - int count_morphopen=0.0; - for (int k=1; kComm, count_morphopen); - volume_change = double(count_morphopen - count_connected); - - if (rank==0) printf(" opening of connected oil %f \n",volume_change/count_connected); - - ScaLBL_CopyToDevice(Phi,phase.data(),N*sizeof(double)); - ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4){ - if (Dm->kproc()==0){ - ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0); - ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,1); - ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,2); - } - if (Dm->kproc() == nprocz-1){ - ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-1); - ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-2); - ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-3); - } - } - } - return(volume_change); -} -double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){ - srand(time(NULL)); - double mass_loss =0.f; - double count =0.f; - double *Aq_tmp, *Bq_tmp; - - Aq_tmp = new double [7*Np]; - Bq_tmp = new double [7*Np]; - - ScaLBL_CopyToHost(Aq_tmp, Aq, 7*Np*sizeof(double)); - ScaLBL_CopyToHost(Bq_tmp, Bq, 7*Np*sizeof(double)); - - - for (int n=0; n < ScaLBL_Comm->LastExterior(); n++){ - double random_value = seed_water_in_oil*double(rand())/ RAND_MAX; - double dA = Aq_tmp[n] + Aq_tmp[n+Np] + Aq_tmp[n+2*Np] + Aq_tmp[n+3*Np] + Aq_tmp[n+4*Np] + Aq_tmp[n+5*Np] + Aq_tmp[n+6*Np]; - double dB = Bq_tmp[n] + Bq_tmp[n+Np] + Bq_tmp[n+2*Np] + Bq_tmp[n+3*Np] + Bq_tmp[n+4*Np] + Bq_tmp[n+5*Np] + Bq_tmp[n+6*Np]; - double phase_id = (dA - dB) / (dA + dB); - if (phase_id > 0.0){ - Aq_tmp[n] -= 0.3333333333333333*random_value; - Aq_tmp[n+Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+2*Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+3*Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+4*Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+5*Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+6*Np] -= 0.1111111111111111*random_value; - - Bq_tmp[n] += 0.3333333333333333*random_value; - Bq_tmp[n+Np] += 0.1111111111111111*random_value; - Bq_tmp[n+2*Np] += 0.1111111111111111*random_value; - Bq_tmp[n+3*Np] += 0.1111111111111111*random_value; - Bq_tmp[n+4*Np] += 0.1111111111111111*random_value; - Bq_tmp[n+5*Np] += 0.1111111111111111*random_value; - Bq_tmp[n+6*Np] += 0.1111111111111111*random_value; - } - mass_loss += random_value*seed_water_in_oil; - } - - for (int n=ScaLBL_Comm->FirstInterior(); n < ScaLBL_Comm->LastInterior(); n++){ - double random_value = seed_water_in_oil*double(rand())/ RAND_MAX; - double dA = Aq_tmp[n] + Aq_tmp[n+Np] + Aq_tmp[n+2*Np] + Aq_tmp[n+3*Np] + Aq_tmp[n+4*Np] + Aq_tmp[n+5*Np] + Aq_tmp[n+6*Np]; - double dB = Bq_tmp[n] + Bq_tmp[n+Np] + Bq_tmp[n+2*Np] + Bq_tmp[n+3*Np] + Bq_tmp[n+4*Np] + Bq_tmp[n+5*Np] + Bq_tmp[n+6*Np]; - double phase_id = (dA - dB) / (dA + dB); - if (phase_id > 0.0){ - Aq_tmp[n] -= 0.3333333333333333*random_value; - Aq_tmp[n+Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+2*Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+3*Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+4*Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+5*Np] -= 0.1111111111111111*random_value; - Aq_tmp[n+6*Np] -= 0.1111111111111111*random_value; - - Bq_tmp[n] += 0.3333333333333333*random_value; - Bq_tmp[n+Np] += 0.1111111111111111*random_value; - Bq_tmp[n+2*Np] += 0.1111111111111111*random_value; - Bq_tmp[n+3*Np] += 0.1111111111111111*random_value; - Bq_tmp[n+4*Np] += 0.1111111111111111*random_value; - Bq_tmp[n+5*Np] += 0.1111111111111111*random_value; - Bq_tmp[n+6*Np] += 0.1111111111111111*random_value; - } - mass_loss += random_value*seed_water_in_oil; - } - - count= sumReduce( Dm->Comm, count); - mass_loss= sumReduce( Dm->Comm, mass_loss); - if (rank == 0) printf("Remove mass %f from %f voxels \n",mass_loss,count); - - // Need to initialize Aq, Bq, Den, Phi directly - //ScaLBL_CopyToDevice(Phi,phase.data(),7*Np*sizeof(double)); - ScaLBL_CopyToDevice(Aq, Aq_tmp, 7*Np*sizeof(double)); - ScaLBL_CopyToDevice(Bq, Bq_tmp, 7*Np*sizeof(double)); - - return(mass_loss); -} - -double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta_volume){ - const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); - - double vF = 0.f; - double vS = 0.f; - double delta_volume; - double WallFactor = 0.0; - bool USE_CONNECTED_NWP = false; - - DoubleArray phase(Nx,Ny,Nz); - IntArray phase_label(Nx,Ny,Nz);; - DoubleArray phase_distance(Nx,Ny,Nz); - Array phase_id(Nx,Ny,Nz); - fillHalo fillDouble(Dm->Comm,Dm->rank_info,{Nx-2,Ny-2,Nz-2},{1,1,1},0,1); - - - // Basic algorithm to - // 1. Copy phase field to CPU - ScaLBL_CopyToHost(phase.data(), Phi, N*sizeof(double)); - - double count = 0.f; - for (int k=1; k 0.f && Averages->SDs(i,j,k) > 0.f) count+=1.f; - } - } - } - double volume_initial = sumReduce( Dm->Comm, count); - /* - sprintf(LocalRankFilename,"phi_initial.%05i.raw",rank); - FILE *INPUT = fopen(LocalRankFilename,"wb"); - fwrite(phase.data(),8,N,INPUT); - fclose(INPUT); - */ - // 2. Identify connected components of phase field -> phase_label - - double volume_connected = 0.0; - double second_biggest = 0.0; - if (USE_CONNECTED_NWP){ - BlobIDstruct new_index; - ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,phase,Averages->SDs,vF,vS,phase_label,comm); - MPI_Barrier(Dm->Comm); - - // only operate on component "0" - count = 0.0; - - for (int k=0; kComm, count); - second_biggest = sumReduce( Dm->Comm, second_biggest); - } - else { - // use the whole NWP - for (int k=0; kSDs(i,j,k) > 0.f){ - if (phase(i,j,k) > 0.f ){ - phase_id(i,j,k) = 0; - } - else { - phase_id(i,j,k) = 1; - } - } - else { - phase_id(i,j,k) = 1; - } - } - } - } - } - - /*int reach_x, reach_y, reach_z; - for (int k=0; k phase_distance - CalcDist(phase_distance,phase_id,*Dm); - - double temp,value; - double factor=0.5/beta; - for (int k=0; k 1.f) value=1.f; - if (value < -1.f) value=-1.f; - // temp -- distance based on analytical form McClure, Prins et al, Comp. Phys. Comm. - temp = -factor*log((1.0+value)/(1.0-value)); - /// use this approximation close to the object - if (fabs(value) < 0.8 && Averages->SDs(i,j,k) > 1.f ){ - phase_distance(i,j,k) = temp; - } - // erase the original object - phase(i,j,k) = -1.0; - } - } - } - } - - if (USE_CONNECTED_NWP){ - if (volume_connected - second_biggest < 2.0*fabs(target_delta_volume) && target_delta_volume < 0.0){ - // if connected volume is less than 2% just delete the whole thing - if (rank==0) printf("Connected region has shrunk! \n"); - REVERSE_FLOW_DIRECTION = true; - } - -/* else{*/ - if (rank==0) printf("Pathway volume / next largest ganglion %f \n",volume_connected/second_biggest ); - } - if (rank==0) printf("MorphGrow with target volume fraction change %f \n", target_delta_volume/volume_initial); - double target_delta_volume_incremental = target_delta_volume; - if (fabs(target_delta_volume) > 0.01*volume_initial) - target_delta_volume_incremental = 0.01*volume_initial*target_delta_volume/fabs(target_delta_volume); - delta_volume = MorphGrow(Averages->SDs,phase_distance,phase_id,Averages->Dm, target_delta_volume_incremental, WallFactor); - - for (int k=0; kSDs(i,j,k) > 0.f){ - if (d < 3.f){ - //phase(i,j,k) = -1.0; - phase(i,j,k) = (2.f*(exp(-2.f*beta*d))/(1.f+exp(-2.f*beta*d))-1.f); - } - } - } - } - } - fillDouble.fill(phase); - //} - - count = 0.f; - for (int k=1; k 0.f && Averages->SDs(i,j,k) > 0.f){ - count+=1.f; - } - } - } - } - double volume_final= sumReduce( Dm->Comm, count); - - delta_volume = (volume_final-volume_initial); - if (rank == 0) printf("MorphInit: change fluid volume fraction by %f \n", delta_volume/volume_initial); - if (rank == 0) printf(" new saturation = %f \n", volume_final/(0.238323*double((Nx-2)*(Ny-2)*(Nz-2)*nprocs))); - - // 6. copy back to the device - //if (rank==0) printf("MorphInit: copy data back to device\n"); - ScaLBL_CopyToDevice(Phi,phase.data(),N*sizeof(double)); - /* - sprintf(LocalRankFilename,"dist_final.%05i.raw",rank); - FILE *DIST = fopen(LocalRankFilename,"wb"); - fwrite(phase_distance.data(),8,N,DIST); - fclose(DIST); - - sprintf(LocalRankFilename,"phi_final.%05i.raw",rank); - FILE *PHI = fopen(LocalRankFilename,"wb"); - fwrite(phase.data(),8,N,PHI); - fclose(PHI); - */ - // 7. Re-initialize phase field and density - ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4){ - if (Dm->kproc()==0){ - ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0); - ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,1); - ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,2); - } - if (Dm->kproc() == nprocz-1){ - ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-1); - ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-2); - ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-3); - } - } - return delta_volume; -} - -void ScaLBL_ColorModel::WriteDebug(){ - // Copy back final phase indicator field and convert to regular layout - DoubleArray PhaseField(Nx,Ny,Nz); - //ScaLBL_Comm->RegularLayout(Map,Phi,PhaseField); - ScaLBL_CopyToHost(PhaseField.data(), Phi, sizeof(double)*N); - - FILE *OUTFILE; - sprintf(LocalRankFilename,"Phase.%05i.raw",rank); - OUTFILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,OUTFILE); - fclose(OUTFILE); - - ScaLBL_Comm->RegularLayout(Map,&Den[0],PhaseField); - FILE *AFILE; - sprintf(LocalRankFilename,"A.%05i.raw",rank); - AFILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,AFILE); - fclose(AFILE); - - ScaLBL_Comm->RegularLayout(Map,&Den[Np],PhaseField); - FILE *BFILE; - sprintf(LocalRankFilename,"B.%05i.raw",rank); - BFILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,BFILE); - fclose(BFILE); - - ScaLBL_Comm->RegularLayout(Map,Pressure,PhaseField); - FILE *PFILE; - sprintf(LocalRankFilename,"Pressure.%05i.raw",rank); - PFILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,PFILE); - fclose(PFILE); - - ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField); - FILE *VELX_FILE; - sprintf(LocalRankFilename,"Velocity_X.%05i.raw",rank); - VELX_FILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,VELX_FILE); - fclose(VELX_FILE); - - ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],PhaseField); - FILE *VELY_FILE; - sprintf(LocalRankFilename,"Velocity_Y.%05i.raw",rank); - VELY_FILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,VELY_FILE); - fclose(VELY_FILE); - - ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],PhaseField); - FILE *VELZ_FILE; - sprintf(LocalRankFilename,"Velocity_Z.%05i.raw",rank); - VELZ_FILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,VELZ_FILE); - fclose(VELZ_FILE); - -/* ScaLBL_Comm->RegularLayout(Map,&ColorGrad[0],PhaseField); - FILE *CGX_FILE; - sprintf(LocalRankFilename,"Gradient_X.%05i.raw",rank); - CGX_FILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,CGX_FILE); - fclose(CGX_FILE); - - ScaLBL_Comm->RegularLayout(Map,&ColorGrad[Np],PhaseField); - FILE *CGY_FILE; - sprintf(LocalRankFilename,"Gradient_Y.%05i.raw",rank); - CGY_FILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,CGY_FILE); - fclose(CGY_FILE); - - ScaLBL_Comm->RegularLayout(Map,&ColorGrad[2*Np],PhaseField); - FILE *CGZ_FILE; - sprintf(LocalRankFilename,"Gradient_Z.%05i.raw",rank); - CGZ_FILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,CGZ_FILE); - fclose(CGZ_FILE); -*/ -} diff --git a/models/ElectroKinetic.h b/models/ElectroKinetic.h deleted file mode 100644 index a3b3a124..00000000 --- a/models/ElectroKinetic.h +++ /dev/null @@ -1,88 +0,0 @@ -/* -Implementation of color lattice boltzmann model - */ -#include -#include -#include -#include -#include -#include -#include - -#include "common/Communication.h" -#include "analysis/TwoPhase.h" -#include "analysis/runAnalysis.h" -#include "common/MPI_Helpers.h" -#include "ProfilerApp.h" -#include "threadpool/thread_pool.h" - -class ScaLBL_ColorModel{ -public: - ScaLBL_ColorModel(int RANK, int NP, MPI_Comm COMM); - ~ScaLBL_ColorModel(); - - // functions in they should be run - void ReadParams(string filename); - void ReadParams(std::shared_ptr db0); - void SetDomain(); - void ReadInput(); - void Create(); - void Initialize(); - void Run(); - void WriteDebug(); - - bool Restart,pBC; - bool REVERSE_FLOW_DIRECTION; - int timestep,timestepMax; - int BoundaryCondition; - double tauA,tauB,rhoA,rhoB,alpha,beta; - double Fx,Fy,Fz,flux; - double din,dout,inletA,inletB,outletA,outletB; - - int Nx,Ny,Nz,N,Np; - int rank,nprocx,nprocy,nprocz,nprocs; - double Lx,Ly,Lz; - - std::shared_ptr Dm; // this domain is for analysis - std::shared_ptr Mask; // this domain is for lbm - std::shared_ptr ScaLBL_Comm; - std::shared_ptr ScaLBL_Comm_Regular; - //std::shared_ptr Averages; - std::shared_ptr Averages; - - // input database - std::shared_ptr db; - std::shared_ptr domain_db; - std::shared_ptr color_db; - std::shared_ptr analysis_db; - std::shared_ptr vis_db; - - IntArray Map; - signed char *id; - int *NeighborList; - int *dvcMap; - double *fq, *Aq, *Bq; - double *Den, *Phi; - double *ColorGrad; - double *Velocity; - double *Pressure; - -private: - MPI_Comm comm; - - int dist_mem_size; - int neighborSize; - // filenames - char LocalRankString[8]; - char LocalRankFilename[40]; - char LocalRestartFile[40]; - - //int rank,nprocs; - void LoadParams(std::shared_ptr db0); - void AssignComponentLabels(double *phase); - double ImageInit(std::string filename); - double MorphInit(const double beta, const double morph_delta); - double SeedPhaseField(const double seed_water_in_oil); - double MorphOpenConnected(double target_volume_change); -}; - From fcdb84b2ad735fd51ad6b71901e5b5b774c910b5 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Thu, 6 Aug 2020 16:06:52 -0400 Subject: [PATCH 009/205] electro skeleton compiles --- common/ScaLBL.cpp | 1 - common/ScaLBL.h | 9 ++-- models/IonModel.cpp | 25 ++++------- models/IonModel.h | 2 +- models/PoissonSolver.cpp | 49 +++++---------------- tests/CMakeLists.txt | 1 + tests/lbpm_electrokinetic_dfh_simulator.cpp | 2 +- 7 files changed, 25 insertions(+), 64 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index eace3f3f..6ae49cca 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -1211,7 +1211,6 @@ void ScaLBL_Communicator::SendD3Q7AA(double *Aq, int Component){ //...Packing for Y face(3,7,10,15,17)................................. ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,&Aq[Component*N],N); - ScaLBL_D3Q19_Pack(3,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,Bq,N); MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]); MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]); diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 23cf6936..e029ddda 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -78,10 +78,10 @@ extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Velocity, int start extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Velocity, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz); -// ION TRANSPORT MODEL -extern "C" void ScaLBL_D3Q7_AAeven_Poisson(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz); +// LBM Poisson solver +extern "C" void ScaLBL_D3Q7_AAeven_Poisson(double *dist, double *ChargeDensity, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz); -extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz); +extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, double *dist, double *ChargeDensity, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz); // MRT MODEL @@ -92,7 +92,6 @@ extern "C" void ScaLBL_D3Q19_AAodd_MRT(int *d_neighborList, double *dist, int st double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz); // COLOR MODEL - extern "C" void ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi, double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np); @@ -197,7 +196,7 @@ public: void SendD3Q19AA(double *dist); void RecvD3Q19AA(double *dist); void SendD3Q7AA(double *fq, int Component); - void RecvD3Q7AA(double *fq, int Component) + void RecvD3Q7AA(double *fq, int Component); void BiSendD3Q7AA(double *Aq, double *Bq); void BiRecvD3Q7AA(double *Aq, double *Bq); void TriSendD3Q7AA(double *Aq, double *Bq, double *Cq); diff --git a/models/IonModel.cpp b/models/IonModel.cpp index 38088587..92dcd652 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -1,8 +1,7 @@ /* * Multi-relaxation time LBM Model */ -#include "models/MRT.h" -#include "models/ElectroModel.h" +#include "models/IonModel.h" #include "analysis/distance.h" #include "common/ReadMicroCT.h" @@ -26,15 +25,10 @@ void ScaLBL_IonModel::ReadParams(string filename){ tau = 1.0; timestepMax = 100000; tolerance = 1.0e-8; - Fx = Fy = 0.0; - Fz = 1.0e-5; - - // Color Model parameters + // Model parameters if (ion_db->keyExists( "timestepMax" )){ - timestepMax = mrt_db->getScalar( "timestepMax" ); + timestepMax = ion_db->getScalar( "timestepMax" ); } - - mu=(tau-0.5)/3.0; } void ScaLBL_IonModel::SetDomain(){ Dm = std::shared_ptr(new Domain(domain_db,comm)); // full domain for analysis @@ -50,9 +44,6 @@ void ScaLBL_IonModel::SetDomain(){ N = Nx*Ny*Nz; Distance.resize(Nx,Ny,Nz); - Velocity_x.resize(Nx,Ny,Nz); - Velocity_y.resize(Nx,Ny,Nz); - Velocity_z.resize(Nx,Ny,Nz); for (int i=0; iid[i] = 1; // initialize this way //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object @@ -176,7 +167,7 @@ void ScaLBL_IonModel::Initialize(){ } void ScaLBL_IonModel::Run(double *Velocity){ - + double rlx = 1.0/tau; //.......create and start timer............ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); MPI_Barrier(comm); @@ -190,19 +181,19 @@ void ScaLBL_IonModel::Run(double *Velocity){ //************************************************************************/ timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL - ScaLBL_D3Q7_AAodd_Ion(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_D3Q7_AAodd_Ion(NeighborList, fq, Velocity, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE // Set boundary conditions /* ... */ - ScaLBL_D3Q7_AAodd_Ion(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_D3Q7_AAodd_Ion(NeighborList, fq, Velocity, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL - ScaLBL_D3Q7_AAeven_Ion(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_D3Q7_AAeven_Ion(fq, Velocity, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE // Set boundary conditions /* ... */ - ScaLBL_D3Q7_AAeven_Ion(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_D3Q7_AAeven_Ion(fq, Velocity, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ } diff --git a/models/IonModel.h b/models/IonModel.h index f6ffad5e..0b1d7e38 100644 --- a/models/IonModel.h +++ b/models/IonModel.h @@ -1,5 +1,5 @@ /* - * Multi-relaxation time LBM Model + * Ion transporte LB Model */ #include #include diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index f4f15224..776134cc 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -1,8 +1,7 @@ /* * Multi-relaxation time LBM Model */ -#include "models/MRT.h" -#include "models/ElectroModel.h" +#include "models/PoissonSolver.h" #include "analysis/distance.h" #include "common/ReadMicroCT.h" @@ -21,8 +20,7 @@ void ScaLBL_Poisson::ReadParams(string filename){ // read the input database db = std::make_shared( filename ); domain_db = db->getDatabase( "Domain" ); - mrt_db = db->getDatabase( "MRT" ); - electric_db = db->getDatabase( "Electrochemistry" ); + electric_db = db->getDatabase( "Electric" ); tau = 1.0; timestepMax = 100000; @@ -31,38 +29,14 @@ void ScaLBL_Poisson::ReadParams(string filename){ Fz = 1.0e-5; // Color Model parameters - if (mrt_db->keyExists( "timestepMax" )){ - timestepMax = mrt_db->getScalar( "timestepMax" ); + if (electric_db->keyExists( "timestepMax" )){ + timestepMax = electric_db->getScalar( "timestepMax" ); } - if (mrt_db->keyExists( "tolerance" )){ - tolerance = mrt_db->getScalar( "tolerance" ); - } - if (mrt_db->keyExists( "tau" )){ - tau = mrt_db->getScalar( "tau" ); - } - if (mrt_db->keyExists( "F" )){ - Fx = mrt_db->getVector( "F" )[0]; - Fy = mrt_db->getVector( "F" )[1]; - Fz = mrt_db->getVector( "F" )[2]; - } - if (mrt_db->keyExists( "Restart" )){ - Restart = mrt_db->getScalar( "Restart" ); - } - if (mrt_db->keyExists( "din" )){ - din = mrt_db->getScalar( "din" ); - } - if (mrt_db->keyExists( "dout" )){ - dout = mrt_db->getScalar( "dout" ); - } - if (mrt_db->keyExists( "flux" )){ - flux = mrt_db->getScalar( "flux" ); - } // Read domain parameters if (domain_db->keyExists( "BC" )){ BoundaryCondition = domain_db->getScalar( "BC" ); } - mu=(tau-0.5)/3.0; } @@ -80,10 +54,7 @@ void ScaLBL_Poisson::SetDomain(){ N = Nx*Ny*Nz; Distance.resize(Nx,Ny,Nz); - Velocity_x.resize(Nx,Ny,Nz); - Velocity_y.resize(Nx,Ny,Nz); - Velocity_z.resize(Nx,Ny,Nz); - + for (int i=0; iid[i] = 1; // initialize this way //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object MPI_Barrier(comm); @@ -205,7 +176,7 @@ void ScaLBL_Poisson::Initialize(){ } void ScaLBL_Poisson::Run(double *ChargeDensity){ - + double rlx = 1.0/tau; //.......create and start timer............ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); MPI_Barrier(comm); @@ -219,19 +190,19 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ //************************************************************************/ timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL - ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, ChargeDensity, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE // Set boundary conditions /* ... */ - ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, ChargeDensity, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL - ScaLBL_D3Q7_AAeven_Poisson(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_D3Q7_AAeven_Poisson(fq, ChargeDensity, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE // Set boundary conditions /* ... */ - ScaLBL_D3Q7_AAeven_Poisson(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_D3Q7_AAeven_Poisson(fq, ChargeDensity, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7e703c9d..8c937107 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -4,6 +4,7 @@ ADD_LBPM_EXECUTABLE( lbpm_color_simulator ) ADD_LBPM_EXECUTABLE( lbpm_permeability_simulator ) ADD_LBPM_EXECUTABLE( lbpm_greyscale_simulator ) +ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_dfh_simulator.cpp ) #ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator ) #ADD_LBPM_EXECUTABLE( lbpm_color_macro_simulator ) ADD_LBPM_EXECUTABLE( lbpm_dfh_simulator ) diff --git a/tests/lbpm_electrokinetic_dfh_simulator.cpp b/tests/lbpm_electrokinetic_dfh_simulator.cpp index e3765d12..5209565c 100644 --- a/tests/lbpm_electrokinetic_dfh_simulator.cpp +++ b/tests/lbpm_electrokinetic_dfh_simulator.cpp @@ -61,7 +61,7 @@ int main(int argc, char **argv) DFHModel.Run(); // Solve the N-S equations to get velocity IonModel.Run(DFHModel.Velocity); //solve for ion transport and electric potential - IonModel.Run(DFHModel.Velocity, DFHModel.Phi); //solve for ion transport and electric potential with multiphase system + //IonModel.Run(DFHModel.Velocity, DFHModel.Phi); //solve for ion transport and electric potential with multiphase system PoissonSolver.Run(IonModel.ChargeDensity); DFHModel.WriteDebug(); From d9cde3c76cb1fd206a7c2002f323ec7de4e480f6 Mon Sep 17 00:00:00 2001 From: James McClure Date: Thu, 6 Aug 2020 16:12:18 -0400 Subject: [PATCH 010/205] use generalized D3Q7 MPI structures for multi-ion --- models/IonModel.cpp | 20 +++++++++++++------- models/PoissonSolver.cpp | 8 ++++---- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/models/IonModel.cpp b/models/IonModel.cpp index 92dcd652..687deb9e 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -25,9 +25,11 @@ void ScaLBL_IonModel::ReadParams(string filename){ tau = 1.0; timestepMax = 100000; tolerance = 1.0e-8; - // Model parameters - if (ion_db->keyExists( "timestepMax" )){ - timestepMax = ion_db->getScalar( "timestepMax" ); + // Model parameters + + number_ion_species = 1; + if (ion_db->keyExists( "number_ion_species" )){ + number_ion_species = ion_db->getScalar( "number_ion_species" ); } } void ScaLBL_IonModel::SetDomain(){ @@ -180,17 +182,21 @@ void ScaLBL_IonModel::Run(double *Velocity){ while (timestep < timestepMax && error > tolerance) { //************************************************************************/ timestep++; - ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL + for (int ic=0; icSendD3Q7AA(fq, ic); //READ FROM NORMAL ScaLBL_D3Q7_AAodd_Ion(NeighborList, fq, Velocity, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz); - ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + for (int ic=0; icRecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE // Set boundary conditions /* ... */ ScaLBL_D3Q7_AAodd_Ion(NeighborList, fq, Velocity, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; - ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL + for (int ic=0; icSendD3Q7AA(fq, ic); //READ FORM NORMAL ScaLBL_D3Q7_AAeven_Ion(fq, Velocity, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz); - ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + for (int ic=0; icRecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE // Set boundary conditions /* ... */ ScaLBL_D3Q7_AAeven_Ion(fq, Velocity, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz); diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index 776134cc..9668dd0b 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -189,17 +189,17 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ while (timestep < timestepMax && error > tolerance) { //************************************************************************/ timestep++; - ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL + ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FROM NORMAL ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, ChargeDensity, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz); - ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE // Set boundary conditions /* ... */ ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, ChargeDensity, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; - ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL + ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FORM NORMAL ScaLBL_D3Q7_AAeven_Poisson(fq, ChargeDensity, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz); - ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE // Set boundary conditions /* ... */ ScaLBL_D3Q7_AAeven_Poisson(fq, ChargeDensity, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz); From dff4e3d5362914e36ec2ec38f6de8826ed55c1eb Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Fri, 7 Aug 2020 17:44:02 -0400 Subject: [PATCH 011/205] save the work;to be continued --- common/ScaLBL.cpp | 6 +- cpu/Poisson.cpp | 180 +++++++++++--------- models/PoissonSolver.cpp | 110 +++++++++--- models/PoissonSolver.h | 12 +- tests/lbpm_electrokinetic_dfh_simulator.cpp | 2 +- 5 files changed, 200 insertions(+), 110 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 6ae49cca..d9b75c43 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -1182,7 +1182,7 @@ void ScaLBL_Communicator::SendD3Q7AA(double *Aq, int Component){ // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 if (Lock==true){ - ERROR("ScaLBL Error (SendD3Q19): ScaLBL_Communicator is locked -- did you forget to match Send/Recv calls?"); + ERROR("ScaLBL Error (SendD3Q7): ScaLBL_Communicator is locked -- did you forget to match Send/Recv calls?"); } else{ Lock=true; @@ -1288,7 +1288,7 @@ void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq){ // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 if (Lock==true){ - ERROR("ScaLBL Error (SendD3Q19): ScaLBL_Communicator is locked -- did you forget to match Send/Recv calls?"); + ERROR("ScaLBL Error (BiSendD3Q7): ScaLBL_Communicator is locked -- did you forget to match Send/Recv calls?"); } else{ Lock=true; @@ -1415,7 +1415,7 @@ void ScaLBL_Communicator::TriSendD3Q7AA(double *Aq, double *Bq, double *Cq){ // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 if (Lock==true){ - ERROR("ScaLBL Error (SendD3Q19): ScaLBL_Communicator is locked -- did you forget to match Send/Recv calls?"); + ERROR("ScaLBL Error (TriSendD3Q7): ScaLBL_Communicator is locked -- did you forget to match Send/Recv calls?"); } else{ Lock=true; diff --git a/cpu/Poisson.cpp b/cpu/Poisson.cpp index 355e4223..d24bc12f 100644 --- a/cpu/Poisson.cpp +++ b/cpu/Poisson.cpp @@ -1,65 +1,18 @@ -extern "C" void ScaLBL_D3Q7_AAeven_Poisson(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ + +extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, double *dist, double *Den_charge, double *Psi, double *ElectricField, double rlx, double epsilon_LB,double deltaT, + int start, int finish, int Np){ int n; - // conserved momemnts - double rho,ux,uy,uz,uu; - // non-conserved moments - double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double psi;//electric potential + double Ex,Ey,Ez;//electrical field + double rho_e;//local charge density + double f0,f1,f2,f3,f4,f5,f6; + int nr1,nr2,nr3,nr4,nr5,nr6; for (int n=start; n( filename ); domain_db = db->getDatabase( "Domain" ); - electric_db = db->getDatabase( "Electric" ); + electric_db = db->getDatabase( "Poisson" ); - tau = 1.0; + k2_inv = 4.5;//the inverse of 2nd-rank moment of D3Q7 lattice + deltaT = 0.3;//time step of LB-Poisson equation + tau = 0.5+k2_inv*deltaT; timestepMax = 100000; - tolerance = 1.0e-8; - Fx = Fy = 0.0; - Fz = 1.0e-5; + tolerance = 1.0e-6;//stopping criterion for obtaining steady-state electricla potential + h = 1.0;//resolution; unit: um/lu + epsilon0 = 8.85e-12;//electrical permittivity of vaccum; unit:[C/(V*m)] + epsilon0_LB = epsilon0*(h*1.0e-6);//unit:[C/(V*lu)] + epsilonR = 78.4;//default dielectric constant for water + epsilon_LB = epsilon0_LB*epsilonR;//electrical permittivity + analysis_interval = 1000; - // Color Model parameters + // LB-Poisson Model parameters if (electric_db->keyExists( "timestepMax" )){ timestepMax = electric_db->getScalar( "timestepMax" ); } - + if (electric_db->keyExists( "analysis_interval" )){ + analysis_interval = electric_db->getScalar( "analysis_interval" ); + } + if (electric_db->keyExists( "tolerance" )){ + tolerance = electric_db->getScalar( "tolerance" ); + } + if (electric_db->keyExists( "deltaT" )){ + deltaT = electric_db->getScalar( "deltaT" ); + } + if (electric_db->keyExists( "epsilonR" )){ + epsilonR = electric_db->getScalar( "epsilonR" ); + } // Read domain parameters + if (domain_db->keyExists( "voxel_length" )){//default unit: um/lu + h = domain_db->getScalar( "voxel_length" ); + } if (domain_db->keyExists( "BC" )){ BoundaryCondition = domain_db->getScalar( "BC" ); } + //Re-calcualte model parameters if user updates input + epsilon0_LB = epsilon0*(h*1.0e-6);//unit:[C/(V*lu)] + epsilon_LB = epsilon0_LB*epsilonR;//electrical permittivity + tau = 0.5+k2_inv*deltaT; - mu=(tau-0.5)/3.0; } void ScaLBL_Poisson::SetDomain(){ Dm = std::shared_ptr(new Domain(domain_db,comm)); // full domain for analysis @@ -54,6 +77,7 @@ void ScaLBL_Poisson::SetDomain(){ N = Nx*Ny*Nz; Distance.resize(Nx,Ny,Nz); + Psi_host.resize(Nx,Ny,Nz); for (int i=0; iid[i] = 1; // initialize this way //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object @@ -158,6 +182,7 @@ void ScaLBL_Poisson::Create(){ ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize); ScaLBL_AllocateDeviceMemory((void **) &fq, 7*dist_mem_size); ScaLBL_AllocateDeviceMemory((void **) &Psi, sizeof(double)*Np); + ScaLBL_AllocateDeviceMemory((void **) &ElectricField, 3*sizeof(double)*Np); //........................................................................... // Update GPU data structures if (rank==0) printf ("Setting up device map and neighbor list \n"); @@ -171,54 +196,95 @@ void ScaLBL_Poisson::Initialize(){ /* * This function initializes model */ - if (rank==0) printf ("Initializing distributions \n"); - ScaLBL_D3Q19_Init(fq, Np); + if (rank==0) printf ("Initializing D3Q7 distributions for LB-Poisson solver\n"); + ScaLBL_D3Q7_Poisson_Init(fq, Np); } void ScaLBL_Poisson::Run(double *ChargeDensity){ - double rlx = 1.0/tau; + + //LB-related parameter + double rlx = 1.0/tau; + //.......create and start timer............ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); MPI_Barrier(comm); starttime = MPI_Wtime(); - if (rank==0) printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax); - if (rank==0) printf("********************************************************\n"); + if (rank==0) printf("***************************************************************************\n"); + if (rank==0) printf("LB-Poisson Solver: timestepMax = %i; steady-state tolerance = %.3g \n", timestepMax,tolerance); + if (rank==0) printf("***************************************************************************\n"); timestep=0; double error = 1.0; - double flow_rate_previous = 0.0; + double psi_avg_previous = 0.0; while (timestep < timestepMax && error > tolerance) { //************************************************************************/ - timestep++; + // *************ODD TIMESTEP*************// + timestep++; ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FROM NORMAL - ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, ChargeDensity, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz); + ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, ChargeDensity, Psi, ElectricField, rlx, epsilon_LB, deltaT, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE // Set boundary conditions /* ... */ - ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, ChargeDensity, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz); + ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, ChargeDensity, Psi, ElectricField, rlx, epsilon_LB, deltaT, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + + // *************EVEN TIMESTEP*************// timestep++; ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FORM NORMAL - ScaLBL_D3Q7_AAeven_Poisson(fq, ChargeDensity, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz); + ScaLBL_D3Q7_AAeven_Poisson(fq, ChargeDensity, Psi, ElectricField, rlx, epsilon_LB, deltaT, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE // Set boundary conditions /* ... */ - ScaLBL_D3Q7_AAeven_Poisson(fq, ChargeDensity, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz); + ScaLBL_D3Q7_AAeven_Poisson(fq, ChargeDensity, Psi, ElectricField, rlx, epsilon_LB, deltaT, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ + + // Check convergence of steady-state solution + if (timestep%analysis_interval==0){ + + ScaLBL_Comm->RegularLayout(Map,&Psi,Psi_host); + double count_loc=0; + double count; + double psi_avg; + double psi_loc=0.f; + + for (int k=1; k 0){ + psi_loc += Psi_host(i,j,k); + count_loc+=1.0; + } + } + } + } + MPI_Allreduce(&psi_loc,&psi_avg,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + + psi_avg /= count; + double psi_avg_mag=psi_avg; + if (psi_avg==0.0) psi_avg_mag=1.0; + error = fabs(psi_avg-psi_avg_previous)/fabs(psi_avg_mag); + psi_avg_previous = psi_avg; + } } //************************************************************************/ stoptime = MPI_Wtime(); - if (rank==0) printf("-------------------------------------------------------------------\n"); + if (rank==0) printf("LB-Poission Solver: a steady-state solution is obtained\n"); + if (rank==0) printf("---------------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; - if (rank==0) printf("********************************************************\n"); + if (rank==0) printf("******************* LB-Poisson Solver Statistics ********************\n"); if (rank==0) printf("CPU time = %f \n", cputime); if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); MLUPS *= nprocs; if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); - if (rank==0) printf("********************************************************\n"); + if (rank==0) printf("*********************************************************************\n"); } + +//void ScaLBL_Poisson::get_ElectricField(){ +//// ??? +//} diff --git a/models/PoissonSolver.h b/models/PoissonSolver.h index 63e10df0..7eb7ac83 100644 --- a/models/PoissonSolver.h +++ b/models/PoissonSolver.h @@ -29,17 +29,19 @@ public: void Initialize(); void Run(double *ChargeDensity); - bool Restart,pBC; + //bool Restart,pBC; int timestep,timestepMax; + int analysis_interval; int BoundaryCondition; - double tau,mu; - double Fx,Fy,Fz,flux; - double din,dout; + double tau; double tolerance; + double k2_inv,deltaT; + double epsilon0,epsilon0_LB,epsilonR,epsilon_LB; int Nx,Ny,Nz,N,Np; int rank,nprocx,nprocy,nprocz,nprocs; double Lx,Ly,Lz; + double h;//image resolution std::shared_ptr Dm; // this domain is for analysis std::shared_ptr Mask; // this domain is for lbm @@ -51,9 +53,11 @@ public: IntArray Map; DoubleArray Distance; + DoubleArray Psi_host; int *NeighborList; double *fq; double *Psi; + double *ElectricField; private: MPI_Comm comm; diff --git a/tests/lbpm_electrokinetic_dfh_simulator.cpp b/tests/lbpm_electrokinetic_dfh_simulator.cpp index 5209565c..9725dda4 100644 --- a/tests/lbpm_electrokinetic_dfh_simulator.cpp +++ b/tests/lbpm_electrokinetic_dfh_simulator.cpp @@ -38,7 +38,7 @@ int main(int argc, char **argv) if (rank == 0){ printf("********************************************************\n"); - printf("Running Color LBM \n"); + printf("Running Electrokinetic LBM Simulator \n"); printf("********************************************************\n"); } PROFILE_ENABLE(1); From 28988ef6ba15ed6f4f866bfd9bd583ebda36a930 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 10 Aug 2020 12:03:28 -0400 Subject: [PATCH 012/205] save the work;untested --- cpu/Ion.cpp | 316 ++++++++++++++------ cpu/Poisson.cpp | 4 +- models/IonModel.cpp | 186 ++++++++++-- models/IonModel.h | 10 +- tests/lbpm_electrokinetic_dfh_simulator.cpp | 6 +- 5 files changed, 395 insertions(+), 127 deletions(-) diff --git a/cpu/Ion.cpp b/cpu/Ion.cpp index 569c9a0a..d6874a76 100644 --- a/cpu/Ion.cpp +++ b/cpu/Ion.cpp @@ -1,12 +1,177 @@ -extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Velocity, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ - int n; - // conserved momemnts - double rho,ux,uy,uz,uu; - // non-conserved moments - double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; - for (int n=start; n 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq + // q=2 + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; + // q=4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; + // q=6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; + + // q=0 + dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*Ci; + + // q = 1 + dist[nr2] = f1*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*deltaT*(ux+uEPx)); + + // q=2 + dist[nr1] = f2*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*deltaT*(ux+uEPx)); + + // q = 3 + dist[nr4] = f3*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*deltaT*(uy+uEPy)); + + // q = 4 + dist[nr3] = f4*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*deltaT*(uy+uEPy)); + + // q = 5 + dist[nr6] = f5*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*deltaT*(uz+uEPz)); + + // q = 6 + dist[nr5] = f6*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*deltaT*(uz+uEPz)); + + + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField, + double Di, double zi, double rlx, double deltaT, double Vt, int start, int finish, int Np){ + int n; + double Ci; + double ux,uy,uz; + double uEPx,uEPy,uEPz;//electrochemical induced velocity + double Ex,Ey,Ez;//electrical field + double f0,f1,f2,f3,f4,f5,f6; + + for (n=start; n 10Np => odd part of dist) - f1 = dist[nr1]; // reading the f1 data into register fq - - nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) - f2 = dist[nr2]; // reading the f2 data into register fq - - // q=3 - nr3 = neighborList[n+2*Np]; // neighbor 4 - f3 = dist[nr3]; - - // q = 4 - nr4 = neighborList[n+3*Np]; // neighbor 3 - f4 = dist[nr4]; - - // q=5 - nr5 = neighborList[n+4*Np]; - f5 = dist[nr5]; - - // q = 6 - nr6 = neighborList[n+5*Np]; - f6 = dist[nr6]; - - rho = f0+f2+f1+f4+f3+f6; - ux = Velocity[n]; - uy = Velocity[n+Np]; - uz = Velocity[n+2*Np]; - uu = 1.5*(ux*ux+uy*uy+uz*uz); - - // q=0 - dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu); + dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*Ci; // q = 1 - dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx; + dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*deltaT*(ux+uEPx)); // q=2 - dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx; + dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*deltaT*(ux+uEPx)); // q = 3 - dist[nr4] = f3*(1.0-rlx) + - rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy; + dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*deltaT*(uy+uEPy)); // q = 4 - dist[nr3] = f4*(1.0-rlx) + - rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy; + dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*deltaT*(uy+uEPy)); // q = 5 - dist[nr6] = f5*(1.0-rlx) + - rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz; + dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*deltaT*(uz+uEPz)); // q = 6 - dist[nr5] = f6*(1.0-rlx) + - rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz; + dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*deltaT*(uz+uEPz)); } -} \ No newline at end of file +} + +extern "C" void ScaLBL_D3Q7_Poisson_Init(double *dist, double *Den, double DenInit, int Np) +{ + int n; + for (n=0; n& IonValence, int number_ion_species, int start, int finish, int Np){ + + int n; + int ic=number_ion_species; + double Ci;//ion concentration of species i + double CD;//charge density + double F = 96485.0;//Faraday's constant; unit[C/mol]; F=e*Na, where Na is the Avogadro constant + for (n=start; n0){ + for (n=start; n( filename ); domain_db = db->getDatabase( "Domain" ); ion_db = db->getDatabase( "Ions" ); - tau = 1.0; + // Default model parameters + T = 300.0;//temperature; unit [K] + Vt = kb*T/electron_charge;//thermal voltage; unit [V] + k2_inv = 4.5;//the inverse of 2nd-rank moment of D3Q7 lattice + h = 1.0;//resolution; unit: um/lu timestepMax = 100000; tolerance = 1.0e-8; - // Model parameters - number_ion_species = 1; + IonDiffusivity.push_back(1.0e-9);//User input unit [m^2/sec] + //TODO needs to scale the unit of diffusivity! + IonValence.push_back(1); + IonConcentration.push_back(1.0e-3);//unit [mol/m^3] + // TODO rescale ion concentration unit + deltaT.push_back(1.0); + tau.push_back(0.5+k2_inv*deltaT[0]*IonDiffusivisty[0]); + + // LB-Ion Model parameters + if (ion_db->keyExists( "timestepMax" )){ + timestepMax = ion_db->getScalar( "timestepMax" ); + } + if (ion_db->keyExists( "analysis_interval" )){ + analysis_interval = ion_db->getScalar( "analysis_interval" ); + } + if (ion_db->keyExists( "tolerance" )){ + tolerance = ion_db->getScalar( "tolerance" ); + } + if (ion_db->keyExists( "temperature" )){ + T = ion_db->getScalar( "temperature" ); + } + if (ion_db->keyExists( "epsilonR" )){ + epsilonR = ion_db->getScalar( "epsilonR" ); + } if (ion_db->keyExists( "number_ion_species" )){ number_ion_species = ion_db->getScalar( "number_ion_species" ); } + + //read ion related list + if (ion_db->keyExists( "deltaT" )){ + deltaT.clear(); + deltaT = ion_db->getVector( "deltaT" ); + if (deltaT.size()!=number_ion_species){ + ERROR("Error: number_ion_species and deltaT must be the same length! \n"); + } + } + //NOTE: Ion diffusivity has unit: [m^2/sec] + if (ion_db->keyExists("IonDiffusivityList")){ + IonDiffusivity.clear(); + IonDiffusivity = ion_db->getVector( "IonDiffusivityList" ); + if (IonDiffusivity.size()!=number_ion_species){ + ERROR("Error: number_ion_species and IonDiffusivityList must be the same length! \n"); + } + } + //read ion algebric valence list + if (ion_db->keyExists("IonValenceList")){ + IonValence.clear(); + IonValence = ion_db->getVector( "IonValenceList" ); + if (IonValence.size()!=number_ion_species){ + ERROR("Error: number_ion_species and IonValenceList must be the same length! \n"); + } + } + //read initial ion concentration list; unit [mol/m^3] + if (ion_db->keyExists("IonConcentrationList")){ + IonConcentration.clear(); + IonConcentration = ion_db->getVector( "IonConcentrationList" ); + if (IonConcentration.size()!=number_ion_species){ + ERROR("Error: number_ion_species and IonConcentrationList must be the same length! \n"); + } + } + + // Read domain parameters + if (domain_db->keyExists( "voxel_length" )){//default unit: um/lu + h = domain_db->getScalar( "voxel_length" ); + } + if (domain_db->keyExists( "BC" )){ + BoundaryCondition = domain_db->getScalar( "BC" ); + } + //Re-calcualte model parameters if user updates input + //TODO ion diffusivity needs rescale unit to LB unit + //TODO rescale ion initial concentration unit to LB unit + if (deltaT.size()>1){ + tau.clear(); + for (int i=0;i(new Domain(domain_db,comm)); // full domain for analysis Mask = std::shared_ptr(new Domain(domain_db,comm)); // mask domain removes immobile phases @@ -164,42 +246,104 @@ void ScaLBL_IonModel::Initialize(){ /* * This function initializes model */ - if (rank==0) printf ("Initializing distributions \n"); - ScaLBL_D3Q19_Init(fq, Np); + if (rank==0) printf ("Initializing D3Q7 distributions for ion transport\n"); + for (int ic=0; ic rlx(tau.begin(),tau.end()); + for (double item : rlx){ + item = 1.0/item; + } //.......create and start timer............ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); MPI_Barrier(comm); starttime = MPI_Wtime(); - if (rank==0) printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax); - if (rank==0) printf("********************************************************\n"); + + if (rank==0) printf("***************************************************\n"); + if (rank==0) printf("LB-Ion Transport: timestepMax = %i\n", timestepMax); + if (rank==0) printf("***************************************************\n"); timestep=0; - double error = 1.0; - double flow_rate_previous = 0.0; - while (timestep < timestepMax && error > tolerance) { + while (timestep < timestepMax) { //************************************************************************/ + // *************ODD TIMESTEP*************// timestep++; - for (int ic=0; icSendD3Q7AA(fq, ic); //READ FROM NORMAL - ScaLBL_D3Q7_AAodd_Ion(NeighborList, fq, Velocity, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz); - for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); + } + ScaLBL_D3Q7_IonChargeDensity(Ci, ChargeDensity, IonValence, number_ion_species, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + + for (int ic=0; icRecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE + } + for (int ic=0; icLastExterior(), Np); + } + ScaLBL_D3Q7_IonChargeDensity(Ci, ChargeDensity, number_ion_species, 0, ScaLBL_Comm->LastExterior(), Np); + + //LB-Ion collison + for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); + } + // Set boundary conditions /* ... */ - ScaLBL_D3Q7_AAodd_Ion(NeighborList, fq, Velocity, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz); + + for (int ic=0; icLastExterior(), Np); + } ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + + // *************EVEN TIMESTEP*************// timestep++; - for (int ic=0; icSendD3Q7AA(fq, ic); //READ FORM NORMAL - ScaLBL_D3Q7_AAeven_Ion(fq, Velocity, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz); - for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); + } + ScaLBL_D3Q7_IonChargeDensity(Ci, ChargeDensity, IonValence, number_ion_species, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + + for (int ic=0; icRecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE + } + for (int ic=0; icLastExterior(), Np); + } + ScaLBL_D3Q7_IonChargeDensity(Ci, ChargeDensity, number_ion_species, 0, ScaLBL_Comm->LastExterior(), Np); + + //LB-Ion collison + for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); + } + // Set boundary conditions /* ... */ - ScaLBL_D3Q7_AAeven_Ion(fq, Velocity, 0, ScaLBL_Comm->LastExterior(), Np, rlx, Fx, Fy, Fz); + + for (int ic=0; icLastExterior(), Np); + } ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ } diff --git a/models/IonModel.h b/models/IonModel.h index 0b1d7e38..8be11468 100644 --- a/models/IonModel.h +++ b/models/IonModel.h @@ -8,6 +8,7 @@ #include #include #include +#include #include "common/ScaLBL.h" #include "common/Communication.h" @@ -33,12 +34,13 @@ public: bool Restart,pBC; int timestep,timestepMax; int BoundaryCondition; - double tau,mu; - double Fx,Fy,Fz,flux; - double din,dout; - double tolerance; int number_ion_species; + vector IonDiffusivity;//User input unit [m^2/sec] + vector IonValence; + vector IonConcentration;//unit [mol/m^3] + vector deltaT; + vector tau; int Nx,Ny,Nz,N,Np; int rank,nprocx,nprocy,nprocz,nprocs; diff --git a/tests/lbpm_electrokinetic_dfh_simulator.cpp b/tests/lbpm_electrokinetic_dfh_simulator.cpp index 9725dda4..0c9da812 100644 --- a/tests/lbpm_electrokinetic_dfh_simulator.cpp +++ b/tests/lbpm_electrokinetic_dfh_simulator.cpp @@ -59,10 +59,10 @@ int main(int argc, char **argv) DFHModel.Create(); // creating the model will create data structure to match the pore structure and allocate variables DFHModel.Initialize(); // initializing the model will set initial conditions for variables - DFHModel.Run(); // Solve the N-S equations to get velocity - IonModel.Run(DFHModel.Velocity); //solve for ion transport and electric potential - //IonModel.Run(DFHModel.Velocity, DFHModel.Phi); //solve for ion transport and electric potential with multiphase system + DFHModel.Run(IonModel.ChargeDensity); // Solve the N-S equations to get velocity PoissonSolver.Run(IonModel.ChargeDensity); + IonModel.Run(DFHModel.Velocity,PoissonSolver.ElectricField); //solve for ion transport and electric potential + //IonModel.Run(DFHModel.Velocity, DFHModel.Phi,PoissonSolver.ElectricField); DFHModel.WriteDebug(); From b4d50ee821d58c1262a83c43aceed791ce809b33 Mon Sep 17 00:00:00 2001 From: James McClure Date: Mon, 10 Aug 2020 12:54:55 -0400 Subject: [PATCH 013/205] example for how to structure ion comm loop --- models/IonModel.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/models/IonModel.cpp b/models/IonModel.cpp index 46859eab..237ebcf1 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -275,17 +275,13 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ //Update ion concentration and charge density for (int ic=0; icSendD3Q7AA(fq, ic); //READ FROM NORMAL - } - for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE + /* Update exterior ion concentration */ } + ScaLBL_D3Q7_IonChargeDensity(Ci, ChargeDensity, IonValence, number_ion_species, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - for (int ic=0; icRecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE - } for (int ic=0; ic Date: Tue, 11 Aug 2020 14:35:46 -0400 Subject: [PATCH 014/205] distinguish parallel comm / bounceback sites at halo --- common/ScaLBL.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index d9b75c43..928e3d5e 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -372,7 +372,11 @@ int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLis for (k=0;k 0) + Map(i,j,k) = -2; // this label is for parallel communication sites + else + Map(i,j,k) = -1; // this label is for solid bounce-back sites } } } From e0b0e0566495f27bb1cd185f4978b67ff70dde5e Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 11 Aug 2020 15:16:40 -0400 Subject: [PATCH 015/205] added bounce-back interaction capability --- common/ScaLBL.cpp | 215 +++++++++++++++++++++++++++++++++++++++++++++- common/ScaLBL.h | 6 ++ 2 files changed, 220 insertions(+), 1 deletion(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 928e3d5e..dbb75380 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -526,7 +526,7 @@ int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLis } } } - + //for (idx=0; idx Date: Tue, 11 Aug 2020 15:34:12 -0400 Subject: [PATCH 016/205] fixed bugs in bounceback list --- common/ScaLBL.cpp | 13 +++++++------ common/ScaLBL.h | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index dbb75380..18238a8f 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -854,9 +854,11 @@ int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLis } -void ScaLBL_Communicator::SetupBounceBackList() +void ScaLBL_Communicator::SetupBounceBackList(IntArray &Map, signed char *id, int Np) { - + + int idx,i,j,k; + int neighbor; // save list of bounce-back distributions and interaction sites n_bb_d3q7 = 0; n_bb_d3q19 = 0; @@ -866,9 +868,8 @@ void ScaLBL_Communicator::SetupBounceBackList() for (i=1;i Date: Tue, 11 Aug 2020 16:52:56 -0400 Subject: [PATCH 017/205] add zeta potential to Poisson --- common/ScaLBL.cpp | 10 ++++++++++ common/ScaLBL.h | 4 ++-- models/PoissonSolver.cpp | 11 +++++++++++ models/PoissonSolver.h | 1 + 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 18238a8f..28459298 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -1067,6 +1067,16 @@ void ScaLBL_Communicator::SetupBounceBackList(IntArray &Map, signed char *id, in ScaLBL_CopyToDevice(bb_dist, bb_dist_tmp, local_count*sizeof(int)); } +void ScaLBL_Communicator::SolidDirichletD3Q7(double *fq, double *assignValues){ + // fq is a D3Q7 distribution + // assignValues is a list of values to assign at bounce-back sites + for (int idx=0; idxn_bb_d3q7); //........................................................................... + // initialize the zeta function (example is zeta is constant on solid surface) + double *tmpZeta = new double[ScaLBL_Comm->n_bb_d3q7]; + for int (i=0; in_bb_d3q7; i++){ + tmpZeta[i] = 1.0/k2_inv; // this has to be read from input file + } + ScaLBL_CopyToDevice(zeta, tmpZeta, sizeof(double)*ScaLBL_Comm->n_bb_d3q7); + delete [] tmpZeta; + // Update GPU data structures if (rank==0) printf ("Setting up device map and neighbor list \n"); // copy the neighbor list @@ -225,6 +234,7 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ // Set boundary conditions /* ... */ ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, ChargeDensity, Psi, ElectricField, rlx, epsilon_LB, deltaT, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_Comm->SolidDirichletD3Q7(fq, zeta); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); // *************EVEN TIMESTEP*************// @@ -235,6 +245,7 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ // Set boundary conditions /* ... */ ScaLBL_D3Q7_AAeven_Poisson(fq, ChargeDensity, Psi, ElectricField, rlx, epsilon_LB, deltaT, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_Comm->SolidDirichletD3Q7(fq, zeta); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ diff --git a/models/PoissonSolver.h b/models/PoissonSolver.h index 7eb7ac83..625c602f 100644 --- a/models/PoissonSolver.h +++ b/models/PoissonSolver.h @@ -58,6 +58,7 @@ public: double *fq; double *Psi; double *ElectricField; + double *zeta; private: MPI_Comm comm; From 3a162849a03db896cbaa76d1b34117e247870496 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Fri, 14 Aug 2020 14:23:22 -0400 Subject: [PATCH 018/205] save the work;untested --- cpu/Ion.cpp | 33 +- cpu/Poisson.cpp | 22 +- cpu/Stokes.cpp | 957 ++++++++++++++++++++ models/IonModel.cpp | 190 ++-- models/IonModel.h | 12 +- models/MultiPhysController.cpp | 57 ++ models/MultiPhysController.h | 47 + models/PoissonSolver.cpp | 81 +- models/StokesModel.cpp | 543 +++++++++++ models/StokesModel.h | 75 ++ tests/lbpm_electrokinetic_dfh_simulator.cpp | 137 +-- 11 files changed, 1932 insertions(+), 222 deletions(-) create mode 100644 cpu/Stokes.cpp create mode 100644 models/MultiPhysController.cpp create mode 100644 models/MultiPhysController.h create mode 100644 models/StokesModel.cpp create mode 100644 models/StokesModel.h diff --git a/cpu/Ion.cpp b/cpu/Ion.cpp index d6874a76..4f760e4f 100644 --- a/cpu/Ion.cpp +++ b/cpu/Ion.cpp @@ -80,7 +80,7 @@ extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, i } extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *Velocity, double *ElectricField, - double Di, double zi, double rlx, double deltaT, double Vt, int start, int finish, int Np){ + double Di, double zi, double rlx, double Vt, int start, int finish, int Np){ int n; double Ci; double ux,uy,uz; @@ -128,29 +128,28 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *D dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*Ci; // q = 1 - dist[nr2] = f1*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*deltaT*(ux+uEPx)); + dist[nr2] = f1*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*(ux+uEPx)); // q=2 - dist[nr1] = f2*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*deltaT*(ux+uEPx)); + dist[nr1] = f2*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*(ux+uEPx)); // q = 3 - dist[nr4] = f3*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*deltaT*(uy+uEPy)); + dist[nr4] = f3*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*(uy+uEPy)); // q = 4 - dist[nr3] = f4*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*deltaT*(uy+uEPy)); + dist[nr3] = f4*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*(uy+uEPy)); // q = 5 - dist[nr6] = f5*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*deltaT*(uz+uEPz)); + dist[nr6] = f5*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*(uz+uEPz)); // q = 6 - dist[nr5] = f6*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*deltaT*(uz+uEPz)); - + dist[nr5] = f6*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*(uz+uEPz)); } } extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField, - double Di, double zi, double rlx, double deltaT, double Vt, int start, int finish, int Np){ + double Di, double zi, double rlx, double Vt, int start, int finish, int Np){ int n; double Ci; double ux,uy,uz; @@ -184,28 +183,28 @@ extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Veloci dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*Ci; // q = 1 - dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*deltaT*(ux+uEPx)); + dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*(ux+uEPx)); // q=2 - dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*deltaT*(ux+uEPx)); + dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*(ux+uEPx)); // q = 3 - dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*deltaT*(uy+uEPy)); + dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*(uy+uEPy)); // q = 4 - dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*deltaT*(uy+uEPy)); + dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*(uy+uEPy)); // q = 5 - dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*deltaT*(uz+uEPz)); + dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*(uz+uEPz)); // q = 6 - dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*deltaT*(uz+uEPz)); + dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*(uz+uEPz)); } } -extern "C" void ScaLBL_D3Q7_Poisson_Init(double *dist, double *Den, double DenInit, int Np) +extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np) { int n; for (n=0; n& IonValence, int number_ion_species, int start, int finish, int Np){ +extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, vector& IonValence, int number_ion_species, int start, int finish, int Np){ int n; int ic=number_ion_species; diff --git a/cpu/Poisson.cpp b/cpu/Poisson.cpp index 7eac7401..d33ab658 100644 --- a/cpu/Poisson.cpp +++ b/cpu/Poisson.cpp @@ -1,5 +1,5 @@ -extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, double *dist, double *Den_charge, double *Psi, double *ElectricField, double rlx, double epsilon_LB,double deltaT, +extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,double gamma, int start, int finish, int Np){ int n; double psi;//electric potential @@ -7,12 +7,13 @@ extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, double *dist, doubl double rho_e;//local charge density double f0,f1,f2,f3,f4,f5,f6; int nr1,nr2,nr3,nr4,nr5,nr6; + double rlx=1.0/tau; for (n=start; n + +extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, + double Gx, double Gy, double Gz, int start, int finish, int Np) +{ + double fq; + // conserved momemnts + double rho,jx,jy,jz; + double ux,uy,uz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + // body force due to electric field + double rhoE;//charge density + double Ex,Ey,Ez; + // total body force + double Fx,Fy,Fz; + + constexpr double mrt_V1=0.05263157894736842; + constexpr double mrt_V2=0.012531328320802; + constexpr double mrt_V3=0.04761904761904762; + constexpr double mrt_V4=0.004594820384294068; + constexpr double mrt_V5=0.01587301587301587; + constexpr double mrt_V6=0.0555555555555555555555555; + constexpr double mrt_V7=0.02777777777777778; + constexpr double mrt_V8=0.08333333333333333; + constexpr double mrt_V9=0.003341687552213868; + constexpr double mrt_V10=0.003968253968253968; + constexpr double mrt_V11=0.01388888888888889; + constexpr double mrt_V12=0.04166666666666666; + + for (int n=start; n 10Np => odd part of dist) + fq = dist[nread]; // reading the f1 data into register fq + //fp = dist[10*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jx = fq; + m4 = -4.0*fq; + m9 = 2.0*fq; + m10 = -4.0*fq; + + // f2 = dist[10*Np+n]; + nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nread]; // reading the f2 data into register fq + //fq = dist[Np+n]; + rho += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + nread = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nread]; + //fq = dist[11*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + nread = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nread]; + //fq = dist[2*Np+n]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + nread = neighborList[n+4*Np]; + fq = dist[nread]; + //fq = dist[12*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + + // q = 6 + nread = neighborList[n+5*Np]; + fq = dist[nread]; + //fq = dist[3*Np+n]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + nread = neighborList[n+6*Np]; + fq = dist[nread]; + //fq = dist[13*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + nread = neighborList[n+7*Np]; + fq = dist[nread]; + //fq = dist[4*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + nread = neighborList[n+8*Np]; + fq = dist[nread]; + //fq = dist[14*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + nread = neighborList[n+9*Np]; + fq = dist[nread]; + //fq = dist[5*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + nread = neighborList[n+10*Np]; + fq = dist[nread]; + //fq = dist[15*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + nread = neighborList[n+11*Np]; + fq = dist[nread]; + //fq = dist[6*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + nread = neighborList[n+12*Np]; + fq = dist[nread]; + //fq = dist[16*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + nread = neighborList[n+13*Np]; + fq = dist[nread]; + //fq = dist[7*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + //fq = dist[17*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + //fq = dist[8*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + //fq = dist[18*Np+n]; + nread = neighborList[n+16*Np]; + fq = dist[nread]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + //fq = dist[9*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + + // write the velocity + ux = jx / rho; + uy = jy / rho; + uz = jz / rho; + Velocity[n] = ux; + Velocity[Np+n] = uy; + Velocity[2*Np+n] = uz; + + //..............incorporate external force................................................ + //..............carry out relaxation process............................................... + m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho) - m2); + m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4); + m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6); + m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8); + m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) - m9); + m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10); + m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) - m11); + m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho) - m12); + m13 = m13 + rlx_setA*((jx*jy/rho) - m13); + m14 = m14 + rlx_setA*((jy*jz/rho) - m14); + m15 = m15 + rlx_setA*((jx*jz/rho) - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //....................................................................................................... + //.................inverse transformation...................................................... + + // q=0 + fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx; + nread = neighborList[n+Np]; + dist[nread] = fq; + + // q=2 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx; + nread = neighborList[n]; + dist[nread] = fq; + + // q = 3 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy; + nread = neighborList[n+3*Np]; + dist[nread] = fq; + + // q = 4 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy; + nread = neighborList[n+2*Np]; + dist[nread] = fq; + + // q = 5 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz; + nread = neighborList[n+5*Np]; + dist[nread] = fq; + + // q = 6 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz; + nread = neighborList[n+4*Np]; + dist[nread] = fq; + + // q = 7 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6) + +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy); + nread = neighborList[n+7*Np]; + dist[nread] = fq; + + // q = 8 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy); + nread = neighborList[n+6*Np]; + dist[nread] = fq; + + // q = 9 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6) + +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy); + nread = neighborList[n+9*Np]; + dist[nread] = fq; + + // q = 10 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4) + +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy); + nread = neighborList[n+8*Np]; + dist[nread] = fq; + + // q = 11 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz); + nread = neighborList[n+11*Np]; + dist[nread] = fq; + + // q = 12 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz); + nread = neighborList[n+10*Np]; + dist[nread]= fq; + + // q = 13 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz); + nread = neighborList[n+13*Np]; + dist[nread] = fq; + + // q= 14 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz); + nread = neighborList[n+12*Np]; + dist[nread] = fq; + + + // q = 15 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + + // q = 17 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) + -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) + -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + + } +} + diff --git a/models/IonModel.cpp b/models/IonModel.cpp index 237ebcf1..2e5b4e71 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -16,68 +16,70 @@ ScaLBL_IonModel::~ScaLBL_IonModel(){ } -void ScaLBL_IonModel::ReadParams(string filename){ - - //fundamental constant - kb = 1.38e-23;//Boltzmann constant;unit [J/K] - electron_charge = 1.6e-19;//electron charge;unit [C] +void ScaLBL_IonModel::ReadParams(string filename,int num_iter,int num_iter_Stokes,double time_conv_Stokes){ // read the input database db = std::make_shared( filename ); domain_db = db->getDatabase( "Domain" ); ion_db = db->getDatabase( "Ions" ); - // Default model parameters + //------ Load number of iteration from multiphysics controller ------// + timestepMax = num_iter; + //compute time conversion factor for ion model + time_conv = num_iter_Stokes*time_conv_Stokes/num_iter; + //-------------------------------------------------------------------// + + // Universal constant + kb = 1.38e-23;//Boltzmann constant;unit [J/K] + electron_charge = 1.6e-19;//electron charge;unit [C] + + //---------------------- Default model parameters --------------------------// T = 300.0;//temperature; unit [K] - Vt = kb*T/electron_charge;//thermal voltage; unit [V] + Vt = kb*T/electron_charge;//thermal voltage; unit [Vy] k2_inv = 4.5;//the inverse of 2nd-rank moment of D3Q7 lattice h = 1.0;//resolution; unit: um/lu - timestepMax = 100000; tolerance = 1.0e-8; number_ion_species = 1; - IonDiffusivity.push_back(1.0e-9);//User input unit [m^2/sec] - //TODO needs to scale the unit of diffusivity! - IonValence.push_back(1); - IonConcentration.push_back(1.0e-3);//unit [mol/m^3] - // TODO rescale ion concentration unit - deltaT.push_back(1.0); - tau.push_back(0.5+k2_inv*deltaT[0]*IonDiffusivisty[0]); + IonDiffusivity.push_back(1.0e-9);//user-input diffusivity has physical unit [m^2/sec] + IonValence.push_back(1);//algebraic valence charge + IonConcentration.push_back(1.0e-3);//user-input ion concentration has physical unit [mol/m^3] + //deltaT.push_back(1.0); + //tau.push_back(0.5+k2_inv*deltaT[0]*IonDiffusivisty[0]); + tau.push_back(0.5+k2_inv*time_conv/(h*1.0e-6)/(h*1.0e-6)*IonDiffusivisty[0]); + //--------------------------------------------------------------------------// // LB-Ion Model parameters - if (ion_db->keyExists( "timestepMax" )){ - timestepMax = ion_db->getScalar( "timestepMax" ); - } - if (ion_db->keyExists( "analysis_interval" )){ - analysis_interval = ion_db->getScalar( "analysis_interval" ); - } + //if (ion_db->keyExists( "timestepMax" )){ + // timestepMax = ion_db->getScalar( "timestepMax" ); + //} if (ion_db->keyExists( "tolerance" )){ tolerance = ion_db->getScalar( "tolerance" ); } if (ion_db->keyExists( "temperature" )){ T = ion_db->getScalar( "temperature" ); } - if (ion_db->keyExists( "epsilonR" )){ - epsilonR = ion_db->getScalar( "epsilonR" ); - } if (ion_db->keyExists( "number_ion_species" )){ number_ion_species = ion_db->getScalar( "number_ion_species" ); } - //read ion related list - if (ion_db->keyExists( "deltaT" )){ - deltaT.clear(); - deltaT = ion_db->getVector( "deltaT" ); - if (deltaT.size()!=number_ion_species){ - ERROR("Error: number_ion_species and deltaT must be the same length! \n"); - } - } - //NOTE: Ion diffusivity has unit: [m^2/sec] + //NOTE: ion diffusivity has INPUT unit: [m^2/sec] + // it must be converted to LB unit: [lu^2/lt] if (ion_db->keyExists("IonDiffusivityList")){ IonDiffusivity.clear(); IonDiffusivity = ion_db->getVector( "IonDiffusivityList" ); + // time relaxation parameters tau also needs update + tau.clear(); if (IonDiffusivity.size()!=number_ion_species){ ERROR("Error: number_ion_species and IonDiffusivityList must be the same length! \n"); } + else{ + for (int i=0; ikeyExists("IonValenceList")){ @@ -87,14 +89,34 @@ void ScaLBL_IonModel::ReadParams(string filename){ ERROR("Error: number_ion_species and IonValenceList must be the same length! \n"); } } - //read initial ion concentration list; unit [mol/m^3] + //read initial ion concentration list; INPUT unit [mol/m^3] + //it must be converted to LB unit [mol/lu^3] if (ion_db->keyExists("IonConcentrationList")){ IonConcentration.clear(); IonConcentration = ion_db->getVector( "IonConcentrationList" ); if (IonConcentration.size()!=number_ion_species){ ERROR("Error: number_ion_species and IonConcentrationList must be the same length! \n"); } + else{ + for (int i=0; ikeyExists( "deltaT" )){ + // deltaT.clear(); + // tau.clear(); + // deltaT = ion_db->getVector( "deltaT" ); + // if (deltaT.size()!=number_ion_species){ + // ERROR("Error: number_ion_species and deltaT must be the same length! \n"); + // } + // else{//update relaxation parameter tau + // for (int i=0;ikeyExists( "voxel_length" )){//default unit: um/lu @@ -103,15 +125,15 @@ void ScaLBL_IonModel::ReadParams(string filename){ if (domain_db->keyExists( "BC" )){ BoundaryCondition = domain_db->getScalar( "BC" ); } - //Re-calcualte model parameters if user updates input - //TODO ion diffusivity needs rescale unit to LB unit - //TODO rescale ion initial concentration unit to LB unit - if (deltaT.size()>1){ - tau.clear(); - for (int i=0;iSDs); - if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); + if (rank==0) printf("LB Ion Solver: Initialized solid phase & converting to Signed Distance function \n"); CalcDist(Distance,id_solid,*Dm); - if (rank == 0) cout << "Domain set." << endl; + if (rank == 0) cout << " Domain set." << endl; } void ScaLBL_IonModel::Create(){ @@ -209,13 +231,13 @@ void ScaLBL_IonModel::Create(){ Mask->CommInit(); Np=Mask->PoreCount(); //........................................................................... - if (rank==0) printf ("Create ScaLBL_Communicator \n"); + if (rank==0) printf ("LB Ion Solver: Create ScaLBL_Communicator \n"); // Create a communicator for the device (will use optimized layout) // ScaLBL_Communicator ScaLBL_Comm(Mask); // original ScaLBL_Comm = std::shared_ptr(new ScaLBL_Communicator(Mask)); int Npad=(Np/16 + 2)*16; - if (rank==0) printf ("Set up memory efficient layout \n"); + if (rank==0) printf ("LB Ion Solver: Set up memory efficient layout \n"); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); @@ -224,7 +246,7 @@ void ScaLBL_IonModel::Create(){ // MAIN VARIABLES ALLOCATED HERE //........................................................................... // LBM variables - if (rank==0) printf ("Allocating distributions \n"); + if (rank==0) printf ("LB Ion Solver: Allocating distributions \n"); //......................device distributions................................. int dist_mem_size = Np*sizeof(double); int neighborSize=18*(Np*sizeof(int)); @@ -235,7 +257,7 @@ void ScaLBL_IonModel::Create(){ ScaLBL_AllocateDeviceMemory((void **) &ChargeDensity, sizeof(double)*Np); //........................................................................... // Update GPU data structures - if (rank==0) printf ("Setting up device map and neighbor list \n"); + if (rank==0) printf ("LB Ion Solver: Setting up device map and neighbor list \n"); // copy the neighbor list ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); MPI_Barrier(comm); @@ -246,10 +268,13 @@ void ScaLBL_IonModel::Initialize(){ /* * This function initializes model */ - if (rank==0) printf ("Initializing D3Q7 distributions for ion transport\n"); + if (rank==0) printf ("LB Ion Solver: initializing D3Q7 distributions\n"); for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence, number_ion_species, 0, ScaLBL_Comm->LastExterior(), Np); } void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ @@ -260,13 +285,10 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ item = 1.0/item; } //.......create and start timer............ - double starttime,stoptime,cputime; - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); - starttime = MPI_Wtime(); + //double starttime,stoptime,cputime; + //ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + //starttime = MPI_Wtime(); - if (rank==0) printf("***************************************************\n"); - if (rank==0) printf("LB-Ion Transport: timestepMax = %i\n", timestepMax); - if (rank==0) printf("***************************************************\n"); timestep=0; while (timestep < timestepMax) { //************************************************************************/ @@ -277,22 +299,15 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ ScaLBL_Comm->SendD3Q7AA(fq, ic); //READ FROM NORMAL ScaLBL_D3Q7_AAodd_IonConcentration(NeighborList, &fq[ic*Np*7],&Ci[ic*Np],ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE - /* Update exterior ion concentration */ - } - - ScaLBL_D3Q7_IonChargeDensity(Ci, ChargeDensity, IonValence, number_ion_species, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - - for (int ic=0; icLastExterior(), Np); } - ScaLBL_D3Q7_IonChargeDensity(Ci, ChargeDensity, number_ion_species, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence, number_ion_species, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence, number_ion_species, 0, ScaLBL_Comm->LastExterior(), Np); //LB-Ion collison for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); + rlx[ic],Vt,ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); } // Set boundary conditions @@ -300,7 +315,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ for (int ic=0; icLastExterior(), Np); + rlx[ic],Vt,0, ScaLBL_Comm->LastExterior(), Np); } ScaLBL_DeviceBarrier(); MPI_Barrier(comm); @@ -309,28 +324,17 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ //Update ion concentration and charge density for (int ic=0; icSendD3Q7AA(fq, ic); //READ FORM NORMAL - } - for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); - } - ScaLBL_D3Q7_IonChargeDensity(Ci, ChargeDensity, IonValence, number_ion_species, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - - for (int ic=0; icRecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE - } - for (int ic=0; icLastExterior(), Np); } - ScaLBL_D3Q7_IonChargeDensity(Ci, ChargeDensity, number_ion_species, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence, number_ion_species, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence, number_ion_species, 0, ScaLBL_Comm->LastExterior(), Np); //LB-Ion collison for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); + rlx[ic],Vt,ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); } // Set boundary conditions @@ -338,25 +342,25 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ for (int ic=0; icLastExterior(), Np); + rlx[ic],Vt,0, ScaLBL_Comm->LastExterior(), Np); } ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ } //************************************************************************/ - stoptime = MPI_Wtime(); - if (rank==0) printf("-------------------------------------------------------------------\n"); - // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; - // Performance obtained from each node - double MLUPS = double(Np)/cputime/1000000; + //stoptime = MPI_Wtime(); + //if (rank==0) printf("-------------------------------------------------------------------\n"); + //// Compute the walltime per timestep + //cputime = (stoptime - starttime)/timestep; + //// Performance obtained from each node + //double MLUPS = double(Np)/cputime/1000000; - if (rank==0) printf("********************************************************\n"); - if (rank==0) printf("CPU time = %f \n", cputime); - if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); - MLUPS *= nprocs; - if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); - if (rank==0) printf("********************************************************\n"); + //if (rank==0) printf("********************************************************\n"); + //if (rank==0) printf("CPU time = %f \n", cputime); + //if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); + //MLUPS *= nprocs; + //if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); + //if (rank==0) printf("********************************************************\n"); } diff --git a/models/IonModel.h b/models/IonModel.h index 8be11468..9fe0a146 100644 --- a/models/IonModel.h +++ b/models/IonModel.h @@ -22,24 +22,28 @@ public: ~ScaLBL_IonModel(); // functions in they should be run - void ReadParams(string filename); + void ReadParams(string filename,int num_iter,int num_iter_Stokes,double time_conv_Stokes); void ReadParams(std::shared_ptr db0); void SetDomain(); void ReadInput(); void Create(); void Initialize(); - void Run(double *Velocity); - void VelocityField(); + void Run(double *Velocity, double *ElectricField); bool Restart,pBC; int timestep,timestepMax; int BoundaryCondition; + double h;//domain resolution, unit [um/lu] + double time_conv; + double kb,electron_charge,T,Vt; + double k2_inv; + double tolerance; int number_ion_species; vector IonDiffusivity;//User input unit [m^2/sec] vector IonValence; vector IonConcentration;//unit [mol/m^3] - vector deltaT; + //vector deltaT; vector tau; int Nx,Ny,Nz,N,Np; diff --git a/models/MultiPhysController.cpp b/models/MultiPhysController.cpp new file mode 100644 index 00000000..f79db6b7 --- /dev/null +++ b/models/MultiPhysController.cpp @@ -0,0 +1,57 @@ +#include "models/MultiPhysController.h" + +ScaLBL_Multiphys_Controller::ScaLBL_Multiphys_Controller(int RANK, int NP, MPI_Comm COMM): +rank(RANK),Restart(0),timestepMax(0),num_iter_Stokes(0),num_iter_Ion(0),SchmidtNum(0),comm(COMM) +{ + +} +ScaLBL_Multiphys_Controller::~ScaLBL_Multiphys_Controller(){ + +} + +void ScaLBL_Multiphys_Controller::ReadParams(string filename){ + + // read the input database + db = std::make_shared( filename ); + study_db = db->getDatabase( "MultiphysController" ); + + + // Default parameters + timestepMax = 10000; + Restart = false; + SchmidtNum = 1.0; + num_iter_Stokes=1; + num_iter_Ion=1; + + // load input parameters + if (study_db->keyExists( "timestepMax" )){ + timestepMax = study_db->getScalar( "timestepMax" ); + } + if (study_db->keyExists( "Schmidt_Number" )){ + SchmidtNum = study_db->getScalar( "Schmidt_Number" ); + } + // recalculate relevant parameters + if (SchmidtNum>1){ + num_iter_Stokes = int(round(SchmidtNum/2)*2); + num_iter_Ion = 1; + } + else if (SchmidtNum>0 && SchmidtNum<1){ + num_iter_Ion = int(round((1.0/SchmidtNum)/2)*2); + num_iter_Stokes = 1; + } + else{ + ERROR("Error: SchmidtNum (Schmidt number) must be a positive number! \n"); + } + + // load input parameters + // in case user wants to have an absolute control over the iternal iteration + if (study_db->keyExists( "num_iter_Ion" )){ + num_iter_Ion = study_db->getScalar( "num_iter_Ion" ); + } + if (study_db->keyExists( "num_iter_Stokes" )){ + num_iter_Stokes = study_db->getScalar( "num_iter_Stokes" ); + } + +} + + diff --git a/models/MultiPhysController.h b/models/MultiPhysController.h new file mode 100644 index 00000000..b108e28a --- /dev/null +++ b/models/MultiPhysController.h @@ -0,0 +1,47 @@ +/* + * Multiphysics controller that coordinates the coupling between different models + */ +#include +#include +#include +#include +#include +#include +#include + +#include "common/ScaLBL.h" +#include "common/Communication.h" +#include "common/MPI_Helpers.h" +#include "analysis/Minkowski.h" +#include "ProfilerApp.h" + +class ScaLBL_Multiphys_Controller{ +public: + ScaLBL_Multiphys_Controller(int RANK, int NP, MPI_Comm COMM); + ~ScaLBL_Multiphys_Controller(); + + void ReadParams(string filename); + void ReadParams(std::shared_ptr db0); + + bool Restart; + //int timestep; + int timestepMax; + int num_iter_Stokes; + int num_iter_Ion; + double SchmidtNum;//Schmidt number = kinematic_viscosity/mass_diffusivity + + // input database + std::shared_ptr db; + std::shared_ptr study_db; + +private: + MPI_Comm comm; + + // filenames + char LocalRankString[8]; + char LocalRankFilename[40]; + char LocalRestartFile[40]; + + //int rank,nprocs; + void LoadParams(std::shared_ptr db0); +}; diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index 9b6f3c89..ca251221 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -23,8 +23,8 @@ void ScaLBL_Poisson::ReadParams(string filename){ electric_db = db->getDatabase( "Poisson" ); k2_inv = 4.5;//the inverse of 2nd-rank moment of D3Q7 lattice - deltaT = 0.3;//time step of LB-Poisson equation - tau = 0.5+k2_inv*deltaT; + gamma = 0.3;//time step of LB-Poisson equation + tau = 0.5+k2_inv*gamma; timestepMax = 100000; tolerance = 1.0e-6;//stopping criterion for obtaining steady-state electricla potential h = 1.0;//resolution; unit: um/lu @@ -44,8 +44,8 @@ void ScaLBL_Poisson::ReadParams(string filename){ if (electric_db->keyExists( "tolerance" )){ tolerance = electric_db->getScalar( "tolerance" ); } - if (electric_db->keyExists( "deltaT" )){ - deltaT = electric_db->getScalar( "deltaT" ); + if (electric_db->keyExists( "gamma" )){ + gamma = electric_db->getScalar( "gamma" ); } if (electric_db->keyExists( "epsilonR" )){ epsilonR = electric_db->getScalar( "epsilonR" ); @@ -60,8 +60,12 @@ void ScaLBL_Poisson::ReadParams(string filename){ //Re-calcualte model parameters if user updates input epsilon0_LB = epsilon0*(h*1.0e-6);//unit:[C/(V*lu)] epsilon_LB = epsilon0_LB*epsilonR;//electrical permittivity - tau = 0.5+k2_inv*deltaT; + tau = 0.5+k2_inv*gamma; + if (rank==0) printf("***********************************************************************************\n"); + if (rank==0) printf("LB-Poisson Solver: steady-state MaxTimeStep = %i; steady-state tolerance = %.3g \n", timestepMax,tolerance); + if (rank==0) printf(" LB relaxation tau = %.5g \n", tau); + if (rank==0) printf("***********************************************************************************\n"); } void ScaLBL_Poisson::SetDomain(){ Dm = std::shared_ptr(new Domain(domain_db,comm)); // full domain for analysis @@ -143,9 +147,9 @@ void ScaLBL_Poisson::ReadInput(){ } } // MeanFilter(Averages->SDs); - if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); + if (rank==0) printf("LB-Poisson Solver: Initialized solid phase & converting to Signed Distance function \n"); CalcDist(Distance,id_solid,*Dm); - if (rank == 0) cout << "Domain set." << endl; + if (rank == 0) cout << " Domain set." << endl; } void ScaLBL_Poisson::Create(){ @@ -159,13 +163,13 @@ void ScaLBL_Poisson::Create(){ Mask->CommInit(); Np=Mask->PoreCount(); //........................................................................... - if (rank==0) printf ("Create ScaLBL_Communicator \n"); + if (rank==0) printf ("LB-Poisson Solver: Create ScaLBL_Communicator \n"); // Create a communicator for the device (will use optimized layout) // ScaLBL_Communicator ScaLBL_Comm(Mask); // original ScaLBL_Comm = std::shared_ptr(new ScaLBL_Communicator(Mask)); int Npad=(Np/16 + 2)*16; - if (rank==0) printf ("Set up memory efficient layout \n"); + if (rank==0) printf ("LB-Poisson Solver: Set up memory efficient layout \n"); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); @@ -174,7 +178,7 @@ void ScaLBL_Poisson::Create(){ // MAIN VARIABLES ALLOCATED HERE //........................................................................... // LBM variables - if (rank==0) printf ("Allocating distributions \n"); + if (rank==0) printf ("LB-Poisson Solver: Allocating distributions \n"); //......................device distributions................................. int dist_mem_size = Np*sizeof(double); int neighborSize=18*(Np*sizeof(int)); @@ -185,7 +189,7 @@ void ScaLBL_Poisson::Create(){ ScaLBL_AllocateDeviceMemory((void **) &ElectricField, 3*sizeof(double)*Np); //........................................................................... // Update GPU data structures - if (rank==0) printf ("Setting up device map and neighbor list \n"); + if (rank==0) printf ("LB-Poisson Solver: Setting up device map and neighbor list \n"); // copy the neighbor list ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); MPI_Barrier(comm); @@ -196,22 +200,17 @@ void ScaLBL_Poisson::Initialize(){ /* * This function initializes model */ - if (rank==0) printf ("Initializing D3Q7 distributions for LB-Poisson solver\n"); + if (rank==0) printf ("LB-Poisson Solver: initializing D3Q7 distributions\n"); ScaLBL_D3Q7_Poisson_Init(fq, Np); } void ScaLBL_Poisson::Run(double *ChargeDensity){ - //LB-related parameter - double rlx = 1.0/tau; - //.......create and start timer............ - double starttime,stoptime,cputime; - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); - starttime = MPI_Wtime(); - if (rank==0) printf("***************************************************************************\n"); - if (rank==0) printf("LB-Poisson Solver: timestepMax = %i; steady-state tolerance = %.3g \n", timestepMax,tolerance); - if (rank==0) printf("***************************************************************************\n"); + //double starttime,stoptime,cputime; + //ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + //starttime = MPI_Wtime(); + timestep=0; double error = 1.0; double psi_avg_previous = 0.0; @@ -220,21 +219,21 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ // *************ODD TIMESTEP*************// timestep++; ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FROM NORMAL - ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, ChargeDensity, Psi, ElectricField, rlx, epsilon_LB, deltaT, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE // Set boundary conditions /* ... */ - ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, ChargeDensity, Psi, ElectricField, rlx, epsilon_LB, deltaT, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); // *************EVEN TIMESTEP*************// timestep++; ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FORM NORMAL - ScaLBL_D3Q7_AAeven_Poisson(fq, ChargeDensity, Psi, ElectricField, rlx, epsilon_LB, deltaT, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_AAeven_Poisson(fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE // Set boundary conditions /* ... */ - ScaLBL_D3Q7_AAeven_Poisson(fq, ChargeDensity, Psi, ElectricField, rlx, epsilon_LB, deltaT, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q7_AAeven_Poisson(fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ @@ -267,24 +266,22 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ psi_avg_previous = psi_avg; } } - //************************************************************************/ - stoptime = MPI_Wtime(); - if (rank==0) printf("LB-Poission Solver: a steady-state solution is obtained\n"); - if (rank==0) printf("---------------------------------------------------------------------------\n"); - // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; - // Performance obtained from each node - double MLUPS = double(Np)/cputime/1000000; - if (rank==0) printf("******************* LB-Poisson Solver Statistics ********************\n"); - if (rank==0) printf("CPU time = %f \n", cputime); - if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); - MLUPS *= nprocs; - if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); - if (rank==0) printf("*********************************************************************\n"); + //************************************************************************/ + //stoptime = MPI_Wtime(); + ////if (rank==0) printf("LB-Poission Solver: a steady-state solution is obtained\n"); + ////if (rank==0) printf("---------------------------------------------------------------------------\n"); + //// Compute the walltime per timestep + //cputime = (stoptime - starttime)/timestep; + //// Performance obtained from each node + //double MLUPS = double(Np)/cputime/1000000; + + //if (rank==0) printf("******************* LB-Poisson Solver Statistics ********************\n"); + //if (rank==0) printf("CPU time = %f \n", cputime); + //if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); + //MLUPS *= nprocs; + //if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); + //if (rank==0) printf("*********************************************************************\n"); } -//void ScaLBL_Poisson::get_ElectricField(){ -//// ??? -//} diff --git a/models/StokesModel.cpp b/models/StokesModel.cpp new file mode 100644 index 00000000..8bde5c75 --- /dev/null +++ b/models/StokesModel.cpp @@ -0,0 +1,543 @@ +/* + * Multi-relaxation time LBM Model + */ +#include "models/StokesModel.h" +#include "analysis/distance.h" +#include "common/ReadMicroCT.h" + +ScaLBL_StokesModel::ScaLBL_StokesModel(int RANK, int NP, MPI_Comm COMM): +rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0), +Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),mu(0), +Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) +{ + +} +ScaLBL_StokesModel::~ScaLBL_StokesModel(){ + +} + +void ScaLBL_StokesModel::ReadParams(string filename,int num_iter){ + // read the input database + db = std::make_shared( filename ); + domain_db = db->getDatabase( "Domain" ); + stokes_db = db->getDatabase( "Stokes" ); + + //------ Load number of iteration from multiphysics controller ------// + timestepMax = num_iter; + //-------------------------------------------------------------------// + + //---------------------- Default model parameters --------------------------// + nu_phys = 1.004e-6;//by default use water kinematic viscosity at 20C; unit [m^2/sec] + h = 1.0;//image resolution;[um] + tau = 1.0; + mu = (tau-0.5)/3.0;//LB kinematic viscosity;unit [lu^2/lt] + time_conv = h*h*mu/nu_phys;//time conversion factor from physical to LB unit; [sec/lt] + tolerance = 1.0e-8; + Fx = Fy = 0.0; + Fz = 1.0e-5; + //--------------------------------------------------------------------------// + + // Single-fluid Navier-Stokes Model parameters + //if (stokes_db->keyExists( "timestepMax" )){ + // timestepMax = stokes_db->getScalar( "timestepMax" ); + //} + if (stokes_db->keyExists( "tolerance" )){ + tolerance = stokes_db->getScalar( "tolerance" ); + } + if (stokes_db->keyExists( "tau" )){ + tau = stokes_db->getScalar( "tau" ); + } + if (stokes_db->keyExists( "nu_phys" )){ + nu_phys = stokes_db->getScalar( "nu_phys" ); + } + if (stokes_db->keyExists( "F" )){ + Fx = stokes_db->getVector( "F" )[0]; + Fy = stokes_db->getVector( "F" )[1]; + Fz = stokes_db->getVector( "F" )[2]; + } + if (stokes_db->keyExists( "Restart" )){ + Restart = stokes_db->getScalar( "Restart" ); + } + if (stokes_db->keyExists( "din" )){ + din = stokes_db->getScalar( "din" ); + } + if (stokes_db->keyExists( "dout" )){ + dout = stokes_db->getScalar( "dout" ); + } + if (stokes_db->keyExists( "flux" )){ + flux = stokes_db->getScalar( "flux" ); + } + + // Read domain parameters + if (domain_db->keyExists( "BC" )){ + BoundaryCondition = domain_db->getScalar( "BC" ); + } + if (domain_db->keyExists( "voxel_length" )){//default unit: um/lu + h = domain_db->getScalar( "voxel_length" ); + } + + // Re-calculate model parameters due to parameter read + mu=(tau-0.5)/3.0; + time_conv = h*h*mu/nu_phys;//time conversion factor from physical to LB unit; [sec/lt] + if (rank==0) printf("*****************************************************\n"); + if (rank==0) printf("LB Single-Fluid Navier-Stokes Solver: \n"); + if (rank==0) printf(" Time conversion factor: %.5g [sec/lt]\n", time_conv); + if (rank==0) printf(" Internal iteration: %i [lt]\n", timestepMax); + if (rank==0) printf("*****************************************************\n"); +} +void ScaLBL_StokesModel::SetDomain(){ + Dm = std::shared_ptr(new Domain(domain_db,comm)); // full domain for analysis + Mask = std::shared_ptr(new Domain(domain_db,comm)); // mask domain removes immobile phases + + // domain parameters + Nx = Dm->Nx; + Ny = Dm->Ny; + Nz = Dm->Nz; + Lx = Dm->Lx; + Ly = Dm->Ly; + Lz = Dm->Lz; + + N = Nx*Ny*Nz; + Distance.resize(Nx,Ny,Nz); + Velocity_x.resize(Nx,Ny,Nz); + Velocity_y.resize(Nx,Ny,Nz); + Velocity_z.resize(Nx,Ny,Nz); + + for (int i=0; iid[i] = 1; // initialize this way + //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object + MPI_Barrier(comm); + Dm->CommInit(); + MPI_Barrier(comm); + + rank = Dm->rank(); + nprocx = Dm->nprocx(); + nprocy = Dm->nprocy(); + nprocz = Dm->nprocz(); +} + +void ScaLBL_StokesModel::ReadInput(){ + + sprintf(LocalRankString,"%05d",Dm->rank()); + sprintf(LocalRankFilename,"%s%s","ID.",LocalRankString); + sprintf(LocalRestartFile,"%s%s","Restart.",LocalRankString); + + + if (domain_db->keyExists( "Filename" )){ + auto Filename = domain_db->getScalar( "Filename" ); + Mask->Decomp(Filename); + } + else if (domain_db->keyExists( "GridFile" )){ + // Read the local domain data + auto input_id = readMicroCT( *domain_db, comm ); + // Fill the halo (assuming GCW of 1) + array size0 = { (int) input_id.size(0), (int) input_id.size(1), (int) input_id.size(2) }; + ArraySize size1 = { (size_t) Mask->Nx, (size_t) Mask->Ny, (size_t) Mask->Nz }; + ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 ); + fillHalo fill( comm, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); + Array id_view; + id_view.viewRaw( size1, Mask->id ); + fill.copy( input_id, id_view ); + fill.fill( id_view ); + } + else{ + Mask->ReadIDs(); + } + + // Generate the signed distance map + // Initialize the domain and communication + Array id_solid(Nx,Ny,Nz); + // Solve for the position of the solid phase + for (int k=0;kid[n] > 0) id_solid(i,j,k) = 1; + else id_solid(i,j,k) = 0; + } + } + } + // Initialize the signed distance function + for (int k=0;kSDs); + if (rank==0) printf("LB Single-Fluid Solver: initialized solid phase & converting to Signed Distance function \n"); + CalcDist(Distance,id_solid,*Dm); + if (rank == 0) cout << " Domain set." << endl; +} + +void ScaLBL_StokesModel::Create(){ + /* + * This function creates the variables needed to run a LBM + */ + int rank=Mask->rank(); + //......................................................... + // Initialize communication structures in averaging domain + for (int i=0; iid[i] = Mask->id[i]; + Mask->CommInit(); + Np=Mask->PoreCount(); + //........................................................................... + if (rank==0) printf ("LB Single-Fluid Solver: Create ScaLBL_Communicator \n"); + // Create a communicator for the device (will use optimized layout) + // ScaLBL_Communicator ScaLBL_Comm(Mask); // original + ScaLBL_Comm = std::shared_ptr(new ScaLBL_Communicator(Mask)); + + int Npad=(Np/16 + 2)*16; + if (rank==0) printf ("LB Single-Fluid Solver: Set up memory efficient layout \n"); + Map.resize(Nx,Ny,Nz); Map.fill(-2); + auto neighborList= new int[18*Npad]; + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + MPI_Barrier(comm); + //........................................................................... + // MAIN VARIABLES ALLOCATED HERE + //........................................................................... + // LBM variables + if (rank==0) printf ("LB Single-Fluid Solver: Allocating distributions \n"); + //......................device distributions................................. + int dist_mem_size = Np*sizeof(double); + int neighborSize=18*(Np*sizeof(int)); + //........................................................................... + ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize); + ScaLBL_AllocateDeviceMemory((void **) &fq, 19*dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &Pressure, sizeof(double)*Np); + ScaLBL_AllocateDeviceMemory((void **) &Velocity, 3*sizeof(double)*Np); + //........................................................................... + // Update GPU data structures + if (rank==0) printf ("LB Single-Fluid Solver: Setting up device map and neighbor list \n"); + // copy the neighbor list + ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); + MPI_Barrier(comm); + +} + +void ScaLBL_StokesModel::Initialize(){ + /* + * This function initializes model + */ + if (rank==0) printf("LB Single-Fluid Solver: Initializing distributions \n"); + if (rank==0) printf("****************************************************************\n"); + ScaLBL_D3Q19_Init(fq, Np); +} + +void ScaLBL_StokesModel::Run_Lite(double *ChargeDensity, double *ElectricField){ + double rlx_setA=1.0/tau; + double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA); + timestep = 0; + while (timestep < timestepMax) { + //************************************************************************/ + ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL + ScaLBL_D3Q19_AAodd_StokesMRT(NeighborList, fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz, + ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + // Set boundary conditions + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + } + ScaLBL_D3Q19_AAodd_StokesMRT(NeighborList, fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + + timestep++; + ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL + ScaLBL_D3Q19_AAeven_StokesMRT(fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + // Set boundary conditions + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + } + ScaLBL_D3Q19_AAeven_StokesMRT(fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + //************************************************************************/ + } +} + +//void ScaLBL_StokesModel::computeVelocity_phys(){ +// ScaLBL_D3Q19_Momentum(fq,Velocity, Np); +// ScaLBL_DeviceBarrier(); MPI_Barrier(comm); +//} + +void ScaLBL_StokesModel::Run(){ + double rlx_setA=1.0/tau; + double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA); + + Minkowski Morphology(Mask); + + if (rank==0){ + bool WriteHeader=false; + FILE *log_file = fopen("Permeability.csv","r"); + if (log_file != NULL) + fclose(log_file); + else + WriteHeader=true; + + if (WriteHeader){ + log_file = fopen("Permeability.csv","a+"); + fprintf(log_file,"time Fx Fy Fz mu Vs As Js Xs vx vy vz k\n"); + fclose(log_file); + } + } + + //.......create and start timer............ + double starttime,stoptime,cputime; + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + starttime = MPI_Wtime(); + if (rank==0) printf("****************************************************************\n"); + if (rank==0) printf("LB Single-Fluid Navier-Stokes Solver: timestepMax = %i\n", timestepMax); + if (rank==0) printf("****************************************************************\n"); + timestep=0; + double error = 1.0; + double flow_rate_previous = 0.0; + while (timestep < timestepMax && error > tolerance) { + //************************************************************************/ + timestep++; + ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL + ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + // Set boundary conditions + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + } + ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + timestep++; + ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL + ScaLBL_D3Q19_AAeven_MRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + // Set boundary conditions + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + } + ScaLBL_D3Q19_AAeven_MRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + //************************************************************************/ + + if (timestep%1000==0){ + ScaLBL_D3Q19_Momentum(fq,Velocity, Np); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x); + ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y); + ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z); + + double count_loc=0; + double count; + double vax,vay,vaz; + double vax_loc,vay_loc,vaz_loc; + vax_loc = vay_loc = vaz_loc = 0.f; + for (int k=1; k 0){ + vax_loc += Velocity_x(i,j,k); + vay_loc += Velocity_y(i,j,k); + vaz_loc += Velocity_z(i,j,k); + count_loc+=1.0; + } + } + } + } + MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + + vax /= count; + vay /= count; + vaz /= count; + + double force_mag = sqrt(Fx*Fx+Fy*Fy+Fz*Fz); + double dir_x = Fx/force_mag; + double dir_y = Fy/force_mag; + double dir_z = Fz/force_mag; + if (force_mag == 0.0){ + // default to z direction + dir_x = 0.0; + dir_y = 0.0; + dir_z = 1.0; + force_mag = 1.0; + } + double flow_rate = (vax*dir_x + vay*dir_y + vaz*dir_z); + + error = fabs(flow_rate - flow_rate_previous) / fabs(flow_rate); + flow_rate_previous = flow_rate; + + //if (rank==0) printf("Computing Minkowski functionals \n"); + Morphology.ComputeScalar(Distance,0.f); + //Morphology.PrintAll(); + double mu = (tau-0.5)/3.f; + double Vs = Morphology.V(); + double As = Morphology.A(); + double Hs = Morphology.H(); + double Xs = Morphology.X(); + Vs=sumReduce( Dm->Comm, Vs); + As=sumReduce( Dm->Comm, As); + Hs=sumReduce( Dm->Comm, Hs); + Xs=sumReduce( Dm->Comm, Xs); + double h = Dm->voxel_length; + double absperm = h*h*mu*Mask->Porosity()*flow_rate / force_mag; + if (rank==0) { + printf(" %f\n",absperm); + FILE * log_file = fopen("Permeability.csv","a"); + fprintf(log_file,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",timestep, Fx, Fy, Fz, mu, + h*h*h*Vs,h*h*As,h*Hs,Xs,vax,vay,vaz, absperm); + fclose(log_file); + } + } + } + //************************************************************************/ + stoptime = MPI_Wtime(); + if (rank==0) printf("-------------------------------------------------------------------\n"); + // Compute the walltime per timestep + cputime = (stoptime - starttime)/timestep; + // Performance obtained from each node + double MLUPS = double(Np)/cputime/1000000; + + if (rank==0) printf("********************************************************\n"); + if (rank==0) printf("CPU time = %f \n", cputime); + if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); + MLUPS *= nprocs; + if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); + if (rank==0) printf("********************************************************\n"); + +} + +void ScaLBL_StokesModel::VelocityField(){ + +/* Minkowski Morphology(Mask); + int SIZE=Np*sizeof(double); + ScaLBL_D3Q19_Momentum(fq,Velocity, Np); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_CopyToHost(&VELOCITY[0],&Velocity[0],3*SIZE); + + memcpy(Morphology.SDn.data(), Distance.data(), Nx*Ny*Nz*sizeof(double)); + Morphology.Initialize(); + Morphology.UpdateMeshValues(); + Morphology.ComputeLocal(); + Morphology.Reduce(); + + double count_loc=0; + double count; + double vax,vay,vaz; + double vax_loc,vay_loc,vaz_loc; + vax_loc = vay_loc = vaz_loc = 0.f; + for (int n=0; nLastExterior(); n++){ + vax_loc += VELOCITY[n]; + vay_loc += VELOCITY[Np+n]; + vaz_loc += VELOCITY[2*Np+n]; + count_loc+=1.0; + } + + for (int n=ScaLBL_Comm->FirstInterior(); nLastInterior(); n++){ + vax_loc += VELOCITY[n]; + vay_loc += VELOCITY[Np+n]; + vaz_loc += VELOCITY[2*Np+n]; + count_loc+=1.0; + } + MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + + vax /= count; + vay /= count; + vaz /= count; + + double mu = (tau-0.5)/3.f; + if (rank==0) printf("Fx Fy Fz mu Vs As Js Xs vx vy vz\n"); + if (rank==0) printf("%.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",Fx, Fy, Fz, mu, + Morphology.V(),Morphology.A(),Morphology.J(),Morphology.X(),vax,vay,vaz); + */ + + std::vector visData; + fillHalo fillData(Dm->Comm,Dm->rank_info,{Dm->Nx-2,Dm->Ny-2,Dm->Nz-2},{1,1,1},0,1); + + auto VxVar = std::make_shared(); + auto VyVar = std::make_shared(); + auto VzVar = std::make_shared(); + auto SignDistVar = std::make_shared(); + + IO::initialize("","silo","false"); + // Create the MeshDataStruct + visData.resize(1); + visData[0].meshName = "domain"; + visData[0].mesh = std::make_shared( Dm->rank_info,Dm->Nx-2,Dm->Ny-2,Dm->Nz-2,Dm->Lx,Dm->Ly,Dm->Lz ); + SignDistVar->name = "SignDist"; + SignDistVar->type = IO::VariableType::VolumeVariable; + SignDistVar->dim = 1; + SignDistVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(SignDistVar); + + VxVar->name = "Velocity_x"; + VxVar->type = IO::VariableType::VolumeVariable; + VxVar->dim = 1; + VxVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VxVar); + VyVar->name = "Velocity_y"; + VyVar->type = IO::VariableType::VolumeVariable; + VyVar->dim = 1; + VyVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VyVar); + VzVar->name = "Velocity_z"; + VzVar->type = IO::VariableType::VolumeVariable; + VzVar->dim = 1; + VzVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VzVar); + + Array& SignData = visData[0].vars[0]->data; + Array& VelxData = visData[0].vars[1]->data; + Array& VelyData = visData[0].vars[2]->data; + Array& VelzData = visData[0].vars[3]->data; + + ASSERT(visData[0].vars[0]->name=="SignDist"); + ASSERT(visData[0].vars[1]->name=="Velocity_x"); + ASSERT(visData[0].vars[2]->name=="Velocity_y"); + ASSERT(visData[0].vars[3]->name=="Velocity_z"); + + fillData.copy(Distance,SignData); + fillData.copy(Velocity_x,VelxData); + fillData.copy(Velocity_y,VelyData); + fillData.copy(Velocity_z,VelzData); + + IO::writeData( timestep, visData, Dm->Comm ); + +} diff --git a/models/StokesModel.h b/models/StokesModel.h new file mode 100644 index 00000000..6ff8f6fa --- /dev/null +++ b/models/StokesModel.h @@ -0,0 +1,75 @@ +/* + * Multi-relaxation time LBM Model + */ +#include +#include +#include +#include +#include +#include +#include + +#include "common/ScaLBL.h" +#include "common/Communication.h" +#include "common/MPI_Helpers.h" +#include "analysis/Minkowski.h" +#include "ProfilerApp.h" + +class ScaLBL_StokesModel{ +public: + ScaLBL_StokesModel(int RANK, int NP, MPI_Comm COMM); + ~ScaLBL_StokesModel(); + + // functions in they should be run + void ReadParams(string filename); + void ReadParams(std::shared_ptr db0); + void SetDomain(); + void ReadInput(); + void Create(); + void Initialize(); + void Run(); + void VelocityField(); + + bool Restart,pBC; + int timestep,timestepMax; + int BoundaryCondition; + double tau,mu; + double Fx,Fy,Fz,flux; + double din,dout; + double tolerance; + + int Nx,Ny,Nz,N,Np; + int rank,nprocx,nprocy,nprocz,nprocs; + double Lx,Ly,Lz; + + std::shared_ptr Dm; // this domain is for analysis + std::shared_ptr Mask; // this domain is for lbm + std::shared_ptr ScaLBL_Comm; + // input database + std::shared_ptr db; + std::shared_ptr domain_db; + std::shared_ptr stokes_db; + + IntArray Map; + DoubleArray Distance; + int *NeighborList; + double *fq; + double *Velocity; + double *Pressure; + + //Minkowski Morphology; + + DoubleArray Velocity_x; + DoubleArray Velocity_y; + DoubleArray Velocity_z; +private: + MPI_Comm comm; + + // filenames + char LocalRankString[8]; + char LocalRankFilename[40]; + char LocalRestartFile[40]; + + //int rank,nprocs; + void LoadParams(std::shared_ptr db0); +}; diff --git a/tests/lbpm_electrokinetic_dfh_simulator.cpp b/tests/lbpm_electrokinetic_dfh_simulator.cpp index 0c9da812..d4a83384 100644 --- a/tests/lbpm_electrokinetic_dfh_simulator.cpp +++ b/tests/lbpm_electrokinetic_dfh_simulator.cpp @@ -5,74 +5,99 @@ #include #include #include +#include -#include "models/DFHModel.h" +#include "models/StokesModel.h" #include "models/IonModel.h" #include "models/PoissonSolver.h" - -//#define WRE_SURFACES - -/* - * Simulator for two-phase flow in porous media - * James E. McClure 2013-2014 - */ +#include "models/MultiPhysController.h" using namespace std; -//************************************************************************* -// Implementation of Two-Phase Immiscible LBM using CUDA -//************************************************************************* +//*************************************************************************** +// Implementation of Multiphysics simulator using lattice-Boltzmann method +//*************************************************************************** int main(int argc, char **argv) { - // Initialize MPI - int provided_thread_support = -1; - MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); - MPI_Comm comm; - MPI_Comm_dup(MPI_COMM_WORLD,&comm); - int rank = comm_rank(comm); - int nprocs = comm_size(comm); - if ( rank==0 && provided_thread_support Date: Sun, 16 Aug 2020 11:20:11 -0400 Subject: [PATCH 019/205] save the work;CPU version compiled; to be tested --- common/ScaLBL.cpp | 67 ++++++++------ common/ScaLBL.h | 38 ++++++-- cpu/D3Q7BC.cpp | 32 +++++++ cpu/Ion.cpp | 23 ++--- models/IonModel.cpp | 155 ++++++++++++++++++++++++++------- models/IonModel.h | 5 +- models/MultiPhysController.cpp | 2 +- models/MultiPhysController.h | 2 + models/PoissonSolver.cpp | 122 +++++++++++++++++++++++--- models/PoissonSolver.h | 6 +- models/StokesModel.cpp | 2 +- models/StokesModel.h | 6 +- tests/CMakeLists.txt | 2 +- 13 files changed, 361 insertions(+), 101 deletions(-) create mode 100644 cpu/D3Q7BC.cpp diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 28459298..a77afbca 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -857,8 +857,8 @@ int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLis void ScaLBL_Communicator::SetupBounceBackList(IntArray &Map, signed char *id, int Np) { - int idx,i,j,k; - int neighbor; + int idx,i,j,k; + int neighbor; // save list of bounce-back distributions and interaction sites n_bb_d3q7 = 0; n_bb_d3q19 = 0; @@ -929,8 +929,9 @@ void ScaLBL_Communicator::SetupBounceBackList(IntArray &Map, signed char *id, in } int *bb_dist_tmp = new int [local_count]; - bb_interactions = new int [local_count]; + int *bb_interactions_tmp = new int [local_count]; ScaLBL_AllocateDeviceMemory((void **) &bb_dist, sizeof(int)*local_count); + ScaLBL_AllocateDeviceMemory((void **) &bb_interactions, sizeof(int)*local_count); local_count=0; for (k=1;k& IonValence, int number_ion_species, int start, int finish, int Np){ +extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){ int n; - int ic=number_ion_species; double Ci;//ion concentration of species i double CD;//charge density + double CD_tmp; double F = 96485.0;//Faraday's constant; unit[C/mol]; F=e*Na, where Na is the Avogadro constant - for (n=start; n0){ - for (n=start; n0) + CD_tmp; } } + diff --git a/models/IonModel.cpp b/models/IonModel.cpp index 2e5b4e71..8c12b587 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -1,14 +1,14 @@ /* - * Multi-relaxation time LBM Model + * Dilute Ion Transport LBM Model */ #include "models/IonModel.h" #include "analysis/distance.h" #include "common/ReadMicroCT.h" ScaLBL_IonModel::ScaLBL_IonModel(int RANK, int NP, MPI_Comm COMM): -rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0), -Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),mu(0), -Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) +rank(RANK),nprocs(NP),timestep(0),timestepMax(0),time_conv(0),kb(0),electron_charge(0),T(0),Vt(0),k2_inv(0),h(0), +tolerance(0),number_ion_species(0),Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0), +BoundaryCondition(0),BoundaryConditionSolid(0),Lx(0),Ly(0),Lz(0),comm(COMM) { } @@ -44,8 +44,8 @@ void ScaLBL_IonModel::ReadParams(string filename,int num_iter,int num_iter_Stoke IonValence.push_back(1);//algebraic valence charge IonConcentration.push_back(1.0e-3);//user-input ion concentration has physical unit [mol/m^3] //deltaT.push_back(1.0); - //tau.push_back(0.5+k2_inv*deltaT[0]*IonDiffusivisty[0]); - tau.push_back(0.5+k2_inv*time_conv/(h*1.0e-6)/(h*1.0e-6)*IonDiffusivisty[0]); + //tau.push_back(0.5+k2_inv*deltaT[0]*IonDiffusivity[0]); + tau.push_back(0.5+k2_inv*time_conv/(h*1.0e-6)/(h*1.0e-6)*IonDiffusivity[0]); //--------------------------------------------------------------------------// // LB-Ion Model parameters @@ -74,10 +74,10 @@ void ScaLBL_IonModel::ReadParams(string filename,int num_iter,int num_iter_Stoke } else{ for (int i=0; ikeyExists( "deltaT" )){ - // deltaT.clear(); - // tau.clear(); - // deltaT = ion_db->getVector( "deltaT" ); - // if (deltaT.size()!=number_ion_species){ - // ERROR("Error: number_ion_species and deltaT must be the same length! \n"); - // } - // else{//update relaxation parameter tau - // for (int i=0;ikeyExists( "voxel_length" )){//default unit: um/lu h = domain_db->getScalar( "voxel_length" ); } + BoundaryCondition = 0; if (domain_db->keyExists( "BC" )){ BoundaryCondition = domain_db->getScalar( "BC" ); } + BoundaryConditionSolid = 0; + if (domain_db->keyExists( "BC_Solid" )){ + BoundaryConditionSolid = domain_db->getScalar( "BC_Solid" ); + } if (rank==0) printf("*****************************************************\n"); if (rank==0) printf("LB Ion Transport Solver: \n"); @@ -134,6 +125,18 @@ void ScaLBL_IonModel::ReadParams(string filename,int num_iter,int num_iter_Stoke if (rank==0) printf(" Ion %i: LB relaxation tau = %.5g\n", i+1,tau[i]); } if (rank==0) printf("*****************************************************\n"); + + switch (BoundaryConditionSolid){ + case 0: + if (rank==0) printf("LB Ion Solver: solid boundary: non-flux boundary is assigned"); + break; + case 1: + if (rank==0) printf("LB Ion Solver: solid boundary: Neumann-type surfacen ion concentration is assigned"); + break; + default: + if (rank==0) printf("LB Ion Solver: solid boundary: non-flux boundary is assigned"); + break; + } } void ScaLBL_IonModel::SetDomain(){ @@ -220,6 +223,64 @@ void ScaLBL_IonModel::ReadInput(){ if (rank == 0) cout << " Domain set." << endl; } + +void ScaLBL_IonModel::AssignSolidBoundary(double *ion_solid) +{ + size_t NLABELS=0; + signed char VALUE=0; + double AFFINITY=0.f; + + auto LabelList = ion_db->getVector( "SolidLabels" ); + auto AffinityList = ion_db->getVector( "SolidValues" ); + + NLABELS=LabelList.size(); + if (NLABELS != AffinityList.size()){ + ERROR("Error: LB Ion Solver: SolidLabels and SolidValues must be the same length! \n"); + } + + double label_count[NLABELS]; + double label_count_global[NLABELS]; + // Assign the labels + + for (size_t idx=0; idxid[n]; + AFFINITY=0.f; + // Assign the affinity from the paired list + for (unsigned int idx=0; idx < NLABELS; idx++){ + //printf("idx=%i, value=%i, %i, \n",idx, VALUE,LabelList[idx]); + if (VALUE == LabelList[idx]){ + AFFINITY=AffinityList[idx]; + //NOTE need to convert the user input phys unit to LB unit + AFFINITY = AFFINITY*(h*h*1.0e-12); + label_count[idx] += 1.0; + idx = NLABELS; + //Mask->id[n] = 0; // set mask to zero since this is an immobile component + } + } + ion_solid[n] = AFFINITY; + } + } + } + + for (size_t idx=0; idxComm, label_count[idx]); + + if (rank==0){ + printf("LB Ion Solver: Ion Solid labels: %lu \n",NLABELS); + for (unsigned int idx=0; idxSetupBounceBackList(Map, Mask->id, Np); + MPI_Barrier(comm); + + double *IonSolid_host; + IonSolid_host = new double[Nx*Ny*Nz]; + AssignSolidBoundary(IonSolid_host); + ScaLBL_CopyToDevice(IonSolid, IonSolid_host, Nx*Ny*Nz*sizeof(double)); + ScaLBL_DeviceBarrier(); + delete [] IonSolid_host; + } + + } void ScaLBL_IonModel::Initialize(){ @@ -273,8 +351,10 @@ void ScaLBL_IonModel::Initialize(){ ScaLBL_D3Q7_Ion_Init(&fq[ic*Np*7],&Ci[ic*Np],IonConcentration[ic],Np); } if (rank==0) printf ("LB Ion Solver: initializing charge density\n"); - ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence, number_ion_species, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence, number_ion_species, 0, ScaLBL_Comm->LastExterior(), Np); + for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence[ic], ic, 0, ScaLBL_Comm->LastExterior(), Np); + } } void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ @@ -299,10 +379,11 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ ScaLBL_Comm->SendD3Q7AA(fq, ic); //READ FROM NORMAL ScaLBL_D3Q7_AAodd_IonConcentration(NeighborList, &fq[ic*Np*7],&Ci[ic*Np],ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE + ScaLBL_DeviceBarrier(); ScaLBL_D3Q7_AAodd_IonConcentration(NeighborList, &fq[ic*Np*7],&Ci[ic*Np], 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence[ic], ic, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence[ic], ic, 0, ScaLBL_Comm->LastExterior(), Np); } - ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence, number_ion_species, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence, number_ion_species, 0, ScaLBL_Comm->LastExterior(), Np); //LB-Ion collison for (int ic=0; icLastExterior(), Np); } - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + if (BoundaryConditionSolid==1){ + for (int ic=0; icSolidNeumannD3Q7(&fq[ic*Np*7], IonSolid); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + } + } // *************EVEN TIMESTEP*************// timestep++; @@ -327,9 +414,9 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ ScaLBL_D3Q7_AAeven_IonConcentration(&fq[ic*Np*7],&Ci[ic*Np],ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE ScaLBL_D3Q7_AAeven_IonConcentration(&fq[ic*Np*7],&Ci[ic*Np], 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence[ic], ic, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence[ic], ic, 0, ScaLBL_Comm->LastExterior(), Np); } - ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence, number_ion_species, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence, number_ion_species, 0, ScaLBL_Comm->LastExterior(), Np); //LB-Ion collison for (int ic=0; icLastExterior(), Np); } - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + if (BoundaryConditionSolid==1){ + for (int ic=0; icSolidNeumannD3Q7(&fq[ic*Np*7], IonSolid); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + } + } //************************************************************************/ } //************************************************************************/ diff --git a/models/IonModel.h b/models/IonModel.h index 9fe0a146..0edec1b0 100644 --- a/models/IonModel.h +++ b/models/IonModel.h @@ -30,9 +30,10 @@ public: void Initialize(); void Run(double *Velocity, double *ElectricField); - bool Restart,pBC; + //bool Restart,pBC; int timestep,timestepMax; int BoundaryCondition; + int BoundaryConditionSolid; double h;//domain resolution, unit [um/lu] double time_conv; double kb,electron_charge,T,Vt; @@ -64,6 +65,7 @@ public: double *fq; double *Ci; double *ChargeDensity; + double *IonSolid; private: MPI_Comm comm; @@ -75,4 +77,5 @@ private: //int rank,nprocs; void LoadParams(std::shared_ptr db0); + void AssignSolidBoundary(double *ion_solid); }; diff --git a/models/MultiPhysController.cpp b/models/MultiPhysController.cpp index f79db6b7..1453067a 100644 --- a/models/MultiPhysController.cpp +++ b/models/MultiPhysController.cpp @@ -1,7 +1,7 @@ #include "models/MultiPhysController.h" ScaLBL_Multiphys_Controller::ScaLBL_Multiphys_Controller(int RANK, int NP, MPI_Comm COMM): -rank(RANK),Restart(0),timestepMax(0),num_iter_Stokes(0),num_iter_Ion(0),SchmidtNum(0),comm(COMM) +rank(RANK),nprocs(NP),Restart(0),timestepMax(0),num_iter_Stokes(0),num_iter_Ion(0),SchmidtNum(0),comm(COMM) { } diff --git a/models/MultiPhysController.h b/models/MultiPhysController.h index b108e28a..e51f2102 100644 --- a/models/MultiPhysController.h +++ b/models/MultiPhysController.h @@ -30,6 +30,8 @@ public: int num_iter_Ion; double SchmidtNum;//Schmidt number = kinematic_viscosity/mass_diffusivity + int rank,nprocs; + // input database std::shared_ptr db; std::shared_ptr study_db; diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index 466c8841..a4decc24 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -6,9 +6,9 @@ #include "common/ReadMicroCT.h" ScaLBL_Poisson::ScaLBL_Poisson(int RANK, int NP, MPI_Comm COMM): -rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0), -Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),mu(0), -Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) +rank(RANK), nprocs(NP),timestep(0),timestepMax(0),tau(0),k2_inv(0),gamma(0),tolerance(0),h(0), +epsilon0(0),epsilon0_LB(0),epsilonR(0),epsilon_LB(0),Nx(0),Ny(0),Nz(0),N(0),Np(0),analysis_interval(0), +nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),BoundaryConditionSolid(0),Lx(0),Ly(0),Lz(0),comm(COMM) { } @@ -54,9 +54,16 @@ void ScaLBL_Poisson::ReadParams(string filename){ if (domain_db->keyExists( "voxel_length" )){//default unit: um/lu h = domain_db->getScalar( "voxel_length" ); } + + BoundaryCondition = 0; if (domain_db->keyExists( "BC" )){ BoundaryCondition = domain_db->getScalar( "BC" ); } + BoundaryConditionSolid = 1; + if (domain_db->keyExists( "BC_Solid" )){ + BoundaryConditionSolid = domain_db->getScalar( "BC_Solid" ); + } + //Re-calcualte model parameters if user updates input epsilon0_LB = epsilon0*(h*1.0e-6);//unit:[C/(V*lu)] epsilon_LB = epsilon0_LB*epsilonR;//electrical permittivity @@ -66,6 +73,18 @@ void ScaLBL_Poisson::ReadParams(string filename){ if (rank==0) printf("LB-Poisson Solver: steady-state MaxTimeStep = %i; steady-state tolerance = %.3g \n", timestepMax,tolerance); if (rank==0) printf(" LB relaxation tau = %.5g \n", tau); if (rank==0) printf("***********************************************************************************\n"); + + switch (BoundaryConditionSolid){ + case 1: + if (rank==0) printf("LB-Poisson Solver: solid boundary: Dirichlet-type surfacen potential is assigned"); + break; + case 2: + if (rank==0) printf("LB-Poisson Solver: solid boundary: Neumann-type surfacen charge density is assigned"); + break; + default: + if (rank==0) printf("LB-Poisson Solver: solid boundary: Dirichlet-type surfacen potential is assigned"); + break; + } } void ScaLBL_Poisson::SetDomain(){ Dm = std::shared_ptr(new Domain(domain_db,comm)); // full domain for analysis @@ -152,6 +171,77 @@ void ScaLBL_Poisson::ReadInput(){ if (rank == 0) cout << " Domain set." << endl; } +void ScaLBL_Poisson::AssignSolidBoundary(double *poisson_solid) +{ + size_t NLABELS=0; + signed char VALUE=0; + double AFFINITY=0.f; + + auto LabelList = electric_db->getVector( "SolidLabels" ); + auto AffinityList = electric_db->getVector( "SolidValues" ); + + NLABELS=LabelList.size(); + if (NLABELS != AffinityList.size()){ + ERROR("Error: LB-Poisson Solver: SolidLabels and SolidValues must be the same length! \n"); + } + + double label_count[NLABELS]; + double label_count_global[NLABELS]; + // Assign the labels + + for (size_t idx=0; idxid[n]; + AFFINITY=0.f; + // Assign the affinity from the paired list + for (unsigned int idx=0; idx < NLABELS; idx++){ + //printf("idx=%i, value=%i, %i, \n",idx, VALUE,LabelList[idx]); + if (VALUE == LabelList[idx]){ + AFFINITY=AffinityList[idx]; + //NOTE need to convert the user input phys unit to LB unit + if (BoundaryConditionSolid==2){ + //for BCS=1, i.e. Dirichlet-type, no need for unit conversion + AFFINITY = AFFINITY*(h*h*1.0e-12); + } + label_count[idx] += 1.0; + idx = NLABELS; + //Mask->id[n] = 0; // set mask to zero since this is an immobile component + } + } + poisson_solid[n] = AFFINITY; + } + } + } + + for (size_t idx=0; idxComm, label_count[idx]); + + if (rank==0){ + printf("LB-Poisson Solver: Poisson Solid labels: %lu \n",NLABELS); + for (unsigned int idx=0; idxn_bb_d3q7); + ScaLBL_AllocateDeviceMemory((void **) &PoissonSolid, sizeof(double)*Nx*Ny*Nz); //........................................................................... - // initialize the zeta function (example is zeta is constant on solid surface) - double *tmpZeta = new double[ScaLBL_Comm->n_bb_d3q7]; - for int (i=0; in_bb_d3q7; i++){ - tmpZeta[i] = 1.0/k2_inv; // this has to be read from input file - } - ScaLBL_CopyToDevice(zeta, tmpZeta, sizeof(double)*ScaLBL_Comm->n_bb_d3q7); - delete [] tmpZeta; // Update GPU data structures if (rank==0) printf ("LB-Poisson Solver: Setting up device map and neighbor list \n"); // copy the neighbor list ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + //Initialize solid boundary for electrical potential + ScaLBL_Comm->SetupBounceBackList(Map, Mask->id, Np); + MPI_Barrier(comm); + + double *PoissonSolid_host; + PoissonSolid_host = new double[Nx*Ny*Nz]; + AssignSolidBoundary(PoissonSolid_host); + ScaLBL_CopyToDevice(PoissonSolid, PoissonSolid_host, Nx*Ny*Nz*sizeof(double)); + ScaLBL_DeviceBarrier(); + delete [] PoissonSolid_host; } void ScaLBL_Poisson::Initialize(){ @@ -233,7 +327,7 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ // Set boundary conditions /* ... */ ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_Comm->SolidDirichletD3Q7(fq, zeta); + ScaLBL_Comm->SolidDirichletD3Q7(fq, PoissonSolid); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); // *************EVEN TIMESTEP*************// @@ -244,14 +338,14 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ // Set boundary conditions /* ... */ ScaLBL_D3Q7_AAeven_Poisson(fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_Comm->SolidDirichletD3Q7(fq, zeta); + ScaLBL_Comm->SolidDirichletD3Q7(fq, PoissonSolid); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ // Check convergence of steady-state solution if (timestep%analysis_interval==0){ - ScaLBL_Comm->RegularLayout(Map,&Psi,Psi_host); + ScaLBL_Comm->RegularLayout(Map,Psi,Psi_host); double count_loc=0; double count; double psi_avg; diff --git a/models/PoissonSolver.h b/models/PoissonSolver.h index 625c602f..fe003c18 100644 --- a/models/PoissonSolver.h +++ b/models/PoissonSolver.h @@ -33,9 +33,10 @@ public: int timestep,timestepMax; int analysis_interval; int BoundaryCondition; + int BoundaryConditionSolid; double tau; double tolerance; - double k2_inv,deltaT; + double k2_inv,gamma; double epsilon0,epsilon0_LB,epsilonR,epsilon_LB; int Nx,Ny,Nz,N,Np; @@ -58,7 +59,7 @@ public: double *fq; double *Psi; double *ElectricField; - double *zeta; + double *PoissonSolid; private: MPI_Comm comm; @@ -70,4 +71,5 @@ private: //int rank,nprocs; void LoadParams(std::shared_ptr db0); + void AssignSolidBoundary(double *poisson_solid); }; diff --git a/models/StokesModel.cpp b/models/StokesModel.cpp index 8bde5c75..d0664955 100644 --- a/models/StokesModel.cpp +++ b/models/StokesModel.cpp @@ -7,7 +7,7 @@ ScaLBL_StokesModel::ScaLBL_StokesModel(int RANK, int NP, MPI_Comm COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0), -Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),mu(0), +Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),mu(0),h(0),nu_phys(0),time_conv(0),tolerance(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) { diff --git a/models/StokesModel.h b/models/StokesModel.h index 6ff8f6fa..c44cb1c5 100644 --- a/models/StokesModel.h +++ b/models/StokesModel.h @@ -21,13 +21,14 @@ public: ~ScaLBL_StokesModel(); // functions in they should be run - void ReadParams(string filename); + void ReadParams(string filename,int num_iter); void ReadParams(std::shared_ptr db0); void SetDomain(); void ReadInput(); void Create(); void Initialize(); void Run(); + void Run_Lite(double *ChargeDensity, double *ElectricField); void VelocityField(); bool Restart,pBC; @@ -37,6 +38,9 @@ public: double Fx,Fy,Fz,flux; double din,dout; double tolerance; + double nu_phys; + double time_conv; + double h;//image resolution int Nx,Ny,Nz,N,Np; int rank,nprocx,nprocy,nprocz,nprocs; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8c937107..1a8bfac0 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -4,7 +4,7 @@ ADD_LBPM_EXECUTABLE( lbpm_color_simulator ) ADD_LBPM_EXECUTABLE( lbpm_permeability_simulator ) ADD_LBPM_EXECUTABLE( lbpm_greyscale_simulator ) -ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_dfh_simulator.cpp ) +ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_dfh_simulator ) #ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator ) #ADD_LBPM_EXECUTABLE( lbpm_color_macro_simulator ) ADD_LBPM_EXECUTABLE( lbpm_dfh_simulator ) From 771f679f5c598438ac31b7798e74128bdb26b0ea Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 17 Aug 2020 09:59:22 -0400 Subject: [PATCH 020/205] add output; CPU version compiled; to be tested --- common/ScaLBL.h | 8 ++- cpu/Ion.cpp | 10 ++++ cpu/Stokes.cpp | 60 ++++++++++++++++++--- models/IonModel.cpp | 19 +++++++ models/IonModel.h | 1 + models/PoissonSolver.cpp | 11 ++++ models/PoissonSolver.h | 3 +- models/StokesModel.cpp | 53 +++++++++++++++--- models/StokesModel.h | 2 + tests/lbpm_electrokinetic_dfh_simulator.cpp | 4 +- 10 files changed, 151 insertions(+), 20 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index dc3831c7..4d8a3dc3 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -49,6 +49,8 @@ extern "C" void ScaLBL_D3Q19_Init(double *Dist, int Np); extern "C" void ScaLBL_D3Q19_Momentum(double *dist, double *vel, int Np); +extern "C" void ScaLBL_D3Q19_Momentum_Phys(double *dist, double *vel, double h, double time_conv, int Np); + extern "C" void ScaLBL_D3Q19_Pressure(double *dist, double *press, int Np); // BGK MODEL @@ -89,6 +91,8 @@ extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np); +extern "C" void ScaLBL_IonConcentration_Phys(double *Den, double h, int ion_component, int start, int finish, int Np); + // LBM Poisson solver extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,double gamma, @@ -102,10 +106,10 @@ extern "C" void ScaLBL_D3Q7_Poisson_Init(double *dist, int Np); // LBM Stokes Model (adapted from MRT model) extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, - double Gx, double Gy, double Gz, int start, int finish, int Np); + double Gx, double Gy, double Gz, double Ex, double Ey, double Ez, int start, int finish, int Np); extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, - double Gx, double Gy, double Gz, int start, int finish, int Np); + double Gx, double Gy, double Gz, double Ex, double Ey, double Ez, int start, int finish, int Np); // MRT MODEL extern "C" void ScaLBL_D3Q19_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, diff --git a/cpu/Ion.cpp b/cpu/Ion.cpp index 5586aae4..b04b3c9c 100644 --- a/cpu/Ion.cpp +++ b/cpu/Ion.cpp @@ -235,3 +235,13 @@ extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity } } +extern "C" void ScaLBL_IonConcentration_Phys(double *Den, double h, int ion_component, int start, int finish, int Np){ + //h: resolution [um/lu] + int n; + double Ci; + + for (n=start; n extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, - double Gx, double Gy, double Gz, int start, int finish, int Np) + double Gx, double Gy, double Gz, double Ex_const, double Ey_const, double Ez_const, int start, int finish, int Np) { double fq; // conserved momemnts @@ -32,9 +32,9 @@ extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, do //Load data rhoE = ChargeDensity[n]; - Ex = ElectricField[n+0*Np]; - Ey = ElectricField[n+1*Np]; - Ez = ElectricField[n+2*Np]; + Ex = ElectricField[n+0*Np]+Ex_const; + Ey = ElectricField[n+1*Np]+Ey_const; + Ez = ElectricField[n+2*Np]+Ez_const; //compute total body force, including input body force (Gx,Gy,Gz) Fx = Gx + rhoE*Ex; Fy = Gy + rhoE*Ey; @@ -455,7 +455,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, do } extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, - double Gx, double Gy, double Gz, int start, int finish, int Np) + double Gx, double Gy, double Gz, double Ex_const, double Ey_const, double Ez_const, int start, int finish, int Np) { double fq; // conserved momemnts @@ -487,9 +487,9 @@ extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, do //Load data rhoE = ChargeDensity[n]; - Ex = ElectricField[n+0*Np]; - Ey = ElectricField[n+1*Np]; - Ez = ElectricField[n+2*Np]; + Ex = ElectricField[n+0*Np]+Ex_const; + Ey = ElectricField[n+1*Np]+Ey_const; + Ez = ElectricField[n+2*Np]+Ez_const; //compute total body force, including input body force (Gx,Gy,Gz) Fx = Gx + rhoE*Ex; Fy = Gy + rhoE*Ey; @@ -955,3 +955,47 @@ extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, do } } +extern "C" void ScaLBL_D3Q19_Momentum_Phys(double *dist, double *vel, double h, double time_conv, int Np) +{ + //h: resolution [um/lu] + //time_conv: time conversion factor [sec/lt] + int n; + // distributions + double f1,f2,f3,f4,f5,f6,f7,f8,f9; + double f10,f11,f12,f13,f14,f15,f16,f17,f18; + double vx,vy,vz; + + for (n=0; nFirstInterior(), ScaLBL_Comm->LastInterior(), Np); + } + + DoubleArray PhaseField(Nx,Ny,Nz); + for (int ic=0; icRegularLayout(Map,&Ci[ic*Np],PhaseField); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + + FILE *OUTFILE; + sprintf(LocalRankFilename,"Ion%02i.%05i.raw",ic+1,rank); + OUTFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,OUTFILE); + fclose(OUTFILE); + } + +} + diff --git a/models/IonModel.h b/models/IonModel.h index 0edec1b0..6232d105 100644 --- a/models/IonModel.h +++ b/models/IonModel.h @@ -29,6 +29,7 @@ public: void Create(); void Initialize(); void Run(double *Velocity, double *ElectricField); + void getIonConcentration(); //bool Restart,pBC; int timestep,timestepMax; diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index a4decc24..27c0cda5 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -390,3 +390,14 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ } +void ScaLBL_Poisson::getElectricalPotential(){ + + DoubleArray PhaseField(Nx,Ny,Nz); + ScaLBL_Comm->RegularLayout(Map,Psi,PhaseField); + //ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + FILE *OUTFILE; + sprintf(LocalRankFilename,"Electrical_Potential.%05i.raw",rank); + OUTFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,OUTFILE); + fclose(OUTFILE); +} diff --git a/models/PoissonSolver.h b/models/PoissonSolver.h index fe003c18..9f66c09c 100644 --- a/models/PoissonSolver.h +++ b/models/PoissonSolver.h @@ -28,7 +28,8 @@ public: void Create(); void Initialize(); void Run(double *ChargeDensity); - + void getElectricalPotential(); + //bool Restart,pBC; int timestep,timestepMax; int analysis_interval; diff --git a/models/StokesModel.cpp b/models/StokesModel.cpp index d0664955..46f28f45 100644 --- a/models/StokesModel.cpp +++ b/models/StokesModel.cpp @@ -35,6 +35,9 @@ void ScaLBL_StokesModel::ReadParams(string filename,int num_iter){ tolerance = 1.0e-8; Fx = Fy = 0.0; Fz = 1.0e-5; + //Body electric field [V/lu] + Ex = Ey = 0.0; + Ez = 1.0e-3; //--------------------------------------------------------------------------// // Single-fluid Navier-Stokes Model parameters @@ -55,6 +58,11 @@ void ScaLBL_StokesModel::ReadParams(string filename,int num_iter){ Fy = stokes_db->getVector( "F" )[1]; Fz = stokes_db->getVector( "F" )[2]; } + if (stokes_db->keyExists( "ElectricField" )){//NOTE user-input has physical unit [V/m] + Ex = stokes_db->getVector( "ElectricField" )[0]; + Ey = stokes_db->getVector( "ElectricField" )[1]; + Ez = stokes_db->getVector( "ElectricField" )[2]; + } if (stokes_db->keyExists( "Restart" )){ Restart = stokes_db->getScalar( "Restart" ); } @@ -79,6 +87,11 @@ void ScaLBL_StokesModel::ReadParams(string filename,int num_iter){ // Re-calculate model parameters due to parameter read mu=(tau-0.5)/3.0; time_conv = h*h*mu/nu_phys;//time conversion factor from physical to LB unit; [sec/lt] + // convert user-input electric field ([V/m]) from physical unit to LB unit + Ex = Ex*(h*1.0e-6);//LB electric field: V/lu + Ey = Ey*(h*1.0e-6); + Ez = Ez*(h*1.0e-6); + if (rank==0) printf("*****************************************************\n"); if (rank==0) printf("LB Single-Fluid Navier-Stokes Solver: \n"); if (rank==0) printf(" Time conversion factor: %.5g [sec/lt]\n", time_conv); @@ -232,7 +245,7 @@ void ScaLBL_StokesModel::Run_Lite(double *ChargeDensity, double *ElectricField){ while (timestep < timestepMax) { //************************************************************************/ ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL - ScaLBL_D3Q19_AAodd_StokesMRT(NeighborList, fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz, + ScaLBL_D3Q19_AAodd_StokesMRT(NeighborList, fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz, Ex, Ey, Ez, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE // Set boundary conditions @@ -248,12 +261,12 @@ void ScaLBL_StokesModel::Run_Lite(double *ChargeDensity, double *ElectricField){ ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); } - ScaLBL_D3Q19_AAodd_StokesMRT(NeighborList, fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q19_AAodd_StokesMRT(NeighborList, fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz, Ex, Ey, Ez, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL - ScaLBL_D3Q19_AAeven_StokesMRT(fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q19_AAeven_StokesMRT(fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz, Ex, Ey, Ez, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE // Set boundary conditions if (BoundaryCondition == 3){ @@ -268,16 +281,40 @@ void ScaLBL_StokesModel::Run_Lite(double *ChargeDensity, double *ElectricField){ ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); } - ScaLBL_D3Q19_AAeven_StokesMRT(fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q19_AAeven_StokesMRT(fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz, Ex, Ey, Ez, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ } } -//void ScaLBL_StokesModel::computeVelocity_phys(){ -// ScaLBL_D3Q19_Momentum(fq,Velocity, Np); -// ScaLBL_DeviceBarrier(); MPI_Barrier(comm); -//} +void ScaLBL_StokesModel::getVelocity(){ + //get velocity in physical unit [m/sec] + ScaLBL_D3Q19_Momentum_Phys(fq, Velocity, h, time_conv, Np); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + + DoubleArray PhaseField(Nx,Ny,Nz); + ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField); + FILE *VELX_FILE; + sprintf(LocalRankFilename,"Velocity_X.%05i.raw",rank); + VELX_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELX_FILE); + fclose(VELX_FILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],PhaseField); + FILE *VELY_FILE; + sprintf(LocalRankFilename,"Velocity_Y.%05i.raw",rank); + VELY_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELY_FILE); + fclose(VELY_FILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],PhaseField); + FILE *VELZ_FILE; + sprintf(LocalRankFilename,"Velocity_Z.%05i.raw",rank); + VELZ_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELZ_FILE); + fclose(VELZ_FILE); + +} void ScaLBL_StokesModel::Run(){ double rlx_setA=1.0/tau; diff --git a/models/StokesModel.h b/models/StokesModel.h index c44cb1c5..f0a4de6a 100644 --- a/models/StokesModel.h +++ b/models/StokesModel.h @@ -30,12 +30,14 @@ public: void Run(); void Run_Lite(double *ChargeDensity, double *ElectricField); void VelocityField(); + void getVelocity(); bool Restart,pBC; int timestep,timestepMax; int BoundaryCondition; double tau,mu; double Fx,Fy,Fz,flux; + double Ex,Ey,Ez; double din,dout; double tolerance; double nu_phys; diff --git a/tests/lbpm_electrokinetic_dfh_simulator.cpp b/tests/lbpm_electrokinetic_dfh_simulator.cpp index d4a83384..bdc15ef4 100644 --- a/tests/lbpm_electrokinetic_dfh_simulator.cpp +++ b/tests/lbpm_electrokinetic_dfh_simulator.cpp @@ -89,7 +89,9 @@ int main(int argc, char **argv) //-------------------------------------------- } - //StokesModel.WriteDebug(); + StokesModel.getVelocity(); + PoissonSolver.getElectricalPotential(); + IonModel.getIonConcentration(); PROFILE_STOP("Main"); PROFILE_SAVE("lbpm_electrokinetic_simulator",1); From 5756d6f1386291e181a166bc617d1e7e63a2398d Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Tue, 18 Aug 2020 12:40:41 -0400 Subject: [PATCH 021/205] fix a few trivial bugs; add some checkpoint print; still debugging --- common/ScaLBL.cpp | 28 ++++++++++----------- cpu/Ion.cpp | 26 +++++++++---------- models/IonModel.cpp | 28 +++++++++++++++------ models/PoissonSolver.cpp | 21 +++++++++------- models/StokesModel.cpp | 2 +- tests/lbpm_electrokinetic_dfh_simulator.cpp | 5 ++++ 6 files changed, 65 insertions(+), 45 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index a77afbca..854c8b49 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -1429,37 +1429,37 @@ void ScaLBL_Communicator::SendD3Q7AA(double *Aq, int Component){ ScaLBL_DeviceBarrier(); // Pack the distributions //...Packing for x face(2,8,10,12,14)................................ - ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,&Aq[Component*N],N); + ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,&Aq[Component*7*N],N); MPI_Isend(sendbuf_x, sendCount_x,MPI_DOUBLE,rank_x,sendtag,MPI_COMM_SCALBL,&req1[0]); MPI_Irecv(recvbuf_X, recvCount_X,MPI_DOUBLE,rank_X,recvtag,MPI_COMM_SCALBL,&req2[0]); //...Packing for X face(1,7,9,11,13)................................ - ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,&Aq[Component*N],N); + ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,&Aq[Component*7*N],N); MPI_Isend(sendbuf_X, sendCount_X,MPI_DOUBLE,rank_X,sendtag,MPI_COMM_SCALBL,&req1[1]); MPI_Irecv(recvbuf_x, recvCount_x,MPI_DOUBLE,rank_x,recvtag,MPI_COMM_SCALBL,&req2[1]); //...Packing for y face(4,8,9,16,18)................................. - ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,&Aq[Component*N],N); + ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,&Aq[Component*7*N],N); MPI_Isend(sendbuf_y, 2*sendCount_y,MPI_DOUBLE,rank_y,sendtag,MPI_COMM_SCALBL,&req1[2]); MPI_Irecv(recvbuf_Y, 2*recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,MPI_COMM_SCALBL,&req2[2]); //...Packing for Y face(3,7,10,15,17)................................. - ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,&Aq[Component*N],N); + ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,&Aq[Component*7*N],N); MPI_Isend(sendbuf_Y, sendCount_Y,MPI_DOUBLE,rank_Y,sendtag,MPI_COMM_SCALBL,&req1[3]); MPI_Irecv(recvbuf_y, recvCount_y,MPI_DOUBLE,rank_y,recvtag,MPI_COMM_SCALBL,&req2[3]); //...Packing for z face(6,12,13,16,17)................................ - ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,&Aq[Component*N],N); + ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,&Aq[Component*7*N],N); MPI_Isend(sendbuf_z, sendCount_z,MPI_DOUBLE,rank_z,sendtag,MPI_COMM_SCALBL,&req1[4]); MPI_Irecv(recvbuf_Z, recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,MPI_COMM_SCALBL,&req2[4]); //...Packing for Z face(5,11,14,15,18)................................ - ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,&Aq[Component*N],N); + ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,&Aq[Component*7*N],N); //................................................................................... // Send all the distributions @@ -1483,33 +1483,33 @@ void ScaLBL_Communicator::RecvD3Q7AA(double *Aq, int Component){ // Unpack the distributions on the device //................................................................................... //...Unpacking for x face(2,8,10,12,14)................................ - ScaLBL_D3Q7_Unpack(2,dvcRecvDist_x,0,recvCount_x,recvbuf_x,&Aq[Component*N],N); + ScaLBL_D3Q7_Unpack(2,dvcRecvDist_x,0,recvCount_x,recvbuf_x,&Aq[Component*7*N],N); //................................................................................... //...Packing for X face(1,7,9,11,13)................................ - ScaLBL_D3Q7_Unpack(1,dvcRecvDist_X,0,recvCount_X,recvbuf_X,&Aq[Component*N],N); + ScaLBL_D3Q7_Unpack(1,dvcRecvDist_X,0,recvCount_X,recvbuf_X,&Aq[Component*7*N],N); //................................................................................... //...Packing for y face(4,8,9,16,18)................................. - ScaLBL_D3Q7_Unpack(4,dvcRecvDist_y,0,recvCount_y,recvbuf_y,&Aq[Component*N],N); + ScaLBL_D3Q7_Unpack(4,dvcRecvDist_y,0,recvCount_y,recvbuf_y,&Aq[Component*7*N],N); //................................................................................... //...Packing for Y face(3,7,10,15,17)................................. - ScaLBL_D3Q7_Unpack(3,dvcRecvDist_Y,0,recvCount_Y,recvbuf_Y,&Aq[Component*N],N); + ScaLBL_D3Q7_Unpack(3,dvcRecvDist_Y,0,recvCount_Y,recvbuf_Y,&Aq[Component*7*N],N); //................................................................................... if (BoundaryCondition > 0){ if (kproc != 0){ //...Packing for z face(6,12,13,16,17)................................ - ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,&Aq[Component*N],N); + ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,&Aq[Component*7*N],N); } if (kproc != nprocz-1){ //...Packing for Z face(5,11,14,15,18)................................ - ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,&Aq[Component*N],N); + ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,&Aq[Component*7*N],N); } } else { //...Packing for z face(6,12,13,16,17)................................ - ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,&Aq[Component*N],N); + ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,&Aq[Component*7*N],N); //...Packing for Z face(5,11,14,15,18)................................ - ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,&Aq[Component*N],N); + ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,&Aq[Component*7*N],N); } //................................................................................... diff --git a/cpu/Ion.cpp b/cpu/Ion.cpp index b04b3c9c..2c7e72a9 100644 --- a/cpu/Ion.cpp +++ b/cpu/Ion.cpp @@ -128,22 +128,22 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *D dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*Ci; // q = 1 - dist[nr2] = f1*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*(ux+uEPx)); + dist[nr2] = f1*(1.0-rlx) + rlx*0.1111111111111111*Ci*(1.0+4.5*(ux+uEPx)); // q=2 - dist[nr1] = f2*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*(ux+uEPx)); + dist[nr1] = f2*(1.0-rlx) + rlx*0.1111111111111111*Ci*(1.0-4.5*(ux+uEPx)); // q = 3 - dist[nr4] = f3*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*(uy+uEPy)); + dist[nr4] = f3*(1.0-rlx) + rlx*0.1111111111111111*Ci*(1.0+4.5*(uy+uEPy)); // q = 4 - dist[nr3] = f4*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*(uy+uEPy)); + dist[nr3] = f4*(1.0-rlx) + rlx*0.1111111111111111*Ci*(1.0-4.5*(uy+uEPy)); // q = 5 - dist[nr6] = f5*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*(uz+uEPz)); + dist[nr6] = f5*(1.0-rlx) + rlx*0.1111111111111111*Ci*(1.0+4.5*(uz+uEPz)); // q = 6 - dist[nr5] = f6*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*(uz+uEPz)); + dist[nr5] = f6*(1.0-rlx) + rlx*0.1111111111111111*Ci*(1.0-4.5*(uz+uEPz)); } } @@ -183,22 +183,22 @@ extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Veloci dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*Ci; // q = 1 - dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*(ux+uEPx)); + dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.1111111111111111*Ci*(1.0+4.5*(ux+uEPx)); // q=2 - dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*(ux+uEPx)); + dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.1111111111111111*Ci*(1.0-4.5*(ux+uEPx)); // q = 3 - dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*(uy+uEPy)); + dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.1111111111111111*Ci*(1.0+4.5*(uy+uEPy)); // q = 4 - dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*(uy+uEPy)); + dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.1111111111111111*Ci*(1.0-4.5*(uy+uEPy)); // q = 5 - dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.1111111111111111*(1.0+4.5*(uz+uEPz)); + dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.1111111111111111*Ci*(1.0+4.5*(uz+uEPz)); // q = 6 - dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.1111111111111111*(1.0-4.5*(uz+uEPz)); + dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.1111111111111111*Ci*(1.0-4.5*(uz+uEPz)); } @@ -231,7 +231,7 @@ extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity Ci = Den[n+ion_component*Np]; CD = ChargeDensity[n]; CD_tmp = F*IonValence*Ci; - ChargeDensity[n] = CD*(IonValence>0) + CD_tmp; + ChargeDensity[n] = CD*(ion_component>0) + CD_tmp; } } diff --git a/models/IonModel.cpp b/models/IonModel.cpp index 14e0a64c..fca2871c 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -36,7 +36,7 @@ void ScaLBL_IonModel::ReadParams(string filename,int num_iter,int num_iter_Stoke //---------------------- Default model parameters --------------------------// T = 300.0;//temperature; unit [K] Vt = kb*T/electron_charge;//thermal voltage; unit [Vy] - k2_inv = 4.5;//the inverse of 2nd-rank moment of D3Q7 lattice + k2_inv = 4.5;//speed of sound for D3Q7 lattice h = 1.0;//resolution; unit: um/lu tolerance = 1.0e-8; number_ion_species = 1; @@ -57,6 +57,8 @@ void ScaLBL_IonModel::ReadParams(string filename,int num_iter,int num_iter_Stoke } if (ion_db->keyExists( "temperature" )){ T = ion_db->getScalar( "temperature" ); + //re-calculate thermal voltage + Vt = kb*T/electron_charge;//thermal voltage; unit [Vy] } if (ion_db->keyExists( "number_ion_species" )){ number_ion_species = ion_db->getScalar( "number_ion_species" ); @@ -104,6 +106,12 @@ void ScaLBL_IonModel::ReadParams(string filename,int num_iter,int num_iter_Stoke } } + //Read solid boundary condition specific to Ion model + BoundaryConditionSolid = 0; + if (ion_db->keyExists( "BC_Solid" )){ + BoundaryConditionSolid = ion_db->getScalar( "BC_Solid" ); + } + // Read domain parameters if (domain_db->keyExists( "voxel_length" )){//default unit: um/lu h = domain_db->getScalar( "voxel_length" ); @@ -112,10 +120,6 @@ void ScaLBL_IonModel::ReadParams(string filename,int num_iter,int num_iter_Stoke if (domain_db->keyExists( "BC" )){ BoundaryCondition = domain_db->getScalar( "BC" ); } - BoundaryConditionSolid = 0; - if (domain_db->keyExists( "BC_Solid" )){ - BoundaryConditionSolid = domain_db->getScalar( "BC_Solid" ); - } if (rank==0) printf("*****************************************************\n"); if (rank==0) printf("LB Ion Transport Solver: \n"); @@ -128,13 +132,13 @@ void ScaLBL_IonModel::ReadParams(string filename,int num_iter,int num_iter_Stoke switch (BoundaryConditionSolid){ case 0: - if (rank==0) printf("LB Ion Solver: solid boundary: non-flux boundary is assigned"); + if (rank==0) printf("LB Ion Solver: solid boundary: non-flux boundary is assigned\n"); break; case 1: - if (rank==0) printf("LB Ion Solver: solid boundary: Neumann-type surfacen ion concentration is assigned"); + if (rank==0) printf("LB Ion Solver: solid boundary: Neumann-type surfacen ion concentration is assigned\n"); break; default: - if (rank==0) printf("LB Ion Solver: solid boundary: non-flux boundary is assigned"); + if (rank==0) printf("LB Ion Solver: solid boundary: non-flux boundary is assigned\n"); break; } } @@ -375,6 +379,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ // *************ODD TIMESTEP*************// timestep++; //Update ion concentration and charge density + if (rank==0) printf("timestep=%i; updating ion concentration and charge density\n",timestep); for (int ic=0; icSendD3Q7AA(fq, ic); //READ FROM NORMAL ScaLBL_D3Q7_AAodd_IonConcentration(NeighborList, &fq[ic*Np*7],&Ci[ic*Np],ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); @@ -386,7 +391,9 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ } //LB-Ion collison + if (rank==0) printf("timestep=%i; execute collision step 1/2\n",timestep); for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); } @@ -394,7 +401,9 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ // Set boundary conditions /* ... */ + if (rank==0) printf("timestep=%i; execute collision step 2/2\n",timestep); for (int ic=0; icLastExterior(), Np); } @@ -409,6 +418,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ // *************EVEN TIMESTEP*************// timestep++; //Update ion concentration and charge density + if (rank==0) printf("timestep=%i; updating ion concentration and charge density\n",timestep); for (int ic=0; icSendD3Q7AA(fq, ic); //READ FORM NORMAL ScaLBL_D3Q7_AAeven_IonConcentration(&fq[ic*Np*7],&Ci[ic*Np],ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); @@ -419,6 +429,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ } //LB-Ion collison + if (rank==0) printf("timestep=%i; execute collision step 1/2\n",timestep); for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); @@ -427,6 +438,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ // Set boundary conditions /* ... */ + if (rank==0) printf("timestep=%i; execute collision step 2/2\n",timestep); for (int ic=0; icLastExterior(), Np); diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index 27c0cda5..3cf8a6d2 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -22,7 +22,7 @@ void ScaLBL_Poisson::ReadParams(string filename){ domain_db = db->getDatabase( "Domain" ); electric_db = db->getDatabase( "Poisson" ); - k2_inv = 4.5;//the inverse of 2nd-rank moment of D3Q7 lattice + k2_inv = 4.5;//speed of sound for D3Q7 lattice gamma = 0.3;//time step of LB-Poisson equation tau = 0.5+k2_inv*gamma; timestepMax = 100000; @@ -30,7 +30,7 @@ void ScaLBL_Poisson::ReadParams(string filename){ h = 1.0;//resolution; unit: um/lu epsilon0 = 8.85e-12;//electrical permittivity of vaccum; unit:[C/(V*m)] epsilon0_LB = epsilon0*(h*1.0e-6);//unit:[C/(V*lu)] - epsilonR = 78.4;//default dielectric constant for water + epsilonR = 78.4;//default dielectric constant of water epsilon_LB = epsilon0_LB*epsilonR;//electrical permittivity analysis_interval = 1000; @@ -50,6 +50,13 @@ void ScaLBL_Poisson::ReadParams(string filename){ if (electric_db->keyExists( "epsilonR" )){ epsilonR = electric_db->getScalar( "epsilonR" ); } + + // Read solid boundary condition specific to Poisson equation + BoundaryConditionSolid = 1; + if (electric_db->keyExists( "BC_Solid" )){ + BoundaryConditionSolid = electric_db->getScalar( "BC_Solid" ); + } + // Read domain parameters if (domain_db->keyExists( "voxel_length" )){//default unit: um/lu h = domain_db->getScalar( "voxel_length" ); @@ -59,10 +66,6 @@ void ScaLBL_Poisson::ReadParams(string filename){ if (domain_db->keyExists( "BC" )){ BoundaryCondition = domain_db->getScalar( "BC" ); } - BoundaryConditionSolid = 1; - if (domain_db->keyExists( "BC_Solid" )){ - BoundaryConditionSolid = domain_db->getScalar( "BC_Solid" ); - } //Re-calcualte model parameters if user updates input epsilon0_LB = epsilon0*(h*1.0e-6);//unit:[C/(V*lu)] @@ -76,13 +79,13 @@ void ScaLBL_Poisson::ReadParams(string filename){ switch (BoundaryConditionSolid){ case 1: - if (rank==0) printf("LB-Poisson Solver: solid boundary: Dirichlet-type surfacen potential is assigned"); + if (rank==0) printf("LB-Poisson Solver: solid boundary: Dirichlet-type surfacen potential is assigned\n"); break; case 2: - if (rank==0) printf("LB-Poisson Solver: solid boundary: Neumann-type surfacen charge density is assigned"); + if (rank==0) printf("LB-Poisson Solver: solid boundary: Neumann-type surfacen charge density is assigned\n"); break; default: - if (rank==0) printf("LB-Poisson Solver: solid boundary: Dirichlet-type surfacen potential is assigned"); + if (rank==0) printf("LB-Poisson Solver: solid boundary: Dirichlet-type surfacen potential is assigned\n"); break; } } diff --git a/models/StokesModel.cpp b/models/StokesModel.cpp index 46f28f45..1b733745 100644 --- a/models/StokesModel.cpp +++ b/models/StokesModel.cpp @@ -86,7 +86,7 @@ void ScaLBL_StokesModel::ReadParams(string filename,int num_iter){ // Re-calculate model parameters due to parameter read mu=(tau-0.5)/3.0; - time_conv = h*h*mu/nu_phys;//time conversion factor from physical to LB unit; [sec/lt] + time_conv = (h*h*1.0e-12)*mu/nu_phys;//time conversion factor from physical to LB unit; [sec/lt] // convert user-input electric field ([V/m]) from physical unit to LB unit Ex = Ex*(h*1.0e-6);//LB electric field: V/lu Ey = Ey*(h*1.0e-6); diff --git a/tests/lbpm_electrokinetic_dfh_simulator.cpp b/tests/lbpm_electrokinetic_dfh_simulator.cpp index bdc15ef4..7cf835da 100644 --- a/tests/lbpm_electrokinetic_dfh_simulator.cpp +++ b/tests/lbpm_electrokinetic_dfh_simulator.cpp @@ -78,8 +78,13 @@ int main(int argc, char **argv) while (timestep < Study.timestepMax){ timestep++; + if (rank==0) printf("timestep=%i; running Poisson solver\n",timestep); PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental + + if (rank==0) printf("timestep=%i; running StokesModel\n",timestep); StokesModel.Run_Lite(IonModel.ChargeDensity, PoissonSolver.ElectricField);// Solve the N-S equations to get velocity + + if (rank==0) printf("timestep=%i; running Ion model\n",timestep); IonModel.Run(StokesModel.Velocity,PoissonSolver.ElectricField); //solve for ion transport and electric potential From 8996b582da861f28ccb95bae3b5c273069ef66b1 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Wed, 19 Aug 2020 13:21:31 -0400 Subject: [PATCH 022/205] still debugging; add a few checkpoint print out --- cpu/Ion.cpp | 14 ++++++++++++++ models/IonModel.cpp | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/cpu/Ion.cpp b/cpu/Ion.cpp index 2c7e72a9..82398e92 100644 --- a/cpu/Ion.cpp +++ b/cpu/Ion.cpp @@ -1,7 +1,9 @@ +#include extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){ int n,nread; double fq,Ci; + printf("ScaLBL_D3Q7_AAodd_IonConcentration: entering the kernel successfully\n"); for (n=start; n Date: Wed, 19 Aug 2020 18:56:06 -0400 Subject: [PATCH 023/205] fix trivial bug in tau initialization --- cpu/Ion.cpp | 13 ------------- models/IonModel.cpp | 10 +--------- 2 files changed, 1 insertion(+), 22 deletions(-) diff --git a/cpu/Ion.cpp b/cpu/Ion.cpp index 82398e92..ea4f39f4 100644 --- a/cpu/Ion.cpp +++ b/cpu/Ion.cpp @@ -3,7 +3,6 @@ extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){ int n,nread; double fq,Ci; - printf("ScaLBL_D3Q7_AAodd_IonConcentration: entering the kernel successfully\n"); for (n=start; nSendD3Q7AA(fq, ic); //READ FROM NORMAL ScaLBL_D3Q7_AAodd_IonConcentration(NeighborList, &fq[ic*Np*7],&Ci[ic*Np],ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); @@ -391,9 +390,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ } //LB-Ion collison - if (rank==0) printf("timestep=%i; execute collision step 1/2\n",timestep); for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); } @@ -401,9 +398,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ // Set boundary conditions /* ... */ - if (rank==0) printf("timestep=%i; execute collision step 2/2\n",timestep); for (int ic=0; icLastExterior(), Np); } @@ -418,7 +413,6 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ // *************EVEN TIMESTEP*************// timestep++; //Update ion concentration and charge density - if (rank==0) printf("timestep=%i; updating ion concentration and charge density\n",timestep); for (int ic=0; icSendD3Q7AA(fq, ic); //READ FORM NORMAL ScaLBL_D3Q7_AAeven_IonConcentration(&fq[ic*Np*7],&Ci[ic*Np],ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); @@ -429,7 +423,6 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ } //LB-Ion collison - if (rank==0) printf("timestep=%i; execute collision step 1/2\n",timestep); for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); @@ -438,7 +431,6 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ // Set boundary conditions /* ... */ - if (rank==0) printf("timestep=%i; execute collision step 2/2\n",timestep); for (int ic=0; icLastExterior(), Np); From 59ffd7bfd66cbfa609386a069bd45aa34ba943bf Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Thu, 20 Aug 2020 22:47:10 -0400 Subject: [PATCH 024/205] fix several miscellaneous bugs --- models/IonModel.cpp | 20 +++++++++++--------- models/PoissonSolver.cpp | 2 +- models/StokesModel.cpp | 17 +++++++++-------- tests/lbpm_electrokinetic_dfh_simulator.cpp | 3 +++ 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/models/IonModel.cpp b/models/IonModel.cpp index d6265abe..55c848b9 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -48,6 +48,15 @@ void ScaLBL_IonModel::ReadParams(string filename,int num_iter,int num_iter_Stoke tau.push_back(0.5+k2_inv*time_conv/(h*1.0e-6)/(h*1.0e-6)*IonDiffusivity[0]); //--------------------------------------------------------------------------// + // Read domain parameters + if (domain_db->keyExists( "voxel_length" )){//default unit: um/lu + h = domain_db->getScalar( "voxel_length" ); + } + BoundaryCondition = 0; + if (domain_db->keyExists( "BC" )){ + BoundaryCondition = domain_db->getScalar( "BC" ); + } + // LB-Ion Model parameters //if (ion_db->keyExists( "timestepMax" )){ // timestepMax = ion_db->getScalar( "timestepMax" ); @@ -112,14 +121,6 @@ void ScaLBL_IonModel::ReadParams(string filename,int num_iter,int num_iter_Stoke BoundaryConditionSolid = ion_db->getScalar( "BC_Solid" ); } - // Read domain parameters - if (domain_db->keyExists( "voxel_length" )){//default unit: um/lu - h = domain_db->getScalar( "voxel_length" ); - } - BoundaryCondition = 0; - if (domain_db->keyExists( "BC" )){ - BoundaryCondition = domain_db->getScalar( "BC" ); - } if (rank==0) printf("*****************************************************\n"); if (rank==0) printf("LB Ion Transport Solver: \n"); @@ -275,7 +276,7 @@ void ScaLBL_IonModel::AssignSolidBoundary(double *ion_solid) label_count_global[idx]=sumReduce( Dm->Comm, label_count[idx]); if (rank==0){ - printf("LB Ion Solver: Ion Solid labels: %lu \n",NLABELS); + printf("LB Ion Solver: number of ion solid labels: %lu \n",NLABELS); for (unsigned int idx=0; idxFirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_IonConcentration_Phys(Ci, h, ic, 0, ScaLBL_Comm->LastExterior(), Np); } DoubleArray PhaseField(Nx,Ny,Nz); diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index 3cf8a6d2..701c009c 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -224,7 +224,7 @@ void ScaLBL_Poisson::AssignSolidBoundary(double *poisson_solid) label_count_global[idx]=sumReduce( Dm->Comm, label_count[idx]); if (rank==0){ - printf("LB-Poisson Solver: Poisson Solid labels: %lu \n",NLABELS); + printf("LB-Poisson Solver: number of Poisson solid labels: %lu \n",NLABELS); for (unsigned int idx=0; idxkeyExists( "BC" )){ + BoundaryCondition = domain_db->getScalar( "BC" ); + } + if (domain_db->keyExists( "voxel_length" )){//default unit: um/lu + h = domain_db->getScalar( "voxel_length" ); + } + // Single-fluid Navier-Stokes Model parameters //if (stokes_db->keyExists( "timestepMax" )){ // timestepMax = stokes_db->getScalar( "timestepMax" ); @@ -75,14 +84,6 @@ void ScaLBL_StokesModel::ReadParams(string filename,int num_iter){ if (stokes_db->keyExists( "flux" )){ flux = stokes_db->getScalar( "flux" ); } - - // Read domain parameters - if (domain_db->keyExists( "BC" )){ - BoundaryCondition = domain_db->getScalar( "BC" ); - } - if (domain_db->keyExists( "voxel_length" )){//default unit: um/lu - h = domain_db->getScalar( "voxel_length" ); - } // Re-calculate model parameters due to parameter read mu=(tau-0.5)/3.0; diff --git a/tests/lbpm_electrokinetic_dfh_simulator.cpp b/tests/lbpm_electrokinetic_dfh_simulator.cpp index 7cf835da..156fbc18 100644 --- a/tests/lbpm_electrokinetic_dfh_simulator.cpp +++ b/tests/lbpm_electrokinetic_dfh_simulator.cpp @@ -98,6 +98,9 @@ int main(int argc, char **argv) PoissonSolver.getElectricalPotential(); IonModel.getIonConcentration(); + if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n"); + if (rank==0) printf("*************************************************************\n"); + PROFILE_STOP("Main"); PROFILE_SAVE("lbpm_electrokinetic_simulator",1); // **************************************************** From aa26fcafdaac09beb2e572efd9c1d98bc20f6131 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Fri, 28 Aug 2020 11:15:55 -0400 Subject: [PATCH 025/205] fix miscellaneous bugs and update the data structure of electric potential --- common/ScaLBL.cpp | 33 ++ common/ScaLBL.h | 40 +- cpu/D3Q7BC.cpp | 107 ++++++ cpu/Poisson.cpp | 350 +++++++++++++++--- cpu/Stokes.cpp | 162 ++++----- models/IonModel.cpp | 31 +- models/IonModel.h | 3 +- models/PoissonSolver.cpp | 383 ++++++++++++++++++-- models/PoissonSolver.h | 11 +- models/StokesModel.cpp | 90 +++-- models/StokesModel.h | 7 +- tests/lbpm_electrokinetic_dfh_simulator.cpp | 16 +- 12 files changed, 1027 insertions(+), 206 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 854c8b49..ed7c5e59 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -2053,3 +2053,36 @@ void ScaLBL_Communicator::PrintD3Q19(){ delete [] TempBuffer; } +void ScaLBL_Communicator::D3Q7_Poisson_Potential_BC_z(int *neighborList, double *fq, double Vin, int time){ + if (kproc == 0) { + if (time%2==0){ + ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(dvcSendList_z, fq, Vin, sendCount_z, N); + } + else{ + ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(neighborList, dvcSendList_z, fq, Vin, sendCount_z, N); + } + } +} + +void ScaLBL_Communicator::D3Q7_Poisson_Potential_BC_Z(int *neighborList, double *fq, double Vout, int time){ + if (kproc == nprocz-1){ + if (time%2==0){ + ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(dvcSendList_Z, fq, Vout, sendCount_Z, N); + } + else{ + ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(neighborList, dvcSendList_Z, fq, Vout, sendCount_Z, N); + } + } +} + +void ScaLBL_Communicator::Poisson_D3Q7_BC_z(int *Map, double *Psi, double Vin){ + if (kproc == 0) { + ScaLBL_Poisson_D3Q7_BC_z(dvcSendList_z, Map, Psi, Vin, sendCount_z); + } +} + +void ScaLBL_Communicator::Poisson_D3Q7_BC_Z(int *Map, double *Psi, double Vout){ + if (kproc == nprocz-1){ + ScaLBL_Poisson_D3Q7_BC_Z(dvcSendList_Z, Map, Psi, Vout, sendCount_Z); + } +} diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 4d8a3dc3..414653a8 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -46,11 +46,8 @@ extern "C" void ScaLBL_UnpackDenD3Q7(int *list, int count, double *recvbuf, int extern "C" void ScaLBL_D3Q19_Init(double *Dist, int Np); - extern "C" void ScaLBL_D3Q19_Momentum(double *dist, double *vel, int Np); -extern "C" void ScaLBL_D3Q19_Momentum_Phys(double *dist, double *vel, double h, double time_conv, int Np); - extern "C" void ScaLBL_D3Q19_Pressure(double *dist, double *press, int Np); // BGK MODEL @@ -95,21 +92,30 @@ extern "C" void ScaLBL_IonConcentration_Phys(double *Den, double h, int ion_comp // LBM Poisson solver -extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,double gamma, +extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList,int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,double gamma, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q7_AAeven_Poisson(double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,double gamma, +extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,double gamma, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q7_Poisson_Init(double *dist, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np); + +extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np); + +extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np); + +extern "C" void ScaLBL_D3Q7_Poisson_getElectricField(double *dist, double *ElectricField, double tau, int Np); + +extern "C" void ScaLBL_D3Q7_Poisson_ElectricField(int *neighborList, int *Map, signed char *ID, double *Psi, double *ElectricField, int SolidBC, + int strideY, int strideZ,int start, int finish, int Np); // LBM Stokes Model (adapted from MRT model) extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, - double Gx, double Gy, double Gz, double Ex, double Ey, double Ez, int start, int finish, int Np); + double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, - double Gx, double Gy, double Gz, double Ex, double Ey, double Ez, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, + double Gx, double Gy, double Gz, double rho0, double den_scale, double h, double time_conv,int start, int finish, int Np); // MRT MODEL extern "C" void ScaLBL_D3Q19_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, @@ -190,6 +196,18 @@ extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist,double *BoundaryValue,i extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist,double *BoundaryValue,int *BounceBackDist_list,int *BounceBackSolid_list,int N); +extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np); + +extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np); + +extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np); + +extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np); + +extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count); + +extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count); + class ScaLBL_Communicator{ public: //...................................................................................... @@ -249,6 +267,10 @@ public: void D3Q19_Reflection_BC_z(double *fq); void D3Q19_Reflection_BC_Z(double *fq); double D3Q19_Flux_BC_z(int *neighborList, double *fq, double flux, int time); + void D3Q7_Poisson_Potential_BC_z(int *neighborList, double *fq, double Vin, int time); + void D3Q7_Poisson_Potential_BC_Z(int *neighborList, double *fq, double Vout, int time); + void Poisson_D3Q7_BC_z(int *Map, double *Psi, double Vin); + void Poisson_D3Q7_BC_Z(int *Map, double *Psi, double Vout); // Debugging and unit testing functions void PrintD3Q19(); diff --git a/cpu/D3Q7BC.cpp b/cpu/D3Q7BC.cpp index e7bfd3a4..8c2588d8 100644 --- a/cpu/D3Q7BC.cpp +++ b/cpu/D3Q7BC.cpp @@ -30,3 +30,110 @@ extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist,double *BoundaryValue,int } } +extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){ + for (int idx=0; idx0)+Psi[ijk]*(id<=0);// get neighbor for phi - 1 + //........................................................................ + nn = ijk+1; // neighbor index (get convention) + id = ID[nn]; + m2 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 2 + //........................................................................ + nn = ijk-strideY; // neighbor index (get convention) + id = ID[nn]; + m3 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 3 + //........................................................................ + nn = ijk+strideY; // neighbor index (get convention) + id = ID[nn]; + m4 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 4 + //........................................................................ + nn = ijk-strideZ; // neighbor index (get convention) + id = ID[nn]; + m5 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 5 + //........................................................................ + nn = ijk+strideZ; // neighbor index (get convention) + id = ID[nn]; + m6 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 6 + //........................................................................ + nn = ijk-strideY-1; // neighbor index (get convention) + id = ID[nn]; + m7 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 7 + //........................................................................ + nn = ijk+strideY+1; // neighbor index (get convention) + id = ID[nn]; + m8 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 8 + //........................................................................ + nn = ijk+strideY-1; // neighbor index (get convention) + id = ID[nn]; + m9 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 9 + //........................................................................ + nn = ijk-strideY+1; // neighbor index (get convention) + id = ID[nn]; + m10 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 10 + //........................................................................ + nn = ijk-strideZ-1; // neighbor index (get convention) + id = ID[nn]; + m11 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 11 + //........................................................................ + nn = ijk+strideZ+1; // neighbor index (get convention) + id = ID[nn]; + m12 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 12 + //........................................................................ + nn = ijk+strideZ-1; // neighbor index (get convention) + id = ID[nn]; + m13 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 13 + //........................................................................ + nn = ijk-strideZ+1; // neighbor index (get convention) + id = ID[nn]; + m14 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 14 + //........................................................................ + nn = ijk-strideZ-strideY; // neighbor index (get convention) + id = ID[nn]; + m15 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 15 + //........................................................................ + nn = ijk+strideZ+strideY; // neighbor index (get convention) + id = ID[nn]; + m16 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 16 + //........................................................................ + nn = ijk+strideZ-strideY; // neighbor index (get convention) + id = ID[nn]; + m17 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 17 + //........................................................................ + nn = ijk-strideZ+strideY; // neighbor index (get convention) + id = ID[nn]; + m18 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 18 + //............Compute the Color Gradient................................... + nx = -1.f/18.f*(m1-m2+0.5*(m7-m8+m9-m10+m11-m12+m13-m14)); + ny = -1.f/18.f*(m3-m4+0.5*(m7-m8-m9+m10+m15-m16+m17-m18)); + nz = -1.f/18.f*(m5-m6+0.5*(m11-m12-m13+m14+m15-m16-m17+m18)); + + ElectricField[n] = nx; + ElectricField[Np+n] = ny; + ElectricField[2*Np+n] = nz; + } +} + diff --git a/cpu/Stokes.cpp b/cpu/Stokes.cpp index a31a8bed..a3842345 100644 --- a/cpu/Stokes.cpp +++ b/cpu/Stokes.cpp @@ -1,7 +1,6 @@ #include -extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, - double Gx, double Gy, double Gz, double Ex_const, double Ey_const, double Ez_const, int start, int finish, int Np) +extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np) { double fq; // conserved momemnts @@ -32,13 +31,13 @@ extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, do //Load data rhoE = ChargeDensity[n]; - Ex = ElectricField[n+0*Np]+Ex_const; - Ey = ElectricField[n+1*Np]+Ey_const; - Ez = ElectricField[n+2*Np]+Ez_const; + Ex = ElectricField[n+0*Np]; + Ey = ElectricField[n+1*Np]; + Ez = ElectricField[n+2*Np]; //compute total body force, including input body force (Gx,Gy,Gz) - Fx = Gx + rhoE*Ex; - Fy = Gy + rhoE*Ey; - Fz = Gz + rhoE*Ez; + Fx = Gx + rhoE*Ex*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale;//the extra factors at the end necessarily convert unit from phys to LB + Fy = Gy + rhoE*Ey*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale; + Fz = Gz + rhoE*Ez*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale; // q=0 fq = dist[n]; @@ -311,9 +310,9 @@ extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, do m18 -= fq; // write the velocity - ux = jx / rho; - uy = jy / rho; - uz = jz / rho; + ux = jx / rho0; + uy = jy / rho0; + uz = jz / rho0; Velocity[n] = ux; Velocity[Np+n] = uy; Velocity[2*Np+n] = uz; @@ -326,18 +325,18 @@ extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, do //..............incorporate external force................................................ //..............carry out relaxation process............................................... - m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) - m1); - m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho) - m2); + m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0) - m2); m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4); m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6); m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8); - m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) - m9); + m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) - m9); m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10); - m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) - m11); - m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho) - m12); - m13 = m13 + rlx_setA*((jx*jy/rho) - m13); - m14 = m14 + rlx_setA*((jy*jz/rho) - m14); - m15 = m15 + rlx_setA*((jx*jz/rho) - m15); + m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) - m11); + m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho0) - m12); + m13 = m13 + rlx_setA*((jx*jy/rho0) - m13); + m14 = m14 + rlx_setA*((jy*jz/rho0) - m14); + m15 = m15 + rlx_setA*((jx*jz/rho0) - m15); m16 = m16 + rlx_setB*( - m16); m17 = m17 + rlx_setB*( - m17); m18 = m18 + rlx_setB*( - m18); @@ -454,8 +453,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, do } } -extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, - double Gx, double Gy, double Gz, double Ex_const, double Ey_const, double Ez_const, int start, int finish, int Np) +extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz, double rho0, double den_scale, double h, double time_conv,int start, int finish, int Np) { double fq; // conserved momemnts @@ -487,13 +485,13 @@ extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, do //Load data rhoE = ChargeDensity[n]; - Ex = ElectricField[n+0*Np]+Ex_const; - Ey = ElectricField[n+1*Np]+Ey_const; - Ez = ElectricField[n+2*Np]+Ez_const; + Ex = ElectricField[n+0*Np]; + Ey = ElectricField[n+1*Np]; + Ez = ElectricField[n+2*Np]; //compute total body force, including input body force (Gx,Gy,Gz) - Fx = Gx + rhoE*Ex; - Fy = Gy + rhoE*Ey; - Fz = Gz + rhoE*Ez; + Fx = Gx + rhoE*Ex*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale; + Fy = Gy + rhoE*Ey*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale; + Fz = Gz + rhoE*Ez*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale; // q=0 fq = dist[n]; @@ -803,27 +801,27 @@ extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, do m18 -= fq; // write the velocity - ux = jx / rho; - uy = jy / rho; - uz = jz / rho; + ux = jx / rho0; + uy = jy / rho0; + uz = jz / rho0; Velocity[n] = ux; Velocity[Np+n] = uy; Velocity[2*Np+n] = uz; //..............incorporate external force................................................ //..............carry out relaxation process............................................... - m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) - m1); - m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho) - m2); + m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0) - m2); m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4); m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6); m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8); - m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) - m9); + m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) - m9); m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10); - m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) - m11); - m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho) - m12); - m13 = m13 + rlx_setA*((jx*jy/rho) - m13); - m14 = m14 + rlx_setA*((jy*jz/rho) - m14); - m15 = m15 + rlx_setA*((jx*jz/rho) - m15); + m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) - m11); + m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho0) - m12); + m13 = m13 + rlx_setA*((jx*jy/rho0) - m13); + m14 = m14 + rlx_setA*((jy*jz/rho0) - m14); + m15 = m15 + rlx_setA*((jx*jz/rho0) - m15); m16 = m16 + rlx_setB*( - m16); m17 = m17 + rlx_setB*( - m17); m18 = m18 + rlx_setB*( - m18); @@ -955,47 +953,47 @@ extern "C" void ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, do } } -extern "C" void ScaLBL_D3Q19_Momentum_Phys(double *dist, double *vel, double h, double time_conv, int Np) -{ - //h: resolution [um/lu] - //time_conv: time conversion factor [sec/lt] - int n; - // distributions - double f1,f2,f3,f4,f5,f6,f7,f8,f9; - double f10,f11,f12,f13,f14,f15,f16,f17,f18; - double vx,vy,vz; - - for (n=0; n rlx(tau.begin(),tau.end()); for (double item : rlx){ item = 1.0/item; } + //.......create and start timer............ //double starttime,stoptime,cputime; //ScaLBL_DeviceBarrier(); MPI_Barrier(comm); @@ -462,7 +467,9 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ } -void ScaLBL_IonModel::getIonConcentration(){ +//TODO this ruin the ion concentration on device +//need to do something similar to electric field +void ScaLBL_IonModel::getIonConcentration(int timestep){ for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_IonConcentration_Phys(Ci, h, ic, 0, ScaLBL_Comm->LastExterior(), Np); @@ -474,7 +481,7 @@ void ScaLBL_IonModel::getIonConcentration(){ ScaLBL_DeviceBarrier(); MPI_Barrier(comm); FILE *OUTFILE; - sprintf(LocalRankFilename,"Ion%02i.%05i.raw",ic+1,rank); + sprintf(LocalRankFilename,"Ion%02i_Time_%i.%05i.raw",ic+1,timestep,rank); OUTFILE = fopen(LocalRankFilename,"wb"); fwrite(PhaseField.data(),8,N,OUTFILE); fclose(OUTFILE); @@ -482,3 +489,23 @@ void ScaLBL_IonModel::getIonConcentration(){ } +//void ScaLBL_IonModel::getIonConcentration(){ +// for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); +// ScaLBL_IonConcentration_Phys(Ci, h, ic, 0, ScaLBL_Comm->LastExterior(), Np); +// } +// +// DoubleArray PhaseField(Nx,Ny,Nz); +// for (int ic=0; icRegularLayout(Map,&Ci[ic*Np],PhaseField); +// ScaLBL_DeviceBarrier(); MPI_Barrier(comm); +// +// FILE *OUTFILE; +// sprintf(LocalRankFilename,"Ion%02i.%05i.raw",ic+1,rank); +// OUTFILE = fopen(LocalRankFilename,"wb"); +// fwrite(PhaseField.data(),8,N,OUTFILE); +// fclose(OUTFILE); +// } +// +//} + diff --git a/models/IonModel.h b/models/IonModel.h index 6232d105..59e5b6e6 100644 --- a/models/IonModel.h +++ b/models/IonModel.h @@ -29,7 +29,7 @@ public: void Create(); void Initialize(); void Run(double *Velocity, double *ElectricField); - void getIonConcentration(); + void getIonConcentration(int timestep); //bool Restart,pBC; int timestep,timestepMax; @@ -40,6 +40,7 @@ public: double kb,electron_charge,T,Vt; double k2_inv; double tolerance; + double Ex,Ey,Ez; int number_ion_species; vector IonDiffusivity;//User input unit [m^2/sec] diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index 701c009c..c3e5c019 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -7,7 +7,7 @@ ScaLBL_Poisson::ScaLBL_Poisson(int RANK, int NP, MPI_Comm COMM): rank(RANK), nprocs(NP),timestep(0),timestepMax(0),tau(0),k2_inv(0),gamma(0),tolerance(0),h(0), -epsilon0(0),epsilon0_LB(0),epsilonR(0),epsilon_LB(0),Nx(0),Ny(0),Nz(0),N(0),Np(0),analysis_interval(0), +epsilon0(0),epsilon0_LB(0),epsilonR(0),epsilon_LB(0),Vin(0),Vout(0),Nx(0),Ny(0),Nz(0),N(0),Np(0),analysis_interval(0), nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),BoundaryConditionSolid(0),Lx(0),Ly(0),Lz(0),comm(COMM) { @@ -22,17 +22,20 @@ void ScaLBL_Poisson::ReadParams(string filename){ domain_db = db->getDatabase( "Domain" ); electric_db = db->getDatabase( "Poisson" ); - k2_inv = 4.5;//speed of sound for D3Q7 lattice - gamma = 0.3;//time step of LB-Poisson equation + //k2_inv = 4.5;//speed of sound for D3Q7 lattice + k2_inv = 4.0;//speed of sound for D3Q7 lattice + gamma = 1.0;//time step of LB-Poisson equation tau = 0.5+k2_inv*gamma; timestepMax = 100000; tolerance = 1.0e-6;//stopping criterion for obtaining steady-state electricla potential h = 1.0;//resolution; unit: um/lu - epsilon0 = 8.85e-12;//electrical permittivity of vaccum; unit:[C/(V*m)] + epsilon0 = 8.85e-12;//electric permittivity of vaccum; unit:[C/(V*m)] epsilon0_LB = epsilon0*(h*1.0e-6);//unit:[C/(V*lu)] epsilonR = 78.4;//default dielectric constant of water - epsilon_LB = epsilon0_LB*epsilonR;//electrical permittivity + epsilon_LB = epsilon0_LB*epsilonR;//electric permittivity analysis_interval = 1000; + Vin = 1.0; //Boundary-z (inlet) electric potential + Vout = 1.0; //Boundary-Z (outlet) electric potential // LB-Poisson Model parameters if (electric_db->keyExists( "timestepMax" )){ @@ -56,20 +59,23 @@ void ScaLBL_Poisson::ReadParams(string filename){ if (electric_db->keyExists( "BC_Solid" )){ BoundaryConditionSolid = electric_db->getScalar( "BC_Solid" ); } + // Read boundary condition for electric potentiona + // BC = 0: normal periodic BC + // BC = 1: fixed inlet and outlet potential + BoundaryCondition = 0; + if (electric_db->keyExists( "BC" )){ + BoundaryCondition = electric_db->getScalar( "BC" ); + } // Read domain parameters if (domain_db->keyExists( "voxel_length" )){//default unit: um/lu h = domain_db->getScalar( "voxel_length" ); } - BoundaryCondition = 0; - if (domain_db->keyExists( "BC" )){ - BoundaryCondition = domain_db->getScalar( "BC" ); - } //Re-calcualte model parameters if user updates input epsilon0_LB = epsilon0*(h*1.0e-6);//unit:[C/(V*lu)] - epsilon_LB = epsilon0_LB*epsilonR;//electrical permittivity + epsilon_LB = epsilon0_LB*epsilonR;//electric permittivity tau = 0.5+k2_inv*gamma; if (rank==0) printf("***********************************************************************************\n"); @@ -202,13 +208,13 @@ void ScaLBL_Poisson::AssignSolidBoundary(double *poisson_solid) AFFINITY=0.f; // Assign the affinity from the paired list for (unsigned int idx=0; idx < NLABELS; idx++){ - //printf("idx=%i, value=%i, %i, \n",idx, VALUE,LabelList[idx]); if (VALUE == LabelList[idx]){ AFFINITY=AffinityList[idx]; //NOTE need to convert the user input phys unit to LB unit if (BoundaryConditionSolid==2){ //for BCS=1, i.e. Dirichlet-type, no need for unit conversion - AFFINITY = AFFINITY*(h*h*1.0e-12); + //TODO maybe there is a factor of gamm missing here ? + AFFINITY = AFFINITY*(h*h*1.0e-12)/epsilon_LB; } label_count[idx] += 1.0; idx = NLABELS; @@ -244,7 +250,6 @@ void ScaLBL_Poisson::AssignSolidBoundary(double *poisson_solid) } } - void ScaLBL_Poisson::Create(){ /* * This function creates the variables needed to run a LBM @@ -260,6 +265,7 @@ void ScaLBL_Poisson::Create(){ // Create a communicator for the device (will use optimized layout) // ScaLBL_Communicator ScaLBL_Comm(Mask); // original ScaLBL_Comm = std::shared_ptr(new ScaLBL_Communicator(Mask)); + ScaLBL_Comm_Regular = std::shared_ptr(new ScaLBL_Communicator(Mask)); int Npad=(Np/16 + 2)*16; if (rank==0) printf ("LB-Poisson Solver: Set up memory efficient layout \n"); @@ -277,37 +283,125 @@ void ScaLBL_Poisson::Create(){ int neighborSize=18*(Np*sizeof(int)); //........................................................................... ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize); + ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Np); + ScaLBL_AllocateDeviceMemory((void **) &dvcID, sizeof(signed char)*Nx*Ny*Nz); ScaLBL_AllocateDeviceMemory((void **) &fq, 7*dist_mem_size); - ScaLBL_AllocateDeviceMemory((void **) &Psi, sizeof(double)*Np); + ScaLBL_AllocateDeviceMemory((void **) &Psi, sizeof(double)*Nx*Ny*Nz); ScaLBL_AllocateDeviceMemory((void **) &ElectricField, 3*sizeof(double)*Np); - ScaLBL_AllocateDeviceMemory((void **) &PoissonSolid, sizeof(double)*Nx*Ny*Nz); + //ScaLBL_AllocateDeviceMemory((void **) &PoissonSolid, sizeof(double)*Nx*Ny*Nz); //........................................................................... // Update GPU data structures if (rank==0) printf ("LB-Poisson Solver: Setting up device map and neighbor list \n"); + fflush(stdout); + int *TmpMap; + TmpMap=new int[Np]; + for (int k=1; kLastExterior(); idx++){ + auto n = TmpMap[idx]; + if (n > Nx*Ny*Nz){ + printf("Bad value! idx=%i \n", n); + TmpMap[idx] = Nx*Ny*Nz-1; + } + } + for (int idx=ScaLBL_Comm->FirstInterior(); idxLastInterior(); idx++){ + auto n = TmpMap[idx]; + if ( n > Nx*Ny*Nz ){ + printf("Bad value! idx=%i \n",n); + TmpMap[idx] = Nx*Ny*Nz-1; + } + } + ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np); + ScaLBL_DeviceBarrier(); + delete [] TmpMap; // copy the neighbor list ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + delete [] neighborList; + // copy node ID + ScaLBL_CopyToDevice(dvcID, Mask->id, sizeof(signed char)*Nx*Ny*Nz); + ScaLBL_DeviceBarrier(); - //Initialize solid boundary for electrical potential + //Initialize solid boundary for electric potential ScaLBL_Comm->SetupBounceBackList(Map, Mask->id, Np); MPI_Barrier(comm); - double *PoissonSolid_host; - PoissonSolid_host = new double[Nx*Ny*Nz]; - AssignSolidBoundary(PoissonSolid_host); - ScaLBL_CopyToDevice(PoissonSolid, PoissonSolid_host, Nx*Ny*Nz*sizeof(double)); - ScaLBL_DeviceBarrier(); - delete [] PoissonSolid_host; + //double *PoissonSolid_host; + //PoissonSolid_host = new double[Nx*Ny*Nz]; + //AssignSolidBoundary(PoissonSolid_host); + //ScaLBL_CopyToDevice(PoissonSolid, PoissonSolid_host, Nx*Ny*Nz*sizeof(double)); + //ScaLBL_DeviceBarrier(); + //delete [] PoissonSolid_host; } +// Method 1 +// Psi - size N +// ID_dvc - size N +// Method 2 +// Psi - size Np +// PoissonSolid size N + +void ScaLBL_Poisson::Potential_Init(double *psi_init){ + + if (BoundaryCondition==1){ + if (electric_db->keyExists( "Vin" )){ + Vin = electric_db->getScalar( "Vin" ); + } + if (electric_db->keyExists( "Vout" )){ + Vout = electric_db->getScalar( "Vout" ); + } + } + //By default only periodic BC is applied and Vin=Vout=1.0, i.e. there is no potential gradient along Z-axis + double slope = (Vout-Vin)/(Nz-2); + double psi_linearized; + for (int k=0;kid[n]>0){ + psi_init[n] = psi_linearized; + } + } + } + } +} + void ScaLBL_Poisson::Initialize(){ /* * This function initializes model */ if (rank==0) printf ("LB-Poisson Solver: initializing D3Q7 distributions\n"); - ScaLBL_D3Q7_Poisson_Init(fq, Np); + //NOTE the initialization involves two steps: + //1. assign solid boundary value (surface potential or surface change density) + //2. Initialize electric potential for pore nodes + double *psi_host; + psi_host = new double [Nx*Ny*Nz]; + AssignSolidBoundary(psi_host);//step1 + Potential_Init(psi_host);//step2 + ScaLBL_CopyToDevice(Psi, psi_host, Nx*Ny*Nz*sizeof(double)); + ScaLBL_DeviceBarrier(); + ScaLBL_D3Q7_Poisson_Init(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_Poisson_Init(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); + delete [] psi_host; } void ScaLBL_Poisson::Run(double *ChargeDensity){ @@ -325,30 +419,83 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ // *************ODD TIMESTEP*************// timestep++; ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FROM NORMAL - ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE + ScaLBL_DeviceBarrier(); // Set boundary conditions - /* ... */ - ScaLBL_D3Q7_AAodd_Poisson(NeighborList, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_Comm->SolidDirichletD3Q7(fq, PoissonSolid); + if (BoundaryCondition == 1){ + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + } + //-------------------------// + ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); + + //compute electric field + ScaLBL_Comm_Regular->SendHalo(Psi); + ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid, + Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm_Regular->RecvHalo(Psi); + ScaLBL_DeviceBarrier(); + if (BoundaryCondition == 1){ + ScaLBL_Comm->Poisson_D3Q7_BC_z(dvcMap,Psi,Vin); + ScaLBL_Comm->Poisson_D3Q7_BC_Z(dvcMap,Psi,Vout); + } + ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); + + //perform collision + ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); + if (BoundaryConditionSolid==1){ + ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi); + } + else if (BoundaryConditionSolid==2){ + ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi); + } ScaLBL_DeviceBarrier(); MPI_Barrier(comm); // *************EVEN TIMESTEP*************// timestep++; ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FORM NORMAL - ScaLBL_D3Q7_AAeven_Poisson(fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE + ScaLBL_DeviceBarrier(); // Set boundary conditions - /* ... */ - ScaLBL_D3Q7_AAeven_Poisson(fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_Comm->SolidDirichletD3Q7(fq, PoissonSolid); + if (BoundaryCondition == 1){ + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + } + //-------------------------// + ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); + + //compute electric field + ScaLBL_Comm_Regular->SendHalo(Psi); + ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid, + Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm_Regular->RecvHalo(Psi); + ScaLBL_DeviceBarrier(); + if (BoundaryCondition == 1){ + ScaLBL_Comm->Poisson_D3Q7_BC_z(dvcMap,Psi,Vin); + ScaLBL_Comm->Poisson_D3Q7_BC_Z(dvcMap,Psi,Vout); + } + ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); + + //perform collision + ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); + if (BoundaryConditionSolid==1){ + ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi); + } + else if (BoundaryConditionSolid==2){ + ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi); + } ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ // Check convergence of steady-state solution if (timestep%analysis_interval==0){ - ScaLBL_Comm->RegularLayout(Map,Psi,Psi_host); + //ScaLBL_Comm->RegularLayout(Map,Psi,Psi_host); + ScaLBL_CopyToHost(Psi_host.data(),Psi,sizeof(double)*Nx*Ny*Nz); double count_loc=0; double count; double psi_avg; @@ -393,14 +540,180 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ } -void ScaLBL_Poisson::getElectricalPotential(){ +void ScaLBL_Poisson::getElectricPotential(int timestep){ DoubleArray PhaseField(Nx,Ny,Nz); - ScaLBL_Comm->RegularLayout(Map,Psi,PhaseField); + //ScaLBL_Comm->RegularLayout(Map,Psi,PhaseField); + ScaLBL_CopyToHost(PhaseField.data(),Psi,sizeof(double)*Nx*Ny*Nz); //ScaLBL_DeviceBarrier(); MPI_Barrier(comm); FILE *OUTFILE; - sprintf(LocalRankFilename,"Electrical_Potential.%05i.raw",rank); + sprintf(LocalRankFilename,"Electric_Potential_Time_%i.%05i.raw",timestep,rank); OUTFILE = fopen(LocalRankFilename,"wb"); fwrite(PhaseField.data(),8,N,OUTFILE); fclose(OUTFILE); } + +void ScaLBL_Poisson::getElectricField(int timestep){ + + //ScaLBL_D3Q7_Poisson_getElectricField(fq,ElectricField,tau,Np); + //ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + + DoubleArray PhaseField(Nx,Ny,Nz); + ScaLBL_Comm->RegularLayout(Map,&ElectricField[0*Np],PhaseField); + ElectricField_LB_to_Phys(PhaseField); + FILE *EX; + sprintf(LocalRankFilename,"ElectricField_X_Time_%i.%05i.raw",timestep,rank); + EX = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,EX); + fclose(EX); + + ScaLBL_Comm->RegularLayout(Map,&ElectricField[1*Np],PhaseField); + ElectricField_LB_to_Phys(PhaseField); + FILE *EY; + sprintf(LocalRankFilename,"ElectricField_Y_Time_%i.%05i.raw",timestep,rank); + EY = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,EY); + fclose(EY); + + ScaLBL_Comm->RegularLayout(Map,&ElectricField[2*Np],PhaseField); + ElectricField_LB_to_Phys(PhaseField); + FILE *EZ; + sprintf(LocalRankFilename,"ElectricField_Z_Time_%i.%05i.raw",timestep,rank); + EZ = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,EZ); + fclose(EZ); +} + + +void ScaLBL_Poisson::ElectricField_LB_to_Phys(DoubleArray &Efield_reg){ + for (int k=0;kRegularLayout(Map,Psi,PhaseField); +// //ScaLBL_DeviceBarrier(); MPI_Barrier(comm); +// FILE *OUTFILE; +// sprintf(LocalRankFilename,"Electric_Potential.%05i.raw",rank); +// OUTFILE = fopen(LocalRankFilename,"wb"); +// fwrite(PhaseField.data(),8,N,OUTFILE); +// fclose(OUTFILE); +//} + +//old version where Psi is of size Np +//void ScaLBL_Poisson::AssignSolidBoundary(double *poisson_solid) +//{ +// size_t NLABELS=0; +// signed char VALUE=0; +// double AFFINITY=0.f; +// +// auto LabelList = electric_db->getVector( "SolidLabels" ); +// auto AffinityList = electric_db->getVector( "SolidValues" ); +// +// NLABELS=LabelList.size(); +// if (NLABELS != AffinityList.size()){ +// ERROR("Error: LB-Poisson Solver: SolidLabels and SolidValues must be the same length! \n"); +// } +// +// double label_count[NLABELS]; +// double label_count_global[NLABELS]; +// // Assign the labels +// +// for (size_t idx=0; idxid[n]; +// AFFINITY=0.f; +// // Assign the affinity from the paired list +// for (unsigned int idx=0; idx < NLABELS; idx++){ +// //printf("idx=%i, value=%i, %i, \n",idx, VALUE,LabelList[idx]); +// if (VALUE == LabelList[idx]){ +// AFFINITY=AffinityList[idx]; +// //NOTE need to convert the user input phys unit to LB unit +// if (BoundaryConditionSolid==2){ +// //for BCS=1, i.e. Dirichlet-type, no need for unit conversion +// //TODO maybe there is a factor of gamm missing here ? +// AFFINITY = AFFINITY*(h*h*1.0e-12)/epsilon_LB; +// } +// label_count[idx] += 1.0; +// idx = NLABELS; +// //Mask->id[n] = 0; // set mask to zero since this is an immobile component +// } +// } +// poisson_solid[n] = AFFINITY; +// } +// } +// } +// +// for (size_t idx=0; idxComm, label_count[idx]); +// +// if (rank==0){ +// printf("LB-Poisson Solver: number of Poisson solid labels: %lu \n",NLABELS); +// for (unsigned int idx=0; idxkeyExists( "Vin" )){ +// Vin = electric_db->getScalar( "Vin" ); +// } +// if (electric_db->keyExists( "Vout" )){ +// Vout = electric_db->getScalar( "Vout" ); +// } +// } +// //By default only periodic BC is applied and Vin=Vout=1.0, i.e. there is no potential gradient along Z-axis +// double slope = (Vout-Vin)/(Nz-2); +// double psi_linearized; +// for (int k=0;k Dm; // this domain is for analysis std::shared_ptr Mask; // this domain is for lbm std::shared_ptr ScaLBL_Comm; + std::shared_ptr ScaLBL_Comm_Regular; // input database std::shared_ptr db; std::shared_ptr domain_db; @@ -57,10 +60,12 @@ public: DoubleArray Distance; DoubleArray Psi_host; int *NeighborList; + int *dvcMap; + signed char *dvcID; double *fq; double *Psi; double *ElectricField; - double *PoissonSolid; + //double *PoissonSolid; private: MPI_Comm comm; @@ -73,4 +78,6 @@ private: //int rank,nprocs; void LoadParams(std::shared_ptr db0); void AssignSolidBoundary(double *poisson_solid); + void Potential_Init(double *psi_init); + void ElectricField_LB_to_Phys(DoubleArray &Efield_reg); }; diff --git a/models/StokesModel.cpp b/models/StokesModel.cpp index 41520253..caaf2877 100644 --- a/models/StokesModel.cpp +++ b/models/StokesModel.cpp @@ -7,7 +7,7 @@ ScaLBL_StokesModel::ScaLBL_StokesModel(int RANK, int NP, MPI_Comm COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0), -Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),mu(0),h(0),nu_phys(0),time_conv(0),tolerance(0), +Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),mu(0),h(0),nu_phys(0),rho_phys(0),rho0(0),den_scale(0),time_conv(0),tolerance(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) { @@ -27,17 +27,17 @@ void ScaLBL_StokesModel::ReadParams(string filename,int num_iter){ //-------------------------------------------------------------------// //---------------------- Default model parameters --------------------------// + rho_phys = 1000.0; //by default use water density; unit [kg/m^3] nu_phys = 1.004e-6;//by default use water kinematic viscosity at 20C; unit [m^2/sec] h = 1.0;//image resolution;[um] tau = 1.0; mu = (tau-0.5)/3.0;//LB kinematic viscosity;unit [lu^2/lt] time_conv = h*h*mu/nu_phys;//time conversion factor from physical to LB unit; [sec/lt] + rho0 = 1.0;//LB density + den_scale = rho_phys/rho0*(h*h*h*1.0e-18);//scale factor for density tolerance = 1.0e-8; Fx = Fy = 0.0; Fz = 1.0e-5; - //Body electric field [V/lu] - Ex = Ey = 0.0; - Ez = 1.0e-3; //--------------------------------------------------------------------------// // Read domain parameters @@ -59,19 +59,20 @@ void ScaLBL_StokesModel::ReadParams(string filename,int num_iter){ if (stokes_db->keyExists( "tau" )){ tau = stokes_db->getScalar( "tau" ); } + if (stokes_db->keyExists( "rho0" )){ + rho0 = stokes_db->getScalar( "rho0" ); + } if (stokes_db->keyExists( "nu_phys" )){ nu_phys = stokes_db->getScalar( "nu_phys" ); } + if (stokes_db->keyExists( "rho_phys" )){ + rho_phys = stokes_db->getScalar( "rho_phys" ); + } if (stokes_db->keyExists( "F" )){ Fx = stokes_db->getVector( "F" )[0]; Fy = stokes_db->getVector( "F" )[1]; Fz = stokes_db->getVector( "F" )[2]; } - if (stokes_db->keyExists( "ElectricField" )){//NOTE user-input has physical unit [V/m] - Ex = stokes_db->getVector( "ElectricField" )[0]; - Ey = stokes_db->getVector( "ElectricField" )[1]; - Ez = stokes_db->getVector( "ElectricField" )[2]; - } if (stokes_db->keyExists( "Restart" )){ Restart = stokes_db->getScalar( "Restart" ); } @@ -88,10 +89,7 @@ void ScaLBL_StokesModel::ReadParams(string filename,int num_iter){ // Re-calculate model parameters due to parameter read mu=(tau-0.5)/3.0; time_conv = (h*h*1.0e-12)*mu/nu_phys;//time conversion factor from physical to LB unit; [sec/lt] - // convert user-input electric field ([V/m]) from physical unit to LB unit - Ex = Ex*(h*1.0e-6);//LB electric field: V/lu - Ey = Ey*(h*1.0e-6); - Ez = Ez*(h*1.0e-6); + den_scale = rho_phys/rho0*(h*h*h*1.0e-18);//scale factor for density if (rank==0) printf("*****************************************************\n"); if (rank==0) printf("LB Single-Fluid Navier-Stokes Solver: \n"); @@ -246,7 +244,7 @@ void ScaLBL_StokesModel::Run_Lite(double *ChargeDensity, double *ElectricField){ while (timestep < timestepMax) { //************************************************************************/ ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL - ScaLBL_D3Q19_AAodd_StokesMRT(NeighborList, fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz, Ex, Ey, Ez, + ScaLBL_D3Q19_AAodd_StokesMRT(NeighborList, fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz,rho0,den_scale,h,time_conv, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE // Set boundary conditions @@ -262,12 +260,14 @@ void ScaLBL_StokesModel::Run_Lite(double *ChargeDensity, double *ElectricField){ ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); } - ScaLBL_D3Q19_AAodd_StokesMRT(NeighborList, fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz, Ex, Ey, Ez, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q19_AAodd_StokesMRT(NeighborList, fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz,rho0,den_scale,h,time_conv, + 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL - ScaLBL_D3Q19_AAeven_StokesMRT(fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz, Ex, Ey, Ez, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q19_AAeven_StokesMRT(fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz,rho0,den_scale,h,time_conv, + ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE // Set boundary conditions if (BoundaryCondition == 3){ @@ -282,41 +282,87 @@ void ScaLBL_StokesModel::Run_Lite(double *ChargeDensity, double *ElectricField){ ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); } - ScaLBL_D3Q19_AAeven_StokesMRT(fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz, Ex, Ey, Ez, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q19_AAeven_StokesMRT(fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz,rho0,den_scale,h,time_conv, + 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ } } -void ScaLBL_StokesModel::getVelocity(){ +void ScaLBL_StokesModel::getVelocity(int timestep){ //get velocity in physical unit [m/sec] - ScaLBL_D3Q19_Momentum_Phys(fq, Velocity, h, time_conv, Np); + ScaLBL_D3Q19_Momentum(fq, Velocity, Np); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); DoubleArray PhaseField(Nx,Ny,Nz); ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField); + Velocity_LB_to_Phys(PhaseField); FILE *VELX_FILE; - sprintf(LocalRankFilename,"Velocity_X.%05i.raw",rank); + sprintf(LocalRankFilename,"Velocity_X_Time_%i.%05i.raw",timestep,rank); VELX_FILE = fopen(LocalRankFilename,"wb"); fwrite(PhaseField.data(),8,N,VELX_FILE); fclose(VELX_FILE); ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],PhaseField); + Velocity_LB_to_Phys(PhaseField); FILE *VELY_FILE; - sprintf(LocalRankFilename,"Velocity_Y.%05i.raw",rank); + sprintf(LocalRankFilename,"Velocity_Y_Time_%i.%05i.raw",timestep,rank); VELY_FILE = fopen(LocalRankFilename,"wb"); fwrite(PhaseField.data(),8,N,VELY_FILE); fclose(VELY_FILE); ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],PhaseField); + Velocity_LB_to_Phys(PhaseField); FILE *VELZ_FILE; - sprintf(LocalRankFilename,"Velocity_Z.%05i.raw",rank); + sprintf(LocalRankFilename,"Velocity_Z_Time_%i.%05i.raw",timestep,rank); VELZ_FILE = fopen(LocalRankFilename,"wb"); fwrite(PhaseField.data(),8,N,VELZ_FILE); fclose(VELZ_FILE); } +void ScaLBL_StokesModel::Velocity_LB_to_Phys(DoubleArray &Vel_reg){ + for (int k=0;kRegularLayout(Map,&Velocity[0],PhaseField); +// FILE *VELX_FILE; +// sprintf(LocalRankFilename,"Velocity_X.%05i.raw",rank); +// VELX_FILE = fopen(LocalRankFilename,"wb"); +// fwrite(PhaseField.data(),8,N,VELX_FILE); +// fclose(VELX_FILE); +// +// ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],PhaseField); +// FILE *VELY_FILE; +// sprintf(LocalRankFilename,"Velocity_Y.%05i.raw",rank); +// VELY_FILE = fopen(LocalRankFilename,"wb"); +// fwrite(PhaseField.data(),8,N,VELY_FILE); +// fclose(VELY_FILE); +// +// ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],PhaseField); +// FILE *VELZ_FILE; +// sprintf(LocalRankFilename,"Velocity_Z.%05i.raw",rank); +// VELZ_FILE = fopen(LocalRankFilename,"wb"); +// fwrite(PhaseField.data(),8,N,VELZ_FILE); +// fclose(VELZ_FILE); +// +//} + void ScaLBL_StokesModel::Run(){ double rlx_setA=1.0/tau; double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA); diff --git a/models/StokesModel.h b/models/StokesModel.h index f0a4de6a..d40df415 100644 --- a/models/StokesModel.h +++ b/models/StokesModel.h @@ -30,19 +30,21 @@ public: void Run(); void Run_Lite(double *ChargeDensity, double *ElectricField); void VelocityField(); - void getVelocity(); + void getVelocity(int timestep); bool Restart,pBC; int timestep,timestepMax; int BoundaryCondition; double tau,mu; + double rho0; double Fx,Fy,Fz,flux; - double Ex,Ey,Ez; double din,dout; double tolerance; double nu_phys; + double rho_phys; double time_conv; double h;//image resolution + double den_scale;//scale factor for density int Nx,Ny,Nz,N,Np; int rank,nprocx,nprocy,nprocz,nprocs; @@ -78,4 +80,5 @@ private: //int rank,nprocs; void LoadParams(std::shared_ptr db0); + void Velocity_LB_to_Phys(DoubleArray &Vel_reg); }; diff --git a/tests/lbpm_electrokinetic_dfh_simulator.cpp b/tests/lbpm_electrokinetic_dfh_simulator.cpp index 156fbc18..1df5c5e1 100644 --- a/tests/lbpm_electrokinetic_dfh_simulator.cpp +++ b/tests/lbpm_electrokinetic_dfh_simulator.cpp @@ -78,14 +78,17 @@ int main(int argc, char **argv) while (timestep < Study.timestepMax){ timestep++; - if (rank==0) printf("timestep=%i; running Poisson solver\n",timestep); + //if (rank==0) printf("timestep=%i; running Poisson solver\n",timestep); PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental + //PoissonSolver.getElectricPotential(timestep); - if (rank==0) printf("timestep=%i; running StokesModel\n",timestep); + //if (rank==0) printf("timestep=%i; running StokesModel\n",timestep); StokesModel.Run_Lite(IonModel.ChargeDensity, PoissonSolver.ElectricField);// Solve the N-S equations to get velocity + //StokesModel.getVelocity(timestep); - if (rank==0) printf("timestep=%i; running Ion model\n",timestep); + //if (rank==0) printf("timestep=%i; running Ion model\n",timestep); IonModel.Run(StokesModel.Velocity,PoissonSolver.ElectricField); //solve for ion transport and electric potential + //IonModel.getIonConcentration(timestep); timestep++;//AA operations @@ -94,9 +97,10 @@ int main(int argc, char **argv) //-------------------------------------------- } - StokesModel.getVelocity(); - PoissonSolver.getElectricalPotential(); - IonModel.getIonConcentration(); + StokesModel.getVelocity(timestep); + PoissonSolver.getElectricPotential(timestep); + PoissonSolver.getElectricField(timestep); + IonModel.getIonConcentration(timestep); if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n"); if (rank==0) printf("*************************************************************\n"); From 75e8647051f71acf3a1275fdd406abaeeb8d5a5f Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 28 Aug 2020 13:31:43 -0400 Subject: [PATCH 026/205] re-factor electric solvers for unit testing --- models/PoissonSolver.cpp | 129 ++++++++++++++++++++------------------- models/PoissonSolver.h | 6 ++ 2 files changed, 73 insertions(+), 62 deletions(-) diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index c3e5c019..8b6ca850 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -418,76 +418,20 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ //************************************************************************/ // *************ODD TIMESTEP*************// timestep++; - ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FROM NORMAL - ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); - // Set boundary conditions - if (BoundaryCondition == 1){ - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); - } - //-------------------------// - ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); - + SolveElectricPotentialAAodd(); //compute electric field - ScaLBL_Comm_Regular->SendHalo(Psi); - ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid, - Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm_Regular->RecvHalo(Psi); - ScaLBL_DeviceBarrier(); - if (BoundaryCondition == 1){ - ScaLBL_Comm->Poisson_D3Q7_BC_z(dvcMap,Psi,Vin); - ScaLBL_Comm->Poisson_D3Q7_BC_Z(dvcMap,Psi,Vout); - } - ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - + SolveElectricField(); //perform collision - ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); - if (BoundaryConditionSolid==1){ - ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi); - } - else if (BoundaryConditionSolid==2){ - ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi); - } + SolvePoissonAAodd(); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); // *************EVEN TIMESTEP*************// timestep++; - ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FORM NORMAL - ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); - // Set boundary conditions - if (BoundaryCondition == 1){ - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); - } - //-------------------------// - ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); - + SolveElectricPotentialAAeven(); //compute electric field - ScaLBL_Comm_Regular->SendHalo(Psi); - ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid, - Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm_Regular->RecvHalo(Psi); - ScaLBL_DeviceBarrier(); - if (BoundaryCondition == 1){ - ScaLBL_Comm->Poisson_D3Q7_BC_z(dvcMap,Psi,Vin); - ScaLBL_Comm->Poisson_D3Q7_BC_Z(dvcMap,Psi,Vout); - } - ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - + SolveElectricField(); //perform collision - ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); - if (BoundaryConditionSolid==1){ - ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi); - } - else if (BoundaryConditionSolid==2){ - ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi); - } + SolvePoissonAAeven() ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ @@ -584,6 +528,67 @@ void ScaLBL_Poisson::getElectricField(int timestep){ fclose(EZ); } +void ScaLBL_Poisson::SolveElectricPotentialAAodd(){ + ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FROM NORMAL + ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE + ScaLBL_DeviceBarrier(); + // Set boundary conditions + if (BoundaryCondition == 1){ + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + } + //-------------------------// + ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); +} +void ScaLBL_Poisson::SolveElectricField(){ + ScaLBL_Comm_Regular->SendHalo(Psi); + ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid, + Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm_Regular->RecvHalo(Psi); + ScaLBL_DeviceBarrier(); + if (BoundaryCondition == 1){ + ScaLBL_Comm->Poisson_D3Q7_BC_z(dvcMap,Psi,Vin); + ScaLBL_Comm->Poisson_D3Q7_BC_Z(dvcMap,Psi,Vout); + } + ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); + +} +void ScaLBL_Poisson::SolvePoissonAAodd(){ + ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); + if (BoundaryConditionSolid==1){ + ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi); + } + else if (BoundaryConditionSolid==2){ + ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi); + } +} + +void ScaLBL_Poisson::SolveElectricPotentialAAeven(){ + ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FORM NORMAL + ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE + ScaLBL_DeviceBarrier(); + // Set boundary conditions + if (BoundaryCondition == 1){ + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + } + //-------------------------// + ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); +} +void ScaLBL_Poisson::SolvePoissonAAeven(){ + ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); + if (BoundaryConditionSolid==1){ + ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi); + } + else if (BoundaryConditionSolid==2){ + ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi); + } +} + void ScaLBL_Poisson::ElectricField_LB_to_Phys(DoubleArray &Efield_reg){ for (int k=0;k Date: Fri, 28 Aug 2020 13:37:36 -0400 Subject: [PATCH 027/205] fix poisson solver for unit test --- models/PoissonSolver.cpp | 8 ++++---- models/PoissonSolver.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index 8b6ca850..605ad3a2 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -422,7 +422,7 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ //compute electric field SolveElectricField(); //perform collision - SolvePoissonAAodd(); + SolvePoissonAAodd(ChargeDensity); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); // *************EVEN TIMESTEP*************// @@ -431,7 +431,7 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ //compute electric field SolveElectricField(); //perform collision - SolvePoissonAAeven() + SolvePoissonAAeven(ChargeDensity); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ @@ -554,7 +554,7 @@ void ScaLBL_Poisson::SolveElectricField(){ ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); } -void ScaLBL_Poisson::SolvePoissonAAodd(){ +void ScaLBL_Poisson::SolvePoissonAAodd(double *ChargeDensity){ ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); if (BoundaryConditionSolid==1){ @@ -578,7 +578,7 @@ void ScaLBL_Poisson::SolveElectricPotentialAAeven(){ //-------------------------// ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); } -void ScaLBL_Poisson::SolvePoissonAAeven(){ +void ScaLBL_Poisson::SolvePoissonAAeven(double *ChargeDensity){ ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); if (BoundaryConditionSolid==1){ diff --git a/models/PoissonSolver.h b/models/PoissonSolver.h index bdb90e53..42c47afb 100644 --- a/models/PoissonSolver.h +++ b/models/PoissonSolver.h @@ -31,8 +31,8 @@ public: void SolveElectricPotentialAAodd(); void SolveElectricPotentialAAeven(); void SolveElectricField(); - void SolvePoissonAAodd(); - void SolvePoissonAAeven(); + void SolvePoissonAAodd(double *ChargeDensity); + void SolvePoissonAAeven(double *ChargeDensity); void getElectricPotential(int timestep); void getElectricField(int timestep); From 0a7b1c331b510a80e0194d507749567f8cd52d6c Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Fri, 28 Aug 2020 21:53:41 -0400 Subject: [PATCH 028/205] further clean up the code --- common/ScaLBL.h | 6 +- cpu/Poisson.cpp | 148 ++++++++++++++++++++------------------- models/PoissonSolver.cpp | 12 ++-- 3 files changed, 85 insertions(+), 81 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 414653a8..14010169 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -92,10 +92,10 @@ extern "C" void ScaLBL_IonConcentration_Phys(double *Den, double h, int ion_comp // LBM Poisson solver -extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList,int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,double gamma, +extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList,int *Map, double *dist, double *Den_charge, double *Psi, double tau, double epsilon_LB,double gamma, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,double gamma, +extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double tau, double epsilon_LB,double gamma, int start, int finish, int Np); extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np); @@ -104,8 +104,6 @@ extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *d extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q7_Poisson_getElectricField(double *dist, double *ElectricField, double tau, int Np); - extern "C" void ScaLBL_D3Q7_Poisson_ElectricField(int *neighborList, int *Map, signed char *ID, double *Psi, double *ElectricField, int SolidBC, int strideY, int strideZ,int start, int finish, int Np); diff --git a/cpu/Poisson.cpp b/cpu/Poisson.cpp index 8deaac8e..583f9c1d 100644 --- a/cpu/Poisson.cpp +++ b/cpu/Poisson.cpp @@ -88,10 +88,10 @@ extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *d } } -extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,double gamma,int start, int finish, int Np){ +extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double tau, double epsilon_LB,double gamma,int start, int finish, int Np){ int n; double psi;//electric potential - double Ex,Ey,Ez;//electric field + //double Ex,Ey,Ez;//electric field double rho_e;//local charge density double f0,f1,f2,f3,f4,f5,f6; int nr1,nr2,nr3,nr4,nr5,nr6; @@ -142,40 +142,40 @@ extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *d //ElectricField[n+2*Np] = Ez; // q = 0 - //dist[n] = f0*(1.0-rlx) + 0.3333333333333333*(rlx*psi+rho_e); - dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e); + dist[n] = f0*(1.0-rlx) + 0.3333333333333333*(rlx*psi+rho_e); + //dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e); // q = 1 - //dist[nr2] = f1*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); - dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + dist[nr2] = f1*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); + //dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e); // q = 2 - //dist[nr1] = f2*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); - dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + dist[nr1] = f2*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); + //dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e); // q = 3 - //dist[nr4] = f3*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); - dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + dist[nr4] = f3*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); + //dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e); // q = 4 - //dist[nr3] = f4*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); - dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + dist[nr3] = f4*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); + //dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e); // q = 5 - //dist[nr6] = f5*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); - dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + dist[nr6] = f5*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); + //dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e); // q = 6 - //dist[nr5] = f6*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); - dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + dist[nr5] = f6*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); + //dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e); //........................................................................ } } -extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,double gamma,int start, int finish, int Np){ +extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double tau, double epsilon_LB,double gamma,int start, int finish, int Np){ int n; double psi;//electric potential - double Ex,Ey,Ez;//electric field + //double Ex,Ey,Ez;//electric field double rho_e;//local charge density double f0,f1,f2,f3,f4,f5,f6; double rlx=1.0/tau; @@ -209,32 +209,32 @@ extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_c //ElectricField[n+2*Np] = Ez; // q = 0 - //dist[n] = f0*(1.0-rlx) + 0.3333333333333333*(rlx*psi+rho_e); - dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e); + dist[n] = f0*(1.0-rlx) + 0.3333333333333333*(rlx*psi+rho_e); + //dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e); // q = 1 - //dist[1*Np+n] = f1*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); - dist[1*Np+n] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + dist[1*Np+n] = f1*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); + //dist[1*Np+n] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e); // q = 2 - //dist[2*Np+n] = f2*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); - dist[2*Np+n] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + dist[2*Np+n] = f2*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); + //dist[2*Np+n] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e); // q = 3 - //dist[3*Np+n] = f3*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); - dist[3*Np+n] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + dist[3*Np+n] = f3*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); + //dist[3*Np+n] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e); // q = 4 - //dist[4*Np+n] = f4*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); - dist[4*Np+n] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + dist[4*Np+n] = f4*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); + //dist[4*Np+n] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e); // q = 5 - //dist[5*Np+n] = f5*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); - dist[5*Np+n] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + dist[5*Np+n] = f5*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); + //dist[5*Np+n] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e); // q = 6 - //dist[6*Np+n] = f6*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); - dist[6*Np+n] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + dist[6*Np+n] = f6*(1.0-rlx) + 0.1111111111111111*(rlx*psi+rho_e); + //dist[6*Np+n] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e); //........................................................................ } } @@ -260,45 +260,20 @@ extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, in //dist[5*Np+n] = 0.125*Psi[n]; //dist[6*Np+n] = 0.125*Psi[n]; - dist[0*Np+n] = 0.25*Psi[ijk]; - dist[1*Np+n] = 0.125*Psi[ijk]; - dist[2*Np+n] = 0.125*Psi[ijk]; - dist[3*Np+n] = 0.125*Psi[ijk]; - dist[4*Np+n] = 0.125*Psi[ijk]; - dist[5*Np+n] = 0.125*Psi[ijk]; - dist[6*Np+n] = 0.125*Psi[ijk]; - } -} - -extern "C" void ScaLBL_D3Q7_Poisson_getElectricField(double *dist, double *ElectricField, double tau, int Np){ - int n; - // distributions - double f1,f2,f3,f4,f5,f6; - double Ex,Ey,Ez; - double rlx=1.0/tau; - - for (n=0; ngetDatabase( "Domain" ); electric_db = db->getDatabase( "Poisson" ); - //k2_inv = 4.5;//speed of sound for D3Q7 lattice - k2_inv = 4.0;//speed of sound for D3Q7 lattice + k2_inv = 4.5;//speed of sound for D3Q7 lattice + //k2_inv = 4.0;//speed of sound for D3Q7 lattice gamma = 1.0;//time step of LB-Poisson equation tau = 0.5+k2_inv*gamma; timestepMax = 100000; @@ -443,8 +443,8 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); //perform collision - ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, tau, epsilon_LB, gamma, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); if (BoundaryConditionSolid==1){ ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi); } @@ -480,8 +480,8 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); //perform collision - ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, tau, epsilon_LB, gamma, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); if (BoundaryConditionSolid==1){ ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi); } From 20c8cc9c3b307eb756f7ea8c0cfa9953dded19a5 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Wed, 2 Sep 2020 11:37:23 -0400 Subject: [PATCH 029/205] update PoissonSolver and fix numerous bugs --- common/Domain.cpp | 64 +++++------ common/ScaLBL.cpp | 4 +- common/ScaLBL.h | 4 +- cpu/D3Q7BC.cpp | 2 +- cpu/Poisson.cpp | 129 ++++++++-------------- models/PoissonSolver.cpp | 209 +++++++++++++++++++----------------- models/PoissonSolver.h | 18 ++-- tests/CMakeLists.txt | 1 + tests/TestPoissonSolver.cpp | 102 ++++++++++++++++++ 9 files changed, 305 insertions(+), 228 deletions(-) create mode 100644 tests/TestPoissonSolver.cpp diff --git a/common/Domain.cpp b/common/Domain.cpp index eadc4592..d75ec4a5 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -626,38 +626,38 @@ void Domain::Decomp( const std::string& Filename ) if (BoundaryCondition > 0 && BoundaryCondition !=5) iVol_global = 1.0/(1.0*(Nx-2)*nprocx*(Ny-2)*nprocy*((Nz-2)*nprocz-6)); //......................................................... // If external boundary conditions are applied remove solid - if (BoundaryCondition > 0 && BoundaryCondition !=5 && kproc() == 0){ - if (inlet_layers_z < 4){ - inlet_layers_z=4; - if(RANK==0){ - printf("NOTE:Non-periodic BC is applied, but the number of Z-inlet layers is not specified (or is smaller than 3 voxels) \n the number of Z-inlet layer is reset to %i voxels, saturated with phase label=%i \n",inlet_layers_z-1,inlet_layers_phase); - } - } - for (int k=0; k 0 && BoundaryCondition !=5 && kproc() == nprocz-1){ - if (outlet_layers_z < 4){ - outlet_layers_z=4; - if(RANK==nprocs-1){ - printf("NOTE:Non-periodic BC is applied, but the number of Z-outlet layers is not specified (or is smaller than 3 voxels) \n the number of Z-outlet layer is reset to %i voxels, saturated with phase label=%i \n",outlet_layers_z-1,outlet_layers_phase); - } - } - for (int k=Nz-outlet_layers_z; k 0 && BoundaryCondition !=5 && kproc() == 0){ +// if (inlet_layers_z < 4){ +// inlet_layers_z=4; +// if(RANK==0){ +// printf("NOTE:Non-periodic BC is applied, but the number of Z-inlet layers is not specified (or is smaller than 3 voxels) \n the number of Z-inlet layer is reset to %i voxels, saturated with phase label=%i \n",inlet_layers_z-1,inlet_layers_phase); +// } +// } +// for (int k=0; k 0 && BoundaryCondition !=5 && kproc() == nprocz-1){ +// if (outlet_layers_z < 4){ +// outlet_layers_z=4; +// if(RANK==nprocs-1){ +// printf("NOTE:Non-periodic BC is applied, but the number of Z-outlet layers is not specified (or is smaller than 3 voxels) \n the number of Z-outlet layer is reset to %i voxels, saturated with phase label=%i \n",outlet_layers_z-1,outlet_layers_phase); +// } +// } +// for (int k=Nz-outlet_layers_z; k0)+Psi[ijk]*(id<=0);// get neighbor for phi - 18 //............Compute the Color Gradient................................... - nx = -1.f/18.f*(m1-m2+0.5*(m7-m8+m9-m10+m11-m12+m13-m14)); - ny = -1.f/18.f*(m3-m4+0.5*(m7-m8-m9+m10+m15-m16+m17-m18)); - nz = -1.f/18.f*(m5-m6+0.5*(m11-m12-m13+m14+m15-m16-m17+m18)); + //nx = 1.f/6.f*(m1-m2+0.5*(m7-m8+m9-m10+m11-m12+m13-m14)); + //ny = 1.f/6.f*(m3-m4+0.5*(m7-m8-m9+m10+m15-m16+m17-m18)); + //nz = 1.f/6.f*(m5-m6+0.5*(m11-m12-m13+m14+m15-m16-m17+m18)); + nx = 1.f/6.f*(m1-m2);//but looks like it needs to multiply another factor of 3 + ny = 1.f/6.f*(m3-m4); + nz = 1.f/6.f*(m5-m6); ElectricField[n] = nx; ElectricField[Np+n] = ny; diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index d400df05..89a1c42a 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -6,8 +6,9 @@ #include "common/ReadMicroCT.h" ScaLBL_Poisson::ScaLBL_Poisson(int RANK, int NP, MPI_Comm COMM): -rank(RANK), nprocs(NP),timestep(0),timestepMax(0),tau(0),k2_inv(0),gamma(0),tolerance(0),h(0), +rank(RANK), nprocs(NP),timestep(0),timestepMax(0),tau(0),k2_inv(0),tolerance(0),h(0), epsilon0(0),epsilon0_LB(0),epsilonR(0),epsilon_LB(0),Vin(0),Vout(0),Nx(0),Ny(0),Nz(0),N(0),Np(0),analysis_interval(0), +chargeDen_dummy(0), nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),BoundaryConditionSolid(0),Lx(0),Ly(0),Lz(0),comm(COMM) { @@ -22,10 +23,8 @@ void ScaLBL_Poisson::ReadParams(string filename){ domain_db = db->getDatabase( "Domain" ); electric_db = db->getDatabase( "Poisson" ); - k2_inv = 4.5;//speed of sound for D3Q7 lattice - //k2_inv = 4.0;//speed of sound for D3Q7 lattice - gamma = 1.0;//time step of LB-Poisson equation - tau = 0.5+k2_inv*gamma; + k2_inv = 4.0;//speed of sound for D3Q7 lattice + tau = 0.5+k2_inv; timestepMax = 100000; tolerance = 1.0e-6;//stopping criterion for obtaining steady-state electricla potential h = 1.0;//resolution; unit: um/lu @@ -36,6 +35,7 @@ void ScaLBL_Poisson::ReadParams(string filename){ analysis_interval = 1000; Vin = 1.0; //Boundary-z (inlet) electric potential Vout = 1.0; //Boundary-Z (outlet) electric potential + chargeDen_dummy = 1.0e-3;//For debugging;unit=[C/m^3] // LB-Poisson Model parameters if (electric_db->keyExists( "timestepMax" )){ @@ -47,12 +47,12 @@ void ScaLBL_Poisson::ReadParams(string filename){ if (electric_db->keyExists( "tolerance" )){ tolerance = electric_db->getScalar( "tolerance" ); } - if (electric_db->keyExists( "gamma" )){ - gamma = electric_db->getScalar( "gamma" ); - } if (electric_db->keyExists( "epsilonR" )){ epsilonR = electric_db->getScalar( "epsilonR" ); } + if (electric_db->keyExists( "DummyChargeDen" )){ + chargeDen_dummy = electric_db->getScalar( "DummyChargeDen" ); + } // Read solid boundary condition specific to Poisson equation BoundaryConditionSolid = 1; @@ -76,7 +76,6 @@ void ScaLBL_Poisson::ReadParams(string filename){ //Re-calcualte model parameters if user updates input epsilon0_LB = epsilon0*(h*1.0e-6);//unit:[C/(V*lu)] epsilon_LB = epsilon0_LB*epsilonR;//electric permittivity - tau = 0.5+k2_inv*gamma; if (rank==0) printf("***********************************************************************************\n"); if (rank==0) printf("LB-Poisson Solver: steady-state MaxTimeStep = %i; steady-state tolerance = %.3g \n", timestepMax,tolerance); @@ -213,7 +212,6 @@ void ScaLBL_Poisson::AssignSolidBoundary(double *poisson_solid) //NOTE need to convert the user input phys unit to LB unit if (BoundaryConditionSolid==2){ //for BCS=1, i.e. Dirichlet-type, no need for unit conversion - //TODO maybe there is a factor of gamm missing here ? AFFINITY = AFFINITY*(h*h*1.0e-12)/epsilon_LB; } label_count[idx] += 1.0; @@ -284,11 +282,10 @@ void ScaLBL_Poisson::Create(){ //........................................................................... ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize); ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Np); - ScaLBL_AllocateDeviceMemory((void **) &dvcID, sizeof(signed char)*Nx*Ny*Nz); + //ScaLBL_AllocateDeviceMemory((void **) &dvcID, sizeof(signed char)*Nx*Ny*Nz); ScaLBL_AllocateDeviceMemory((void **) &fq, 7*dist_mem_size); ScaLBL_AllocateDeviceMemory((void **) &Psi, sizeof(double)*Nx*Ny*Nz); ScaLBL_AllocateDeviceMemory((void **) &ElectricField, 3*sizeof(double)*Np); - //ScaLBL_AllocateDeviceMemory((void **) &PoissonSolid, sizeof(double)*Nx*Ny*Nz); //........................................................................... // Update GPU data structures @@ -329,26 +326,13 @@ void ScaLBL_Poisson::Create(){ MPI_Barrier(comm); delete [] neighborList; // copy node ID - ScaLBL_CopyToDevice(dvcID, Mask->id, sizeof(signed char)*Nx*Ny*Nz); - ScaLBL_DeviceBarrier(); + //ScaLBL_CopyToDevice(dvcID, Mask->id, sizeof(signed char)*Nx*Ny*Nz); + //ScaLBL_DeviceBarrier(); //Initialize solid boundary for electric potential ScaLBL_Comm->SetupBounceBackList(Map, Mask->id, Np); MPI_Barrier(comm); - - //double *PoissonSolid_host; - //PoissonSolid_host = new double[Nx*Ny*Nz]; - //AssignSolidBoundary(PoissonSolid_host); - //ScaLBL_CopyToDevice(PoissonSolid, PoissonSolid_host, Nx*Ny*Nz*sizeof(double)); - //ScaLBL_DeviceBarrier(); - //delete [] PoissonSolid_host; } -// Method 1 -// Psi - size N -// ID_dvc - size N -// Method 2 -// Psi - size Np -// PoissonSolid size N void ScaLBL_Poisson::Potential_Init(double *psi_init){ @@ -402,6 +386,16 @@ void ScaLBL_Poisson::Initialize(){ ScaLBL_D3Q7_Poisson_Init(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_D3Q7_Poisson_Init(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); delete [] psi_host; + + //extra treatment for halo layer + if (BoundaryCondition==1){ + if (Dm->kproc()==0){ + ScaLBL_SetSlice_z(Psi,Vin,Nx,Ny,Nz,0); + } + if (Dm->kproc() == nprocz-1){ + ScaLBL_SetSlice_z(Psi,Vout,Nx,Ny,Nz,Nz-1); + } + } } void ScaLBL_Poisson::Run(double *ChargeDensity){ @@ -418,20 +412,17 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ //************************************************************************/ // *************ODD TIMESTEP*************// timestep++; - SolveElectricPotentialAAodd(); - //compute electric field - SolveElectricField(); - //perform collision - SolvePoissonAAodd(ChargeDensity); + + SolveElectricPotentialAAodd();//update electric potential + //SolveElectricField(); //deprecated - compute electric field + SolvePoissonAAodd(ChargeDensity);//perform collision ScaLBL_DeviceBarrier(); MPI_Barrier(comm); // *************EVEN TIMESTEP*************// timestep++; - SolveElectricPotentialAAeven(); - //compute electric field - SolveElectricField(); - //perform collision - SolvePoissonAAeven(ChargeDensity); + SolveElectricPotentialAAeven();//update electric potential + //SolveElectricField();//deprecated - compute electric field + SolvePoissonAAeven(ChargeDensity);//perform collision ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ @@ -484,6 +475,75 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ } +void ScaLBL_Poisson::SolveElectricPotentialAAodd(){ + ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FROM NORMAL + ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE + ScaLBL_DeviceBarrier(); + // Set boundary conditions + if (BoundaryCondition == 1){ + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + } + //-------------------------// + ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); +} + +void ScaLBL_Poisson::SolveElectricPotentialAAeven(){ + ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FORM NORMAL + ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE + ScaLBL_DeviceBarrier(); + // Set boundary conditions + if (BoundaryCondition == 1){ + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + } + //-------------------------// + ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); +} + +void ScaLBL_Poisson::SolvePoissonAAodd(double *ChargeDensity){ + ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, 0, ScaLBL_Comm->LastExterior(), Np); + if (BoundaryConditionSolid==1){ + ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi); + } + else if (BoundaryConditionSolid==2){ + ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi); + } +} + +void ScaLBL_Poisson::SolvePoissonAAeven(double *ChargeDensity){ + ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, 0, ScaLBL_Comm->LastExterior(), Np); + if (BoundaryConditionSolid==1){ + ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi); + } + else if (BoundaryConditionSolid==2){ + ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi); + } +} + +void ScaLBL_Poisson::DummyChargeDensity(){ + double *ChargeDensity_host; + ChargeDensity_host = new double[Np]; + + for (int k=0; kSendD3Q7AA(fq, 0); //READ FROM NORMAL - ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); - // Set boundary conditions - if (BoundaryCondition == 1){ - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); - } - //-------------------------// - ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); -} -void ScaLBL_Poisson::SolveElectricField(){ - ScaLBL_Comm_Regular->SendHalo(Psi); - ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid, - Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm_Regular->RecvHalo(Psi); - ScaLBL_DeviceBarrier(); - if (BoundaryCondition == 1){ - ScaLBL_Comm->Poisson_D3Q7_BC_z(dvcMap,Psi,Vin); - ScaLBL_Comm->Poisson_D3Q7_BC_Z(dvcMap,Psi,Vout); - } - ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - -} -void ScaLBL_Poisson::SolvePoissonAAodd(double *ChargeDensity){ - ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, tau, epsilon_LB, gamma, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); - if (BoundaryConditionSolid==1){ - ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi); - } - else if (BoundaryConditionSolid==2){ - ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi); - } -} - -void ScaLBL_Poisson::SolveElectricPotentialAAeven(){ - ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FORM NORMAL - ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); - // Set boundary conditions - if (BoundaryCondition == 1){ - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); - } - //-------------------------// - ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); -} -void ScaLBL_Poisson::SolvePoissonAAeven(double *ChargeDensity){ - ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, tau, epsilon_LB, gamma, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, tau, epsilon_LB, gamma, 0, ScaLBL_Comm->LastExterior(), Np); - if (BoundaryConditionSolid==1){ - ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi); - } - else if (BoundaryConditionSolid==2){ - ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi); - } -} - - void ScaLBL_Poisson::ElectricField_LB_to_Phys(DoubleArray &Efield_reg){ for (int k=0;kSendHalo(Psi); +// ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid, +// Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); +// ScaLBL_Comm_Regular->RecvHalo(Psi); +// ScaLBL_DeviceBarrier(); +// if (BoundaryCondition == 1){ +// ScaLBL_Comm->Poisson_D3Q7_BC_z(dvcMap,Psi,Vin); +// ScaLBL_Comm->Poisson_D3Q7_BC_Z(dvcMap,Psi,Vout); +// } +// ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); +// +//} //void ScaLBL_Poisson::getElectricPotential(){ // diff --git a/models/PoissonSolver.h b/models/PoissonSolver.h index 42c47afb..921963ed 100644 --- a/models/PoissonSolver.h +++ b/models/PoissonSolver.h @@ -28,13 +28,9 @@ public: void Create(); void Initialize(); void Run(double *ChargeDensity); - void SolveElectricPotentialAAodd(); - void SolveElectricPotentialAAeven(); - void SolveElectricField(); - void SolvePoissonAAodd(double *ChargeDensity); - void SolvePoissonAAeven(double *ChargeDensity); void getElectricPotential(int timestep); void getElectricField(int timestep); + void DummyChargeDensity();//for debugging //bool Restart,pBC; int timestep,timestepMax; @@ -43,9 +39,10 @@ public: int BoundaryConditionSolid; double tau; double tolerance; - double k2_inv,gamma; + double k2_inv; double epsilon0,epsilon0_LB,epsilonR,epsilon_LB; double Vin, Vout; + double chargeDen_dummy;//for debugging int Nx,Ny,Nz,N,Np; int rank,nprocx,nprocy,nprocz,nprocs; @@ -66,11 +63,11 @@ public: DoubleArray Psi_host; int *NeighborList; int *dvcMap; - signed char *dvcID; + //signed char *dvcID; double *fq; double *Psi; double *ElectricField; - //double *PoissonSolid; + double *ChargeDensityDummy;// for debugging private: MPI_Comm comm; @@ -85,5 +82,10 @@ private: void AssignSolidBoundary(double *poisson_solid); void Potential_Init(double *psi_init); void ElectricField_LB_to_Phys(DoubleArray &Efield_reg); + void SolveElectricPotentialAAodd(); + void SolveElectricPotentialAAeven(); + //void SolveElectricField(); + void SolvePoissonAAodd(double *ChargeDensity); + void SolvePoissonAAeven(double *ChargeDensity); }; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1a8bfac0..43167b3f 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -47,6 +47,7 @@ ADD_LBPM_TEST( TestTorusEvolve ) ADD_LBPM_TEST( TestTopo3D ) ADD_LBPM_TEST( TestFluxBC ) ADD_LBPM_TEST( TestMap ) +ADD_LBPM_TEST( TestPoissonSolver ) #ADD_LBPM_TEST( TestMRT ) #ADD_LBPM_TEST( TestColorGrad ) #ADD_LBPM_TEST( TestColorGradDFH ) diff --git a/tests/TestPoissonSolver.cpp b/tests/TestPoissonSolver.cpp new file mode 100644 index 00000000..309a03c7 --- /dev/null +++ b/tests/TestPoissonSolver.cpp @@ -0,0 +1,102 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "models/PoissonSolver.h" + +using namespace std; + +//******************************************************** +// Test lattice-Boltzmann solver of Poisson equation +//******************************************************** + +int main(int argc, char **argv) +{ + // Initialize MPI + int provided_thread_support = -1; + MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); + MPI_Comm comm; + MPI_Comm_dup(MPI_COMM_WORLD,&comm); + int rank = comm_rank(comm); + int nprocs = comm_size(comm); + if ( rank==0 && provided_thread_support Date: Fri, 11 Sep 2020 22:56:00 -0400 Subject: [PATCH 030/205] CPU only;finish preliminary work on testing Poisson and Ion models and their coupling --- common/ScaLBL.cpp | 22 + common/ScaLBL.h | 16 +- cpu/D3Q7BC.cpp | 86 ++ cpu/Ion.cpp | 56 +- gpu/D3Q7BC.cu | 1203 +++++++++++++++++++ models/IonModel.cpp | 557 +++++++-- models/IonModel.h | 22 +- models/MultiPhysController.cpp | 119 +- models/MultiPhysController.h | 12 +- models/PoissonSolver.cpp | 2 +- models/StokesModel.cpp | 234 +++- models/StokesModel.h | 3 + tests/CMakeLists.txt | 3 + tests/TestIonModel.cpp | 89 ++ tests/TestNernstPlanck.cpp | 101 ++ tests/TestPNP_Stokes.cpp | 123 ++ tests/TestPoissonSolver.cpp | 29 - tests/lbpm_electrokinetic_dfh_simulator.cpp | 2 +- 18 files changed, 2453 insertions(+), 226 deletions(-) create mode 100644 gpu/D3Q7BC.cu create mode 100644 tests/TestIonModel.cpp create mode 100644 tests/TestNernstPlanck.cpp create mode 100644 tests/TestPNP_Stokes.cpp diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index b621a517..8db59597 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -2086,3 +2086,25 @@ void ScaLBL_Communicator::Poisson_D3Q7_BC_Z(int *Map, double *Psi, double Vout){ ScaLBL_Poisson_D3Q7_BC_Z(dvcSendList_Z, Map, Psi, Vout, sendCount_Z); } } + +void ScaLBL_Communicator::D3Q7_Ion_Concentration_BC_z(int *neighborList, double *fq, double Cin, int time){ + if (kproc == 0) { + if (time%2==0){ + ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(dvcSendList_z, fq, Cin, sendCount_z, N); + } + else{ + ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(neighborList, dvcSendList_z, fq, Cin, sendCount_z, N); + } + } +} + +void ScaLBL_Communicator::D3Q7_Ion_Concentration_BC_Z(int *neighborList, double *fq, double Cout, int time){ + if (kproc == nprocz-1){ + if (time%2==0){ + ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(dvcSendList_Z, fq, Cout, sendCount_Z, N); + } + else{ + ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(neighborList, dvcSendList_Z, fq, Cout, sendCount_Z, N); + } + } +} diff --git a/common/ScaLBL.h b/common/ScaLBL.h index ca5610d3..a4a5fad3 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -79,17 +79,15 @@ extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, i extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *Velocity, double *ElectricField, - double Di, double zi, double rlx, double Vt, int start, int finish, int Np); + double Di, int zi, double rlx, double Vt, int start, int finish, int Np); extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField, - double Di, double zi, double rlx, double Vt, int start, int finish, int Np); + double Di, int zi, double rlx, double Vt, int start, int finish, int Np); extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np); extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np); -extern "C" void ScaLBL_IonConcentration_Phys(double *Den, double h, int ion_component, int start, int finish, int Np); - // LBM Poisson solver extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList,int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB, @@ -206,6 +204,14 @@ extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, doubl extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count); +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np); + +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np); + +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np); + +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np); + class ScaLBL_Communicator{ public: //...................................................................................... @@ -269,6 +275,8 @@ public: void D3Q7_Poisson_Potential_BC_Z(int *neighborList, double *fq, double Vout, int time); void Poisson_D3Q7_BC_z(int *Map, double *Psi, double Vin); void Poisson_D3Q7_BC_Z(int *Map, double *Psi, double Vout); + void D3Q7_Ion_Concentration_BC_z(int *neighborList, double *fq, double Cin, int time); + void D3Q7_Ion_Concentration_BC_Z(int *neighborList, double *fq, double Cout, int time); // Debugging and unit testing functions void PrintD3Q19(); diff --git a/cpu/D3Q7BC.cpp b/cpu/D3Q7BC.cpp index ee47fed4..161e6a5c 100644 --- a/cpu/D3Q7BC.cpp +++ b/cpu/D3Q7BC.cpp @@ -137,3 +137,89 @@ extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, doubl Psi[nm] = Vout; } } + +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np){ + for (int idx=0; idx +#include +#include + +#define NBLOCKS 1024 +#define NTHREADS 256 + + +__global__ void dvc_ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count) +{ + + int idx; + int iq,ib; + double value_b,value_q; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + iq = BounceBackDist_list[idx]; + ib = BounceBackSolid_list[idx]; + value_b = BoundaryValue[ib];//get boundary value from a solid site + value_q = dist[iq]; + dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice + } +} + +__global__ void dvc_ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count) +{ + + int idx; + int iq,ib; + double value_b,value_q; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + iq = BounceBackDist_list[idx]; + ib = BounceBackSolid_list[idx]; + value_b = BoundaryValue[ib];//get boundary value from a solid site + value_q = dist[iq]; + dist[iq] = value_q + value_b; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np) +{ + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f6 = dist[5*Np+n]; + //................................................... + f5 = Vin - (f0+f1+f2+f3+f4+f6); + dist[6*Np+n] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np) +{ + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f5 = dist[6*Np+n]; + //................................................... + f6 = Vout - (f0+f1+f2+f3+f4+f5); + dist[5*Np+n] = f6; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np) +{ + int idx, n; + int nread,nr5; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + nread = d_neighborList[n+5*Np]; + f6 = dist[nread]; + + // Unknown distributions + nr5 = d_neighborList[n+4*Np]; + f5 = Vin - (f0+f1+f2+f3+f4+f6); + dist[nr5] = f5; + } +} + +__global__ void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np) +{ + int idx, n; + int nread,nr6; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+4*Np]; + f5 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + // unknown distributions + nr6 = d_neighborList[n+5*Np]; + f6 = Vout - (f0+f1+f2+f3+f4+f5); + dist[nr6] = f6; + } +} + +__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count) +{ + int idx,n,nm; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + nm = Map[n]; + Psi[nm] = Vin; + } +} + + +__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count) +{ + int idx,n,nm; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + nm = Map[n]; + Psi[nm] = Vout; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np) +{ + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f6 = dist[5*Np+n]; + //................................................... + f5 = Cin - (f0+f1+f2+f3+f4+f6); + dist[6*Np+n] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np) +{ + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f5 = dist[6*Np+n]; + //................................................... + f6 = Cout - (f0+f1+f2+f3+f4+f5); + dist[5*Np+n] = f6; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np) +{ + int idx, n; + int nread,nr5; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + nread = d_neighborList[n+5*Np]; + f6 = dist[nread]; + + // Unknown distributions + nr5 = d_neighborList[n+4*Np]; + f5 = Cin - (f0+f1+f2+f3+f4+f6); + dist[nr5] = f5; + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np) +{ + int idx, n; + int nread,nr6; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+4*Np]; + f5 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + // unknown distributions + nr6 = d_neighborList[n+5*Np]; + f6 = Cout - (f0+f1+f2+f3+f4+f5); + dist[nr6] = f6; + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *d_neighborList, int *list, double *dist, double din, int count, int Np) +{ + int idx, n; + int nread; + int nr5,nr11,nr14,nr15,nr18; + // distributions + double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; + double f10,f11,f12,f13,f14,f15,f16,f17,f18; + double ux,uy,uz,Cyz,Cxz; + ux = uy = 0.0; + + idx = blockIdx.x*blockDim.x + threadIdx.x; + + if (idx < count){ + + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+6*Np]; + f7 = dist[nread]; + + nread = d_neighborList[n+8*Np]; + f9 = dist[nread]; + + nread = d_neighborList[n+12*Np]; + f13 = dist[nread]; + + nread = d_neighborList[n+16*Np]; + f17 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + nread = d_neighborList[n+5*Np]; + f6 = dist[nread]; + + nread = d_neighborList[n+7*Np]; + f8 = dist[nread]; + + nread = d_neighborList[n+9*Np]; + f10 = dist[nread]; + + nread = d_neighborList[n+11*Np]; + f12 = dist[nread]; + + nread = d_neighborList[n+15*Np]; + f16 = dist[nread]; + + // Unknown distributions + nr5 = d_neighborList[n+4*Np]; + nr11 = d_neighborList[n+10*Np]; + nr15 = d_neighborList[n+14*Np]; + nr14 = d_neighborList[n+13*Np]; + nr18 = d_neighborList[n+17*Np]; + + //................................................... + //........Determine the inlet flow velocity......... + //ux = (f1-f2+f7-f8+f9-f10+f11-f12+f13-f14); + //uy = (f3-f4+f7-f8-f9+f10+f15-f16+f17-f18); + uz = din - (f0+f1+f2+f3+f4+f7+f8+f9+f10 + 2*(f6+f12+f13+f16+f17)); + + Cxz = 0.5*(f1+f7+f9-f2-f10-f8) - 0.3333333333333333*ux; + Cyz = 0.5*(f3+f7+f10-f4-f9-f8) - 0.3333333333333333*uy; + + f5 = f6 + 0.33333333333333338*uz; + f11 = f12 + 0.16666666666666678*(uz+ux)-Cxz; + f14 = f13 + 0.16666666666666678*(uz-ux)+Cxz; + f15 = f16 + 0.16666666666666678*(uy+uz)-Cyz; + f18 = f17 + 0.16666666666666678*(uz-uy)+Cyz; + //........Store in "opposite" memory location.......... + dist[nr5] = f5; + dist[nr11] = f11; + dist[nr14] = f14; + dist[nr15] = f15; + dist[nr18] = f18; + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAodd_Pressure_BC_Z(int *d_neighborList, int *list, double *dist, double dout, int count, int Np) +{ + int idx,n,nread; + int nr6,nr12,nr13,nr16,nr17; + // distributions + double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; + double f10,f11,f12,f13,f14,f15,f16,f17,f18; + double ux,uy,uz,Cyz,Cxz; + ux = uy = 0.0; + + idx = blockIdx.x*blockDim.x + threadIdx.x; + + // Loop over the boundary - threadblocks delineated by start...finish + if ( idx < count ){ + + n = list[idx]; + //........................................................................ + // Read distributions + //........................................................................ + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+4*Np]; + f5 = dist[nread]; + + nread = d_neighborList[n+6*Np]; + f7 = dist[nread]; + + nread = d_neighborList[n+8*Np]; + f9 = dist[nread]; + + nread = d_neighborList[n+10*Np]; + f11 = dist[nread]; + + nread = d_neighborList[n+14*Np]; + f15 = dist[nread]; + + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + nread = d_neighborList[n+7*Np]; + f8 = dist[nread]; + + nread = d_neighborList[n+9*Np]; + f10 = dist[nread]; + + nread = d_neighborList[n+13*Np]; + f14 = dist[nread]; + + nread = d_neighborList[n+17*Np]; + f18 = dist[nread]; + + // unknown distributions + nr6 = d_neighborList[n+5*Np]; + nr12 = d_neighborList[n+11*Np]; + nr16 = d_neighborList[n+15*Np]; + nr17 = d_neighborList[n+16*Np]; + nr13 = d_neighborList[n+12*Np]; + + + //........Determine the outlet flow velocity......... + //ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14; + //uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18; + uz = -dout + (f0+f1+f2+f3+f4+f7+f8+f9+f10 + 2*(f5+f11+f14+f15+f18)); + + Cxz = 0.5*(f1+f7+f9-f2-f10-f8) - 0.3333333333333333*ux; + Cyz = 0.5*(f3+f7+f10-f4-f9-f8) - 0.3333333333333333*uy; + + f6 = f5 - 0.33333333333333338*uz; + f12 = f11 - 0.16666666666666678*(uz+ux)+Cxz; + f13 = f14 - 0.16666666666666678*(uz-ux)-Cxz; + f16 = f15 - 0.16666666666666678*(uy+uz)+Cyz; + f17 = f18 - 0.16666666666666678*(uz-uy)-Cyz; + + //........Store in "opposite" memory location.......... + dist[nr6] = f6; + dist[nr12] = f12; + dist[nr13] = f13; + dist[nr16] = f16; + dist[nr17] = f17; + //................................................... + } +} + + +__global__ void dvc_ScaLBL_D3Q19_AAeven_Flux_BC_z(int *list, double *dist, double flux, double Area, + double *dvcsum, int count, int Np) +{ + int idx, n; + // distributions + double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; + double f10,f11,f12,f13,f14,f15,f16,f17,f18; + double factor = 1.f/(Area); + double sum = 0.f; + + idx = blockIdx.x*blockDim.x + threadIdx.x; + + if (idx < count){ + + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f6 = dist[5*Np+n]; + f7 = dist[8*Np+n]; + f8 = dist[7*Np+n]; + f9 = dist[10*Np+n]; + f10 = dist[9*Np+n]; + f12 = dist[11*Np+n]; + f13 = dist[14*Np+n]; + f16 = dist[15*Np+n]; + f17 = dist[18*Np+n]; + sum = factor*(f0+f1+f2+f3+f4+f7+f8+f9+f10 + 2*(f6+f12+f13+f16+f17)); + } + + //sum = blockReduceSum(sum); + //if (threadIdx.x==0) + // atomicAdd(dvcsum, sum); + + extern __shared__ double temp[]; + thread_group g = this_thread_block(); + double block_sum = reduce_sum(g, temp, sum); + + if (g.thread_rank() == 0) atomicAdd(dvcsum, block_sum); +} + + +__global__ void dvc_ScaLBL_D3Q19_AAodd_Flux_BC_z(int *d_neighborList, int *list, double *dist, double flux, + double Area, double *dvcsum, int count, int Np) +{ + int idx, n; + int nread; + + // distributions + double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; + double f10,f11,f12,f13,f14,f15,f16,f17,f18; + double factor = 1.f/(Area); + double sum = 0.f; + + idx = blockIdx.x*blockDim.x + threadIdx.x; + + if (idx < count){ + + n = list[idx]; + + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+6*Np]; + f7 = dist[nread]; + + nread = d_neighborList[n+8*Np]; + f9 = dist[nread]; + + nread = d_neighborList[n+12*Np]; + f13 = dist[nread]; + + nread = d_neighborList[n+16*Np]; + f17 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + nread = d_neighborList[n+5*Np]; + f6 = dist[nread]; + + nread = d_neighborList[n+7*Np]; + f8 = dist[nread]; + + nread = d_neighborList[n+9*Np]; + f10 = dist[nread]; + + nread = d_neighborList[n+11*Np]; + f12 = dist[nread]; + + nread = d_neighborList[n+15*Np]; + f16 = dist[nread]; + + sum = factor*(f0+f1+f2+f3+f4+f7+f8+f9+f10 + 2*(f6+f12+f13+f16+f17)); + + } + + //sum = blockReduceSum(sum); + //if (threadIdx.x==0) + // atomicAdd(dvcsum, sum); + + extern __shared__ double temp[]; + thread_group g = this_thread_block(); + double block_sum = reduce_sum(g, temp, sum); + + if (g.thread_rank() == 0) atomicAdd(dvcsum, block_sum); +} + + +__global__ void dvc_D3Q19_Velocity_BC_z(double *disteven, double *distodd, double uz, + int Nx, int Ny, int Nz) +{ + int n,N; + // distributions + double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; + double f10,f11,f12,f13,f14,f15,f16,f17,f18; + double din; + + N = Nx*Ny*Nz; + n = Nx*Ny + blockIdx.x*blockDim.x + threadIdx.x; + + if (n < 2*Nx*Ny){ + //........................................................................ + // Read distributions from "opposite" memory convention + //........................................................................ + //........................................................................ + f1 = distodd[n]; + f3 = distodd[N+n]; + f5 = distodd[2*N+n]; + f7 = distodd[3*N+n]; + f9 = distodd[4*N+n]; + f11 = distodd[5*N+n]; + f13 = distodd[6*N+n]; + f15 = distodd[7*N+n]; + f17 = distodd[8*N+n]; + //........................................................................ + f0 = disteven[n]; + f2 = disteven[N+n]; + f4 = disteven[2*N+n]; + f6 = disteven[3*N+n]; + f8 = disteven[4*N+n]; + f10 = disteven[5*N+n]; + f12 = disteven[6*N+n]; + f14 = disteven[7*N+n]; + f16 = disteven[8*N+n]; + f18 = disteven[9*N+n]; + //................................................... + + // Determine the outlet flow velocity + // uz = 1.0 - (f0+f4+f3+f2+f1+f8+f7+f9+f10 + + // 2*(f5+f15+f18+f11+f14))/din; + din = (f0+f4+f3+f2+f1+f8+f7+f9+f10+2*(f5+f15+f18+f11+f14))/(1.0-uz); + // Set the unknown distributions: + f6 = f5 + 0.3333333333333333*din*uz; + f16 = f15 + 0.1666666666666667*din*uz; + f17 = f16 + f4 - f3-f15+f18+f8-f7 +f9-f10; + f12= (din*uz+f5+ f15+f18+f11+f14-f6-f16-f17-f2+f1-f14+f11-f8+f7+f9-f10)*0.5; + f13= din*uz+f5+ f15+f18+f11+f14-f6-f16-f17-f12; + + //........Store in "opposite" memory location.......... + disteven[3*N+n] = f6; + disteven[6*N+n] = f12; + distodd[6*N+n] = f13; + disteven[8*N+n] = f16; + distodd[8*N+n] = f17; + //................................................... + } +} + +__global__ void dvc_D3Q19_Velocity_BC_Z(double *disteven, double *distodd, double uz, + int Nx, int Ny, int Nz, int outlet){ + int n,N; + // distributions + double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; + double f10,f11,f12,f13,f14,f15,f16,f17,f18; + double dout; + + N = Nx*Ny*Nz; + n = outlet + blockIdx.x*blockDim.x + threadIdx.x; + + // Loop over the boundary - threadblocks delineated by start...finish + if ( n 0 ){ + f_even[n] = 0 + 0.01*0; + f_odd[n] = 0+ 0.01*1; //double(100*n)+1.f; + f_even[N+n] = 1+ 0.01*2; //double(100*n)+2.f; + f_odd[N+n] = 1+ 0.01*3; //double(100*n)+3.f; + f_even[2*N+n] = 2+ 0.01*4; //double(100*n)+4.f; + f_odd[2*N+n] = 2+ 0.01*5; //double(100*n)+5.f; + f_even[3*N+n] = 3+ 0.01*6; //double(100*n)+6.f; + f_odd[3*N+n] = 3+ 0.01*7; //double(100*n)+7.f; + f_even[4*N+n] = 4+ 0.01*8; //double(100*n)+8.f; + f_odd[4*N+n] = 4+ 0.01*9; //double(100*n)+9.f; + f_even[5*N+n] = 5+ 0.01*10; //double(100*n)+10.f; + f_odd[5*N+n] = 5+ 0.01*11; //double(100*n)+11.f; + f_even[6*N+n] = 6+ 0.01*12; //double(100*n)+12.f; + f_odd[6*N+n] = 6+ 0.01*13; //double(100*n)+13.f; + f_even[7*N+n] = 7+ 0.01*14; //double(100*n)+14.f; + f_odd[7*N+n] = 7+ 0.01*15; //double(100*n)+15.f; + f_even[8*N+n] = 8+ 0.01*16; //double(100*n)+16.f; + f_odd[8*N+n] = 8+ 0.01*17; //double(100*n)+17.f; + f_even[9*N+n] = 9+ 0.01*18; //double(100*n)+18.f; + } + else{ + for(int q=0; q<9; q++){ + f_even[q*N+n] = -1.0; + f_odd[q*N+n] = -1.0; + } + f_even[9*N+n] = -1.0; + } + } + } +} + + +//************************************************************************* + +//extern "C" void ScaLBL_D3Q19_MapRecv(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count, +// int *d3q19_recvlist, int Nx, int Ny, int Nz){ +// int GRID = count / 512 + 1; +// dvc_ScaLBL_D3Q19_Unpack <<>>(q, Cqx, Cqy, Cqz, list, start, count, d3q19_recvlist, Nx, Ny, Nz); +//} + +extern "C" void ScaLBL_D3Q19_Pack(int q, int *list, int start, int count, double *sendbuf, double *dist, int N){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_Pack <<>>(q, list, start, count, sendbuf, dist, N); +} + +extern "C" void ScaLBL_D3Q19_Unpack(int q, int *list, int start, int count, double *recvbuf, double *dist, int N){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_Unpack <<>>(q, list, start, count, recvbuf, dist, N); +} +//************************************************************************* + +extern "C" void ScaLBL_D3Q19_AA_Init(double *f_even, double *f_odd, int Np){ + dvc_ScaLBL_D3Q19_AA_Init<<>>(f_even, f_odd, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AA_Init: %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_Init(double *dist, int Np){ + dvc_ScaLBL_D3Q19_Init<<>>(dist, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_GreyIMRT_Init(double *dist, int Np, double Den){ + dvc_ScaLBL_D3Q19_GreyIMRT_Init<<>>(dist, Np, Den); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_GreyIMRT_Init: %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_Swap(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz){ + dvc_ScaLBL_D3Q19_Swap<<>>(ID, disteven, distodd, Nx, Ny, Nz); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Swap: %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *disteven, double *distodd, int Np) +{ + + const int Q = 9; + // cudaStream_t streams[Q]; + // Launch the swap operation as different kernels + for (int q=0; q>>(neighborList, disteven, distodd, Np, q); + } + // cpu should wait for all kernels to finish (to avoid launch of dependent kernels) + //cudaDeviceSynchronize(); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Swap: %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_Compact(char * ID, double *d_dist, int Np) { + cudaFuncSetCacheConfig(dvc_ScaLBL_AAeven_Compact, cudaFuncCachePreferL1); + dvc_ScaLBL_AAeven_Compact<<>>(ID, d_dist, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_Compact(char * ID, int *d_neighborList, double *d_dist, int Np) { + cudaFuncSetCacheConfig(dvc_ScaLBL_AAodd_Compact, cudaFuncCachePreferL1); + dvc_ScaLBL_AAodd_Compact<<>>(ID,d_neighborList, d_dist,Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_Momentum(double *dist, double *vel, int Np){ + + dvc_ScaLBL_D3Q19_Momentum<<>>(dist, vel, Np); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Velocity: %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_Pressure(double *fq, double *Pressure, int Np){ + dvc_ScaLBL_D3Q19_Pressure<<< NBLOCKS,NTHREADS >>>(fq, Pressure, Np); +} + +extern "C" void ScaLBL_D3Q19_Velocity_BC_z(double *disteven, double *distodd, double uz,int Nx, int Ny, int Nz){ + int GRID = Nx*Ny / 512 + 1; + dvc_D3Q19_Velocity_BC_z<<>>(disteven,distodd, uz, Nx, Ny, Nz); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Velocity_BC_z: %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_Velocity_BC_Z(double *disteven, double *distodd, double uz, int Nx, int Ny, int Nz, int outlet){ + int GRID = Nx*Ny / 512 + 1; + dvc_D3Q19_Velocity_BC_Z<<>>(disteven, distodd, uz, Nx, Ny, Nz, outlet); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Velocity_BC_Z: %s \n",cudaGetErrorString(err)); + } +} + +extern "C" double ScaLBL_D3Q19_Flux_BC_z(double *disteven, double *distodd, double flux,int Nx, int Ny, int Nz){ + + int GRID = Nx*Ny / 512 + 1; + + // IMPORTANT -- this routine may fail if Nx*Ny > 512*512 + if (Nx*Ny > 512*512){ + printf("WARNING (ScaLBL_D3Q19_Flux_BC_z): CUDA reduction operation may fail if Nx*Ny > 512*512"); + } + + // Allocate memory to store the sums + double din; + double sum[1]; + double *dvcsum; + int sharedBytes = NTHREADS*sizeof(double); + cudaMalloc((void **)&dvcsum,sizeof(double)*Nx*Ny); + cudaMemset(dvcsum,0,sizeof(double)*Nx*Ny); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Flux_BC_z (memory allocation): %s \n",cudaGetErrorString(err)); + } + + // compute the local flux and store the result + dvc_D3Q19_Flux_BC_z<<>>(disteven, distodd, flux, dvcsum, Nx, Ny, Nz); + + err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Flux_BC_z (flux calculation, step 1): %s \n",cudaGetErrorString(err)); + } + + // Now read the total flux + cudaMemcpy(&sum[0],dvcsum,sizeof(double),cudaMemcpyDeviceToHost); + din=sum[0]; + + err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Flux_BC_z (flux calculation, step 2): %s \n",cudaGetErrorString(err)); + } + + // free the memory needed for reduction + cudaFree(dvcsum); + + return din; +} + + +extern "C" void ScaLBL_D3Q19_AAeven_Pressure_BC_z(int *list, double *dist, double din, int count, int N){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_AAeven_Pressure_BC_z<<>>(list, dist, din, count, N); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_Pressure_BC_z (kernel): %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_Pressure_BC_Z(int *list, double *dist, double dout, int count, int N){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_AAeven_Pressure_BC_Z<<>>(list, dist, dout, count, N); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_Pressure_BC_Z (kernel): %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *neighborList, int *list, double *dist, double din, int count, int N){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_AAodd_Pressure_BC_z<<>>(neighborList, list, dist, din, count, N); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_Pressure_BC_z (kernel): %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_Z(int *neighborList, int *list, double *dist, double dout, int count, int N){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_AAodd_Pressure_BC_Z<<>>(neighborList, list, dist, dout, count, N); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_Pressure_BC_Z (kernel): %s \n",cudaGetErrorString(err)); + } +} +//******************************************************************************* +//******************************************************************************* +//******************************************************************************* + + +//******************************************************************************* +extern "C" double ScaLBL_D3Q19_AAeven_Flux_BC_z(int *list, double *dist, double flux, double area, + int count, int N){ + + int GRID = count / 512 + 1; + + // IMPORTANT -- this routine may fail if Nx*Ny > 512*512 + if (count > 512*512){ + printf("WARNING (ScaLBL_D3Q19_Flux_BC_Z): CUDA reduction operation may fail if count > 512*512"); + } + + // Allocate memory to store the sums + double din; + double sum[1]; + double *dvcsum; + cudaMalloc((void **)&dvcsum,sizeof(double)*count); + cudaMemset(dvcsum,0,sizeof(double)*count); + int sharedBytes = 512*sizeof(double); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_Flux_BC_z (memory allocation): %s \n",cudaGetErrorString(err)); + } + + // compute the local flux and store the result + dvc_ScaLBL_D3Q19_AAeven_Flux_BC_z<<>>(list, dist, flux, area, dvcsum, count, N); + err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_Flux_BC_z (kernel): %s \n",cudaGetErrorString(err)); + } + + // Now read the total flux + cudaMemcpy(&sum[0],dvcsum,sizeof(double),cudaMemcpyDeviceToHost); + din=sum[0]; + err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_Flux_BC_z (reduction): %s \n",cudaGetErrorString(err)); + } + + // free the memory needed for reduction + cudaFree(dvcsum); + + return din; +} + +extern "C" double ScaLBL_D3Q19_AAodd_Flux_BC_z(int *neighborList, int *list, double *dist, double flux, + double area, int count, int N){ + + int GRID = count / 512 + 1; + + // IMPORTANT -- this routine may fail if Nx*Ny > 512*512 + if (count > 512*512){ + printf("WARNING (ScaLBL_D3Q19_AAodd_Flux_BC_z): CUDA reduction operation may fail if count > 512*512"); + } + + // Allocate memory to store the sums + double din; + double sum[1]; + double *dvcsum; + cudaMalloc((void **)&dvcsum,sizeof(double)*count); + cudaMemset(dvcsum,0,sizeof(double)*count); + int sharedBytes = 512*sizeof(double); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_Flux_BC_z (memory allocation): %s \n",cudaGetErrorString(err)); + } + + // compute the local flux and store the result + dvc_ScaLBL_D3Q19_AAodd_Flux_BC_z<<>>(neighborList, list, dist, flux, area, dvcsum, count, N); + err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_Flux_BC_z (kernel): %s \n",cudaGetErrorString(err)); + } + // Now read the total flux + cudaMemcpy(&sum[0],dvcsum,sizeof(double),cudaMemcpyDeviceToHost); + din=sum[0]; + err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_Flux_BC_z (reduction): %s \n",cudaGetErrorString(err)); + } + + // free the memory needed for reduction + cudaFree(dvcsum); + + return din; +} + +extern "C" double ScaLBL_D3Q19_Flux_BC_Z(double *disteven, double *distodd, double flux, int Nx, int Ny, int Nz, int outlet){ + + int GRID = Nx*Ny / 512 + 1; + + // IMPORTANT -- this routine may fail if Nx*Ny > 512*512 + if (Nx*Ny > 512*512){ + printf("WARNING (ScaLBL_D3Q19_Flux_BC_Z): CUDA reduction operation may fail if Nx*Ny > 512*512"); + } + + // Allocate memory to store the sums + double dout; + double sum[1]; + double *dvcsum; + cudaMalloc((void **)&dvcsum,sizeof(double)*Nx*Ny); + cudaMemset(dvcsum,0,sizeof(double)*Nx*Ny); + + // compute the local flux and store the result + dvc_D3Q19_Flux_BC_Z<<>>(disteven, distodd, flux, dvcsum, Nx, Ny, Nz, outlet); + + // Now read the total flux + cudaMemcpy(&sum[0],dvcsum,sizeof(double),cudaMemcpyDeviceToHost); + + // free the memory needed for reduction + + dout = sum[0]; + + cudaFree(dvcsum); + + return dout; + +} + +extern "C" double deviceReduce(double *in, double* out, int N) { + int threads = 512; + int blocks = min((N + threads - 1) / threads, 1024); + + double sum = 0.f; + deviceReduceKernel<<>>(in, out, N); + deviceReduceKernel<<<1, 1024>>>(out, out, blocks); + return sum; +} + +extern "C" void ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_Reflection_BC_z<<>>(list, dist, count, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Reflection_BC_z (kernel): %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_Reflection_BC_Z<<>>(list, dist, count, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Reflection_BC_Z (kernel): %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, + double Fy, double Fz){ + + dvc_ScaLBL_AAeven_MRT<<>>(dist,start,finish,Np,rlx_setA,rlx_setB,Fx,Fy,Fz); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_MRT: %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_MRT(int *neighborlist, double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, + double Fy, double Fz){ + + dvc_ScaLBL_AAodd_MRT<<>>(neighborlist,dist,start,finish,Np,rlx_setA,rlx_setB,Fx,Fy,Fz); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_MRT: %s \n",cudaGetErrorString(err)); + } +} + diff --git a/models/IonModel.cpp b/models/IonModel.cpp index c8c1fba8..0a8f779a 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -1,6 +1,7 @@ /* * Dilute Ion Transport LBM Model */ +#include #include "models/IonModel.h" #include "analysis/distance.h" #include "common/ReadMicroCT.h" @@ -8,6 +9,7 @@ ScaLBL_IonModel::ScaLBL_IonModel(int RANK, int NP, MPI_Comm COMM): rank(RANK),nprocs(NP),timestep(0),timestepMax(0),time_conv(0),kb(0),electron_charge(0),T(0),Vt(0),k2_inv(0),h(0), tolerance(0),number_ion_species(0),Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0), +fluidVelx_dummy(0),fluidVely_dummy(0),fluidVelz_dummy(0), BoundaryCondition(0),BoundaryConditionSolid(0),Lx(0),Ly(0),Lz(0),comm(COMM) { @@ -16,18 +18,12 @@ ScaLBL_IonModel::~ScaLBL_IonModel(){ } -void ScaLBL_IonModel::ReadParams(string filename,int num_iter,int num_iter_Stokes,double time_conv_Stokes){ +void ScaLBL_IonModel::ReadParams(string filename,vector &num_iter){ // read the input database db = std::make_shared( filename ); domain_db = db->getDatabase( "Domain" ); ion_db = db->getDatabase( "Ions" ); - - //------ Load number of iteration from multiphysics controller ------// - timestepMax = num_iter; - //compute time conversion factor for ion model - time_conv = num_iter_Stokes*time_conv_Stokes/num_iter; - //-------------------------------------------------------------------// // Universal constant kb = 1.38e-23;//Boltzmann constant;unit [J/K] @@ -36,26 +32,30 @@ void ScaLBL_IonModel::ReadParams(string filename,int num_iter,int num_iter_Stoke //---------------------- Default model parameters --------------------------// T = 300.0;//temperature; unit [K] Vt = kb*T/electron_charge;//thermal voltage; unit [Vy] - k2_inv = 4.5;//speed of sound for D3Q7 lattice + k2_inv = 4.0;//speed of sound for D3Q7 lattice h = 1.0;//resolution; unit: um/lu tolerance = 1.0e-8; number_ion_species = 1; + tau.push_back(1.0); IonDiffusivity.push_back(1.0e-9);//user-input diffusivity has physical unit [m^2/sec] IonValence.push_back(1);//algebraic valence charge IonConcentration.push_back(1.0e-3);//user-input ion concentration has physical unit [mol/m^3] - //deltaT.push_back(1.0); - //tau.push_back(0.5+k2_inv*deltaT[0]*IonDiffusivity[0]); - tau.push_back(0.5+k2_inv*time_conv/(h*1.0e-6)/(h*1.0e-6)*IonDiffusivity[0]); + Cin.push_back(1.0e-3);//user-input inlet boundary ion concentration;unit [mol/m^3] + Cout.push_back(1.0e-3);//user-input outlet boundary ion concentration;unit [mol/m^3] + //tau.push_back(0.5+k2_inv*time_conv/(h*1.0e-6)/(h*1.0e-6)*IonDiffusivity[0]); + time_conv.push_back((tau[0]-0.5)/k2_inv*(h*h*1.0e-12)/IonDiffusivity[0]); + fluidVelx_dummy = 0.0;//for debugging, unit [m/sec] + fluidVely_dummy = 0.0;//for debugging, unit [m/sec] + fluidVelz_dummy = 0.0;//for debugging, unit [m/sec] + Ex_dummy = 0.0;//for debugging, unit [V/m] + Ey_dummy = 0.0;//for debugging, unit [V/m] + Ez_dummy = 0.0;//for debugging, unit [V/m] //--------------------------------------------------------------------------// // Read domain parameters if (domain_db->keyExists( "voxel_length" )){//default unit: um/lu h = domain_db->getScalar( "voxel_length" ); } - BoundaryCondition = 0; - if (domain_db->keyExists( "BC" )){ - BoundaryCondition = domain_db->getScalar( "BC" ); - } // LB-Ion Model parameters //if (ion_db->keyExists( "timestepMax" )){ @@ -69,29 +69,63 @@ void ScaLBL_IonModel::ReadParams(string filename,int num_iter,int num_iter_Stoke //re-calculate thermal voltage Vt = kb*T/electron_charge;//thermal voltage; unit [Vy] } + if (ion_db->keyExists( "FluidVelDummy" )){ + fluidVelx_dummy = ion_db->getVector( "FluidVelDummy" )[0]; + fluidVely_dummy = ion_db->getVector( "FluidVelDummy" )[1]; + fluidVelz_dummy = ion_db->getVector( "FluidVelDummy" )[2]; + } + if (ion_db->keyExists( "ElectricFieldDummy" )){ + Ex_dummy = ion_db->getVector( "ElectricFieldDummy" )[0]; + Ey_dummy = ion_db->getVector( "ElectricFieldDummy" )[1]; + Ez_dummy = ion_db->getVector( "ElectricFieldDummy" )[2]; + } if (ion_db->keyExists( "number_ion_species" )){ number_ion_species = ion_db->getScalar( "number_ion_species" ); } + //------ Load number of iteration from multiphysics controller ------// + if (num_iter.size()!=number_ion_species){ + ERROR("Error: number_ion_species and num_iter_Ion_List (from Multiphysics) must be of the same length! \n"); + } + else{ + timestepMax.assign(num_iter.begin(),num_iter.end()); + } + //-------------------------------------------------------------------// + if (ion_db->keyExists("tauList")){ + tau.clear(); + tau = ion_db->getVector( "tauList" ); + vectorDi = ion_db->getVector( "IonDiffusivityList" );//temp storing ion diffusivity in physical unit + if (tau.size()!=number_ion_species || Di.size()!=number_ion_species){ + ERROR("Error: number_ion_species, tauList and IonDiffusivityList must be of the same length! \n"); + } + else{ + time_conv.clear(); + for (int i=0; ikeyExists("IonDiffusivityList")){ IonDiffusivity.clear(); IonDiffusivity = ion_db->getVector( "IonDiffusivityList" ); - // time relaxation parameters tau also needs update - tau.clear(); if (IonDiffusivity.size()!=number_ion_species){ ERROR("Error: number_ion_species and IonDiffusivityList must be the same length! \n"); } else{ for (int i=0; ikeyExists("IonValenceList")){ IonValence.clear(); @@ -114,33 +148,255 @@ void ScaLBL_IonModel::ReadParams(string filename,int num_iter,int num_iter_Stoke } } } + else { + for (int i=0; ikeyExists( "BC_Solid" )){ BoundaryConditionSolid = ion_db->getScalar( "BC_Solid" ); } - - - if (rank==0) printf("*****************************************************\n"); - if (rank==0) printf("LB Ion Transport Solver: \n"); - if (rank==0) printf(" Time conversion factor: %.5g [sec/lt]\n", time_conv); - if (rank==0) printf(" Internal iteration: %i [lt]\n", timestepMax); - for (int i=0; ikeyExists( "BC" )){ + BoundaryCondition = ion_db->getScalar( "BC" ); + } + if (BoundaryCondition==1){ + //read boundary ion concentration list; INPUT unit [mol/m^3] + //it must be converted to LB unit [mol/lu^3] + + //inlet + if (ion_db->keyExists("CinList")){ + Cin.clear(); + Cin = ion_db->getVector( "CinList" ); + if (Cin.size()!=number_ion_species){ + ERROR("Error: number_ion_species and CinList must be the same length! \n"); + } + else{ + for (int i=0; ikeyExists("CoutList")){ + Cout.clear(); + Cout = ion_db->getVector( "CoutList" ); + if (Cout.size()!=number_ion_species){ + ERROR("Error: number_ion_species and CoutList must be the same length! \n"); + } + else{ + for (int i=0; i( filename ); + domain_db = db->getDatabase( "Domain" ); + ion_db = db->getDatabase( "Ions" ); + + // Universal constant + kb = 1.38e-23;//Boltzmann constant;unit [J/K] + electron_charge = 1.6e-19;//electron charge;unit [C] + + //---------------------- Default model parameters --------------------------// + T = 300.0;//temperature; unit [K] + Vt = kb*T/electron_charge;//thermal voltage; unit [Vy] + k2_inv = 4.0;//speed of sound for D3Q7 lattice + h = 1.0;//resolution; unit: um/lu + tolerance = 1.0e-8; + number_ion_species = 1; + tau.push_back(1.0); + IonDiffusivity.push_back(1.0e-9);//user-input diffusivity has physical unit [m^2/sec] + IonValence.push_back(1);//algebraic valence charge + IonConcentration.push_back(1.0e-3);//user-input ion concentration has physical unit [mol/m^3] + Cin.push_back(1.0e-3);//user-input inlet boundary ion concentration;unit [mol/m^3] + Cout.push_back(1.0e-3);//user-input outlet boundary ion concentration;unit [mol/m^3] + //tau.push_back(0.5+k2_inv*time_conv/(h*1.0e-6)/(h*1.0e-6)*IonDiffusivity[0]); + time_conv.push_back((tau[0]-0.5)/k2_inv*(h*h*1.0e-12)/IonDiffusivity[0]); + fluidVelx_dummy = 0.0;//for debugging, unit [m/sec] + fluidVely_dummy = 0.0;//for debugging, unit [m/sec] + fluidVelz_dummy = 0.0;//for debugging, unit [m/sec] + Ex_dummy = 0.0;//for debugging, unit [V/m] + Ey_dummy = 0.0;//for debugging, unit [V/m] + Ez_dummy = 0.0;//for debugging, unit [V/m] + //--------------------------------------------------------------------------// + + // Read domain parameters + if (domain_db->keyExists( "voxel_length" )){//default unit: um/lu + h = domain_db->getScalar( "voxel_length" ); + } + + // LB-Ion Model parameters + //if (ion_db->keyExists( "timestepMax" )){ + // timestepMax = ion_db->getScalar( "timestepMax" ); + //} + if (ion_db->keyExists( "tolerance" )){ + tolerance = ion_db->getScalar( "tolerance" ); + } + if (ion_db->keyExists( "temperature" )){ + T = ion_db->getScalar( "temperature" ); + //re-calculate thermal voltage + Vt = kb*T/electron_charge;//thermal voltage; unit [Vy] + } + if (ion_db->keyExists( "FluidVelDummy" )){ + fluidVelx_dummy = ion_db->getVector( "FluidVelDummy" )[0]; + fluidVely_dummy = ion_db->getVector( "FluidVelDummy" )[1]; + fluidVelz_dummy = ion_db->getVector( "FluidVelDummy" )[2]; + } + if (ion_db->keyExists( "ElectricFieldDummy" )){ + Ex_dummy = ion_db->getVector( "ElectricFieldDummy" )[0]; + Ey_dummy = ion_db->getVector( "ElectricFieldDummy" )[1]; + Ez_dummy = ion_db->getVector( "ElectricFieldDummy" )[2]; + } + if (ion_db->keyExists( "number_ion_species" )){ + number_ion_species = ion_db->getScalar( "number_ion_species" ); + } + if (ion_db->keyExists("tauList")){ + tau.clear(); + tau = ion_db->getVector( "tauList" ); + vectorDi = ion_db->getVector( "IonDiffusivityList" );//temp storing ion diffusivity in physical unit + if (tau.size()!=number_ion_species || Di.size()!=number_ion_species){ + ERROR("Error: number_ion_species, tauList and IonDiffusivityList must be of the same length! \n"); + } + else{ + time_conv.clear(); + for (int i=0; ikeyExists("IonDiffusivityList")){ + IonDiffusivity.clear(); + IonDiffusivity = ion_db->getVector( "IonDiffusivityList" ); + if (IonDiffusivity.size()!=number_ion_species){ + ERROR("Error: number_ion_species and IonDiffusivityList must be the same length! \n"); + } + else{ + for (int i=0; ikeyExists("IonValenceList")){ + IonValence.clear(); + IonValence = ion_db->getVector( "IonValenceList" ); + if (IonValence.size()!=number_ion_species){ + ERROR("Error: number_ion_species and IonValenceList must be the same length! \n"); + } + } + //read initial ion concentration list; INPUT unit [mol/m^3] + //it must be converted to LB unit [mol/lu^3] + if (ion_db->keyExists("IonConcentrationList")){ + IonConcentration.clear(); + IonConcentration = ion_db->getVector( "IonConcentrationList" ); + if (IonConcentration.size()!=number_ion_species){ + ERROR("Error: number_ion_species and IonConcentrationList must be the same length! \n"); + } + else{ + for (int i=0; ikeyExists( "BC_Solid" )){ + BoundaryConditionSolid = ion_db->getScalar( "BC_Solid" ); + } + // Read boundary condition for ion transport + // BC = 0: normal periodic BC + // BC = 1: fixed inlet and outlet ion concentration + BoundaryCondition = 0; + if (ion_db->keyExists( "BC" )){ + BoundaryCondition = ion_db->getScalar( "BC" ); + } + if (BoundaryCondition==1){ + //read boundary ion concentration list; INPUT unit [mol/m^3] + //it must be converted to LB unit [mol/lu^3] + + //inlet + if (ion_db->keyExists("CinList")){ + Cin.clear(); + Cin = ion_db->getVector( "CinList" ); + if (Cin.size()!=number_ion_species){ + ERROR("Error: number_ion_species and CinList must be the same length! \n"); + } + else{ + for (int i=0; ikeyExists("CoutList")){ + Cout.clear(); + Cout = ion_db->getVector( "CoutList" ); + if (Cout.size()!=number_ion_species){ + ERROR("Error: number_ion_species and CoutList must be the same length! \n"); + } + else{ + for (int i=0; iid[n] = 0; // set mask to zero since this is an immobile component @@ -360,6 +616,27 @@ void ScaLBL_IonModel::Initialize(){ ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence[ic], ic, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence[ic], ic, 0, ScaLBL_Comm->LastExterior(), Np); } + + if (rank==0) printf("*****************************************************\n"); + if (rank==0) printf("LB Ion Transport Solver: \n"); + for (int i=0; iSendD3Q7AA(fq, ic); //READ FROM NORMAL + for (int ic=0; icSendD3Q7AA(fq, ic); //READ FROM NORMAL ScaLBL_D3Q7_AAodd_IonConcentration(NeighborList, &fq[ic*Np*7],&Ci[ic*Np],ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm->RecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE + ScaLBL_Comm->RecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); + // Set boundary conditions + if (BoundaryCondition == 1){ + ScaLBL_Comm->D3Q7_Ion_Concentration_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); + ScaLBL_Comm->D3Q7_Ion_Concentration_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); + } + //-------------------------// ScaLBL_D3Q7_AAodd_IonConcentration(NeighborList, &fq[ic*Np*7],&Ci[ic*Np], 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence[ic], ic, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence[ic], ic, 0, ScaLBL_Comm->LastExterior(), Np); - } + - //LB-Ion collison - for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); - } - - // Set boundary conditions - /* ... */ - - for (int ic=0; icLastExterior(), Np); - } - if (BoundaryConditionSolid==1){ - for (int ic=0; icSolidNeumannD3Q7(&fq[ic*Np*7], IonSolid); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->SolidDirichletD3Q7(&fq[ic*Np*7], IonSolid); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); } - } - // *************EVEN TIMESTEP*************// - timestep++; - //Update ion concentration and charge density - for (int ic=0; icSendD3Q7AA(fq, ic); //READ FORM NORMAL + // *************EVEN TIMESTEP*************// + timestep++; + //Update ion concentration and charge density + ScaLBL_Comm->SendD3Q7AA(fq, ic); //READ FORM NORMAL ScaLBL_D3Q7_AAeven_IonConcentration(&fq[ic*Np*7],&Ci[ic*Np],ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm->RecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE + ScaLBL_Comm->RecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE + ScaLBL_DeviceBarrier(); + // Set boundary conditions + if (BoundaryCondition == 1){ + ScaLBL_Comm->D3Q7_Ion_Concentration_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); + ScaLBL_Comm->D3Q7_Ion_Concentration_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); + } + //-------------------------// ScaLBL_D3Q7_AAeven_IonConcentration(&fq[ic*Np*7],&Ci[ic*Np], 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence[ic], ic, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence[ic], ic, 0, ScaLBL_Comm->LastExterior(), Np); - } + - //LB-Ion collison - for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); - } - - // Set boundary conditions - /* ... */ - - for (int ic=0; icLastExterior(), Np); - } - if (BoundaryConditionSolid==1){ - for (int ic=0; icSolidNeumannD3Q7(&fq[ic*Np*7], IonSolid); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->SolidDirichletD3Q7(&fq[ic*Np*7], IonSolid); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); } } - //************************************************************************/ - } + } + + //Compute charge density for Poisson equation + for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence[ic], ic, 0, ScaLBL_Comm->LastExterior(), Np); + } //************************************************************************/ //stoptime = MPI_Wtime(); //if (rank==0) printf("-------------------------------------------------------------------\n"); @@ -464,21 +737,15 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ //MLUPS *= nprocs; //if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); //if (rank==0) printf("********************************************************\n"); - } -//TODO this ruin the ion concentration on device -//need to do something similar to electric field void ScaLBL_IonModel::getIonConcentration(int timestep){ - for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_IonConcentration_Phys(Ci, h, ic, 0, ScaLBL_Comm->LastExterior(), Np); - } DoubleArray PhaseField(Nx,Ny,Nz); for (int ic=0; icRegularLayout(Map,&Ci[ic*Np],PhaseField); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + IonConcentration_LB_to_Phys(PhaseField); FILE *OUTFILE; sprintf(LocalRankFilename,"Ion%02i_Time_%i.%05i.raw",ic+1,timestep,rank); @@ -489,6 +756,100 @@ void ScaLBL_IonModel::getIonConcentration(int timestep){ } +void ScaLBL_IonModel::IonConcentration_LB_to_Phys(DoubleArray &Den_reg){ + for (int k=0;k &ci_avg_previous){ + double *Ci_host; + Ci_host = new double[Np]; + vector error(number_ion_species,0.0); + + for (int ic=0; icLastExterior(); idx++){ + ci_loc +=Ci_host[idx]; + count_loc+=1.0; + } + for (int idx=ScaLBL_Comm->FirstInterior(); idxLastInterior(); idx++){ + ci_loc +=Ci_host[idx]; + count_loc+=1.0; + } + + MPI_Allreduce(&ci_loc,&ci_avg,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + + ci_avg /= count; + double ci_avg_mag=ci_avg; + if (ci_avg==0.0) ci_avg_mag=1.0; + error[ic] = fabs(ci_avg-ci_avg_previous[ic])/fabs(ci_avg_mag); + ci_avg_previous[ic] = ci_avg; + } + double error_max; + error_max = *max_element(error.begin(),error.end()); + if (rank==0){ + printf("IonModel: error max: %.5g\n",error_max); + } + return error_max; +} + //void ScaLBL_IonModel::getIonConcentration(){ // for (int ic=0; icFirstInterior(), ScaLBL_Comm->LastInterior(), Np); diff --git a/models/IonModel.h b/models/IonModel.h index 59e5b6e6..0bc881af 100644 --- a/models/IonModel.h +++ b/models/IonModel.h @@ -22,7 +22,8 @@ public: ~ScaLBL_IonModel(); // functions in they should be run - void ReadParams(string filename,int num_iter,int num_iter_Stokes,double time_conv_Stokes); + void ReadParams(string filename,vector &num_iter); + void ReadParams(string filename); void ReadParams(std::shared_ptr db0); void SetDomain(); void ReadInput(); @@ -30,24 +31,30 @@ public: void Initialize(); void Run(double *Velocity, double *ElectricField); void getIonConcentration(int timestep); - + void DummyFluidVelocity(); + void DummyElectricField(); + double CalIonDenConvergence(vector &ci_avg_previous); + //bool Restart,pBC; - int timestep,timestepMax; + int timestep; + vector timestepMax; int BoundaryCondition; int BoundaryConditionSolid; double h;//domain resolution, unit [um/lu] - double time_conv; double kb,electron_charge,T,Vt; double k2_inv; double tolerance; - double Ex,Ey,Ez; + double fluidVelx_dummy,fluidVely_dummy,fluidVelz_dummy; + double Ex_dummy,Ey_dummy,Ez_dummy; int number_ion_species; vector IonDiffusivity;//User input unit [m^2/sec] vector IonValence; vector IonConcentration;//unit [mol/m^3] - //vector deltaT; + vector Cin;//unit [mol/m^3] + vector Cout;//unit [mol/m^3] vector tau; + vector time_conv; int Nx,Ny,Nz,N,Np; int rank,nprocx,nprocy,nprocz,nprocs; @@ -68,6 +75,8 @@ public: double *Ci; double *ChargeDensity; double *IonSolid; + double *FluidVelocityDummy; + double *ElectricFieldDummy; private: MPI_Comm comm; @@ -80,4 +89,5 @@ private: //int rank,nprocs; void LoadParams(std::shared_ptr db0); void AssignSolidBoundary(double *ion_solid); + void IonConcentration_LB_to_Phys(DoubleArray &Den_reg); }; diff --git a/models/MultiPhysController.cpp b/models/MultiPhysController.cpp index 1453067a..a54223d3 100644 --- a/models/MultiPhysController.cpp +++ b/models/MultiPhysController.cpp @@ -1,7 +1,8 @@ #include "models/MultiPhysController.h" ScaLBL_Multiphys_Controller::ScaLBL_Multiphys_Controller(int RANK, int NP, MPI_Comm COMM): -rank(RANK),nprocs(NP),Restart(0),timestepMax(0),num_iter_Stokes(0),num_iter_Ion(0),SchmidtNum(0),comm(COMM) +rank(RANK),nprocs(NP),Restart(0),timestepMax(0),num_iter_Stokes(0),num_iter_Ion(0), +analysis_interval(0),tolerance(0),comm(COMM) { } @@ -19,34 +20,50 @@ void ScaLBL_Multiphys_Controller::ReadParams(string filename){ // Default parameters timestepMax = 10000; Restart = false; - SchmidtNum = 1.0; num_iter_Stokes=1; - num_iter_Ion=1; + num_iter_Ion.push_back(1); + analysis_interval = 500; + tolerance = 1.0e-6; // load input parameters if (study_db->keyExists( "timestepMax" )){ timestepMax = study_db->getScalar( "timestepMax" ); } - if (study_db->keyExists( "Schmidt_Number" )){ - SchmidtNum = study_db->getScalar( "Schmidt_Number" ); + if (study_db->keyExists( "analysis_interval" )){ + analysis_interval = study_db->getScalar( "analysis_interval" ); } + if (study_db->keyExists( "tolerance" )){ + tolerance = study_db->getScalar( "tolerance" ); + } + //if (study_db->keyExists( "time_conv" )){ + // time_conv = study_db->getScalar( "time_conv" ); + //} + //if (study_db->keyExists( "Schmidt_Number" )){ + // SchmidtNum = study_db->getScalar( "Schmidt_Number" ); + //} + // recalculate relevant parameters - if (SchmidtNum>1){ - num_iter_Stokes = int(round(SchmidtNum/2)*2); - num_iter_Ion = 1; - } - else if (SchmidtNum>0 && SchmidtNum<1){ - num_iter_Ion = int(round((1.0/SchmidtNum)/2)*2); - num_iter_Stokes = 1; - } - else{ - ERROR("Error: SchmidtNum (Schmidt number) must be a positive number! \n"); - } + //if (SchmidtNum>1){ + // num_iter_Stokes = int(round(SchmidtNum/2)*2); + // num_iter_Ion = 1; + //} + //else if (SchmidtNum>0 && SchmidtNum<1){ + // num_iter_Ion = int(round((1.0/SchmidtNum)/2)*2); + // num_iter_Stokes = 1; + //} + //else if (SchmidtNum==1){ + // num_iter_Stokes = 1; + // num_iter_Ion = 1; + //} + //else{ + // ERROR("Error: SchmidtNum (Schmidt number) must be a positive number! \n"); + //} // load input parameters // in case user wants to have an absolute control over the iternal iteration - if (study_db->keyExists( "num_iter_Ion" )){ - num_iter_Ion = study_db->getScalar( "num_iter_Ion" ); + if (study_db->keyExists( "num_iter_Ion_List" )){ + num_iter_Ion.clear(); + num_iter_Ion = study_db->getVector( "num_iter_Ion_List" ); } if (study_db->keyExists( "num_iter_Stokes" )){ num_iter_Stokes = study_db->getScalar( "num_iter_Stokes" ); @@ -54,4 +71,70 @@ void ScaLBL_Multiphys_Controller::ReadParams(string filename){ } +int ScaLBL_Multiphys_Controller::getStokesNumIter_PNP_coupling(double StokesTimeConv,const vector &IonTimeConv){ + //Return number of internal iterations for the Stokes solver + int num_iter_stokes; + vector TimeConv; + printf("*****Debug; IonTimeConv size = %i\n",IonTimeConv.size()); + for (unsigned int i =0; i::iterator it_max = max_element(TimeConv.begin(),TimeConv.end()); + int idx_max = distance(TimeConv.begin(),it_max); + if (idx_max==0){ + num_iter_stokes = 2; + } + else{ + double temp = 2*TimeConv[idx_max]/StokesTimeConv;//the factor 2 is the number of iterations for the element has max time_conv + num_iter_stokes = int(round(temp/2)*2); + } + return num_iter_stokes; +} + +vector ScaLBL_Multiphys_Controller::getIonNumIter_PNP_coupling(double StokesTimeConv,const vector &IonTimeConv){ + //Return number of internal iterations for the Ion transport solver + vector num_iter_ion; + vector TimeConv; + TimeConv.assign(IonTimeConv.begin(),IonTimeConv.end()); + TimeConv.insert(TimeConv.begin(),StokesTimeConv); + vector::iterator it_max = max_element(TimeConv.begin(),TimeConv.end()); + unsigned int idx_max = distance(TimeConv.begin(),it_max); + if (idx_max==0){ + for (unsigned int idx=1;idx #include #include +#include +#include #include "common/ScaLBL.h" #include "common/Communication.h" @@ -22,13 +24,17 @@ public: void ReadParams(string filename); void ReadParams(std::shared_ptr db0); + int getStokesNumIter_PNP_coupling(double StokesTimeConv,const vector &IonTimeConv); + vector getIonNumIter_PNP_coupling(double StokesTimeConv,const vector &IonTimeConv); + //void getIonNumIter_PNP_coupling(double StokesTimeConv,vector &IonTimeConv,vector &IonTimeMax); bool Restart; - //int timestep; int timestepMax; int num_iter_Stokes; - int num_iter_Ion; - double SchmidtNum;//Schmidt number = kinematic_viscosity/mass_diffusivity + vector num_iter_Ion; + int analysis_interval; + double tolerance; + //double SchmidtNum;//Schmidt number = kinematic_viscosity/mass_diffusivity int rank,nprocs; diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index 89a1c42a..b74536c5 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -59,7 +59,7 @@ void ScaLBL_Poisson::ReadParams(string filename){ if (electric_db->keyExists( "BC_Solid" )){ BoundaryConditionSolid = electric_db->getScalar( "BC_Solid" ); } - // Read boundary condition for electric potentiona + // Read boundary condition for electric potential // BC = 0: normal periodic BC // BC = 1: fixed inlet and outlet potential BoundaryCondition = 0; diff --git a/models/StokesModel.cpp b/models/StokesModel.cpp index caaf2877..3b0b2d3a 100644 --- a/models/StokesModel.cpp +++ b/models/StokesModel.cpp @@ -91,12 +91,84 @@ void ScaLBL_StokesModel::ReadParams(string filename,int num_iter){ time_conv = (h*h*1.0e-12)*mu/nu_phys;//time conversion factor from physical to LB unit; [sec/lt] den_scale = rho_phys/rho0*(h*h*h*1.0e-18);//scale factor for density - if (rank==0) printf("*****************************************************\n"); - if (rank==0) printf("LB Single-Fluid Navier-Stokes Solver: \n"); - if (rank==0) printf(" Time conversion factor: %.5g [sec/lt]\n", time_conv); - if (rank==0) printf(" Internal iteration: %i [lt]\n", timestepMax); - if (rank==0) printf("*****************************************************\n"); } + +void ScaLBL_StokesModel::ReadParams(string filename){ + //NOTE the max time step is left unspecified + + // read the input database + db = std::make_shared( filename ); + domain_db = db->getDatabase( "Domain" ); + stokes_db = db->getDatabase( "Stokes" ); + + + //---------------------- Default model parameters --------------------------// + rho_phys = 1000.0; //by default use water density; unit [kg/m^3] + nu_phys = 1.004e-6;//by default use water kinematic viscosity at 20C; unit [m^2/sec] + h = 1.0;//image resolution;[um] + tau = 1.0; + mu = (tau-0.5)/3.0;//LB kinematic viscosity;unit [lu^2/lt] + time_conv = h*h*mu/nu_phys;//time conversion factor from physical to LB unit; [sec/lt] + rho0 = 1.0;//LB density + den_scale = rho_phys/rho0*(h*h*h*1.0e-18);//scale factor for density + tolerance = 1.0e-8; + Fx = Fy = 0.0; + Fz = 1.0e-5; + //--------------------------------------------------------------------------// + + // Read domain parameters + BoundaryCondition = 0; + if (domain_db->keyExists( "BC" )){ + BoundaryCondition = domain_db->getScalar( "BC" ); + } + if (domain_db->keyExists( "voxel_length" )){//default unit: um/lu + h = domain_db->getScalar( "voxel_length" ); + } + + // Single-fluid Navier-Stokes Model parameters + //if (stokes_db->keyExists( "timestepMax" )){ + // timestepMax = stokes_db->getScalar( "timestepMax" ); + //} + if (stokes_db->keyExists( "tolerance" )){ + tolerance = stokes_db->getScalar( "tolerance" ); + } + if (stokes_db->keyExists( "tau" )){ + tau = stokes_db->getScalar( "tau" ); + } + if (stokes_db->keyExists( "rho0" )){ + rho0 = stokes_db->getScalar( "rho0" ); + } + if (stokes_db->keyExists( "nu_phys" )){ + nu_phys = stokes_db->getScalar( "nu_phys" ); + } + if (stokes_db->keyExists( "rho_phys" )){ + rho_phys = stokes_db->getScalar( "rho_phys" ); + } + if (stokes_db->keyExists( "F" )){ + Fx = stokes_db->getVector( "F" )[0]; + Fy = stokes_db->getVector( "F" )[1]; + Fz = stokes_db->getVector( "F" )[2]; + } + if (stokes_db->keyExists( "Restart" )){ + Restart = stokes_db->getScalar( "Restart" ); + } + if (stokes_db->keyExists( "din" )){ + din = stokes_db->getScalar( "din" ); + } + if (stokes_db->keyExists( "dout" )){ + dout = stokes_db->getScalar( "dout" ); + } + if (stokes_db->keyExists( "flux" )){ + flux = stokes_db->getScalar( "flux" ); + } + + // Re-calculate model parameters due to parameter read + mu=(tau-0.5)/3.0; + time_conv = (h*h*1.0e-12)*mu/nu_phys;//time conversion factor from physical to LB unit; [sec/lt] + den_scale = rho_phys/rho0*(h*h*h*1.0e-18);//scale factor for density + +} + void ScaLBL_StokesModel::SetDomain(){ Dm = std::shared_ptr(new Domain(domain_db,comm)); // full domain for analysis Mask = std::shared_ptr(new Domain(domain_db,comm)); // mask domain removes immobile phases @@ -235,6 +307,12 @@ void ScaLBL_StokesModel::Initialize(){ if (rank==0) printf("LB Single-Fluid Solver: Initializing distributions \n"); if (rank==0) printf("****************************************************************\n"); ScaLBL_D3Q19_Init(fq, Np); + + if (rank==0) printf("*****************************************************\n"); + if (rank==0) printf("LB Single-Fluid Navier-Stokes Solver: \n"); + if (rank==0) printf(" Time conversion factor: %.5g [sec/lt]\n", time_conv); + if (rank==0) printf(" Internal iteration: %i [lt]\n", timestepMax); + if (rank==0) printf("*****************************************************\n"); } void ScaLBL_StokesModel::Run_Lite(double *ChargeDensity, double *ElectricField){ @@ -243,6 +321,7 @@ void ScaLBL_StokesModel::Run_Lite(double *ChargeDensity, double *ElectricField){ timestep = 0; while (timestep < timestepMax) { //************************************************************************/ + timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL ScaLBL_D3Q19_AAodd_StokesMRT(NeighborList, fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz,rho0,den_scale,h,time_conv, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); @@ -334,34 +413,123 @@ void ScaLBL_StokesModel::Velocity_LB_to_Phys(DoubleArray &Vel_reg){ } } -//void ScaLBL_StokesModel::getVelocity(){ -// //get velocity in physical unit [m/sec] -// ScaLBL_D3Q19_Momentum_Phys(fq, Velocity, h, time_conv, Np); -// ScaLBL_DeviceBarrier(); MPI_Barrier(comm); -// -// DoubleArray PhaseField(Nx,Ny,Nz); -// ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField); -// FILE *VELX_FILE; -// sprintf(LocalRankFilename,"Velocity_X.%05i.raw",rank); -// VELX_FILE = fopen(LocalRankFilename,"wb"); -// fwrite(PhaseField.data(),8,N,VELX_FILE); -// fclose(VELX_FILE); -// -// ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],PhaseField); -// FILE *VELY_FILE; -// sprintf(LocalRankFilename,"Velocity_Y.%05i.raw",rank); -// VELY_FILE = fopen(LocalRankFilename,"wb"); -// fwrite(PhaseField.data(),8,N,VELY_FILE); -// fclose(VELY_FILE); -// -// ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],PhaseField); -// FILE *VELZ_FILE; -// sprintf(LocalRankFilename,"Velocity_Z.%05i.raw",rank); -// VELZ_FILE = fopen(LocalRankFilename,"wb"); -// fwrite(PhaseField.data(),8,N,VELZ_FILE); -// fclose(VELZ_FILE); -// -//} +vector ScaLBL_StokesModel::computeElectricForceAvg(double *ChargeDensity, double *ElectricField){ + + double *Ex_host; + double *Ey_host; + double *Ez_host; + Ex_host = new double[Np]; + Ey_host = new double[Np]; + Ez_host = new double[Np]; + + double *rhoE_host; + rhoE_host = new double[Np]; + + ScaLBL_CopyToHost(Ex_host,&ElectricField[0*Np],Np*sizeof(double)); + ScaLBL_CopyToHost(Ey_host,&ElectricField[1*Np],Np*sizeof(double)); + ScaLBL_CopyToHost(Ez_host,&ElectricField[2*Np],Np*sizeof(double)); + ScaLBL_CopyToHost(rhoE_host,ChargeDensity,Np*sizeof(double)); + + double count_loc=0; + double count; + double Fx_avg,Fy_avg,Fz_avg;//average electric field induced force + double Fx_loc,Fy_loc,Fz_loc; + Fx_loc = Fy_loc = Fz_loc = 0.0; + + for (int idx=0; idxLastExterior(); idx++){ + Fx_loc += rhoE_host[idx]*Ex_host[idx]*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale; + Fy_loc += rhoE_host[idx]*Ey_host[idx]*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale; + Fz_loc += rhoE_host[idx]*Ez_host[idx]*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale; + count_loc+=1.0; + } + for (int idx=ScaLBL_Comm->FirstInterior(); idxLastInterior(); idx++){ + Fx_loc += rhoE_host[idx]*Ex_host[idx]*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale; + Fy_loc += rhoE_host[idx]*Ey_host[idx]*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale; + Fz_loc += rhoE_host[idx]*Ez_host[idx]*(time_conv*time_conv)/(h*h*1.0e-12)/den_scale; + count_loc+=1.0; + } + + MPI_Allreduce(&Fx_loc,&Fx_avg,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&Fy_loc,&Fy_avg,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&Fz_loc,&Fz_avg,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + + Fx_avg /= count; + Fy_avg /= count; + Fz_avg /= count; + + vectorF_avg{Fx_avg,Fy_avg,Fz_avg}; + + delete [] Ex_host; + delete [] Ey_host; + delete [] Ez_host; + delete [] rhoE_host; + + return F_avg; +} + +double ScaLBL_StokesModel::CalVelocityConvergence(double& flow_rate_previous,double *ChargeDensity, double *ElectricField){ + + //----------------------------------------------------- + ScaLBL_D3Q19_Momentum(fq,Velocity, Np); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x); + ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y); + ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z); + + double count_loc=0; + double count; + double vax,vay,vaz; + double vax_loc,vay_loc,vaz_loc; + vax_loc = vay_loc = vaz_loc = 0.f; + for (int k=1; k 0){ + vax_loc += Velocity_x(i,j,k); + vay_loc += Velocity_y(i,j,k); + vaz_loc += Velocity_z(i,j,k); + count_loc+=1.0; + } + } + } + } + MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + + vax /= count; + vay /= count; + vaz /= count; + + vector Eforce; + Eforce = computeElectricForceAvg(ChargeDensity,ElectricField); + double TFx = Fx+Eforce[0];//TF: total body force + double TFy = Fy+Eforce[1]; + double TFz = Fz+Eforce[2]; + double force_mag = sqrt(TFx*TFx+TFy*TFy+TFz*TFz); + double dir_x = TFx/force_mag; + double dir_y = TFy/force_mag; + double dir_z = TFz/force_mag; + if (force_mag == 0.0){ + // default to z direction + dir_x = 0.0; + dir_y = 0.0; + dir_z = 1.0; + force_mag = 1.0; + } + double flow_rate = (vax*dir_x + vay*dir_y + vaz*dir_z); + double error = fabs(flow_rate - flow_rate_previous) / fabs(flow_rate); + flow_rate_previous = flow_rate; + //---------------------------------------------------- + + //for debugging + if (rank==0){ + printf("StokesModel: error: %.5g\n",error); + } + return error; +} void ScaLBL_StokesModel::Run(){ double rlx_setA=1.0/tau; diff --git a/models/StokesModel.h b/models/StokesModel.h index d40df415..346d75c3 100644 --- a/models/StokesModel.h +++ b/models/StokesModel.h @@ -22,6 +22,7 @@ public: // functions in they should be run void ReadParams(string filename,int num_iter); + void ReadParams(string filename); void ReadParams(std::shared_ptr db0); void SetDomain(); void ReadInput(); @@ -31,6 +32,7 @@ public: void Run_Lite(double *ChargeDensity, double *ElectricField); void VelocityField(); void getVelocity(int timestep); + double CalVelocityConvergence(double& flow_rate_previous,double *ChargeDensity, double *ElectricField); bool Restart,pBC; int timestep,timestepMax; @@ -81,4 +83,5 @@ private: //int rank,nprocs; void LoadParams(std::shared_ptr db0); void Velocity_LB_to_Phys(DoubleArray &Vel_reg); + vector computeElectricForceAvg(double *ChargeDensity, double *ElectricField); }; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 43167b3f..64232406 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -48,6 +48,9 @@ ADD_LBPM_TEST( TestTopo3D ) ADD_LBPM_TEST( TestFluxBC ) ADD_LBPM_TEST( TestMap ) ADD_LBPM_TEST( TestPoissonSolver ) +ADD_LBPM_TEST( TestIonModel ) +ADD_LBPM_TEST( TestNernstPlanck ) +ADD_LBPM_TEST( TestPNP_Stokes ) #ADD_LBPM_TEST( TestMRT ) #ADD_LBPM_TEST( TestColorGrad ) #ADD_LBPM_TEST( TestColorGradDFH ) diff --git a/tests/TestIonModel.cpp b/tests/TestIonModel.cpp new file mode 100644 index 00000000..32f8302b --- /dev/null +++ b/tests/TestIonModel.cpp @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "models/IonModel.h" +#include "models/MultiPhysController.h" + +using namespace std; + +//*************************************************************************** +// Test lattice-Boltzmann Ion Model coupled with Poisson equation +//*************************************************************************** + +int main(int argc, char **argv) +{ + // Initialize MPI + int provided_thread_support = -1; + MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); + MPI_Comm comm; + MPI_Comm_dup(MPI_COMM_WORLD,&comm); + int rank = comm_rank(comm); + int nprocs = comm_size(comm); + if ( rank==0 && provided_thread_supportci_avg_previous{0.0,0.0};//assuming 1:1 solution + while (timestep < Study.timestepMax && error > Study.tolerance){ + + timestep++; + IonModel.Run(IonModel.FluidVelocityDummy,IonModel.ElectricFieldDummy); //solve for ion transport and electric potential + timestep++;//AA operations + + if (timestep%Study.analysis_interval==0){ + error = IonModel.CalIonDenConvergence(ci_avg_previous); + } + } + IonModel.getIonConcentration(timestep); + + if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n"); + if (rank==0) printf("*************************************************************\n"); + + PROFILE_STOP("Main"); + PROFILE_SAVE("TestIonModel",1); + // **************************************************** + MPI_Barrier(comm); + } // Limit scope so variables that contain communicators will free before MPI_Finialize + MPI_Comm_free(&comm); + MPI_Finalize(); +} + + diff --git a/tests/TestNernstPlanck.cpp b/tests/TestNernstPlanck.cpp new file mode 100644 index 00000000..0344e1b1 --- /dev/null +++ b/tests/TestNernstPlanck.cpp @@ -0,0 +1,101 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "models/IonModel.h" +#include "models/PoissonSolver.h" +#include "models/MultiPhysController.h" + +using namespace std; + +//*************************************************************************** +// Test lattice-Boltzmann Ion Model coupled with Poisson equation +//*************************************************************************** + +int main(int argc, char **argv) +{ + // Initialize MPI + int provided_thread_support = -1; + MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); + MPI_Comm comm; + MPI_Comm_dup(MPI_COMM_WORLD,&comm); + int rank = comm_rank(comm); + int nprocs = comm_size(comm); + if ( rank==0 && provided_thread_supportci_avg_previous{0.0,0.0};//assuming 1:1 solution + while (timestep < Study.timestepMax && error > Study.tolerance){ + + timestep++; + PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental + IonModel.Run(IonModel.FluidVelocityDummy,PoissonSolver.ElectricField); //solve for ion transport and electric potential + + timestep++;//AA operations + + if (timestep%Study.analysis_interval==0){ + error = IonModel.CalIonDenConvergence(ci_avg_previous); + } + } + + PoissonSolver.getElectricPotential(timestep); + PoissonSolver.getElectricField(timestep); + IonModel.getIonConcentration(timestep); + + if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n"); + if (rank==0) printf("*************************************************************\n"); + + PROFILE_STOP("Main"); + PROFILE_SAVE("lbpm_electrokinetic_simulator",1); + // **************************************************** + MPI_Barrier(comm); + } // Limit scope so variables that contain communicators will free before MPI_Finialize + MPI_Comm_free(&comm); + MPI_Finalize(); +} + + diff --git a/tests/TestPNP_Stokes.cpp b/tests/TestPNP_Stokes.cpp new file mode 100644 index 00000000..ee5d43c5 --- /dev/null +++ b/tests/TestPNP_Stokes.cpp @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "models/IonModel.h" +#include "models/StokesModel.h" +#include "models/PoissonSolver.h" +#include "models/MultiPhysController.h" + +using namespace std; + +//*************************************************************************** +// Test lattice-Boltzmann Ion Model coupled with Poisson equation +//*************************************************************************** + +int main(int argc, char **argv) +{ + // Initialize MPI + int provided_thread_support = -1; + MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); + MPI_Comm comm; + MPI_Comm_dup(MPI_COMM_WORLD,&comm); + int rank = comm_rank(comm); + int nprocs = comm_size(comm); + if ( rank==0 && provided_thread_supportci_avg_previous{0.0,0.0};//assuming 1:1 solution + double vel_avg_previous = 0.0; + while (timestep < Study.timestepMax && error > Study.tolerance){ + + timestep++; + PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental + StokesModel.Run_Lite(IonModel.ChargeDensity, PoissonSolver.ElectricField);// Solve the N-S equations to get velocity + IonModel.Run(StokesModel.Velocity,PoissonSolver.ElectricField); //solve for ion transport and electric potential + + timestep++;//AA operations + + if (timestep%Study.analysis_interval==0){ + error_ion = IonModel.CalIonDenConvergence(ci_avg_previous); + error_stokes = StokesModel.CalVelocityConvergence(vel_avg_previous,IonModel.ChargeDensity,PoissonSolver.ElectricField); + error = max(error_ion,error_stokes); + } + } + + PoissonSolver.getElectricPotential(timestep); + PoissonSolver.getElectricField(timestep); + IonModel.getIonConcentration(timestep); + StokesModel.getVelocity(timestep); + + if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n"); + if (rank==0) printf("*************************************************************\n"); + + PROFILE_STOP("Main"); + PROFILE_SAVE("lbpm_electrokinetic_simulator",1); + // **************************************************** + MPI_Barrier(comm); + } // Limit scope so variables that contain communicators will free before MPI_Finialize + MPI_Comm_free(&comm); + MPI_Finalize(); +} + + diff --git a/tests/TestPoissonSolver.cpp b/tests/TestPoissonSolver.cpp index 309a03c7..7d44e573 100644 --- a/tests/TestPoissonSolver.cpp +++ b/tests/TestPoissonSolver.cpp @@ -50,7 +50,6 @@ int main(int argc, char **argv) PoissonSolver.ReadInput(); PoissonSolver.Create(); PoissonSolver.Initialize(); - PoissonSolver.getElectricPotential(0); //Initialize dummy charge density for test PoissonSolver.DummyChargeDensity(); @@ -59,34 +58,6 @@ int main(int argc, char **argv) PoissonSolver.getElectricPotential(1); PoissonSolver.getElectricField(1); - //int timestep=0; - //while (timestep < Study.timestepMax){ - // - // timestep++; - // //if (rank==0) printf("timestep=%i; running Poisson solver\n",timestep); - // PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental - // //PoissonSolver.getElectricPotential(timestep); - - // //if (rank==0) printf("timestep=%i; running StokesModel\n",timestep); - // StokesModel.Run_Lite(IonModel.ChargeDensity, PoissonSolver.ElectricField);// Solve the N-S equations to get velocity - // //StokesModel.getVelocity(timestep); - - // //if (rank==0) printf("timestep=%i; running Ion model\n",timestep); - // IonModel.Run(StokesModel.Velocity,PoissonSolver.ElectricField); //solve for ion transport and electric potential - // //IonModel.getIonConcentration(timestep); - // - // - // timestep++;//AA operations - // //-------------------------------------------- - // //potentially leave analysis module for future - // //-------------------------------------------- - //} - - //StokesModel.getVelocity(timestep); - //PoissonSolver.getElectricPotential(timestep); - //PoissonSolver.getElectricField(timestep); - //IonModel.getIonConcentration(timestep); - if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n"); if (rank==0) printf("*************************************************************\n"); diff --git a/tests/lbpm_electrokinetic_dfh_simulator.cpp b/tests/lbpm_electrokinetic_dfh_simulator.cpp index 1df5c5e1..75fe87e5 100644 --- a/tests/lbpm_electrokinetic_dfh_simulator.cpp +++ b/tests/lbpm_electrokinetic_dfh_simulator.cpp @@ -61,7 +61,7 @@ int main(int argc, char **argv) StokesModel.Initialize(); // initializing the model will set initial conditions for variables // Initialize LB-Ion model - IonModel.ReadParams(filename,Study.num_iter_Ion,Study.num_iter_Stokes,StokesModel.time_conv); + IonModel.ReadParams(filename,Study.num_iter_Ion); IonModel.SetDomain(); IonModel.ReadInput(); IonModel.Create(); From 039978cc81cea94762a4ee877039b2aa2d6dcaa6 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Sun, 20 Sep 2020 11:00:36 -0400 Subject: [PATCH 031/205] GPU version is available now --- common/ScaLBL.h | 5 +- cpu/Ion.cpp | 8 +- cpu/Poisson.cpp | 206 ++-- gpu/D3Q7BC.cu | 918 +--------------- gpu/Ion.cu | 344 ++++++ gpu/Poisson.cu | 330 ++++++ gpu/Stokes.cu | 995 ++++++++++++++++++ models/MultiPhysController.cpp | 13 +- models/MultiPhysController.h | 1 + tests/CMakeLists.txt | 2 +- tests/TestPNP_Stokes.cpp | 2 +- ..._electrokinetic_SingleFluid_simulator.cpp} | 47 +- 12 files changed, 1858 insertions(+), 1013 deletions(-) create mode 100644 gpu/Ion.cu create mode 100644 gpu/Poisson.cu create mode 100644 gpu/Stokes.cu rename tests/{lbpm_electrokinetic_dfh_simulator.cpp => lbpm_electrokinetic_SingleFluid_simulator.cpp} (77%) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index a4a5fad3..9a711330 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -102,8 +102,9 @@ extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *d extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q7_Poisson_ElectricField(int *neighborList, int *Map, signed char *ID, double *Psi, double *ElectricField, int SolidBC, - int strideY, int strideZ,int start, int finish, int Np); +//maybe deprecated +//extern "C" void ScaLBL_D3Q7_Poisson_ElectricField(int *neighborList, int *Map, signed char *ID, double *Psi, double *ElectricField, int SolidBC, +// int strideY, int strideZ,int start, int finish, int Np); // LBM Stokes Model (adapted from MRT model) diff --git a/cpu/Ion.cpp b/cpu/Ion.cpp index 236bca70..98d35142 100644 --- a/cpu/Ion.cpp +++ b/cpu/Ion.cpp @@ -229,10 +229,10 @@ extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity double F = 96485.0;//Faraday's constant; unit[C/mol]; F=e*Na, where Na is the Avogadro constant for (n=start; n0) + CD_tmp; + Ci = Den[n+ion_component*Np]; + CD = ChargeDensity[n]; + CD_tmp = F*IonValence*Ci; + ChargeDensity[n] = CD*(ion_component>0) + CD_tmp; } } diff --git a/cpu/Poisson.cpp b/cpu/Poisson.cpp index d76bbd42..c84350cd 100644 --- a/cpu/Poisson.cpp +++ b/cpu/Poisson.cpp @@ -235,109 +235,109 @@ extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, in } } -extern "C" void ScaLBL_D3Q7_Poisson_ElectricField(int *neighborList, int *Map, signed char *ID, double *Psi, double *ElectricField, int SolidBC, - int strideY, int strideZ,int start, int finish, int Np){ - - int n,nn; - int ijk; - int id; - // distributions - double m1,m2,m3,m4,m5,m6,m7,m8,m9; - double m10,m11,m12,m13,m14,m15,m16,m17,m18; - double nx,ny,nz; - - for (n=start; n0)+Psi[ijk]*(id<=0);// get neighbor for phi - 1 - //........................................................................ - nn = ijk+1; // neighbor index (get convention) - id = ID[nn]; - m2 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 2 - //........................................................................ - nn = ijk-strideY; // neighbor index (get convention) - id = ID[nn]; - m3 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 3 - //........................................................................ - nn = ijk+strideY; // neighbor index (get convention) - id = ID[nn]; - m4 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 4 - //........................................................................ - nn = ijk-strideZ; // neighbor index (get convention) - id = ID[nn]; - m5 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 5 - //........................................................................ - nn = ijk+strideZ; // neighbor index (get convention) - id = ID[nn]; - m6 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 6 - //........................................................................ - nn = ijk-strideY-1; // neighbor index (get convention) - id = ID[nn]; - m7 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 7 - //........................................................................ - nn = ijk+strideY+1; // neighbor index (get convention) - id = ID[nn]; - m8 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 8 - //........................................................................ - nn = ijk+strideY-1; // neighbor index (get convention) - id = ID[nn]; - m9 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 9 - //........................................................................ - nn = ijk-strideY+1; // neighbor index (get convention) - id = ID[nn]; - m10 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 10 - //........................................................................ - nn = ijk-strideZ-1; // neighbor index (get convention) - id = ID[nn]; - m11 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 11 - //........................................................................ - nn = ijk+strideZ+1; // neighbor index (get convention) - id = ID[nn]; - m12 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 12 - //........................................................................ - nn = ijk+strideZ-1; // neighbor index (get convention) - id = ID[nn]; - m13 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 13 - //........................................................................ - nn = ijk-strideZ+1; // neighbor index (get convention) - id = ID[nn]; - m14 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 14 - //........................................................................ - nn = ijk-strideZ-strideY; // neighbor index (get convention) - id = ID[nn]; - m15 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 15 - //........................................................................ - nn = ijk+strideZ+strideY; // neighbor index (get convention) - id = ID[nn]; - m16 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 16 - //........................................................................ - nn = ijk+strideZ-strideY; // neighbor index (get convention) - id = ID[nn]; - m17 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 17 - //........................................................................ - nn = ijk-strideZ+strideY; // neighbor index (get convention) - id = ID[nn]; - m18 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 18 - //............Compute the Color Gradient................................... - //nx = 1.f/6.f*(m1-m2+0.5*(m7-m8+m9-m10+m11-m12+m13-m14)); - //ny = 1.f/6.f*(m3-m4+0.5*(m7-m8-m9+m10+m15-m16+m17-m18)); - //nz = 1.f/6.f*(m5-m6+0.5*(m11-m12-m13+m14+m15-m16-m17+m18)); - nx = 1.f/6.f*(m1-m2);//but looks like it needs to multiply another factor of 3 - ny = 1.f/6.f*(m3-m4); - nz = 1.f/6.f*(m5-m6); - - ElectricField[n] = nx; - ElectricField[Np+n] = ny; - ElectricField[2*Np+n] = nz; - } -} +//extern "C" void ScaLBL_D3Q7_Poisson_ElectricField(int *neighborList, int *Map, signed char *ID, double *Psi, double *ElectricField, int SolidBC, +// int strideY, int strideZ,int start, int finish, int Np){ +// +// int n,nn; +// int ijk; +// int id; +// // distributions +// double m1,m2,m3,m4,m5,m6,m7,m8,m9; +// double m10,m11,m12,m13,m14,m15,m16,m17,m18; +// double nx,ny,nz; +// +// for (n=start; n0)+Psi[ijk]*(id<=0);// get neighbor for phi - 1 +// //........................................................................ +// nn = ijk+1; // neighbor index (get convention) +// id = ID[nn]; +// m2 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 2 +// //........................................................................ +// nn = ijk-strideY; // neighbor index (get convention) +// id = ID[nn]; +// m3 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 3 +// //........................................................................ +// nn = ijk+strideY; // neighbor index (get convention) +// id = ID[nn]; +// m4 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 4 +// //........................................................................ +// nn = ijk-strideZ; // neighbor index (get convention) +// id = ID[nn]; +// m5 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 5 +// //........................................................................ +// nn = ijk+strideZ; // neighbor index (get convention) +// id = ID[nn]; +// m6 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 6 +// //........................................................................ +// nn = ijk-strideY-1; // neighbor index (get convention) +// id = ID[nn]; +// m7 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 7 +// //........................................................................ +// nn = ijk+strideY+1; // neighbor index (get convention) +// id = ID[nn]; +// m8 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 8 +// //........................................................................ +// nn = ijk+strideY-1; // neighbor index (get convention) +// id = ID[nn]; +// m9 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 9 +// //........................................................................ +// nn = ijk-strideY+1; // neighbor index (get convention) +// id = ID[nn]; +// m10 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 10 +// //........................................................................ +// nn = ijk-strideZ-1; // neighbor index (get convention) +// id = ID[nn]; +// m11 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 11 +// //........................................................................ +// nn = ijk+strideZ+1; // neighbor index (get convention) +// id = ID[nn]; +// m12 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 12 +// //........................................................................ +// nn = ijk+strideZ-1; // neighbor index (get convention) +// id = ID[nn]; +// m13 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 13 +// //........................................................................ +// nn = ijk-strideZ+1; // neighbor index (get convention) +// id = ID[nn]; +// m14 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 14 +// //........................................................................ +// nn = ijk-strideZ-strideY; // neighbor index (get convention) +// id = ID[nn]; +// m15 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 15 +// //........................................................................ +// nn = ijk+strideZ+strideY; // neighbor index (get convention) +// id = ID[nn]; +// m16 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 16 +// //........................................................................ +// nn = ijk+strideZ-strideY; // neighbor index (get convention) +// id = ID[nn]; +// m17 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 17 +// //........................................................................ +// nn = ijk-strideZ+strideY; // neighbor index (get convention) +// id = ID[nn]; +// m18 = SolidBC==1 ? Psi[nn] : Psi[nn]*(id>0)+Psi[ijk]*(id<=0);// get neighbor for phi - 18 +// //............Compute the Color Gradient................................... +// //nx = 1.f/6.f*(m1-m2+0.5*(m7-m8+m9-m10+m11-m12+m13-m14)); +// //ny = 1.f/6.f*(m3-m4+0.5*(m7-m8-m9+m10+m15-m16+m17-m18)); +// //nz = 1.f/6.f*(m5-m6+0.5*(m11-m12-m13+m14+m15-m16-m17+m18)); +// nx = 1.f/6.f*(m1-m2);//but looks like it needs to multiply another factor of 3 +// ny = 1.f/6.f*(m3-m4); +// nz = 1.f/6.f*(m5-m6); +// +// ElectricField[n] = nx; +// ElectricField[Np+n] = ny; +// ElectricField[2*Np+n] = nz; +// } +//} //extern "C" void ScaLBL_D3Q7_Poisson_getElectricField(double *dist, double *ElectricField, double tau, int Np){ // int n; diff --git a/gpu/D3Q7BC.cu b/gpu/D3Q7BC.cu index 5f3ae92c..8d27f7d5 100644 --- a/gpu/D3Q7BC.cu +++ b/gpu/D3Q7BC.cu @@ -108,7 +108,7 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList } } -__global__ void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np) +__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np) { int idx, n; int nread,nr6; @@ -187,6 +187,7 @@ __global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double int idx,n; double f0,f1,f2,f3,f4,f5,f6; idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ n = list[idx]; f0 = dist[n]; f1 = dist[2*Np+n]; @@ -232,7 +233,7 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList } } -extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np) +__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np) { int idx, n; int nread,nr6; @@ -264,940 +265,113 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, in } } -__global__ void dvc_ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *d_neighborList, int *list, double *dist, double din, int count, int Np) -{ - int idx, n; - int nread; - int nr5,nr11,nr14,nr15,nr18; - // distributions - double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; - double f10,f11,f12,f13,f14,f15,f16,f17,f18; - double ux,uy,uz,Cyz,Cxz; - ux = uy = 0.0; - - idx = blockIdx.x*blockDim.x + threadIdx.x; - - if (idx < count){ - - n = list[idx]; - f0 = dist[n]; - - nread = d_neighborList[n]; - f1 = dist[nread]; - - nread = d_neighborList[n+2*Np]; - f3 = dist[nread]; - - nread = d_neighborList[n+6*Np]; - f7 = dist[nread]; - - nread = d_neighborList[n+8*Np]; - f9 = dist[nread]; - - nread = d_neighborList[n+12*Np]; - f13 = dist[nread]; - - nread = d_neighborList[n+16*Np]; - f17 = dist[nread]; - - nread = d_neighborList[n+Np]; - f2 = dist[nread]; - - nread = d_neighborList[n+3*Np]; - f4 = dist[nread]; - - nread = d_neighborList[n+5*Np]; - f6 = dist[nread]; - - nread = d_neighborList[n+7*Np]; - f8 = dist[nread]; - - nread = d_neighborList[n+9*Np]; - f10 = dist[nread]; - - nread = d_neighborList[n+11*Np]; - f12 = dist[nread]; - - nread = d_neighborList[n+15*Np]; - f16 = dist[nread]; - - // Unknown distributions - nr5 = d_neighborList[n+4*Np]; - nr11 = d_neighborList[n+10*Np]; - nr15 = d_neighborList[n+14*Np]; - nr14 = d_neighborList[n+13*Np]; - nr18 = d_neighborList[n+17*Np]; - - //................................................... - //........Determine the inlet flow velocity......... - //ux = (f1-f2+f7-f8+f9-f10+f11-f12+f13-f14); - //uy = (f3-f4+f7-f8-f9+f10+f15-f16+f17-f18); - uz = din - (f0+f1+f2+f3+f4+f7+f8+f9+f10 + 2*(f6+f12+f13+f16+f17)); - - Cxz = 0.5*(f1+f7+f9-f2-f10-f8) - 0.3333333333333333*ux; - Cyz = 0.5*(f3+f7+f10-f4-f9-f8) - 0.3333333333333333*uy; - - f5 = f6 + 0.33333333333333338*uz; - f11 = f12 + 0.16666666666666678*(uz+ux)-Cxz; - f14 = f13 + 0.16666666666666678*(uz-ux)+Cxz; - f15 = f16 + 0.16666666666666678*(uy+uz)-Cyz; - f18 = f17 + 0.16666666666666678*(uz-uy)+Cyz; - //........Store in "opposite" memory location.......... - dist[nr5] = f5; - dist[nr11] = f11; - dist[nr14] = f14; - dist[nr15] = f15; - dist[nr18] = f18; - } -} - -__global__ void dvc_ScaLBL_D3Q19_AAodd_Pressure_BC_Z(int *d_neighborList, int *list, double *dist, double dout, int count, int Np) -{ - int idx,n,nread; - int nr6,nr12,nr13,nr16,nr17; - // distributions - double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; - double f10,f11,f12,f13,f14,f15,f16,f17,f18; - double ux,uy,uz,Cyz,Cxz; - ux = uy = 0.0; - - idx = blockIdx.x*blockDim.x + threadIdx.x; - - // Loop over the boundary - threadblocks delineated by start...finish - if ( idx < count ){ - - n = list[idx]; - //........................................................................ - // Read distributions - //........................................................................ - f0 = dist[n]; - - nread = d_neighborList[n]; - f1 = dist[nread]; - - nread = d_neighborList[n+2*Np]; - f3 = dist[nread]; - - nread = d_neighborList[n+4*Np]; - f5 = dist[nread]; - - nread = d_neighborList[n+6*Np]; - f7 = dist[nread]; - - nread = d_neighborList[n+8*Np]; - f9 = dist[nread]; - - nread = d_neighborList[n+10*Np]; - f11 = dist[nread]; - - nread = d_neighborList[n+14*Np]; - f15 = dist[nread]; - - - nread = d_neighborList[n+Np]; - f2 = dist[nread]; - - nread = d_neighborList[n+3*Np]; - f4 = dist[nread]; - - nread = d_neighborList[n+7*Np]; - f8 = dist[nread]; - - nread = d_neighborList[n+9*Np]; - f10 = dist[nread]; - - nread = d_neighborList[n+13*Np]; - f14 = dist[nread]; - - nread = d_neighborList[n+17*Np]; - f18 = dist[nread]; - - // unknown distributions - nr6 = d_neighborList[n+5*Np]; - nr12 = d_neighborList[n+11*Np]; - nr16 = d_neighborList[n+15*Np]; - nr17 = d_neighborList[n+16*Np]; - nr13 = d_neighborList[n+12*Np]; - - - //........Determine the outlet flow velocity......... - //ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14; - //uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18; - uz = -dout + (f0+f1+f2+f3+f4+f7+f8+f9+f10 + 2*(f5+f11+f14+f15+f18)); - - Cxz = 0.5*(f1+f7+f9-f2-f10-f8) - 0.3333333333333333*ux; - Cyz = 0.5*(f3+f7+f10-f4-f9-f8) - 0.3333333333333333*uy; - - f6 = f5 - 0.33333333333333338*uz; - f12 = f11 - 0.16666666666666678*(uz+ux)+Cxz; - f13 = f14 - 0.16666666666666678*(uz-ux)-Cxz; - f16 = f15 - 0.16666666666666678*(uy+uz)+Cyz; - f17 = f18 - 0.16666666666666678*(uz-uy)-Cyz; - - //........Store in "opposite" memory location.......... - dist[nr6] = f6; - dist[nr12] = f12; - dist[nr13] = f13; - dist[nr16] = f16; - dist[nr17] = f17; - //................................................... - } -} - - -__global__ void dvc_ScaLBL_D3Q19_AAeven_Flux_BC_z(int *list, double *dist, double flux, double Area, - double *dvcsum, int count, int Np) -{ - int idx, n; - // distributions - double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; - double f10,f11,f12,f13,f14,f15,f16,f17,f18; - double factor = 1.f/(Area); - double sum = 0.f; - - idx = blockIdx.x*blockDim.x + threadIdx.x; - - if (idx < count){ - - n = list[idx]; - f0 = dist[n]; - f1 = dist[2*Np+n]; - f2 = dist[1*Np+n]; - f3 = dist[4*Np+n]; - f4 = dist[3*Np+n]; - f6 = dist[5*Np+n]; - f7 = dist[8*Np+n]; - f8 = dist[7*Np+n]; - f9 = dist[10*Np+n]; - f10 = dist[9*Np+n]; - f12 = dist[11*Np+n]; - f13 = dist[14*Np+n]; - f16 = dist[15*Np+n]; - f17 = dist[18*Np+n]; - sum = factor*(f0+f1+f2+f3+f4+f7+f8+f9+f10 + 2*(f6+f12+f13+f16+f17)); - } - - //sum = blockReduceSum(sum); - //if (threadIdx.x==0) - // atomicAdd(dvcsum, sum); - - extern __shared__ double temp[]; - thread_group g = this_thread_block(); - double block_sum = reduce_sum(g, temp, sum); - - if (g.thread_rank() == 0) atomicAdd(dvcsum, block_sum); -} - - -__global__ void dvc_ScaLBL_D3Q19_AAodd_Flux_BC_z(int *d_neighborList, int *list, double *dist, double flux, - double Area, double *dvcsum, int count, int Np) -{ - int idx, n; - int nread; - - // distributions - double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; - double f10,f11,f12,f13,f14,f15,f16,f17,f18; - double factor = 1.f/(Area); - double sum = 0.f; - - idx = blockIdx.x*blockDim.x + threadIdx.x; - - if (idx < count){ - - n = list[idx]; - - f0 = dist[n]; - - nread = d_neighborList[n]; - f1 = dist[nread]; - - nread = d_neighborList[n+2*Np]; - f3 = dist[nread]; - - nread = d_neighborList[n+6*Np]; - f7 = dist[nread]; - - nread = d_neighborList[n+8*Np]; - f9 = dist[nread]; - - nread = d_neighborList[n+12*Np]; - f13 = dist[nread]; - - nread = d_neighborList[n+16*Np]; - f17 = dist[nread]; - - nread = d_neighborList[n+Np]; - f2 = dist[nread]; - - nread = d_neighborList[n+3*Np]; - f4 = dist[nread]; - - nread = d_neighborList[n+5*Np]; - f6 = dist[nread]; - - nread = d_neighborList[n+7*Np]; - f8 = dist[nread]; - - nread = d_neighborList[n+9*Np]; - f10 = dist[nread]; - - nread = d_neighborList[n+11*Np]; - f12 = dist[nread]; - - nread = d_neighborList[n+15*Np]; - f16 = dist[nread]; - - sum = factor*(f0+f1+f2+f3+f4+f7+f8+f9+f10 + 2*(f6+f12+f13+f16+f17)); - - } - - //sum = blockReduceSum(sum); - //if (threadIdx.x==0) - // atomicAdd(dvcsum, sum); - - extern __shared__ double temp[]; - thread_group g = this_thread_block(); - double block_sum = reduce_sum(g, temp, sum); - - if (g.thread_rank() == 0) atomicAdd(dvcsum, block_sum); -} - - -__global__ void dvc_D3Q19_Velocity_BC_z(double *disteven, double *distodd, double uz, - int Nx, int Ny, int Nz) -{ - int n,N; - // distributions - double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; - double f10,f11,f12,f13,f14,f15,f16,f17,f18; - double din; - - N = Nx*Ny*Nz; - n = Nx*Ny + blockIdx.x*blockDim.x + threadIdx.x; - - if (n < 2*Nx*Ny){ - //........................................................................ - // Read distributions from "opposite" memory convention - //........................................................................ - //........................................................................ - f1 = distodd[n]; - f3 = distodd[N+n]; - f5 = distodd[2*N+n]; - f7 = distodd[3*N+n]; - f9 = distodd[4*N+n]; - f11 = distodd[5*N+n]; - f13 = distodd[6*N+n]; - f15 = distodd[7*N+n]; - f17 = distodd[8*N+n]; - //........................................................................ - f0 = disteven[n]; - f2 = disteven[N+n]; - f4 = disteven[2*N+n]; - f6 = disteven[3*N+n]; - f8 = disteven[4*N+n]; - f10 = disteven[5*N+n]; - f12 = disteven[6*N+n]; - f14 = disteven[7*N+n]; - f16 = disteven[8*N+n]; - f18 = disteven[9*N+n]; - //................................................... - - // Determine the outlet flow velocity - // uz = 1.0 - (f0+f4+f3+f2+f1+f8+f7+f9+f10 + - // 2*(f5+f15+f18+f11+f14))/din; - din = (f0+f4+f3+f2+f1+f8+f7+f9+f10+2*(f5+f15+f18+f11+f14))/(1.0-uz); - // Set the unknown distributions: - f6 = f5 + 0.3333333333333333*din*uz; - f16 = f15 + 0.1666666666666667*din*uz; - f17 = f16 + f4 - f3-f15+f18+f8-f7 +f9-f10; - f12= (din*uz+f5+ f15+f18+f11+f14-f6-f16-f17-f2+f1-f14+f11-f8+f7+f9-f10)*0.5; - f13= din*uz+f5+ f15+f18+f11+f14-f6-f16-f17-f12; - - //........Store in "opposite" memory location.......... - disteven[3*N+n] = f6; - disteven[6*N+n] = f12; - distodd[6*N+n] = f13; - disteven[8*N+n] = f16; - distodd[8*N+n] = f17; - //................................................... - } -} - -__global__ void dvc_D3Q19_Velocity_BC_Z(double *disteven, double *distodd, double uz, - int Nx, int Ny, int Nz, int outlet){ - int n,N; - // distributions - double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; - double f10,f11,f12,f13,f14,f15,f16,f17,f18; - double dout; - - N = Nx*Ny*Nz; - n = outlet + blockIdx.x*blockDim.x + threadIdx.x; - - // Loop over the boundary - threadblocks delineated by start...finish - if ( n 0 ){ - f_even[n] = 0 + 0.01*0; - f_odd[n] = 0+ 0.01*1; //double(100*n)+1.f; - f_even[N+n] = 1+ 0.01*2; //double(100*n)+2.f; - f_odd[N+n] = 1+ 0.01*3; //double(100*n)+3.f; - f_even[2*N+n] = 2+ 0.01*4; //double(100*n)+4.f; - f_odd[2*N+n] = 2+ 0.01*5; //double(100*n)+5.f; - f_even[3*N+n] = 3+ 0.01*6; //double(100*n)+6.f; - f_odd[3*N+n] = 3+ 0.01*7; //double(100*n)+7.f; - f_even[4*N+n] = 4+ 0.01*8; //double(100*n)+8.f; - f_odd[4*N+n] = 4+ 0.01*9; //double(100*n)+9.f; - f_even[5*N+n] = 5+ 0.01*10; //double(100*n)+10.f; - f_odd[5*N+n] = 5+ 0.01*11; //double(100*n)+11.f; - f_even[6*N+n] = 6+ 0.01*12; //double(100*n)+12.f; - f_odd[6*N+n] = 6+ 0.01*13; //double(100*n)+13.f; - f_even[7*N+n] = 7+ 0.01*14; //double(100*n)+14.f; - f_odd[7*N+n] = 7+ 0.01*15; //double(100*n)+15.f; - f_even[8*N+n] = 8+ 0.01*16; //double(100*n)+16.f; - f_odd[8*N+n] = 8+ 0.01*17; //double(100*n)+17.f; - f_even[9*N+n] = 9+ 0.01*18; //double(100*n)+18.f; - } - else{ - for(int q=0; q<9; q++){ - f_even[q*N+n] = -1.0; - f_odd[q*N+n] = -1.0; - } - f_even[9*N+n] = -1.0; - } - } - } -} - - //************************************************************************* -//extern "C" void ScaLBL_D3Q19_MapRecv(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count, -// int *d3q19_recvlist, int Nx, int Ny, int Nz){ -// int GRID = count / 512 + 1; -// dvc_ScaLBL_D3Q19_Unpack <<>>(q, Cqx, Cqy, Cqz, list, start, count, d3q19_recvlist, Nx, Ny, Nz); -//} - -extern "C" void ScaLBL_D3Q19_Pack(int q, int *list, int start, int count, double *sendbuf, double *dist, int N){ +extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){ int GRID = count / 512 + 1; - dvc_ScaLBL_D3Q19_Pack <<>>(q, list, start, count, sendbuf, dist, N); + dvc_ScaLBL_Solid_Dirichlet_D3Q7<<>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_Solid_Dirichlet_D3Q7 (kernel): %s \n",cudaGetErrorString(err)); + } } -extern "C" void ScaLBL_D3Q19_Unpack(int q, int *list, int start, int count, double *recvbuf, double *dist, int N){ +extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){ int GRID = count / 512 + 1; - dvc_ScaLBL_D3Q19_Unpack <<>>(q, list, start, count, recvbuf, dist, N); -} -//************************************************************************* - -extern "C" void ScaLBL_D3Q19_AA_Init(double *f_even, double *f_odd, int Np){ - dvc_ScaLBL_D3Q19_AA_Init<<>>(f_even, f_odd, Np); + dvc_ScaLBL_Solid_Neumann_D3Q7<<>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_AA_Init: %s \n",cudaGetErrorString(err)); + printf("CUDA error in ScaLBL_Solid_Neumann_D3Q7 (kernel): %s \n",cudaGetErrorString(err)); } } -extern "C" void ScaLBL_D3Q19_Init(double *dist, int Np){ - dvc_ScaLBL_D3Q19_Init<<>>(dist, Np); - cudaError_t err = cudaGetLastError(); - if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",cudaGetErrorString(err)); - } -} - -extern "C" void ScaLBL_D3Q19_GreyIMRT_Init(double *dist, int Np, double Den){ - dvc_ScaLBL_D3Q19_GreyIMRT_Init<<>>(dist, Np, Den); - cudaError_t err = cudaGetLastError(); - if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_GreyIMRT_Init: %s \n",cudaGetErrorString(err)); - } -} - -extern "C" void ScaLBL_D3Q19_Swap(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz){ - dvc_ScaLBL_D3Q19_Swap<<>>(ID, disteven, distodd, Nx, Ny, Nz); - cudaError_t err = cudaGetLastError(); - if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_Swap: %s \n",cudaGetErrorString(err)); - } -} - -extern "C" void ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *disteven, double *distodd, int Np) -{ - - const int Q = 9; - // cudaStream_t streams[Q]; - // Launch the swap operation as different kernels - for (int q=0; q>>(neighborList, disteven, distodd, Np, q); - } - // cpu should wait for all kernels to finish (to avoid launch of dependent kernels) - //cudaDeviceSynchronize(); - cudaError_t err = cudaGetLastError(); - if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_Swap: %s \n",cudaGetErrorString(err)); - } -} - -extern "C" void ScaLBL_D3Q19_AAeven_Compact(char * ID, double *d_dist, int Np) { - cudaFuncSetCacheConfig(dvc_ScaLBL_AAeven_Compact, cudaFuncCachePreferL1); - dvc_ScaLBL_AAeven_Compact<<>>(ID, d_dist, Np); - cudaError_t err = cudaGetLastError(); - if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",cudaGetErrorString(err)); - } -} - -extern "C" void ScaLBL_D3Q19_AAodd_Compact(char * ID, int *d_neighborList, double *d_dist, int Np) { - cudaFuncSetCacheConfig(dvc_ScaLBL_AAodd_Compact, cudaFuncCachePreferL1); - dvc_ScaLBL_AAodd_Compact<<>>(ID,d_neighborList, d_dist,Np); - cudaError_t err = cudaGetLastError(); - if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",cudaGetErrorString(err)); - } -} - -extern "C" void ScaLBL_D3Q19_Momentum(double *dist, double *vel, int Np){ - - dvc_ScaLBL_D3Q19_Momentum<<>>(dist, vel, Np); - - cudaError_t err = cudaGetLastError(); - if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_Velocity: %s \n",cudaGetErrorString(err)); - } -} - -extern "C" void ScaLBL_D3Q19_Pressure(double *fq, double *Pressure, int Np){ - dvc_ScaLBL_D3Q19_Pressure<<< NBLOCKS,NTHREADS >>>(fq, Pressure, Np); -} - -extern "C" void ScaLBL_D3Q19_Velocity_BC_z(double *disteven, double *distodd, double uz,int Nx, int Ny, int Nz){ - int GRID = Nx*Ny / 512 + 1; - dvc_D3Q19_Velocity_BC_z<<>>(disteven,distodd, uz, Nx, Ny, Nz); - cudaError_t err = cudaGetLastError(); - if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_Velocity_BC_z: %s \n",cudaGetErrorString(err)); - } -} - -extern "C" void ScaLBL_D3Q19_Velocity_BC_Z(double *disteven, double *distodd, double uz, int Nx, int Ny, int Nz, int outlet){ - int GRID = Nx*Ny / 512 + 1; - dvc_D3Q19_Velocity_BC_Z<<>>(disteven, distodd, uz, Nx, Ny, Nz, outlet); - cudaError_t err = cudaGetLastError(); - if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_Velocity_BC_Z: %s \n",cudaGetErrorString(err)); - } -} - -extern "C" double ScaLBL_D3Q19_Flux_BC_z(double *disteven, double *distodd, double flux,int Nx, int Ny, int Nz){ - - int GRID = Nx*Ny / 512 + 1; - - // IMPORTANT -- this routine may fail if Nx*Ny > 512*512 - if (Nx*Ny > 512*512){ - printf("WARNING (ScaLBL_D3Q19_Flux_BC_z): CUDA reduction operation may fail if Nx*Ny > 512*512"); - } - - // Allocate memory to store the sums - double din; - double sum[1]; - double *dvcsum; - int sharedBytes = NTHREADS*sizeof(double); - cudaMalloc((void **)&dvcsum,sizeof(double)*Nx*Ny); - cudaMemset(dvcsum,0,sizeof(double)*Nx*Ny); - - cudaError_t err = cudaGetLastError(); - if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_Flux_BC_z (memory allocation): %s \n",cudaGetErrorString(err)); - } - - // compute the local flux and store the result - dvc_D3Q19_Flux_BC_z<<>>(disteven, distodd, flux, dvcsum, Nx, Ny, Nz); - - err = cudaGetLastError(); - if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_Flux_BC_z (flux calculation, step 1): %s \n",cudaGetErrorString(err)); - } - - // Now read the total flux - cudaMemcpy(&sum[0],dvcsum,sizeof(double),cudaMemcpyDeviceToHost); - din=sum[0]; - - err = cudaGetLastError(); - if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_Flux_BC_z (flux calculation, step 2): %s \n",cudaGetErrorString(err)); - } - - // free the memory needed for reduction - cudaFree(dvcsum); - - return din; -} - - -extern "C" void ScaLBL_D3Q19_AAeven_Pressure_BC_z(int *list, double *dist, double din, int count, int N){ +extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){ int GRID = count / 512 + 1; - dvc_ScaLBL_D3Q19_AAeven_Pressure_BC_z<<>>(list, dist, din, count, N); + dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z<<>>(list, dist, Vin, count, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_AAeven_Pressure_BC_z (kernel): %s \n",cudaGetErrorString(err)); + printf("CUDA error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z (kernel): %s \n",cudaGetErrorString(err)); } } -extern "C" void ScaLBL_D3Q19_AAeven_Pressure_BC_Z(int *list, double *dist, double dout, int count, int N){ +extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np){ int GRID = count / 512 + 1; - dvc_ScaLBL_D3Q19_AAeven_Pressure_BC_Z<<>>(list, dist, dout, count, N); + dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z<<>>(list, dist, Vout, count, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_AAeven_Pressure_BC_Z (kernel): %s \n",cudaGetErrorString(err)); + printf("CUDA error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z (kernel): %s \n",cudaGetErrorString(err)); } } -extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *neighborList, int *list, double *dist, double din, int count, int N){ +extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np){ int GRID = count / 512 + 1; - dvc_ScaLBL_D3Q19_AAodd_Pressure_BC_z<<>>(neighborList, list, dist, din, count, N); + dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z<<>>(d_neighborList, list, dist, Vin, count, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_AAodd_Pressure_BC_z (kernel): %s \n",cudaGetErrorString(err)); + printf("CUDA error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z (kernel): %s \n",cudaGetErrorString(err)); } } -extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_Z(int *neighborList, int *list, double *dist, double dout, int count, int N){ +extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np){ int GRID = count / 512 + 1; - dvc_ScaLBL_D3Q19_AAodd_Pressure_BC_Z<<>>(neighborList, list, dist, dout, count, N); + dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z<<>>(d_neighborList, list, dist, Vout, count, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_AAodd_Pressure_BC_Z (kernel): %s \n",cudaGetErrorString(err)); + printf("CUDA error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z (kernel): %s \n",cudaGetErrorString(err)); } } -//******************************************************************************* -//******************************************************************************* -//******************************************************************************* - - -//******************************************************************************* -extern "C" double ScaLBL_D3Q19_AAeven_Flux_BC_z(int *list, double *dist, double flux, double area, - int count, int N){ +extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count){ int GRID = count / 512 + 1; - - // IMPORTANT -- this routine may fail if Nx*Ny > 512*512 - if (count > 512*512){ - printf("WARNING (ScaLBL_D3Q19_Flux_BC_Z): CUDA reduction operation may fail if count > 512*512"); - } - - // Allocate memory to store the sums - double din; - double sum[1]; - double *dvcsum; - cudaMalloc((void **)&dvcsum,sizeof(double)*count); - cudaMemset(dvcsum,0,sizeof(double)*count); - int sharedBytes = 512*sizeof(double); - + dvc_ScaLBL_Poisson_D3Q7_BC_z<<>>(list, Map, Psi, Vin, count); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_AAeven_Flux_BC_z (memory allocation): %s \n",cudaGetErrorString(err)); + printf("CUDA error in ScaLBL_Poisson_D3Q7_BC_z (kernel): %s \n",cudaGetErrorString(err)); } - - // compute the local flux and store the result - dvc_ScaLBL_D3Q19_AAeven_Flux_BC_z<<>>(list, dist, flux, area, dvcsum, count, N); - err = cudaGetLastError(); - if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_AAeven_Flux_BC_z (kernel): %s \n",cudaGetErrorString(err)); - } - - // Now read the total flux - cudaMemcpy(&sum[0],dvcsum,sizeof(double),cudaMemcpyDeviceToHost); - din=sum[0]; - err = cudaGetLastError(); - if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_AAeven_Flux_BC_z (reduction): %s \n",cudaGetErrorString(err)); - } - - // free the memory needed for reduction - cudaFree(dvcsum); - - return din; } -extern "C" double ScaLBL_D3Q19_AAodd_Flux_BC_z(int *neighborList, int *list, double *dist, double flux, - double area, int count, int N){ - +extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count){ int GRID = count / 512 + 1; - - // IMPORTANT -- this routine may fail if Nx*Ny > 512*512 - if (count > 512*512){ - printf("WARNING (ScaLBL_D3Q19_AAodd_Flux_BC_z): CUDA reduction operation may fail if count > 512*512"); - } - - // Allocate memory to store the sums - double din; - double sum[1]; - double *dvcsum; - cudaMalloc((void **)&dvcsum,sizeof(double)*count); - cudaMemset(dvcsum,0,sizeof(double)*count); - int sharedBytes = 512*sizeof(double); + dvc_ScaLBL_Poisson_D3Q7_BC_Z<<>>(list, Map, Psi, Vout, count); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_AAodd_Flux_BC_z (memory allocation): %s \n",cudaGetErrorString(err)); + printf("CUDA error in ScaLBL_Poisson_D3Q7_BC_Z (kernel): %s \n",cudaGetErrorString(err)); } - - // compute the local flux and store the result - dvc_ScaLBL_D3Q19_AAodd_Flux_BC_z<<>>(neighborList, list, dist, flux, area, dvcsum, count, N); - err = cudaGetLastError(); - if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_AAodd_Flux_BC_z (kernel): %s \n",cudaGetErrorString(err)); - } - // Now read the total flux - cudaMemcpy(&sum[0],dvcsum,sizeof(double),cudaMemcpyDeviceToHost); - din=sum[0]; - err = cudaGetLastError(); - if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_AAodd_Flux_BC_z (reduction): %s \n",cudaGetErrorString(err)); - } - - // free the memory needed for reduction - cudaFree(dvcsum); - - return din; } -extern "C" double ScaLBL_D3Q19_Flux_BC_Z(double *disteven, double *distodd, double flux, int Nx, int Ny, int Nz, int outlet){ - - int GRID = Nx*Ny / 512 + 1; - - // IMPORTANT -- this routine may fail if Nx*Ny > 512*512 - if (Nx*Ny > 512*512){ - printf("WARNING (ScaLBL_D3Q19_Flux_BC_Z): CUDA reduction operation may fail if Nx*Ny > 512*512"); - } - - // Allocate memory to store the sums - double dout; - double sum[1]; - double *dvcsum; - cudaMalloc((void **)&dvcsum,sizeof(double)*Nx*Ny); - cudaMemset(dvcsum,0,sizeof(double)*Nx*Ny); - - // compute the local flux and store the result - dvc_D3Q19_Flux_BC_Z<<>>(disteven, distodd, flux, dvcsum, Nx, Ny, Nz, outlet); - - // Now read the total flux - cudaMemcpy(&sum[0],dvcsum,sizeof(double),cudaMemcpyDeviceToHost); - - // free the memory needed for reduction - - dout = sum[0]; - - cudaFree(dvcsum); - - return dout; - -} - -extern "C" double deviceReduce(double *in, double* out, int N) { - int threads = 512; - int blocks = min((N + threads - 1) / threads, 1024); - - double sum = 0.f; - deviceReduceKernel<<>>(in, out, N); - deviceReduceKernel<<<1, 1024>>>(out, out, blocks); - return sum; -} - -extern "C" void ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){ +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np){ int GRID = count / 512 + 1; - dvc_ScaLBL_D3Q19_Reflection_BC_z<<>>(list, dist, count, Np); + dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z<<>>(list, dist, Cin, count, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_Reflection_BC_z (kernel): %s \n",cudaGetErrorString(err)); + printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z (kernel): %s \n",cudaGetErrorString(err)); } } -extern "C" void ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){ +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np){ int GRID = count / 512 + 1; - dvc_ScaLBL_D3Q19_Reflection_BC_Z<<>>(list, dist, count, Np); + dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z<<>>(list, dist, Cout, count, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_Reflection_BC_Z (kernel): %s \n",cudaGetErrorString(err)); + printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z (kernel): %s \n",cudaGetErrorString(err)); } } -extern "C" void ScaLBL_D3Q19_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, - double Fy, double Fz){ - - dvc_ScaLBL_AAeven_MRT<<>>(dist,start,finish,Np,rlx_setA,rlx_setB,Fx,Fy,Fz); - - cudaError_t err = cudaGetLastError(); +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z<<>>(d_neighborList, list, dist, Cin, count, Np); + cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_AAeven_MRT: %s \n",cudaGetErrorString(err)); + printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z (kernel): %s \n",cudaGetErrorString(err)); } } -extern "C" void ScaLBL_D3Q19_AAodd_MRT(int *neighborlist, double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, - double Fy, double Fz){ - - dvc_ScaLBL_AAodd_MRT<<>>(neighborlist,dist,start,finish,Np,rlx_setA,rlx_setB,Fx,Fy,Fz); - - cudaError_t err = cudaGetLastError(); +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z<<>>(d_neighborList, list, dist, Cout, count, Np); + cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ - printf("CUDA error in ScaLBL_D3Q19_AAeven_MRT: %s \n",cudaGetErrorString(err)); + printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z (kernel): %s \n",cudaGetErrorString(err)); } } diff --git a/gpu/Ion.cu b/gpu/Ion.cu new file mode 100644 index 00000000..dc94d2e5 --- /dev/null +++ b/gpu/Ion.cu @@ -0,0 +1,344 @@ +#include +#include +//#include + +#define NBLOCKS 1024 +#define NTHREADS 256 + +__global__ void dvc_ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){ + int n,nread; + double fq,Ci; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq + // q=2 + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; + // q=4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; + // q=6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; + + // q=0 + dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci; + + // q = 1 + dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)); + + // q=2 + dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)); + + // q = 3 + dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)); + + // q = 4 + dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)); + + // q = 5 + dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)); + + // q = 6 + dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)); + } + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField, + double Di, int zi, double rlx, double Vt, int start, int finish, int Np){ + int n; + double Ci; + double ux,uy,uz; + double uEPx,uEPy,uEPz;//electrochemical induced velocity + double Ex,Ey,Ez;//electrical field + double f0,f1,f2,f3,f4,f5,f6; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s0) + CD_tmp; + } + } +} + + +extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAodd_IonConcentration<<>>(neighborList,dist,Den,start,finish,Np); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAodd_IonConcentration: %s \n",cudaGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAeven_IonConcentration<<>>(dist,Den,start,finish,Np); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAeven_IonConcentration: %s \n",cudaGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *Velocity, double *ElectricField, + double Di, int zi, double rlx, double Vt, int start, int finish, int Np){ + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAodd_Ion<<>>(neighborList,dist,Den,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion: %s \n",cudaGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField, + double Di, int zi, double rlx, double Vt, int start, int finish, int Np){ + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAeven_Ion<<>>(dist,Den,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion: %s \n",cudaGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_Ion_Init<<>>(dist,Den,DenInit,Np); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_Ion_Init: %s \n",cudaGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_Ion_ChargeDensity<<>>(Den,ChargeDensity,IonValence,ion_component,start,finish,Np); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_Ion_ChargeDensity: %s \n",cudaGetErrorString(err)); + } + //cudaProfilerStop(); +} diff --git a/gpu/Poisson.cu b/gpu/Poisson.cu new file mode 100644 index 00000000..84a78330 --- /dev/null +++ b/gpu/Poisson.cu @@ -0,0 +1,330 @@ +#include +#include +//#include + +#define NBLOCKS 1024 +#define NTHREADS 256 + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){ + int n; + double psi;//electric potential + double fq; + int nread; + int idx; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; + + Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu + Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound + Ez = (f5-f6)*rlx*4.0; + ElectricField[n+0*Np] = Ex; + ElectricField[n+1*Np] = Ey; + ElectricField[n+2*Np] = Ez; + + // q = 0 + dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e); + + // q = 1 + dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 2 + dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 3 + dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 4 + dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 5 + dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 6 + dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + //........................................................................ + } + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){ + + int n; + double psi;//electric potential + double Ex,Ey,Ez;//electric field + double rho_e;//local charge density + double f0,f1,f2,f3,f4,f5,f6; + double rlx=1.0/tau; + int idx; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s>>(neighborList,Map,dist,Psi,start,finish,Np); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential: %s \n",cudaGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential<<>>(Map,dist,Psi,start,finish,Np); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential: %s \n",cudaGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAodd_Poisson<<>>(neighborList,Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,start,finish,Np); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAodd_Poisson: %s \n",cudaGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAeven_Poisson<<>>(Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,start,finish,Np); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAeven_Poisson: %s \n",cudaGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_Poisson_Init<<>>(Map,dist,Psi,start,finish,Np); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_Poisson_Init: %s \n",cudaGetErrorString(err)); + } + //cudaProfilerStop(); +} diff --git a/gpu/Stokes.cu b/gpu/Stokes.cu new file mode 100644 index 00000000..d091b0b4 --- /dev/null +++ b/gpu/Stokes.cu @@ -0,0 +1,995 @@ +#include +#include +//#include + +#define NBLOCKS 1024 +#define NTHREADS 256 + +__global__ void dvc_ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz, double rho0, double den_scale, double h, double time_conv,int start, int finish, int Np){ + + int n; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + double ux,uy,uz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + int nread; + // body force due to electric field + double rhoE;//charge density + double Ex,Ey,Ez; + // total body force + double Fx,Fy,Fz; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + fq = dist[nread]; // reading the f1 data into register fq + //fp = dist[10*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jx = fq; + m4 = -4.0*fq; + m9 = 2.0*fq; + m10 = -4.0*fq; + + // f2 = dist[10*Np+n]; + nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nread]; // reading the f2 data into register fq + //fq = dist[Np+n]; + rho += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + nread = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nread]; + //fq = dist[11*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + nread = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nread]; + //fq = dist[2*Np+n]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + nread = neighborList[n+4*Np]; + fq = dist[nread]; + //fq = dist[12*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + + // q = 6 + nread = neighborList[n+5*Np]; + fq = dist[nread]; + //fq = dist[3*Np+n]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + nread = neighborList[n+6*Np]; + fq = dist[nread]; + //fq = dist[13*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + nread = neighborList[n+7*Np]; + fq = dist[nread]; + //fq = dist[4*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + nread = neighborList[n+8*Np]; + fq = dist[nread]; + //fq = dist[14*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + nread = neighborList[n+9*Np]; + fq = dist[nread]; + //fq = dist[5*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + nread = neighborList[n+10*Np]; + fq = dist[nread]; + //fq = dist[15*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + nread = neighborList[n+11*Np]; + fq = dist[nread]; + //fq = dist[6*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + nread = neighborList[n+12*Np]; + fq = dist[nread]; + //fq = dist[16*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + nread = neighborList[n+13*Np]; + fq = dist[nread]; + //fq = dist[7*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + //fq = dist[17*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + //fq = dist[8*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + //fq = dist[18*Np+n]; + nread = neighborList[n+16*Np]; + fq = dist[nread]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + //fq = dist[9*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + + // write the velocity + ux = jx / rho0; + uy = jy / rho0; + uz = jz / rho0; + Velocity[n] = ux; + Velocity[Np+n] = uy; + Velocity[2*Np+n] = uz; + + //..............incorporate external force................................................ + //..............carry out relaxation process............................................... + m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0) - m2); + m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4); + m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6); + m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8); + m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) - m9); + m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10); + m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) - m11); + m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho0) - m12); + m13 = m13 + rlx_setA*((jx*jy/rho0) - m13); + m14 = m14 + rlx_setA*((jy*jz/rho0) - m14); + m15 = m15 + rlx_setA*((jx*jz/rho0) - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //....................................................................................................... + //.................inverse transformation...................................................... + + // q=0 + fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx; + nread = neighborList[n+Np]; + dist[nread] = fq; + + // q=2 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx; + nread = neighborList[n]; + dist[nread] = fq; + + // q = 3 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy; + nread = neighborList[n+3*Np]; + dist[nread] = fq; + + // q = 4 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy; + nread = neighborList[n+2*Np]; + dist[nread] = fq; + + // q = 5 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz; + nread = neighborList[n+5*Np]; + dist[nread] = fq; + + // q = 6 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz; + nread = neighborList[n+4*Np]; + dist[nread] = fq; + + // q = 7 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6) + +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy); + nread = neighborList[n+7*Np]; + dist[nread] = fq; + + // q = 8 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy); + nread = neighborList[n+6*Np]; + dist[nread] = fq; + + // q = 9 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6) + +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy); + nread = neighborList[n+9*Np]; + dist[nread] = fq; + + // q = 10 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4) + +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy); + nread = neighborList[n+8*Np]; + dist[nread] = fq; + + // q = 11 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz); + nread = neighborList[n+11*Np]; + dist[nread] = fq; + + // q = 12 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz); + nread = neighborList[n+10*Np]; + dist[nread]= fq; + + // q = 13 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz); + nread = neighborList[n+13*Np]; + dist[nread] = fq; + + // q= 14 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz); + nread = neighborList[n+12*Np]; + dist[nread] = fq; + + + // q = 15 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + + // q = 17 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) + -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) + -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){ + + int n; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + double ux,uy,uz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + // body force due to electric field + double rhoE;//charge density + double Ex,Ey,Ez; + // total body force + double Fx,Fy,Fz; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s>>(neighborList,dist,Velocity,ChargeDensity,ElectricField,rlx_setA,rlx_setB,Gx,Gy,Gz,rho0,den_scale,h,time_conv,start,finish,Np); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_StokesMRT: %s \n",cudaGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q19_AAeven_StokesMRT<<>>(dist,Velocity,ChargeDensity,ElectricField,rlx_setA,rlx_setB,Gx,Gy,Gz,rho0,den_scale,h,time_conv,start,finish,Np); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_StokesMRT: %s \n",cudaGetErrorString(err)); + } + //cudaProfilerStop(); +} + diff --git a/models/MultiPhysController.cpp b/models/MultiPhysController.cpp index a54223d3..fcfb5403 100644 --- a/models/MultiPhysController.cpp +++ b/models/MultiPhysController.cpp @@ -2,7 +2,7 @@ ScaLBL_Multiphys_Controller::ScaLBL_Multiphys_Controller(int RANK, int NP, MPI_Comm COMM): rank(RANK),nprocs(NP),Restart(0),timestepMax(0),num_iter_Stokes(0),num_iter_Ion(0), -analysis_interval(0),tolerance(0),comm(COMM) +analysis_interval(0),visualization_interval(0),tolerance(0),comm(COMM) { } @@ -23,6 +23,7 @@ void ScaLBL_Multiphys_Controller::ReadParams(string filename){ num_iter_Stokes=1; num_iter_Ion.push_back(1); analysis_interval = 500; + visualization_interval = 10000; tolerance = 1.0e-6; // load input parameters @@ -32,6 +33,9 @@ void ScaLBL_Multiphys_Controller::ReadParams(string filename){ if (study_db->keyExists( "analysis_interval" )){ analysis_interval = study_db->getScalar( "analysis_interval" ); } + if (study_db->keyExists( "visualization_interval" )){ + visualization_interval = study_db->getScalar( "visualization_interval" ); + } if (study_db->keyExists( "tolerance" )){ tolerance = study_db->getScalar( "tolerance" ); } @@ -76,15 +80,8 @@ int ScaLBL_Multiphys_Controller::getStokesNumIter_PNP_coupling(double StokesTime int num_iter_stokes; vector TimeConv; - printf("*****Debug; IonTimeConv size = %i\n",IonTimeConv.size()); - for (unsigned int i =0; i::iterator it_max = max_element(TimeConv.begin(),TimeConv.end()); int idx_max = distance(TimeConv.begin(),it_max); if (idx_max==0){ diff --git a/models/MultiPhysController.h b/models/MultiPhysController.h index f1c51f93..f217248f 100644 --- a/models/MultiPhysController.h +++ b/models/MultiPhysController.h @@ -33,6 +33,7 @@ public: int num_iter_Stokes; vector num_iter_Ion; int analysis_interval; + int visualization_interval; double tolerance; //double SchmidtNum;//Schmidt number = kinematic_viscosity/mass_diffusivity diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 64232406..7c854c72 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -4,7 +4,7 @@ ADD_LBPM_EXECUTABLE( lbpm_color_simulator ) ADD_LBPM_EXECUTABLE( lbpm_permeability_simulator ) ADD_LBPM_EXECUTABLE( lbpm_greyscale_simulator ) -ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_dfh_simulator ) +ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_SingleFluid_simulator ) #ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator ) #ADD_LBPM_EXECUTABLE( lbpm_color_macro_simulator ) ADD_LBPM_EXECUTABLE( lbpm_dfh_simulator ) diff --git a/tests/TestPNP_Stokes.cpp b/tests/TestPNP_Stokes.cpp index ee5d43c5..f6369b89 100644 --- a/tests/TestPNP_Stokes.cpp +++ b/tests/TestPNP_Stokes.cpp @@ -112,7 +112,7 @@ int main(int argc, char **argv) if (rank==0) printf("*************************************************************\n"); PROFILE_STOP("Main"); - PROFILE_SAVE("lbpm_electrokinetic_simulator",1); + PROFILE_SAVE("TestPNP_Stokes",1); // **************************************************** MPI_Barrier(comm); } // Limit scope so variables that contain communicators will free before MPI_Finialize diff --git a/tests/lbpm_electrokinetic_dfh_simulator.cpp b/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp similarity index 77% rename from tests/lbpm_electrokinetic_dfh_simulator.cpp rename to tests/lbpm_electrokinetic_SingleFluid_simulator.cpp index 75fe87e5..233889ac 100644 --- a/tests/lbpm_electrokinetic_dfh_simulator.cpp +++ b/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp @@ -7,15 +7,15 @@ #include #include -#include "models/StokesModel.h" #include "models/IonModel.h" +#include "models/StokesModel.h" #include "models/PoissonSolver.h" #include "models/MultiPhysController.h" using namespace std; //*************************************************************************** -// Implementation of Multiphysics simulator using lattice-Boltzmann method +// Test lattice-Boltzmann Ion Model coupled with Poisson equation //*************************************************************************** int main(int argc, char **argv) @@ -35,7 +35,7 @@ int main(int argc, char **argv) { if (rank == 0){ printf("********************************************************\n"); - printf("Running Electrokinetic LBM Simulator \n"); + printf("Running LBPM electrokinetic single-fluid solver \n"); printf("********************************************************\n"); } //PROFILE_ENABLE_TRACE(); @@ -53,18 +53,24 @@ int main(int argc, char **argv) // Load controller information Study.ReadParams(filename); - // Initialize LB Navier-Stokes model - StokesModel.ReadParams(filename,Study.num_iter_Stokes); + // Load user input database files for Navier-Stokes and Ion solvers + StokesModel.ReadParams(filename); + IonModel.ReadParams(filename); + + // Setup other model specific structures StokesModel.SetDomain(); StokesModel.ReadInput(); StokesModel.Create(); // creating the model will create data structure to match the pore structure and allocate variables - StokesModel.Initialize(); // initializing the model will set initial conditions for variables - // Initialize LB-Ion model - IonModel.ReadParams(filename,Study.num_iter_Ion); IonModel.SetDomain(); IonModel.ReadInput(); IonModel.Create(); + + // Get internal iteration number + StokesModel.timestepMax = Study.getStokesNumIter_PNP_coupling(StokesModel.time_conv,IonModel.time_conv); + StokesModel.Initialize(); // initializing the model will set initial conditions for variables + + IonModel.timestepMax = Study.getIonNumIter_PNP_coupling(StokesModel.time_conv,IonModel.time_conv); IonModel.Initialize(); // Initialize LB-Poisson model @@ -74,39 +80,36 @@ int main(int argc, char **argv) PoissonSolver.Create(); PoissonSolver.Initialize(); + int timestep=0; while (timestep < Study.timestepMax){ timestep++; - //if (rank==0) printf("timestep=%i; running Poisson solver\n",timestep); PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental - //PoissonSolver.getElectricPotential(timestep); - - //if (rank==0) printf("timestep=%i; running StokesModel\n",timestep); StokesModel.Run_Lite(IonModel.ChargeDensity, PoissonSolver.ElectricField);// Solve the N-S equations to get velocity - //StokesModel.getVelocity(timestep); - - //if (rank==0) printf("timestep=%i; running Ion model\n",timestep); IonModel.Run(StokesModel.Velocity,PoissonSolver.ElectricField); //solve for ion transport and electric potential - //IonModel.getIonConcentration(timestep); - timestep++;//AA operations - //-------------------------------------------- - //potentially leave analysis module for future - //-------------------------------------------- + + if (timestep%Study.visualization_interval==0){ + PoissonSolver.getElectricPotential(timestep); + PoissonSolver.getElectricField(timestep); + IonModel.getIonConcentration(timestep); + StokesModel.getVelocity(timestep); + } } - StokesModel.getVelocity(timestep); + if (rank==0) printf("Save simulation raw data at maximum timestep\n"); PoissonSolver.getElectricPotential(timestep); PoissonSolver.getElectricField(timestep); IonModel.getIonConcentration(timestep); + StokesModel.getVelocity(timestep); if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n"); if (rank==0) printf("*************************************************************\n"); PROFILE_STOP("Main"); - PROFILE_SAVE("lbpm_electrokinetic_simulator",1); + PROFILE_SAVE("lbpm_electrokinetic_SingleFluid_simulator",1); // **************************************************** MPI_Barrier(comm); } // Limit scope so variables that contain communicators will free before MPI_Finialize From f0c7882639efabf8017a0c727a5fa1657d697dae Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 21 Sep 2020 23:54:28 -0400 Subject: [PATCH 032/205] add higher-order term in Ion equilibrium distribution --- gpu/Ion.cu | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/gpu/Ion.cu b/gpu/Ion.cu index dc94d2e5..2a3a0225 100644 --- a/gpu/Ion.cu +++ b/gpu/Ion.cu @@ -147,25 +147,32 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, doub f6 = dist[nr6]; // q=0 - dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci; + //dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci; + dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 1 - dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)); + //dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)); + dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q=2 - dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)); + //dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)); + dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 3 - dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)); + //dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)); + dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 4 - dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)); + //dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)); + dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 5 - dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)); + //dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)); + dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 6 - dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)); + //dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)); + dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); } } } @@ -206,25 +213,32 @@ __global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *V f6 = dist[5*Np+n]; // q=0 - dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci; + //dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci; + dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 1 - dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)); + //dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)); + dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q=2 - dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)); + //dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)); + dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 3 - dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)); + //dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)); + dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 4 - dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)); + //dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)); + dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 5 - dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)); + //dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)); + dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 6 - dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)); + //dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)); + dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); } } } From 0d6231f1cb927824a3bc2f152a3df68f3750918a Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 22 Sep 2020 14:33:51 -0400 Subject: [PATCH 033/205] read BC from model database --- models/ColorModel.cpp | 7 +++++-- models/DFHModel.cpp | 11 +++++++++-- models/GreyscaleModel.cpp | 5 ++++- models/MRTModel.cpp | 5 ++++- 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index a8c21a75..28d2fbe2 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -1,4 +1,4 @@ -/* + /* color lattice boltzmann model */ #include "models/ColorModel.h" @@ -121,7 +121,10 @@ void ScaLBL_ColorModel::ReadParams(string filename){ //if (BoundaryCondition==4) flux *= rhoA; // mass flux must adjust for density (see formulation for details) BoundaryCondition = 0; - if (domain_db->keyExists( "BC" )){ + if (color_db->keyExists( "BC" )){ + BoundaryCondition = color_db->getScalar( "BC" ); + } + else if (domain_db->keyExists( "BC" )){ BoundaryCondition = domain_db->getScalar( "BC" ); } diff --git a/models/DFHModel.cpp b/models/DFHModel.cpp index 4eb03bea..ad346271 100644 --- a/models/DFHModel.cpp +++ b/models/DFHModel.cpp @@ -81,13 +81,18 @@ void ScaLBL_DFHModel::ReadParams(string filename){ outletA=0.f; outletB=1.f; - if (BoundaryCondition==4) flux = din*rhoA; // mass flux must adjust for density (see formulation for details) + BoundaryCondition = domain_db->getScalar( "BC" ); + if (color_db->keyExists( "BC" )){ + BoundaryCondition = color_db->getScalar( "BC" ); + } + else if (domain_db->keyExists( "BC" )){ + BoundaryCondition = domain_db->getScalar( "BC" ); + } // Read domain parameters auto L = domain_db->getVector( "L" ); auto size = domain_db->getVector( "n" ); auto nproc = domain_db->getVector( "nproc" ); - BoundaryCondition = domain_db->getScalar( "BC" ); Nx = size[0]; Ny = size[1]; Nz = size[2]; @@ -97,6 +102,8 @@ void ScaLBL_DFHModel::ReadParams(string filename){ nprocx = nproc[0]; nprocy = nproc[1]; nprocz = nproc[2]; + + if (BoundaryCondition==4) flux = din*rhoA; // mass flux must adjust for density (see formulation for details) } void ScaLBL_DFHModel::SetDomain(){ diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index c28c88c5..5cdae905 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -88,7 +88,10 @@ void ScaLBL_GreyscaleModel::ReadParams(string filename){ //------------------------ Other Domain parameters ------------------------// BoundaryCondition = 0; - if (domain_db->keyExists( "BC" )){ + if (greyscale_db->keyExists( "BC" )){ + BoundaryCondition = greyscale_db->getScalar( "BC" ); + } + else if (domain_db->keyExists( "BC" )){ BoundaryCondition = domain_db->getScalar( "BC" ); } // ------------------------------------------------------------------------// diff --git a/models/MRTModel.cpp b/models/MRTModel.cpp index acfb8821..b16e1705 100644 --- a/models/MRTModel.cpp +++ b/models/MRTModel.cpp @@ -57,7 +57,10 @@ void ScaLBL_MRTModel::ReadParams(string filename){ } // Read domain parameters - if (domain_db->keyExists( "BC" )){ + if (mrt_db->keyExists( "BoundaryCondition" )){ + BoundaryCondition = mrt_db->getScalar( "BC" ); + } + else if (domain_db->keyExists( "BC" )){ BoundaryCondition = domain_db->getScalar( "BC" ); } From 11be7935758421bbe030c75692f41c41e4b54a44 Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 23 Sep 2020 12:30:22 -0400 Subject: [PATCH 034/205] use periodic BC in ScaLBL (model specific BC possible) --- common/ScaLBL.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 8db59597..49c19054 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -77,7 +77,8 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ nprocx = Dm->nprocx(); nprocy = Dm->nprocy(); nprocz = Dm->nprocz(); - BoundaryCondition = Dm->BoundaryCondition; + //BoundaryCondition = Dm->BoundaryCondition; + BoundaryCondition = 0; // default to periodic BC //...................................................................................... ScaLBL_AllocateZeroCopy((void **) &sendbuf_x, 5*sendCount_x*sizeof(double)); // Allocate device memory From fd27b3138a422be44aa190a0e5eaf48b8a82753f Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 23 Sep 2020 14:53:46 -0400 Subject: [PATCH 035/205] tweak BC conventions --- common/ScaLBL.cpp | 4 ++-- models/PoissonSolver.cpp | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 49c19054..8b0abeba 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -77,8 +77,8 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ nprocx = Dm->nprocx(); nprocy = Dm->nprocy(); nprocz = Dm->nprocz(); - //BoundaryCondition = Dm->BoundaryCondition; - BoundaryCondition = 0; // default to periodic BC + BoundaryCondition = Dm->BoundaryCondition; + //BoundaryCondition = 0; // default to periodic BC //...................................................................................... ScaLBL_AllocateZeroCopy((void **) &sendbuf_x, 5*sendCount_x*sizeof(double)); // Allocate device memory diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index b74536c5..40020b94 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -113,6 +113,8 @@ void ScaLBL_Poisson::SetDomain(){ for (int i=0; iid[i] = 1; // initialize this way //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object MPI_Barrier(comm); + Dm->BoundaryCondition = BoundaryCondition; + Mask->BoundaryCondition = BoundaryCondition; Dm->CommInit(); MPI_Barrier(comm); From 92d56af3b4aa306b75fd2de82b515e90944291f4 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 25 Sep 2020 16:18:54 -0400 Subject: [PATCH 036/205] template for freelee model --- cpu/FreeLee.cpp | 2820 +++++++++++++++++++++++++++++++++++++++ models/FreeLeeModel.cpp | 632 +++++++++ models/FreeLeeModel.h | 83 ++ 3 files changed, 3535 insertions(+) create mode 100644 cpu/FreeLee.cpp create mode 100644 models/FreeLeeModel.cpp create mode 100644 models/FreeLeeModel.h diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp new file mode 100644 index 00000000..35cbd5fd --- /dev/null +++ b/cpu/FreeLee.cpp @@ -0,0 +1,2820 @@ +#include + +#define STOKES + +extern "C" void ScaLBL_Color_Init(char *ID, double *Den, double *Phi, double das, double dbs, int Nx, int Ny, int Nz) +{ + int n,N; + + N = Nx*Ny*Nz; + + for (n=0; n 0){ + + // Retrieve the color gradient + nx = ColorGrad[n]; + ny = ColorGrad[N+n]; + nz = ColorGrad[2*N+n]; + //...........Normalize the Color Gradient................................. + C = sqrt(nx*nx+ny*ny+nz*nz); + if (C==0.0) C=1.0; + nx = nx/C; + ny = ny/C; + nz = nz/C; + //......No color gradient at z-boundary if pressure BC are set............. + // if (pBC && k==0) nx = ny = nz = 0.f; + // if (pBC && k==Nz-1) nx = ny = nz = 0.f; + //........................................................................ + // READ THE DISTRIBUTIONS + // (read from opposite array due to previous swap operation) + //........................................................................ + f2 = distodd[n]; + f4 = distodd[N+n]; + f6 = distodd[2*N+n]; + f8 = distodd[3*N+n]; + f10 = distodd[4*N+n]; + f12 = distodd[5*N+n]; + f14 = distodd[6*N+n]; + f16 = distodd[7*N+n]; + f18 = distodd[8*N+n]; + //........................................................................ + f0 = disteven[n]; + f1 = disteven[N+n]; + f3 = disteven[2*N+n]; + f5 = disteven[3*N+n]; + f7 = disteven[4*N+n]; + f9 = disteven[5*N+n]; + f11 = disteven[6*N+n]; + f13 = disteven[7*N+n]; + f15 = disteven[8*N+n]; + f17 = disteven[9*N+n]; + //........................................................................ + // PERFORM RELAXATION PROCESS + //........................................................................ + //....................compute the moments............................................... + rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; + m1 = -30*f0-11*(f2+f1+f4+f3+f6+f5)+8*(f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18 +f17); + m2 = 12*f0-4*(f2+f1 +f4+f3+f6 +f5)+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; + jx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14; + m4 = 4*(-f1+f2)+f7-f8+f9-f10+f11-f12+f13-f14; + jy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18; + m6 = -4*(f3-f4)+f7-f8-f9+f10+f15-f16+f17-f18; + jz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18; + m8 = -4*(f5-f6)+f11-f12-f13+f14+f15-f16-f17+f18; + m9 = 2*(f1+f2)-f3-f4-f5-f6+f7+f8+f9+f10+f11+f12+f13+f14-2*(f15+f16+f17+f18); + m10 = -4*(f1+f2)+2*(f4+f3+f6+f5)+f8+f7+f10+f9+f12+f11+f14+f13-2*(f16+f15+f18+f17); + m11 = f4+f3-f6-f5+f8+f7+f10+f9-f12-f11-f14-f13; + m12 = -2*(f4+f3-f6-f5)+f8+f7+f10+f9-f12-f11-f14-f13; + m13 = f8+f7-f10-f9; + m14 = f16+f15-f18-f17; + m15 = f12+f11-f14-f13; + m16 = f7-f8+f9-f10-f11+f12-f13+f14; + m17 = -f7+f8+f9-f10+f15-f16+f17-f18; + m18 = f11-f12-f13+f14-f15+f16+f17-f18; + //..........Toelke, Fruediger et. al. 2006............... + if (C == 0.0) nx = ny = nz = 1.0; +#ifdef STOKES + m1 = m1 + rlx_setA*(- 11*rho -alpha*C - m1); + m2 = m2 + rlx_setA*(3*rho - m2); + m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4); + m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6); + m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8); + m9 = m9 + rlx_setA*( 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); + m10 = m10 + rlx_setA*( - m10); + m11 = m11 + rlx_setA*( 0.5*alpha*C*(ny*ny-nz*nz)- m11); + m12 = m12 + rlx_setA*( - m12); + m13 = m13 + rlx_setA*( 0.5*alpha*C*nx*ny - m13); + m14 = m14 + rlx_setA*( 0.5*alpha*C*ny*nz - m14); + m15 = m15 + rlx_setA*( 0.5*alpha*C*nx*nz - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); +#else + m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) -alpha*C - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho)- m2); + m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4); + m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6); + m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8); + m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); + m10 = m10 + rlx_setA*( - m10); + m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); + m12 = m12 + rlx_setA*( - m12); + m13 = m13 + rlx_setA*( (jx*jy/rho) + 0.5*alpha*C*nx*ny - m13); + m14 = m14 + rlx_setA*( (jy*jz/rho) + 0.5*alpha*C*ny*nz - m14); + m15 = m15 + rlx_setA*( (jx*jz/rho) + 0.5*alpha*C*nx*nz - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); +#endif + //.................inverse transformation...................................................... + f0 = 0.05263157894736842*rho-0.012531328320802*m1+0.04761904761904762*m2; + f1 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 + +0.1*(jx-m4)+0.0555555555555555555555555*(m9-m10); + f2 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 + +0.1*(m4-jx)+0.0555555555555555555555555*(m9-m10); + f3 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 + +0.1*(jy-m6)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12); + f4 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 + +0.1*(m6-jy)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12); + f5 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 + +0.1*(jz-m8)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11); + f6 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 + +0.1*(m8-jz)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11); + f7 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx+jy)+0.025*(m4+m6) + +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 + +0.04166666666666666*m12+0.25*m13+0.125*(m16-m17); + f8 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2-0.1*(jx+jy)-0.025*(m4+m6) + +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 + +0.04166666666666666*m12+0.25*m13+0.125*(m17-m16); + f9 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx-jy)+0.025*(m4-m6) + +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 + +0.04166666666666666*m12-0.25*m13+0.125*(m16+m17); + f10 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jy-jx)+0.025*(m6-m4) + +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 + +0.04166666666666666*m12-0.25*m13-0.125*(m16+m17); + f11 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2+0.1*(jx+jz)+0.025*(m4+m8) + +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 + -0.04166666666666666*m12+0.25*m15+0.125*(m18-m16); + f12 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2-0.1*(jx+jz)-0.025*(m4+m8) + +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 + -0.04166666666666666*m12+0.25*m15+0.125*(m16-m18); + f13 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2+0.1*(jx-jz)+0.025*(m4-m8) + +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 + -0.04166666666666666*m12-0.25*m15-0.125*(m16+m18); + f14 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2+0.1*(jz-jx)+0.025*(m8-m4) + +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 + -0.04166666666666666*m12-0.25*m15+0.125*(m16+m18); + f15 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2+0.1*(jy+jz)+0.025*(m6+m8) + -0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m17-m18); + f16 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2-0.1*(jy+jz)-0.025*(m6+m8) + -0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m18-m17); + f17 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2+0.1*(jy-jz)+0.025*(m6-m8) + -0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14+0.125*(m17+m18); + f18 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2+0.1*(jz-jy)+0.025*(m8-m6) + -0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14-0.125*(m17+m18); + //....................................................................................................... + // incorporate external force + f1 += 0.16666666*Fx; + f2 -= 0.16666666*Fx; + f3 += 0.16666666*Fy; + f4 -= 0.16666666*Fy; + f5 += 0.16666666*Fz; + f6 -= 0.16666666*Fz; + f7 += 0.08333333333*(Fx+Fy); + f8 -= 0.08333333333*(Fx+Fy); + f9 += 0.08333333333*(Fx-Fy); + f10 -= 0.08333333333*(Fx-Fy); + f11 += 0.08333333333*(Fx+Fz); + f12 -= 0.08333333333*(Fx+Fz); + f13 += 0.08333333333*(Fx-Fz); + f14 -= 0.08333333333*(Fx-Fz); + f15 += 0.08333333333*(Fy+Fz); + f16 -= 0.08333333333*(Fy+Fz); + f17 += 0.08333333333*(Fy-Fz); + f18 -= 0.08333333333*(Fy-Fz); + //*********** WRITE UPDATED VALUES TO MEMORY ****************** + // Write the updated distributions + //....EVEN..................................... + disteven[n] = f0; + disteven[N+n] = f2; + disteven[2*N+n] = f4; + disteven[3*N+n] = f6; + disteven[4*N+n] = f8; + disteven[5*N+n] = f10; + disteven[6*N+n] = f12; + disteven[7*N+n] = f14; + disteven[8*N+n] = f16; + disteven[9*N+n] = f18; + //....ODD...................................... + distodd[n] = f1; + distodd[N+n] = f3; + distodd[2*N+n] = f5; + distodd[3*N+n] = f7; + distodd[4*N+n] = f9; + distodd[5*N+n] = f11; + distodd[6*N+n] = f13; + distodd[7*N+n] = f15; + distodd[8*N+n] = f17; + + //...Store the Velocity.......................... + Velocity[n] = jx; + Velocity[N+n] = jy; + Velocity[2*N+n] = jz; + /* Velocity[3*n] = jx; + Velocity[3*n+1] = jy; + Velocity[3*n+2] = jz; + */ //...Store the Color Gradient.................... + // ColorGrad[3*n] = nx*C; + // ColorGrad[3*n+1] = ny*C; + // ColorGrad[3*n+2] = nz*C; + //............................................... + //*************************************************************** + } // check if n is in the solid + } // loop over n +} + +extern "C" void ScaLBL_D3Q19_ColorCollide( char *ID, double *disteven, double *distodd, double *phi, double *ColorGrad, + double *Velocity, int Nx, int Ny, int Nz, double rlx_setA, double rlx_setB, + double alpha, double beta, double Fx, double Fy, double Fz) +{ + + int i,j,k,n,nn,N; + // distributions + double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; + double f10,f11,f12,f13,f14,f15,f16,f17,f18; + + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + // additional variables needed for computations + double rho,jx,jy,jz,C,nx,ny,nz; + + N = Nx*Ny*Nz; + char id; + + for (n=0; n 0){ + + //.......Back out the 3-D indices for node n.............. + k = n/(Nx*Ny); + j = (n-Nx*Ny*k)/Nx; + i = n-Nx*Ny*k-Nx*j; + //........................................................................ + //........Get 1-D index for this thread.................... + // n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x; + //........................................................................ + // COMPUTE THE COLOR GRADIENT + //........................................................................ + //.................Read Phase Indicator Values............................ + //........................................................................ + nn = n-1; // neighbor index (get convention) + if (i-1<0) nn += Nx; // periodic BC along the x-boundary + f1 = phi[nn]; // get neighbor for phi - 1 + //........................................................................ + nn = n+1; // neighbor index (get convention) + if (!(i+10)) delta=0; + a1 = na*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nb*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = na*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nb*(0.1111111111111111*(1-4.5*ux))+delta; + + A_odd[n] = a1; + A_even[N+n] = a2; + B_odd[n] = b1; + B_even[N+n] = b2; + //............................................... + // q = 2 + // Cq = {0,1,0} + delta = beta*na*nb*nab*0.1111111111111111*ny; + if (!(na*nb*nab>0)) delta=0; + a1 = na*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nb*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = na*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nb*(0.1111111111111111*(1-4.5*uy))+delta; + + A_odd[N+n] = a1; + A_even[2*N+n] = a2; + B_odd[N+n] = b1; + B_even[2*N+n] = b2; + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*na*nb*nab*0.1111111111111111*nz; + if (!(na*nb*nab>0)) delta=0; + a1 = na*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nb*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = na*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nb*(0.1111111111111111*(1-4.5*uz))+delta; + + A_odd[2*N+n] = a1; + A_even[3*N+n] = a2; + B_odd[2*N+n] = b1; + B_even[3*N+n] = b2; + //............................................... + + /* // Construction and streaming for the components + for (idx=0; idx<3; idx++){ + //............................................... + // Distribution index + q = 2*idx; + // Associated discrete velocity + Cqx = D3Q7[idx][0]; + Cqy = D3Q7[idx][1]; + Cqz = D3Q7[idx][2]; + // Generate the Equilibrium Distribution + a1 = na*feq[q]; + b1 = nb*feq[q]; + a2 = na*feq[q+1]; + b2 = nb*feq[q+1]; + // Recolor the distributions + if (C > 0.0){ + sp = nx*double(Cqx)+ny*double(Cqy)+nz*double(Cqz); + //if (idx > 2) sp = 0.7071067811865475*sp; + //delta = sp*min( min(a1,a2), min(b1,b2) ); + delta = na*nb/(na+nb)*0.1111111111111111*sp; + //if (a1>0 && b1>0){ + a1 += beta*delta; + a2 -= beta*delta; + b1 -= beta*delta; + b2 += beta*delta; + } + // Save the re-colored distributions + A_odd[N*idx+n] = a1; + A_even[N*(idx+1)+n] = a2; + B_odd[N*idx+n] = b1; + B_even[N*(idx+1)+n] = b2; + //............................................... + } + */ + } + } +} + +//************************************************************************* +extern "C" void DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity, + double beta, int Nx, int Ny, int Nz, bool pBC, int S) +{ + char id; + + int idx; + int in,jn,kn,n,nn,N; + int q,Cqx,Cqy,Cqz; + // int sendLoc; + + double na,nb; // density values + double ux,uy,uz; // flow velocity + double nx,ny,nz,C; // color gradient components + double a1,a2,b1,b2; + double sp,delta; + double feq[6]; // equilibrium distributions + // Set of Discrete velocities for the D3Q19 Model + int D3Q7[3][3]={{1,0,0},{0,1,0},{0,0,1}}; + N = Nx*Ny*Nz; + + for (n=0; n 0 && na+nb > 0.0){ + //.......Back out the 3-D indices for node n.............. + int k = n/(Nx*Ny); + int j = (n-Nx*Ny*k)/Nx; + int i = n-Nx*Ny*k-Nx*j; + //.....Load the Color gradient......... + nx = ColorGrad[n]; + ny = ColorGrad[N+n]; + nz = ColorGrad[2*N+n]; + C = sqrt(nx*nx+ny*ny+nz*nz); + nx = nx/C; + ny = ny/C; + nz = nz/C; + //....Load the flow velocity........... + ux = Velocity[n]; + uy = Velocity[N+n]; + uz = Velocity[2*N+n]; + //....Instantiate the density distributions + // Generate Equilibrium Distributions and stream + // Stationary value - distribution 0 + // Den[2*n] += 0.3333333333333333*na; + // Den[2*n+1] += 0.3333333333333333*nb; + Den[2*n] += 0.3333333333333333*na; + Den[2*n+1] += 0.3333333333333333*nb; + // Non-Stationary equilibrium distributions + feq[0] = 0.1111111111111111*(1+3*ux); + feq[1] = 0.1111111111111111*(1-3*ux); + feq[2] = 0.1111111111111111*(1+3*uy); + feq[3] = 0.1111111111111111*(1-3*uy); + feq[4] = 0.1111111111111111*(1+3*uz); + feq[5] = 0.1111111111111111*(1-3*uz); + // Construction and streaming for the components + for (idx=0; idx<3; idx++){ + // Distribution index + q = 2*idx; + // Associated discrete velocity + Cqx = D3Q7[idx][0]; + Cqy = D3Q7[idx][1]; + Cqz = D3Q7[idx][2]; + // Generate the Equilibrium Distribution + a1 = na*feq[q]; + b1 = nb*feq[q]; + a2 = na*feq[q+1]; + b2 = nb*feq[q+1]; + // Recolor the distributions + if (C > 0.0){ + sp = nx*double(Cqx)+ny*double(Cqy)+nz*double(Cqz); + //if (idx > 2) sp = 0.7071067811865475*sp; + //delta = sp*min( min(a1,a2), min(b1,b2) ); + delta = na*nb/(na+nb)*0.1111111111111111*sp; + //if (a1>0 && b1>0){ + a1 += beta*delta; + a2 -= beta*delta; + b1 -= beta*delta; + b2 += beta*delta; + } + + // .......Get the neighbor node.............. + //nn = n + Stride[idx]; + in = i+Cqx; + jn = j+Cqy; + kn = k+Cqz; + + // Adjust for periodic BC, if necessary + // if (in<0) in+= Nx; + // if (jn<0) jn+= Ny; + // if (kn<0) kn+= Nz; + // if (!(in 0 ){ + // Get the density value (Streaming already performed) + Na = Den[n]; + Nb = Den[N+n]; + Phi[n] = (Na-Nb)/(Na+Nb); + } + } + //................................................................... +} + +extern "C" void ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice){ + int n; + for (n=Slice*Nx*Ny; n<(Slice+1)*Nx*Ny; n++){ + Phi[n] = value; + } +} + + +//extern "C" void ScaLBL_D3Q19_AAeven_Color(double *dist, double *Aq, double *Bq, double *Den, double *Velocity, +// double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, +// double Fx, double Fy, double Fz, int start, int finish, int Np){ +extern "C" void ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi, + double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, + double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ + + int ijk,nn,n; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m3,m5,m7; + double nA,nB; // number density + double a1,b1,a2,b2,nAB,delta; + double C,nx,ny,nz; //color gradient magnitude and direction + double ux,uy,uz; + double phi,tau,rho0,rlx_setA,rlx_setB; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + + for (int n=start; n0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; + + Aq[1*Np+n] = a1; + Bq[1*Np+n] = b1; + Aq[2*Np+n] = a2; + Bq[2*Np+n] = b2; + + //............................................... + // q = 2 + // Cq = {0,1,0} + delta = beta*nA*nB*nAB*0.1111111111111111*ny; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; + + Aq[3*Np+n] = a1; + Bq[3*Np+n] = b1; + Aq[4*Np+n] = a2; + Bq[4*Np+n] = b2; + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nz; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; + + Aq[5*Np+n] = a1; + Bq[5*Np+n] = b1; + Aq[6*Np+n] = a2; + Bq[6*Np+n] = b2; + //............................................... + + } + +} + +//extern "C" void ScaLBL_D3Q19_AAodd_Color(int *neighborList, double *dist, double *Aq, double *Bq, double *Den, double *Velocity, +// double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, +// double Fx, double Fy, double Fz, int start, int finish, int Np){ +extern "C" void ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, + double *Phi, double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, + double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ + + int n,nn,ijk,nread; + int nr1,nr2,nr3,nr4,nr5,nr6; + int nr7,nr8,nr9,nr10; + int nr11,nr12,nr13,nr14; + //int nr15,nr16,nr17,nr18; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m3,m5,m7; + double nA,nB; // number density + double a1,b1,a2,b2,nAB,delta; + double C,nx,ny,nz; //color gradient magnitude and direction + double ux,uy,uz; + double phi,tau,rho0,rlx_setA,rlx_setB; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + for (int n=start; n even part of dist) + //fq = dist[nread]; // reading the f2 data into register fq + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nr2]; // reading the f2 data into register fq + rho += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + //nread = neighborList[n+2*Np]; // neighbor 4 + //fq = dist[nread]; + nr3 = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nr3]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + //nread = neighborList[n+3*Np]; // neighbor 3 + //fq = dist[nread]; + nr4 = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nr4]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + //nread = neighborList[n+4*Np]; + //fq = dist[nread]; + nr5 = neighborList[n+4*Np]; + fq = dist[nr5]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + + // q = 6 + //nread = neighborList[n+5*Np]; + //fq = dist[nread]; + nr6 = neighborList[n+5*Np]; + fq = dist[nr6]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + //nread = neighborList[n+6*Np]; + //fq = dist[nread]; + nr7 = neighborList[n+6*Np]; + fq = dist[nr7]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + //nread = neighborList[n+7*Np]; + //fq = dist[nread]; + nr8 = neighborList[n+7*Np]; + fq = dist[nr8]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + //nread = neighborList[n+8*Np]; + //fq = dist[nread]; + nr9 = neighborList[n+8*Np]; + fq = dist[nr9]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + //nread = neighborList[n+9*Np]; + //fq = dist[nread]; + nr10 = neighborList[n+9*Np]; + fq = dist[nr10]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + //nread = neighborList[n+10*Np]; + //fq = dist[nread]; + nr11 = neighborList[n+10*Np]; + fq = dist[nr11]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + //nread = neighborList[n+11*Np]; + //fq = dist[nread]; + nr12 = neighborList[n+11*Np]; + fq = dist[nr12]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + //nread = neighborList[n+12*Np]; + //fq = dist[nread]; + nr13 = neighborList[n+12*Np]; + fq = dist[nr13]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + //nread = neighborList[n+13*Np]; + //fq = dist[nread]; + nr14 = neighborList[n+13*Np]; + fq = dist[nr14]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + //fq = dist[17*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + //fq = dist[8*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + //fq = dist[18*Np+n]; + nread = neighborList[n+16*Np]; + fq = dist[nread]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + //fq = dist[9*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + + //........................................................................ + //..............carry out relaxation process.............................. + //..........Toelke, Fruediger et. al. 2006................................ + if (C == 0.0) nx = ny = nz = 0.0; + m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) -19*alpha*C - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0)- m2); + m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4); + m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6); + m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8); + m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); + m10 = m10 + rlx_setA*( - m10); + m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); + m12 = m12 + rlx_setA*( - m12); + m13 = m13 + rlx_setA*( (jx*jy/rho0) + 0.5*alpha*C*nx*ny - m13); + m14 = m14 + rlx_setA*( (jy*jz/rho0) + 0.5*alpha*C*ny*nz - m14); + m15 = m15 + rlx_setA*( (jx*jz/rho0) + 0.5*alpha*C*nx*nz - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //.................inverse transformation...................................................... + + // q=0 + fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx; + //nread = neighborList[n+Np]; + dist[nr2] = fq; + + // q=2 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx; + //nread = neighborList[n]; + dist[nr1] = fq; + + // q = 3 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy; + //nread = neighborList[n+3*Np]; + dist[nr4] = fq; + + // q = 4 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy; + //nread = neighborList[n+2*Np]; + dist[nr3] = fq; + + // q = 5 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz; + //nread = neighborList[n+5*Np]; + dist[nr6] = fq; + + // q = 6 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz; + //nread = neighborList[n+4*Np]; + dist[nr5] = fq; + + // q = 7 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy); + //nread = neighborList[n+7*Np]; + dist[nr8] = fq; + + // q = 8 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy); + //nread = neighborList[n+6*Np]; + dist[nr7] = fq; + + // q = 9 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy); + //nread = neighborList[n+9*Np]; + dist[nr10] = fq; + + // q = 10 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy); + //nread = neighborList[n+8*Np]; + dist[nr9] = fq; + + // q = 11 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz); + //nread = neighborList[n+11*Np]; + dist[nr12] = fq; + + // q = 12 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ + mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz); + //nread = neighborList[n+10*Np]; + dist[nr11]= fq; + + // q = 13 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz); + //nread = neighborList[n+13*Np]; + dist[nr14] = fq; + + // q= 14 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz); + //nread = neighborList[n+12*Np]; + dist[nr13] = fq; + + + // q = 15 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + + // q = 17 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) + -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) + -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + + // write the velocity + ux = jx / rho0; + uy = jy / rho0; + uz = jz / rho0; + Vel[n] = ux; + Vel[Np+n] = uy; + Vel[2*Np+n] = uz; + + // Instantiate mass transport distributions + // Stationary value - distribution 0 + nAB = 1.0/(nA+nB); + Aq[n] = 0.3333333333333333*nA; + Bq[n] = 0.3333333333333333*nB; + + //............................................... + // q = 0,2,4 + // Cq = {1,0,0}, {0,1,0}, {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nx; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; + + // q = 1 + //nread = neighborList[n+Np]; + Aq[nr2] = a1; + Bq[nr2] = b1; + // q=2 + //nread = neighborList[n]; + Aq[nr1] = a2; + Bq[nr1] = b2; + + //............................................... + // Cq = {0,1,0} + delta = beta*nA*nB*nAB*0.1111111111111111*ny; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; + + // q = 3 + //nread = neighborList[n+3*Np]; + Aq[nr4] = a1; + Bq[nr4] = b1; + // q = 4 + //nread = neighborList[n+2*Np]; + Aq[nr3] = a2; + Bq[nr3] = b2; + + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nz; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; + + // q = 5 + //nread = neighborList[n+5*Np]; + Aq[nr6] = a1; + Bq[nr6] = b1; + // q = 6 + //nread = neighborList[n+4*Np]; + Aq[nr5] = a2; + Bq[nr5] = b2; + //............................................... + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double *Aq, double *Bq, + double *Den, double *Phi, int start, int finish, int Np){ + + int idx,n,nread; + double fq,nA,nB; + + for (int n=start; n 1.f){ + nA = 1.0; nB = 0.f; + } + else if (phi < -1.f){ + nB = 1.0; nA = 0.f; + } + else{ + nA=0.5*(phi+1.f); + nB=0.5*(1.f-phi); + } + Den[idx] = nA; + Den[Np+idx] = nB; + + Aq[idx]=0.3333333333333333*nA; + Aq[Np+idx]=0.1111111111111111*nA; + Aq[2*Np+idx]=0.1111111111111111*nA; + Aq[3*Np+idx]=0.1111111111111111*nA; + Aq[4*Np+idx]=0.1111111111111111*nA; + Aq[5*Np+idx]=0.1111111111111111*nA; + Aq[6*Np+idx]=0.1111111111111111*nA; + + Bq[idx]=0.3333333333333333*nB; + Bq[Np+idx]=0.1111111111111111*nB; + Bq[2*Np+idx]=0.1111111111111111*nB; + Bq[3*Np+idx]=0.1111111111111111*nB; + Bq[4*Np+idx]=0.1111111111111111*nB; + Bq[5*Np+idx]=0.1111111111111111*nB; + Bq[6*Np+idx]=0.1111111111111111*nB; + } +} + +extern "C" void ScaLBL_CopySlice_z(double *Phi, int Nx, int Ny, int Nz, int Source, int Dest){ + int n; double value; + for (n=0; n +#include + +ScaLBL_FreeLeeModel::ScaLBL_FreeLeeModel(int RANK, int NP, MPI_Comm COMM): +rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),W(0),gamma(0), +Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0), +Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) +{ + +ScaLBL_FreeLeeModel::~ScaLBL_FreeLeeModel(){ + +} +void ScaLBL_FreeLeeModel::ReadParams(string filename){ + // read the input database + db = std::make_shared( filename ); + domain_db = db->getDatabase( "Domain" ); + freelee_db = db->getDatabase( "FreeLee" ); + analysis_db = db->getDatabase( "Analysis" ); + vis_db = db->getDatabase( "Visualization" ); + + // set defaults + timestepMax = 100000; + tauA = tauB = 1.0; + rhoA = rhoB = 1.0; + Fx = Fy = Fz = 0.0; + gamma=1e-3; + W=5; + Restart=false; + din=dout=1.0; + flux=0.0; + + // Color Model parameters + if (freelee_db->keyExists( "timestepMax" )){ + timestepMax = freelee_db->getScalar( "timestepMax" ); + } + if (freelee_db->keyExists( "tauA" )){ + tauA = freelee_db->getScalar( "tauA" ); + } + if (freelee_db->keyExists( "tauB" )){ + tauB = freelee_db->getScalar( "tauB" ); + } + if (freelee_db->keyExists( "rhoA" )){ + rhoA = freelee_db->getScalar( "rhoA" ); + } + if (freelee_db->keyExists( "rhoB" )){ + rhoB = freelee_db->getScalar( "rhoB" ); + } + if (freelee_db->keyExists( "F" )){ + Fx = freelee_db->getVector( "F" )[0]; + Fy = freelee_db->getVector( "F" )[1]; + Fz = freelee_db->getVector( "F" )[2]; + } + if (freelee_db->keyExists( "gamma" )){ + gamma = freelee_db->getScalar( "gamma" ); + } + if (freelee_db->keyExists( "W" )){ + W = freelee_db->getScalar( "W" ); + } + if (freelee_db->keyExists( "Restart" )){ + Restart = freelee_db->getScalar( "Restart" ); + } + if (freelee_db->keyExists( "din" )){ + din = freelee_db->getScalar( "din" ); + } + if (freelee_db->keyExists( "dout" )){ + dout = freelee_db->getScalar( "dout" ); + } + if (freelee_db->keyExists( "flux" )){ + flux = freelee_db->getScalar( "flux" ); + } + inletA=1.f; + inletB=0.f; + outletA=0.f; + outletB=1.f; + //if (BoundaryCondition==4) flux *= rhoA; // mass flux must adjust for density (see formulation for details) + + BoundaryCondition = 0; + if (domain_db->keyExists( "BC" )){ + BoundaryCondition = domain_db->getScalar( "BC" ); + } +} + +void ScaLBL_FreeLeeModel::SetDomain(){ + Dm = std::shared_ptr(new Domain(domain_db,comm)); // full domain for analysis + Mask = std::shared_ptr(new Domain(domain_db,comm)); // mask domain removes immobile phases + // domain parameters + Nx = Dm->Nx; + Ny = Dm->Ny; + Nz = Dm->Nz; + Lx = Dm->Lx; + Ly = Dm->Ly; + Lz = Dm->Lz; + N = Nx*Ny*Nz; + Nxh = Nx+2; + Nyh = Ny+2; + Nzh = Nz+2; + Nh = Nxh*Nyh*Nzh; + id = new signed char [N]; + for (int i=0; iid[i] = 1; // initialize this way + //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object + Averages = std::shared_ptr ( new SubPhase(Dm) ); // TwoPhase analysis object + MPI_Barrier(comm); + Dm->CommInit(); + MPI_Barrier(comm); + // Read domain parameters + rank = Dm->rank(); + nprocx = Dm->nprocx(); + nprocy = Dm->nprocy(); + nprocz = Dm->nprocz(); +} + +void ScaLBL_FreeLeeModel::ReadInput(){ + + sprintf(LocalRankString,"%05d",rank); + sprintf(LocalRankFilename,"%s%s","ID.",LocalRankString); + sprintf(LocalRestartFile,"%s%s","Restart.",LocalRankString); + + if (freelee_db->keyExists( "image_sequence" )){ + auto ImageList = freelee_db->getVector( "image_sequence"); + int IMAGE_INDEX = freelee_db->getWithDefault( "image_index", 0 ); + std::string first_image = ImageList[IMAGE_INDEX]; + Mask->Decomp(first_image); + IMAGE_INDEX++; + } + else if (domain_db->keyExists( "GridFile" )){ + // Read the local domain data + auto input_id = readMicroCT( *domain_db, MPI_COMM_WORLD ); + // Fill the halo (assuming GCW of 1) + array size0 = { (int) input_id.size(0), (int) input_id.size(1), (int) input_id.size(2) }; + ArraySize size1 = { (size_t) Mask->Nx, (size_t) Mask->Ny, (size_t) Mask->Nz }; + ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 ); + fillHalo fill( MPI_COMM_WORLD, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); + Array id_view; + id_view.viewRaw( size1, Mask->id ); + fill.copy( input_id, id_view ); + fill.fill( id_view ); + } + else if (domain_db->keyExists( "Filename" )){ + auto Filename = domain_db->getScalar( "Filename" ); + Mask->Decomp(Filename); + } + else{ + Mask->ReadIDs(); + } + for (int i=0; iid[i]; // save what was read + + // Generate the signed distance map + // Initialize the domain and communication + Array id_solid(Nx,Ny,Nz); + // Solve for the position of the solid phase + for (int k=0;kid[n]; + if (label > 0) id_solid(i,j,k) = 1; + else id_solid(i,j,k) = 0; + } + } + } + // Initialize the signed distance function + for (int k=0;kSDs(i,j,k) = 2.0*double(id_solid(i,j,k))-1.0; + } + } + } +// MeanFilter(Averages->SDs); + if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); + CalcDist(Averages->SDs,id_solid,*Mask); + + if (rank == 0) cout << "Domain set." << endl; + + Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta); +} + +void ScaLBL_FreeLeeModel::AssignComponentLabels(double *phase) +{ + size_t NLABELS=0; + signed char VALUE=0; + double AFFINITY=0.f; + + auto LabelList = freelee_db->getVector( "ComponentLabels" ); + auto AffinityList = freelee_db->getVector( "ComponentAffinity" ); + + NLABELS=LabelList.size(); + if (NLABELS != AffinityList.size()){ + ERROR("Error: ComponentLabels and ComponentAffinity must be the same length! \n"); + } + + double label_count[NLABELS]; + double label_count_global[NLABELS]; + // Assign the labels + + for (size_t idx=0; idxid[n] = 0; // set mask to zero since this is an immobile component + } + } + // fluid labels are reserved + if (VALUE == 1) AFFINITY=1.0; + else if (VALUE == 2) AFFINITY=-1.0; + phase[n] = AFFINITY; + } + } + } + + // Set Dm to match Mask + for (int i=0; iid[i] = Mask->id[i]; + + for (size_t idx=0; idxComm, label_count[idx]); + + if (rank==0){ + printf("Component labels: %lu \n",NLABELS); + for (unsigned int idx=0; idxid[i] = Mask->id[i]; + Mask->CommInit(); + Np=Mask->PoreCount(); + //........................................................................... + if (rank==0) printf ("Create ScaLBL_Communicator \n"); + // Create a communicator for the device (will use optimized layout) + // ScaLBL_Communicator ScaLBL_Comm(Mask); // original + ScaLBL_Comm = std::shared_ptr(new ScaLBL_Communicator(Mask)); + ScaLBL_Comm_Regular = std::shared_ptr(new ScaLBL_Communicator(Mask)); + + // create wide halo for phase field + //ScaLBL_Comm_Regular->WideHalo + + // create the layout for the LBM + int Npad=(Np/16 + 2)*16; + if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N); + Map.resize(Nx,Ny,Nz); Map.fill(-2); + auto neighborList= new int[18*Npad]; + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + MPI_Barrier(comm); + + //........................................................................... + // MAIN VARIABLES ALLOCATED HERE + //........................................................................... + // LBM variables + if (rank==0) printf ("Allocating distributions \n"); + //......................device distributions................................. + dist_mem_size = Np*sizeof(double); + neighborSize=18*(Np*sizeof(int)); + //........................................................................... + ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize); + ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Np); + ScaLBL_AllocateDeviceMemory((void **) &fq, 19*dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &hq, 7*dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &mu_phi, dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &Den, dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &Phi, sizeof(double)*Nh); + ScaLBL_AllocateDeviceMemory((void **) &Pressure, sizeof(double)*Np); + ScaLBL_AllocateDeviceMemory((void **) &Velocity, 3*sizeof(double)*Np); + ScaLBL_AllocateDeviceMemory((void **) &ColorGrad, 3*sizeof(double)*Np); + //........................................................................... + // Update GPU data structures + if (rank==0) printf ("Setting up device map and neighbor list \n"); + fflush(stdout); + int *TmpMap; + TmpMap=new int[Np]; + for (int k=1; kLastExterior(); idx++){ + auto n = TmpMap[idx]; + if (n > Nx*Ny*Nz){ + printf("Bad value! idx=%i \n", n); + TmpMap[idx] = Nx*Ny*Nz-1; + } + } + for (int idx=ScaLBL_Comm->FirstInterior(); idxLastInterior(); idx++){ + auto n = TmpMap[idx]; + if ( n > Nx*Ny*Nz ){ + printf("Bad value! idx=%i \n",n); + TmpMap[idx] = Nx*Ny*Nz-1; + } + } + ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np); + ScaLBL_DeviceBarrier(); + delete [] TmpMap; + + // copy the neighbor list + ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); + // initialize phi based on PhaseLabel (include solid component labels) + double *PhaseLabel; + PhaseLabel = new double[N]; + AssignComponentLabels(PhaseLabel); + ScaLBL_CopyToDevice(Phi, PhaseLabel, N*sizeof(double)); +} + +/******************************************************** + * AssignComponentLabels * + ********************************************************/ + +void ScaLBL_FreeLeeModel::Initialize(){ + + if (rank==0) printf ("Initializing distributions \n"); + ScaLBL_D3Q19_Init(fq, Np); + /* + * This function initializes model + */ + if (Restart == true){ + if (rank==0){ + printf("Reading restart file! \n"); + } + + // Read in the restart file to CPU buffers + int *TmpMap; + TmpMap = new int[Np]; + + double *cPhi, *cDist, *cDen; + cPhi = new double[N]; + cDen = new double[2*Np]; + cDist = new double[19*Np]; + ScaLBL_CopyToHost(TmpMap, dvcMap, Np*sizeof(int)); + ScaLBL_CopyToHost(cPhi, Phi, N*sizeof(double)); + + ifstream File(LocalRestartFile,ios::binary); + int idx; + double value,va,vb; + for (int n=0; nLastExterior(); n++){ + va = cDen[n]; + vb = cDen[Np + n]; + value = (va-vb)/(va+vb); + idx = TmpMap[n]; + if (!(idx < 0) && idxFirstInterior(); nLastInterior(); n++){ + va = cDen[n]; + vb = cDen[Np + n]; + value = (va-vb)/(va+vb); + idx = TmpMap[n]; + if (!(idx < 0) && idxLastExterior(), Np); + ScaLBL_PhaseField_Init(dvcMap, Phi, Den, hq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + + // establish reservoirs for external bC + if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4 ){ + if (Dm->kproc()==0){ + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0); + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,1); + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,2); + } + if (Dm->kproc() == nprocz-1){ + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-1); + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-2); + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-3); + } + } + ScaLBL_CopyToHost(Averages->Phi.data(),Phi,N*sizeof(double)); +} + +void ScaLBL_FreeLeeModel::Run(){ + int nprocs=nprocx*nprocy*nprocz; + const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); + + if (rank==0){ + printf("********************************************************\n"); + printf("No. of timesteps: %i \n", timestepMax); + fflush(stdout); + } + + //.......create and start timer............ + double starttime,stoptime,cputime; + ScaLBL_DeviceBarrier(); + MPI_Barrier(comm); + starttime = MPI_Wtime(); + //......................................... + + //************ MAIN ITERATION LOOP ***************************************/ + PROFILE_START("Loop"); + while (timestep < timestepMax ) { + //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } + PROFILE_START("Update"); + // *************ODD TIMESTEP************* + timestep++; + // Compute the Phase indicator field + // Read for hq, Bq happens in this routine (requires communication) + ScaLBL_Comm->BiSendD3Q7AA(hq,Bq); //READ FROM NORMAL + ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->BiRecvD3Q7AA(hq,Bq); //WRITE INTO OPPOSITE + ScaLBL_DeviceBarrier(); + ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); + + // Perform the collision operation + ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL + if (BoundaryCondition > 0 && BoundaryCondition < 5){ + ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); + ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); + } + // Halo exchange for phase field + ScaLBL_Comm_Regular->SendHalo(Phi); + + ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm_Regular->RecvHalo(Phi); + ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + ScaLBL_DeviceBarrier(); + // Set BCs + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + } + ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_DeviceBarrier(); + MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + + // *************EVEN TIMESTEP************* + timestep++; + // Compute the Phase indicator field + ScaLBL_Comm->BiSendD3Q7AA(hq,Bq); //READ FROM NORMAL + ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->BiRecvD3Q7AA(hq,Bq); //WRITE INTO OPPOSITE + ScaLBL_DeviceBarrier(); + ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); + + // Perform the collision operation + ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL + // Halo exchange for phase field + if (BoundaryCondition > 0 && BoundaryCondition < 5){ + ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); + ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); + } + ScaLBL_Comm_Regular->SendHalo(Phi); + ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm_Regular->RecvHalo(Phi); + ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + ScaLBL_DeviceBarrier(); + // Set boundary conditions + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + } + ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_DeviceBarrier(); + MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + //************************************************************************ + PROFILE_STOP("Update"); + } + PROFILE_STOP("Loop"); + PROFILE_SAVE("lbpm_color_simulator",1); + //************************************************************************ + stoptime = MPI_Wtime(); + if (rank==0) printf("-------------------------------------------------------------------\n"); + // Compute the walltime per timestep + cputime = (stoptime - starttime)/timestep; + // Performance obtained from each node + double MLUPS = double(Np)/cputime/1000000; + + if (rank==0) printf("********************************************************\n"); + if (rank==0) printf("CPU time = %f \n", cputime); + if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); + MLUPS *= nprocs; + if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); + if (rank==0) printf("********************************************************\n"); + + // ************************************************************************ +} + + +void ScaLBL_FreeLeeModel::WriteDebug(){ + // Copy back final phase indicator field and convert to regular layout + DoubleArray PhaseField(Nx,Ny,Nz); + //ScaLBL_Comm->RegularLayout(Map,Phi,PhaseField); + ScaLBL_CopyToHost(PhaseField.data(), Phi, sizeof(double)*N); + + FILE *OUTFILE; + sprintf(LocalRankFilename,"Phase.%05i.raw",rank); + OUTFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,OUTFILE); + fclose(OUTFILE); + + ScaLBL_Comm->RegularLayout(Map,&Den[0],PhaseField); + FILE *AFILE; + sprintf(LocalRankFilename,"A.%05i.raw",rank); + AFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,AFILE); + fclose(AFILE); + + ScaLBL_Comm->RegularLayout(Map,&Den[Np],PhaseField); + FILE *BFILE; + sprintf(LocalRankFilename,"B.%05i.raw",rank); + BFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,BFILE); + fclose(BFILE); + + ScaLBL_Comm->RegularLayout(Map,Pressure,PhaseField); + FILE *PFILE; + sprintf(LocalRankFilename,"Pressure.%05i.raw",rank); + PFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,PFILE); + fclose(PFILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField); + FILE *VELX_FILE; + sprintf(LocalRankFilename,"Velocity_X.%05i.raw",rank); + VELX_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELX_FILE); + fclose(VELX_FILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],PhaseField); + FILE *VELY_FILE; + sprintf(LocalRankFilename,"Velocity_Y.%05i.raw",rank); + VELY_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELY_FILE); + fclose(VELY_FILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],PhaseField); + FILE *VELZ_FILE; + sprintf(LocalRankFilename,"Velocity_Z.%05i.raw",rank); + VELZ_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELZ_FILE); + fclose(VELZ_FILE); + +/* ScaLBL_Comm->RegularLayout(Map,&ColorGrad[0],PhaseField); + FILE *CGX_FILE; + sprintf(LocalRankFilename,"Gradient_X.%05i.raw",rank); + CGX_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,CGX_FILE); + fclose(CGX_FILE); + + ScaLBL_Comm->RegularLayout(Map,&ColorGrad[Np],PhaseField); + FILE *CGY_FILE; + sprintf(LocalRankFilename,"Gradient_Y.%05i.raw",rank); + CGY_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,CGY_FILE); + fclose(CGY_FILE); + + ScaLBL_Comm->RegularLayout(Map,&ColorGrad[2*Np],PhaseField); + FILE *CGZ_FILE; + sprintf(LocalRankFilename,"Gradient_Z.%05i.raw",rank); + CGZ_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,CGZ_FILE); + fclose(CGZ_FILE); +*/ +} diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h new file mode 100644 index 00000000..42419afa --- /dev/null +++ b/models/FreeLeeModel.h @@ -0,0 +1,83 @@ +/* +Implementation of color lattice boltzmann model + */ +#include +#include +#include +#include +#include +#include +#include + +#include "common/Communication.h" +#include "common/MPI_Helpers.h" +#include "ProfilerApp.h" +#include "threadpool/thread_pool.h" + +class ScaLBL_FreeLeeModel{ +public: + ScaLBL_FreeLeeModel(int RANK, int NP, MPI_Comm COMM); + ~ScaLBL_FreeLeeModel(); + + // functions in they should be run + void ReadParams(string filename); + void ReadParams(std::shared_ptr db0); + void SetDomain(); + void ReadInput(); + void Create(); + void Initialize(); + void Run(); + void WriteDebug(); + + bool Restart,pBC; + int timestep,timestepMax; + int BoundaryCondition; + double tauA,tauB,rhoA,rhoB; + double W,gamma; + double Fx,Fy,Fz,flux; + double din,dout,inletA,inletB,outletA,outletB; + + int Nx,Ny,Nz,N,Np; + int Nxh,Nyh,Nzh,Nh; // extra halo width + int rank,nprocx,nprocy,nprocz,nprocs; + double Lx,Ly,Lz; + + std::shared_ptr Dm; // this domain is for analysis + std::shared_ptr Mask; // this domain is for lbm + std::shared_ptr ScaLBL_Comm; + std::shared_ptr ScaLBL_Comm_Regular; + //std::shared_ptr Averages; + std::shared_ptr Averages; + + // input database + std::shared_ptr db; + std::shared_ptr domain_db; + std::shared_ptr freelee_db; + std::shared_ptr analysis_db; + std::shared_ptr vis_db; + + IntArray Map; + signed char *id; + int *NeighborList; + int *dvcMap; + double *fq, *hq; + double *mu_phi, *Den, *Phi; + double *ColorGrad; + double *Velocity; + double *Pressure; + +private: + MPI_Comm comm; + + int dist_mem_size; + int neighborSize; + // filenames + char LocalRankString[8]; + char LocalRankFilename[40]; + char LocalRestartFile[40]; + + //int rank,nprocs; + void LoadParams(std::shared_ptr db0); + +}; + From e529caf6feb1d7c463147c050a42f1023d8d4943 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Fri, 25 Sep 2020 17:02:16 -0400 Subject: [PATCH 037/205] finish BC tweak --- models/IonModel.cpp | 2 ++ models/PoissonSolver.cpp | 2 -- models/StokesModel.cpp | 18 ++++++++++-------- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/models/IonModel.cpp b/models/IonModel.cpp index 0a8f779a..7366c0b4 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -418,6 +418,8 @@ void ScaLBL_IonModel::SetDomain(){ for (int i=0; iid[i] = 1; // initialize this way //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object MPI_Barrier(comm); + Dm->BoundaryCondition = BoundaryCondition; + Mask->BoundaryCondition = BoundaryCondition; Dm->CommInit(); MPI_Barrier(comm); diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index 40020b94..fccb1feb 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -416,14 +416,12 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ timestep++; SolveElectricPotentialAAodd();//update electric potential - //SolveElectricField(); //deprecated - compute electric field SolvePoissonAAodd(ChargeDensity);//perform collision ScaLBL_DeviceBarrier(); MPI_Barrier(comm); // *************EVEN TIMESTEP*************// timestep++; SolveElectricPotentialAAeven();//update electric potential - //SolveElectricField();//deprecated - compute electric field SolvePoissonAAeven(ChargeDensity);//perform collision ScaLBL_DeviceBarrier(); MPI_Barrier(comm); //************************************************************************/ diff --git a/models/StokesModel.cpp b/models/StokesModel.cpp index 3b0b2d3a..09528567 100644 --- a/models/StokesModel.cpp +++ b/models/StokesModel.cpp @@ -41,10 +41,6 @@ void ScaLBL_StokesModel::ReadParams(string filename,int num_iter){ //--------------------------------------------------------------------------// // Read domain parameters - BoundaryCondition = 0; - if (domain_db->keyExists( "BC" )){ - BoundaryCondition = domain_db->getScalar( "BC" ); - } if (domain_db->keyExists( "voxel_length" )){//default unit: um/lu h = domain_db->getScalar( "voxel_length" ); } @@ -53,6 +49,10 @@ void ScaLBL_StokesModel::ReadParams(string filename,int num_iter){ //if (stokes_db->keyExists( "timestepMax" )){ // timestepMax = stokes_db->getScalar( "timestepMax" ); //} + BoundaryCondition = 0; + if (stokes_db->keyExists( "BC" )){ + BoundaryCondition = stokes_db->getScalar( "BC" ); + } if (stokes_db->keyExists( "tolerance" )){ tolerance = stokes_db->getScalar( "tolerance" ); } @@ -117,10 +117,6 @@ void ScaLBL_StokesModel::ReadParams(string filename){ //--------------------------------------------------------------------------// // Read domain parameters - BoundaryCondition = 0; - if (domain_db->keyExists( "BC" )){ - BoundaryCondition = domain_db->getScalar( "BC" ); - } if (domain_db->keyExists( "voxel_length" )){//default unit: um/lu h = domain_db->getScalar( "voxel_length" ); } @@ -129,6 +125,10 @@ void ScaLBL_StokesModel::ReadParams(string filename){ //if (stokes_db->keyExists( "timestepMax" )){ // timestepMax = stokes_db->getScalar( "timestepMax" ); //} + BoundaryCondition = 0; + if (stokes_db->keyExists( "BC" )){ + BoundaryCondition = stokes_db->getScalar( "BC" ); + } if (stokes_db->keyExists( "tolerance" )){ tolerance = stokes_db->getScalar( "tolerance" ); } @@ -190,6 +190,8 @@ void ScaLBL_StokesModel::SetDomain(){ for (int i=0; iid[i] = 1; // initialize this way //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object MPI_Barrier(comm); + Dm->BoundaryCondition = BoundaryCondition; + Mask->BoundaryCondition = BoundaryCondition; Dm->CommInit(); MPI_Barrier(comm); From 4c1ce54a6dcdaba776f554dab19c5d7a88057445 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Fri, 25 Sep 2020 17:09:25 -0400 Subject: [PATCH 038/205] found that higher-order terms in Ion BGK not very useful --- gpu/Ion.cu | 56 +++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/gpu/Ion.cu b/gpu/Ion.cu index 2a3a0225..877e5591 100644 --- a/gpu/Ion.cu +++ b/gpu/Ion.cu @@ -147,32 +147,32 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, doub f6 = dist[nr6]; // q=0 - //dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci; - dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci; + //dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 1 - //dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)); - dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)); + //dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q=2 - //dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)); - dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)); + //dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 3 - //dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)); - dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)); + //dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 4 - //dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)); - dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)); + //dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 5 - //dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)); - dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)); + //dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 6 - //dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)); - dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)); + //dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); } } } @@ -213,32 +213,32 @@ __global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *V f6 = dist[5*Np+n]; // q=0 - //dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci; - dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci; + //dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 1 - //dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)); - dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)); + //dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q=2 - //dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)); - dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)); + //dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 3 - //dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)); - dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)); + //dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 4 - //dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)); - dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)); + //dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 5 - //dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)); - dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)); + //dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); // q = 6 - //dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)); - dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)); + //dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); } } } From 0b94f29d58dd3ff0d720be5d7f47aa62a2fc9b72 Mon Sep 17 00:00:00 2001 From: James McClure Date: Mon, 28 Sep 2020 15:44:22 -0400 Subject: [PATCH 039/205] fixed weird type in ScaLBL --- common/ScaLBL.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 2e8eef1a..f76a8de5 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -373,9 +373,9 @@ private: int *dvcRecvList_xy, *dvcRecvList_yz, *dvcRecvList_xz, *dvcRecvList_Xy, *dvcRecvList_Yz, *dvcRecvList_xZ; int *dvcRecvList_xY, *dvcRecvList_yZ, *dvcRecvList_Xz, *dvcRecvList_XY, *dvcRecvList_YZ, *dvcRecvList_XZ; // Recieve buffers for the distributions - int *dvcRecvDist_x, *dvcRecvDist_y, *dvcRecvDist_z, *dvcRecvDist_X, *dvcRecvDist_Y, *dvcRecvDist_Z; - int *dvcRecvDist_xy, *dvcRecvDist_yz, *dvcRecvDist_xz, *dvcRecvDist_Xy, *dvcRecvDist_Yz, *dvcRecvDist_xZ; - int *dvcRecvDist_xY, *dvcRecvDist_yZ, *dvcRecvDist_Xz, *dvcRecvDist_XY, *dvcRecvDist_YZ, *dvcRecvDist_XZ; + double *dvcRecvDist_x, *dvcRecvDist_y, *dvcRecvDist_z, *dvcRecvDist_X, *dvcRecvDist_Y, *dvcRecvDist_Z; + double *dvcRecvDist_xy, *dvcRecvDist_yz, *dvcRecvDist_xz, *dvcRecvDist_Xy, *dvcRecvDist_Yz, *dvcRecvDist_xZ; + double *dvcRecvDist_xY, *dvcRecvDist_yZ, *dvcRecvDist_Xz, *dvcRecvDist_XY, *dvcRecvDist_YZ, *dvcRecvDist_XZ; //...................................................................................... }; From 4657adbc94675b410b60069f6471bb06df81e92e Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 28 Sep 2020 17:08:49 -0400 Subject: [PATCH 040/205] add routine for ion model to read from file --- common/Domain.cpp | 147 ++++++++++++++++++++++++++++++++++++++++++++ common/Domain.h | 1 + common/ScaLBL.h | 1 + cpu/Ion.cpp | 16 +++++ gpu/Ion.cu | 33 ++++++++++ models/IonModel.cpp | 55 +++++++++++++++-- models/IonModel.h | 1 + 7 files changed, 249 insertions(+), 5 deletions(-) diff --git a/common/Domain.cpp b/common/Domain.cpp index d75ec4a5..fc797a8e 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -1286,3 +1286,150 @@ void ReadBinaryFile(char *FILENAME, double *Data, size_t N) File.close(); } +void Domain::ReadFromFile(const std::string& Filename,const std::string& Datatype, double *UserData) +{ + //........................................................................................ + // Reading the user-defined input file + // NOTE: so far it only supports BC=0 (periodic) and BC=5 (mixed reflection) + // because if checkerboard or inlet/outlet buffer layers are added, the + // value of the void space is undefined. + // NOTE: if BC=5 is used, where the inlet and outlet layers of the domain are modified, + // user needs to modify the input file accordingly before LBPM simulator read + // the input file. + //........................................................................................ + int rank_offset = 0; + int RANK = rank(); + int nprocs, nprocx, nprocy, nprocz, nx, ny, nz; + int64_t global_Nx,global_Ny,global_Nz; + int64_t i,j,k,n; + //TODO These offset we may still need them + int64_t xStart,yStart,zStart; + xStart=yStart=zStart=0; + + // Read domain parameters + // TODO currently the size of the data is still read from Domain{}; + // but user may have a user-specified size + auto size = database->getVector( "n" ); + auto SIZE = database->getVector( "N" ); + auto nproc = database->getVector( "nproc" ); + //TODO currently the funcationality "offset" is disabled as the user-defined input data may have a different size from that of the input domain + if (database->keyExists( "offset" )){ + auto offset = database->getVector( "offset" ); + xStart = offset[0]; + yStart = offset[1]; + zStart = offset[2]; + } + + nx = size[0]; + ny = size[1]; + nz = size[2]; + nprocx = nproc[0]; + nprocy = nproc[1]; + nprocz = nproc[2]; + global_Nx = SIZE[0]; + global_Ny = SIZE[1]; + global_Nz = SIZE[2]; + nprocs=nprocx*nprocy*nprocz; + + double *SegData = NULL; + if (RANK==0){ + printf("User-defined input file: %s (data type: %s)\n",Filename.c_str(),Datatype.c_str()); + printf("NOTE: currently only BC=0 or 5 supports user-defined input file!\n"); + // Rank=0 reads the entire segmented data and distributes to worker processes + printf("Dimensions of the user-defined input file: %ld x %ld x %ld \n",global_Nx,global_Ny,global_Nz); + int64_t SIZE = global_Nx*global_Ny*global_Nz; + + if (Datatype == "double"){ + printf("Reading input data as double precision floating number\n"); + SegData = new double[SIZE]; + FILE *SEGDAT = fopen(Filename.c_str(),"rb"); + if (SEGDAT==NULL) ERROR("Domain.cpp: Error reading user-defined file!\n"); + size_t ReadSeg; + ReadSeg=fread(SegData,8,SIZE,SEGDAT); + if (ReadSeg != size_t(SIZE)) printf("Domain.cpp: Error reading file: %s\n",Filename.c_str()); + fclose(SEGDAT); + } + else{ + ERROR("Error: User-defined input file only supports double-precision floating number!\n"); + } + printf("Read file successfully from %s \n",Filename.c_str()); + } + + // Get the rank info + int64_t N = (nx+2)*(ny+2)*(nz+2); + + // number of sites to use for periodic boundary condition transition zone + //int64_t z_transition_size = (nprocz*nz - (global_Nz - zStart))/2; + //if (z_transition_size < 0) z_transition_size=0; + int64_t z_transition_size = 0; + + //char LocalRankFilename[1000];//just for debug + double *loc_id; + loc_id = new double [(nx+2)*(ny+2)*(nz+2)]; + + // Set up the sub-domains + if (RANK==0){ + printf("Decomposing user-defined input file\n"); + printf("Distributing subdomains across %i processors \n",nprocs); + printf("Process grid: %i x %i x %i \n",nprocx,nprocy,nprocz); + printf("Subdomain size: %i x %i x %i \n",nx,ny,nz); + printf("Size of transition region: %ld \n", z_transition_size); + + for (int kp=0; kp>>(dist,Den,Np); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_Ion_Init_FromFile: %s \n",cudaGetErrorString(err)); + } + //cudaProfilerStop(); +} + extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){ //cudaProfilerStart(); diff --git a/models/IonModel.cpp b/models/IonModel.cpp index 7366c0b4..ac53d5cc 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -136,7 +136,7 @@ void ScaLBL_IonModel::ReadParams(string filename,vector &num_iter){ } //read initial ion concentration list; INPUT unit [mol/m^3] //it must be converted to LB unit [mol/lu^3] - if (ion_db->keyExists("IonConcentrationList")){ + if (ion_db->keyExists("IonConcentrationList")){ IonConcentration.clear(); IonConcentration = ion_db->getVector( "IonConcentrationList" ); if (IonConcentration.size()!=number_ion_species){ @@ -544,6 +544,35 @@ void ScaLBL_IonModel::AssignSolidBoundary(double *ion_solid) } } +void ScaLBL_IonModel::AssignIonConcentration_FromFile(double *Ci,const vector &File_ion) +{ + double *Ci_host; + Ci_host = new double[N]; + double VALUE=0.f; + + Mask->ReadFromFile(File_ion[0],File_ion[1],Ci_host); + + for (int k=0;kkeyExists("IonConcentrationFile")){ + //TODO: Need to figure out how to deal with multi-species concentration initialization + //NOTE: "IonConcentrationFile" is a vector, including "file_name, datatype" + auto File_ion = ion_db->getVector( "IonConcentrationFile" ); + double *Ci_host; + Ci_host = new double[number_ion_species*Np]; + for (int ic=0; ic db0); void AssignSolidBoundary(double *ion_solid); + void AssignIonConcentration_FromFile(double *Ci,const vector &File_ion); void IonConcentration_LB_to_Phys(DoubleArray &Den_reg); }; From a69504f96a3eff1ae31e32c44443d9a7c6f22e2a Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 29 Sep 2020 09:43:38 -0400 Subject: [PATCH 041/205] fixed weird type in ScaLBL (revert) --- common/ScaLBL.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index f76a8de5..2e8eef1a 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -373,9 +373,9 @@ private: int *dvcRecvList_xy, *dvcRecvList_yz, *dvcRecvList_xz, *dvcRecvList_Xy, *dvcRecvList_Yz, *dvcRecvList_xZ; int *dvcRecvList_xY, *dvcRecvList_yZ, *dvcRecvList_Xz, *dvcRecvList_XY, *dvcRecvList_YZ, *dvcRecvList_XZ; // Recieve buffers for the distributions - double *dvcRecvDist_x, *dvcRecvDist_y, *dvcRecvDist_z, *dvcRecvDist_X, *dvcRecvDist_Y, *dvcRecvDist_Z; - double *dvcRecvDist_xy, *dvcRecvDist_yz, *dvcRecvDist_xz, *dvcRecvDist_Xy, *dvcRecvDist_Yz, *dvcRecvDist_xZ; - double *dvcRecvDist_xY, *dvcRecvDist_yZ, *dvcRecvDist_Xz, *dvcRecvDist_XY, *dvcRecvDist_YZ, *dvcRecvDist_XZ; + int *dvcRecvDist_x, *dvcRecvDist_y, *dvcRecvDist_z, *dvcRecvDist_X, *dvcRecvDist_Y, *dvcRecvDist_Z; + int *dvcRecvDist_xy, *dvcRecvDist_yz, *dvcRecvDist_xz, *dvcRecvDist_Xy, *dvcRecvDist_Yz, *dvcRecvDist_xZ; + int *dvcRecvDist_xY, *dvcRecvDist_yZ, *dvcRecvDist_Xz, *dvcRecvDist_XY, *dvcRecvDist_YZ, *dvcRecvDist_XZ; //...................................................................................... }; From 6b335eaf287c7b1a8a2b9503256edc28330fd023 Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 29 Sep 2020 10:39:05 -0400 Subject: [PATCH 042/205] add wide halo class --- common/WideHalo.cpp | 387 ++++++++++++++++++++++++++++++++++++++++++++ common/WideHalo.h | 96 +++++++++++ 2 files changed, 483 insertions(+) create mode 100644 common/WideHalo.cpp create mode 100644 common/WideHalo.h diff --git a/common/WideHalo.cpp b/common/WideHalo.cpp new file mode 100644 index 00000000..2e216c3c --- /dev/null +++ b/common/WideHalo.cpp @@ -0,0 +1,387 @@ +/* +This class implements support for halo widths larger than 1 + */ + +ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr Dm, int width) +{ + //...................................................................................... + Lock=false; // unlock the communicator + //...................................................................................... + // Create a separate copy of the communicator for the device + MPI_Comm_dup(Dm->Comm,&MPI_COMM_SCALBL); + //...................................................................................... + // Copy the domain size and communication information directly from Dm + Nx = Dm->Nx; + Ny = Dm->Ny; + Nz = Dm->Nz; + N = Nx*Ny*Nz; + Nxh = Nx + 2*(width - 1); + Nyh = Ny + 2*(width - 1); + Nzh = Nz + 2*(width - 1); + Nh = Nxh*Nyh*Nzh; + next=0; + + rank=Dm->rank(); + iproc = Dm->iproc(); + jproc = Dm->jproc(); + kproc = Dm->kproc(); + nprocx = Dm->nprocx(); + nprocy = Dm->nprocy(); + nprocz = Dm->nprocz(); + rank_info = RankInfoStruct(myrank,nprocx,nprocy,nprocz); + rank = rank_info.rank[1][1][1]; + rank_X = rank_info.rank[2][1][1]; + rank_x = rank_info.rank[0][1][1]; + rank_Y = rank_info.rank[1][2][1]; + rank_y = rank_info.rank[1][0][1]; + rank_Z = rank_info.rank[1][1][2]; + rank_z = rank_info.rank[1][1][0]; + rank_XY = rank_info.rank[2][2][1]; + rank_xy = rank_info.rank[0][0][1]; + rank_Xy = rank_info.rank[2][0][1]; + rank_xY = rank_info.rank[0][2][1]; + rank_XZ = rank_info.rank[2][1][2]; + rank_xz = rank_info.rank[0][1][0]; + rank_Xz = rank_info.rank[2][1][0]; + rank_xZ = rank_info.rank[0][1][2]; + rank_YZ = rank_info.rank[1][2][2]; + rank_yz = rank_info.rank[1][0][0]; + rank_Yz = rank_info.rank[1][2][0]; + rank_yZ = rank_info.rank[1][0][2]; + rank_XYz = rank_info.rank[2][2][0]; + rank_xyz = rank_info.rank[0][0][0]; + rank_Xyz = rank_info.rank[2][0][0]; + rank_xYz = rank_info.rank[0][2][0]; + rank_XYZ = rank_info.rank[2][2][2]; + rank_xyZ = rank_info.rank[0][0][2]; + rank_XyZ = rank_info.rank[2][0][2]; + rank_xYZ = rank_info.rank[0][2][2]; + + sendCount_x = (Ny-2)*(Nz-2)*width; + sendCount_y = (Nx-2)*(Nz-2)*width; + sendCount_z = (Nx-2)*(Ny-2)*width; + sendCount_X = (Ny-2)*(Nz-2)*width; + sendCount_Y = (Nx-2)*(Nz-2)*width; + sendCount_Z = (Nx-2)*(Ny-2)*width; + sendCount_xy = (Nz-2)*width*width; + sendCount_yz = (Nx-2)*width*width; + sendCount_xz = (Ny-2)*width*width; + sendCount_Xy = (Nz-2)*width*width; + sendCount_Yz = (Nx-2)*width*width; + sendCount_xZ = (Ny-2)*width*width; + sendCount_xY = (Nz-2)*width*width; + sendCount_yZ = (Nx-2)*width*width; + sendCount_Xz = (Ny-2)*width*width; + sendCount_XY = (Nz-2)*width*width; + sendCount_YZ = (Nx-2)*width*width; + sendCount_XZ = (Ny-2)*width*width; + sendCount_xyz = width*width*width; + sendCount_Xyz = width*width*width; + sendCount_xYz = width*width*width; + sendCount_XYz = width*width*width; + sendCount_xyZ = width*width*width; + sendCount_XyZ = width*width*width; + sendCount_xYZ = width*width*width; + sendCount_XYZ = width*width*width; + + RecvCount_x = (Ny-2)*(Nz-2)*width; + RecvCount_y = (Nx-2)*(Nz-2)*width; + RecvCount_z = (Nx-2)*(Ny-2)*width; + RecvCount_X = (Ny-2)*(Nz-2)*width; + RecvCount_Y = (Nx-2)*(Nz-2)*width; + RecvCount_Z = (Nx-2)*(Ny-2)*width; + RecvCount_xy = (Nz-2)*width*width; + RecvCount_yz = (Nx-2)*width*width; + RecvCount_xz = (Ny-2)*width*width; + RecvCount_Xy = (Nz-2)*width*width; + RecvCount_Yz = (Nx-2)*width*width; + RecvCount_xZ = (Ny-2)*width*width; + RecvCount_xY = (Nz-2)*width*width; + RecvCount_yZ = (Nx-2)*width*width; + RecvCount_Xz = (Ny-2)*width*width; + RecvCount_XY = (Nz-2)*width*width; + RecvCount_YZ = (Nx-2)*width*width; + RecvCount_XZ = (Ny-2)*width*width; + RecvCount_xyz = width*width*width; + RecvCount_Xyz = width*width*width; + RecvCount_xYz = width*width*width; + RecvCount_XYz = width*width*width; + RecvCount_xyZ = width*width*width; + RecvCount_XyZ = width*width*width; + RecvCount_xYZ = width*width*width; + RecvCount_XYZ = width*width*width; + //...................................................................................... + ScaLBL_AllocateZeroCopy((void **) &sendbuf_x, sendCount_x*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_X, sendCount_X*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_y, sendCount_y*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_Y, sendCount_Y*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_z, sendCount_z*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_Z, sendCount_Z*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_xy, sendCount_xy*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_xY, sendCount_xY*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_Xy, sendCount_Xy*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_XY, sendCount_XY*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_xz, sendCount_xz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_xZ, sendCount_xZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_Xz, sendCount_Xz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_XZ, sendCount_XZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_yz, sendCount_yz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_yZ, sendCount_yZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_Yz, sendCount_Yz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_YZ, sendCount_YZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_xyz, sendCount_xyz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_xYz, sendCount_xYz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_Xyz, sendCount_Xyz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_XYz, sendCount_XYz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_xyZ, sendCount_xyZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_xYZ, sendCount_xYZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_XyZ, sendCount_XyZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_XYZ, sendCount_XYZ*sizeof(double)); // Allocate device memory + //...................................................................................... + ScaLBL_AllocateZeroCopy((void **) &recvbuf_x, recvCount_x*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_X, recvCount_X*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_y, recvCount_y*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_Y, recvCount_Y*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_z, recvCount_z*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_Z, recvCount_Z*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_xy, recvCount_xy*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_xY, recvCount_xY*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_Xy, recvCount_Xy*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_XY, recvCount_XY*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_xz, recvCount_xz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_xZ, recvCount_xZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_Xz, recvCount_Xz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_XZ, recvCount_XZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_yz, recvCount_yz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_yZ, recvCount_yZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_Yz, recvCount_Yz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_YZ, recvCount_YZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_xyz, recvCount_xyz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_xYz, recvCount_xYz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_Xyz, recvCount_Xyz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_XYz, recvCount_XYz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_xyZ, recvCount_xyZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_xYZ, recvCount_xYZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_XyZ, recvCount_XyZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_XYZ, recvCount_XYZ*sizeof(double)); // Allocate device memory + //...................................................................................... + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_x, sendCount_x*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_X, sendCount_X*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_y, sendCount_y*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Y, sendCount_Y*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_z, sendCount_z*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Z, sendCount_Z*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xy, sendCount_xy*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xY, sendCount_xY*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Xy, sendCount_Xy*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_XY, sendCount_XY*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xz, sendCount_xz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xZ, sendCount_xZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Xz, sendCount_Xz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_XZ, sendCount_XZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_yz, sendCount_yz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_yZ, sendCount_yZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Yz, sendCount_Yz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_YZ, sendCount_YZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xyz, sendCount_xyz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xYz, sendCount_xYz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Xyz, sendCount_Xyz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_XYz, sendCount_XYz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xyZ, sendCount_xyZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_xYZ, sendCount_xYZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_XyZ, sendCount_XyZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcSendList_XYZ, sendCount_XYZ*sizeof(int)); // Allocate device memory + //...................................................................................... + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_x, recvCount_x*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_X, recvCount_X*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_y, recvCount_y*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Y, recvCount_Y*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_z, recvCount_z*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Z, recvCount_Z*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xy, recvCount_xy*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xY, recvCount_xY*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Xy, recvCount_Xy*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_XY, recvCount_XY*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xz, recvCount_xz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xZ, recvCount_xZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Xz, recvCount_Xz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_XZ, recvCount_XZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_yz, recvCount_yz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_yZ, recvCount_yZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Yz, recvCount_Yz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_YZ, recvCount_YZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xyz, recvCount_xyZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xYz, recvCount_xYZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_Xyz, recvCount_XyZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_XYz, recvCount_XYZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xyZ, recvCount_xyZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_xYZ, recvCount_xYZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_XyZ, recvCount_XyZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &dvcRecvList_XYZ, recvCount_XYZ*sizeof(int)); // Allocate device memory + //...................................................................................... + + MPI_Barrier(MPI_COMM_SCALBL); + + /* Fill in communications patterns for the lists */ + int *sendList, *recvList; + sendList = new int [width*max(Nxh,Nyh)*max(Nxh,Nzh)]; + /* x face */ + int count = 0; + for (int k=0; k Dm, int width); + ~ScaLBLWideHalo_Communicator(); + //...................................................................................... + MPI_Comm MPI_COMM_SCALBL; // MPI Communicator + unsigned long int CommunicationCount,SendCount,RecvCount; + int Nx,Ny,Nz,N; // original domain structure + int Nxh,Nyh,Nzh,Nh; // with wide halo + RankInfoStruct rank_info; + + int first_interior,last_interior; + //...................................................................................... + // Set up for D3Q19 distributions -- all 27 neighbors are needed + //...................................................................................... + // Buffers to store data sent and recieved by this MPI process + double *sendbuf_x, *sendbuf_y, *sendbuf_z, *sendbuf_X, *sendbuf_Y, *sendbuf_Z; + double *sendbuf_xy, *sendbuf_yz, *sendbuf_xz, *sendbuf_Xy, *sendbuf_Yz, *sendbuf_xZ; + double *sendbuf_xY, *sendbuf_yZ, *sendbuf_Xz, *sendbuf_XY, *sendbuf_YZ, *sendbuf_XZ; + double *sendbuf_xyz, *sendbuf_Xyz, *sendbuf_xYz, *sendbuf_XYy; + double *sendbuf_xyZ, *sendbuf_XyZ, *sendbuf_xYZ, *sendbuf_XYZ; + double *recvbuf_x, *recvbuf_y, *recvbuf_z, *recvbuf_X, *recvbuf_Y, *recvbuf_Z; + double *recvbuf_xy, *recvbuf_yz, *recvbuf_xz, *recvbuf_Xy, *recvbuf_Yz, *recvbuf_xZ; + double *recvbuf_xY, *recvbuf_yZ, *recvbuf_Xz, *recvbuf_XY, *recvbuf_YZ, *recvbuf_XZ; + double *recvbuf_xyz, *recvbuf_Xyz, *recvbuf_xYz, *recvbuf_XYy; + double *recvbuf_xyZ, *recvbuf_XyZ, *recvbuf_xYZ, *recvbuf_XYZ; + //...................................................................................... + + int LastExterior(); + int FirstInterior(); + int LastInterior(); + + void Send(double *data); + void Recv(double *data); + + // Debugging and unit testing functions + void PrintDebug()); + +private: + bool Lock; // use Lock to make sure only one call at a time to protect data in transit + // only one set of Send requests can be active at any time (per instance) + int i,j,k,n; + int iproc,jproc,kproc; + int nprocx,nprocy,nprocz; + int sendtag,recvtag; + // Give the object it's own MPI communicator + RankInfoStruct rank_info; + MPI_Group Group; // Group of processors associated with this domain + MPI_Request req1[26],req2[26]; + MPI_Status stat1[26],stat2[26]; + //...................................................................................... + // MPI ranks for all 18 neighbors + //...................................................................................... + // These variables are all private to prevent external things from modifying them!! + //...................................................................................... + int rank; + int rank_x,rank_y,rank_z,rank_X,rank_Y,rank_Z; + int rank_xy,rank_XY,rank_xY,rank_Xy; + int rank_xz,rank_XZ,rank_xZ,rank_Xz; + int rank_yz,rank_YZ,rank_yZ,rank_Yz; + int rank_xyz,rank_Xyz,rank_xYz,rank_XYz; + int rank_xyZ,rank_XyZ,rank_xYZ,rank_XYZ; + //...................................................................................... + //...................................................................................... + int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z; + int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ; + int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ; + int sendCount_xyz,sendCount_Xyz,sendCount_xYz,sendCount_XYz; + int sendCount_xyZ,sendCount_XyZ,sendCount_xYZ,sendCount_XYZ; + //...................................................................................... + int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z; + int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, recvCount_xZ; + int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, recvCount_XZ; + int recvCount_xyz,recvCount_Xyz,recvCount_xYz,recvCount_XYz; + int recvCount_xyZ,recvCount_XyZ,recvCount_xYZ,recvCount_XYZ; + //...................................................................................... + // Send buffers that reside on the compute device + int *dvcSendList_x, *dvcSendList_y, *dvcSendList_z, *dvcSendList_X, *dvcSendList_Y, *dvcSendList_Z; + int *dvcSendList_xy, *dvcSendList_yz, *dvcSendList_xz, *dvcSendList_Xy, *dvcSendList_Yz, *dvcSendList_xZ; + int *dvcSendList_xY, *dvcSendList_yZ, *dvcSendList_Xz, *dvcSendList_XY, *dvcSendList_YZ, *dvcSendList_XZ; + int *dvcSendList_xyz,*dvcSendList_Xyz,*dvcSendList_xYz,*dvcSendList_XYz; + int *dvcSendList_xyZ,*dvcSendList_XyZ,*dvcSendList_xYZ,*dvcSendList_XYZ; + // Recieve buffers that reside on the compute device + int *dvcRecvList_x, *dvcRecvList_y, *dvcRecvList_z, *dvcRecvList_X, *dvcRecvList_Y, *dvcRecvList_Z; + int *dvcRecvList_xy, *dvcRecvList_yz, *dvcRecvList_xz, *dvcRecvList_Xy, *dvcRecvList_Yz, *dvcRecvList_xZ; + int *dvcRecvList_xY, *dvcRecvList_yZ, *dvcRecvList_Xz, *dvcRecvList_XY, *dvcRecvList_YZ, *dvcRecvList_XZ; + int *dvcRecvList_xyz,*dvcRecvList_Xyz,*dvcRecvList_xYz,*dvcRecvList_XYz; + int *dvcRecvList_xyZ,*dvcRecvList_XyZ,*dvcRecvList_xYZ,*dvcRecvList_XYZ; + //...................................................................................... + +}; From 23f6c089f0621426fec30a07de19713cd78502dd Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 29 Sep 2020 10:43:31 -0400 Subject: [PATCH 043/205] fix name of function --- common/WideHalo.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/common/WideHalo.cpp b/common/WideHalo.cpp index 2e216c3c..78753d33 100644 --- a/common/WideHalo.cpp +++ b/common/WideHalo.cpp @@ -277,7 +277,7 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr Date: Tue, 29 Sep 2020 10:48:54 -0400 Subject: [PATCH 044/205] fixing compile bugs --- common/WideHalo.cpp | 3 ++- common/WideHalo.h | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/common/WideHalo.cpp b/common/WideHalo.cpp index 78753d33..0a52bbd8 100644 --- a/common/WideHalo.cpp +++ b/common/WideHalo.cpp @@ -1,6 +1,7 @@ /* This class implements support for halo widths larger than 1 */ +#include "common/WideHalo.h" ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr Dm, int width) { @@ -383,5 +384,5 @@ void ScaLBLWideHalo_Communicator::Recv(double *data){ } inline int getHaloBlock(){ -} + } diff --git a/common/WideHalo.h b/common/WideHalo.h index 11252426..ac9f8833 100644 --- a/common/WideHalo.h +++ b/common/WideHalo.h @@ -1,6 +1,9 @@ /* This class implements support for halo widths larger than 1 */ +#ifndef WideHalo_H +#define WideHalo_H +#include "common/Domain.h" class ScaLBLWideHalo_Communicator{ public: @@ -92,5 +95,5 @@ private: int *dvcRecvList_xyz,*dvcRecvList_Xyz,*dvcRecvList_xYz,*dvcRecvList_XYz; int *dvcRecvList_xyZ,*dvcRecvList_XyZ,*dvcRecvList_xYZ,*dvcRecvList_XYZ; //...................................................................................... - }; +#endif From afa9dac22adfffc634d95dae2cfdef637e1be4a5 Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 29 Sep 2020 10:56:01 -0400 Subject: [PATCH 045/205] fixing compile bugs --- common/WideHalo.cpp | 76 ++++++++++++++++++++++----------------------- common/WideHalo.h | 3 +- 2 files changed, 39 insertions(+), 40 deletions(-) diff --git a/common/WideHalo.cpp b/common/WideHalo.cpp index 0a52bbd8..32401ef9 100644 --- a/common/WideHalo.cpp +++ b/common/WideHalo.cpp @@ -20,7 +20,6 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr rank(); iproc = Dm->iproc(); @@ -29,7 +28,7 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr nprocx(); nprocy = Dm->nprocy(); nprocz = Dm->nprocz(); - rank_info = RankInfoStruct(myrank,nprocx,nprocy,nprocz); + rank_info = RankInfoStruct(rank,nprocx,nprocy,nprocz); rank = rank_info.rank[1][1][1]; rank_X = rank_info.rank[2][1][1]; rank_x = rank_info.rank[0][1][1]; @@ -85,32 +84,32 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr Date: Tue, 29 Sep 2020 10:58:33 -0400 Subject: [PATCH 046/205] fixing compile bugs --- common/WideHalo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/WideHalo.h b/common/WideHalo.h index beeca65c..9ca88271 100644 --- a/common/WideHalo.h +++ b/common/WideHalo.h @@ -3,7 +3,7 @@ This class implements support for halo widths larger than 1 */ #ifndef WideHalo_H #define WideHalo_H -#include "common/Domain.h" +#include "common/ScaLBL.h" class ScaLBLWideHalo_Communicator{ public: From 229accee9c381a1bfd966c37064fdf4e449c732b Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 29 Sep 2020 11:00:42 -0400 Subject: [PATCH 047/205] fixing compile bugs --- common/WideHalo.cpp | 3 ++- common/WideHalo.h | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/common/WideHalo.cpp b/common/WideHalo.cpp index 32401ef9..9582b601 100644 --- a/common/WideHalo.cpp +++ b/common/WideHalo.cpp @@ -384,5 +384,6 @@ void ScaLBLWideHalo_Communicator::Recv(double *data){ } inline int getHaloBlock(){ - + int count = 0; + return count; } diff --git a/common/WideHalo.h b/common/WideHalo.h index 9ca88271..2dbe8cae 100644 --- a/common/WideHalo.h +++ b/common/WideHalo.h @@ -24,12 +24,12 @@ public: double *sendbuf_x, *sendbuf_y, *sendbuf_z, *sendbuf_X, *sendbuf_Y, *sendbuf_Z; double *sendbuf_xy, *sendbuf_yz, *sendbuf_xz, *sendbuf_Xy, *sendbuf_Yz, *sendbuf_xZ; double *sendbuf_xY, *sendbuf_yZ, *sendbuf_Xz, *sendbuf_XY, *sendbuf_YZ, *sendbuf_XZ; - double *sendbuf_xyz, *sendbuf_Xyz, *sendbuf_xYz, *sendbuf_XYy; + double *sendbuf_xyz, *sendbuf_Xyz, *sendbuf_xYz, *sendbuf_XYz; double *sendbuf_xyZ, *sendbuf_XyZ, *sendbuf_xYZ, *sendbuf_XYZ; double *recvbuf_x, *recvbuf_y, *recvbuf_z, *recvbuf_X, *recvbuf_Y, *recvbuf_Z; double *recvbuf_xy, *recvbuf_yz, *recvbuf_xz, *recvbuf_Xy, *recvbuf_Yz, *recvbuf_xZ; double *recvbuf_xY, *recvbuf_yZ, *recvbuf_Xz, *recvbuf_XY, *recvbuf_YZ, *recvbuf_XZ; - double *recvbuf_xyz, *recvbuf_Xyz, *recvbuf_xYz, *recvbuf_XYy; + double *recvbuf_xyz, *recvbuf_Xyz, *recvbuf_xYz, *recvbuf_XYz; double *recvbuf_xyZ, *recvbuf_XyZ, *recvbuf_xYZ, *recvbuf_XYZ; //...................................................................................... From e4bdc864c6b7b05f02e440e0c7db761d09780242 Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 29 Sep 2020 11:05:09 -0400 Subject: [PATCH 048/205] fixing compile bugs --- models/FreeLeeModel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index 7fc005e3..f3e5b42c 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -1,7 +1,7 @@ /* color lattice boltzmann model */ -#include "models/ColorModel.h" +#include "models/FreeLeeModel.h" #include "analysis/distance.h" #include "analysis/morphology.h" #include "common/Communication.h" From 471e78703a54846bc62eef740bbc07fe62c3594d Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Tue, 29 Sep 2020 13:22:25 -0400 Subject: [PATCH 049/205] add some print-out for debugging --- models/IonModel.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/models/IonModel.cpp b/models/IonModel.cpp index ac53d5cc..5cf38e64 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -697,6 +697,12 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ for (double item : rlx){ item = 1.0/item; } + //**debug + if (rank==0){ + for (unsigned int ic=0;ic Date: Tue, 29 Sep 2020 13:25:53 -0400 Subject: [PATCH 050/205] fixing compile bugs --- models/FreeLeeModel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h index 42419afa..7409946d 100644 --- a/models/FreeLeeModel.h +++ b/models/FreeLeeModel.h @@ -1,5 +1,5 @@ /* -Implementation of color lattice boltzmann model +Implementation of Lee et al JCP 2016 lattice boltzmann model */ #include #include From cf1cf4eb53e863d13223e2c8da38dfb9430dcffc Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 29 Sep 2020 13:29:14 -0400 Subject: [PATCH 051/205] fixing compile bugs --- models/FreeLeeModel.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h index 7409946d..0d0e63c3 100644 --- a/models/FreeLeeModel.h +++ b/models/FreeLeeModel.h @@ -13,6 +13,8 @@ Implementation of Lee et al JCP 2016 lattice boltzmann model #include "common/MPI_Helpers.h" #include "ProfilerApp.h" #include "threadpool/thread_pool.h" +#include "common/ScaLBL.h" +#include "common/WideHalo.h" class ScaLBL_FreeLeeModel{ public: From 917be9f6c4ebe75286c92877c98d04cf98147972 Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 29 Sep 2020 13:37:02 -0400 Subject: [PATCH 052/205] fixing compile bugs --- models/FreeLeeModel.cpp | 12 ++++++------ models/FreeLeeModel.h | 2 ++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index f3e5b42c..e9bca7be 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -14,7 +14,8 @@ rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rh Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) { - + +} ScaLBL_FreeLeeModel::~ScaLBL_FreeLeeModel(){ } @@ -105,8 +106,7 @@ void ScaLBL_FreeLeeModel::SetDomain(){ Nh = Nxh*Nyh*Nzh; id = new signed char [N]; for (int i=0; iid[i] = 1; // initialize this way - //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object - Averages = std::shared_ptr ( new SubPhase(Dm) ); // TwoPhase analysis object + MPI_Barrier(comm); Dm->CommInit(); MPI_Barrier(comm); @@ -167,18 +167,18 @@ void ScaLBL_FreeLeeModel::ReadInput(){ } } } + SignDist.resize(Nx,Ny,Nz); // Initialize the signed distance function for (int k=0;kSDs(i,j,k) = 2.0*double(id_solid(i,j,k))-1.0; + SignDist(i,j,k) = 2.0*double(id_solid(i,j,k))-1.0; } } } -// MeanFilter(Averages->SDs); if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); - CalcDist(Averages->SDs,id_solid,*Mask); + CalcDist(SignDist,id_solid,*Mask); if (rank == 0) cout << "Domain set." << endl; diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h index 0d0e63c3..837ee6a4 100644 --- a/models/FreeLeeModel.h +++ b/models/FreeLeeModel.h @@ -67,6 +67,8 @@ public: double *ColorGrad; double *Velocity; double *Pressure; + + DoubleArray SignDistance; private: MPI_Comm comm; From 5c723742789eb323f64286bd6d1ecefc3445814e Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 29 Sep 2020 13:40:41 -0400 Subject: [PATCH 053/205] fixing compile bugs --- models/FreeLeeModel.cpp | 3 +-- models/FreeLeeModel.h | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index e9bca7be..3265411e 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -181,8 +181,7 @@ void ScaLBL_FreeLeeModel::ReadInput(){ CalcDist(SignDist,id_solid,*Mask); if (rank == 0) cout << "Domain set." << endl; - - Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta); + } void ScaLBL_FreeLeeModel::AssignComponentLabels(double *phase) diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h index 837ee6a4..d89fc1c5 100644 --- a/models/FreeLeeModel.h +++ b/models/FreeLeeModel.h @@ -40,7 +40,7 @@ public: double din,dout,inletA,inletB,outletA,outletB; int Nx,Ny,Nz,N,Np; - int Nxh,Nyh,Nzh,Nh; // extra halo width + int Nxh,Nyh,Nzh,Nh; // extra halo width int rank,nprocx,nprocy,nprocz,nprocs; double Lx,Ly,Lz; @@ -48,8 +48,6 @@ public: std::shared_ptr Mask; // this domain is for lbm std::shared_ptr ScaLBL_Comm; std::shared_ptr ScaLBL_Comm_Regular; - //std::shared_ptr Averages; - std::shared_ptr Averages; // input database std::shared_ptr db; @@ -68,7 +66,7 @@ public: double *Velocity; double *Pressure; - DoubleArray SignDistance; + DoubleArray SignDist; private: MPI_Comm comm; From e6e2c4e27f9c5d3c1883164d179bb686e9dce961 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Tue, 29 Sep 2020 13:51:23 -0400 Subject: [PATCH 054/205] FreeLee model compiles --- models/FreeLeeModel.cpp | 75 +++-------------------------------------- 1 file changed, 5 insertions(+), 70 deletions(-) diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index 3265411e..f52802d3 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -184,68 +184,6 @@ void ScaLBL_FreeLeeModel::ReadInput(){ } -void ScaLBL_FreeLeeModel::AssignComponentLabels(double *phase) -{ - size_t NLABELS=0; - signed char VALUE=0; - double AFFINITY=0.f; - - auto LabelList = freelee_db->getVector( "ComponentLabels" ); - auto AffinityList = freelee_db->getVector( "ComponentAffinity" ); - - NLABELS=LabelList.size(); - if (NLABELS != AffinityList.size()){ - ERROR("Error: ComponentLabels and ComponentAffinity must be the same length! \n"); - } - - double label_count[NLABELS]; - double label_count_global[NLABELS]; - // Assign the labels - - for (size_t idx=0; idxid[n] = 0; // set mask to zero since this is an immobile component - } - } - // fluid labels are reserved - if (VALUE == 1) AFFINITY=1.0; - else if (VALUE == 2) AFFINITY=-1.0; - phase[n] = AFFINITY; - } - } - } - - // Set Dm to match Mask - for (int i=0; iid[i] = Mask->id[i]; - - for (size_t idx=0; idxComm, label_count[idx]); - - if (rank==0){ - printf("Component labels: %lu \n",NLABELS); - for (unsigned int idx=0; idxLastExterior(), Np); - ScaLBL_PhaseField_Init(dvcMap, Phi, Den, hq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + //ScaLBL_PhaseField_Init(dvcMap, Phi, Den, hq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); + //ScaLBL_PhaseField_Init(dvcMap, Phi, Den, hq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); // establish reservoirs for external bC if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4 ){ @@ -423,7 +357,7 @@ void ScaLBL_FreeLeeModel::Initialize(){ ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-3); } } - ScaLBL_CopyToHost(Averages->Phi.data(),Phi,N*sizeof(double)); + //ScaLBL_CopyToHost(Averages->Phi.data(),Phi,N*sizeof(double)); } void ScaLBL_FreeLeeModel::Run(){ @@ -450,7 +384,7 @@ void ScaLBL_FreeLeeModel::Run(){ PROFILE_START("Update"); // *************ODD TIMESTEP************* timestep++; - // Compute the Phase indicator field + /* // Compute the Phase indicator field // Read for hq, Bq happens in this routine (requires communication) ScaLBL_Comm->BiSendD3Q7AA(hq,Bq); //READ FROM NORMAL ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); @@ -527,6 +461,7 @@ void ScaLBL_FreeLeeModel::Run(){ } ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); + */ ScaLBL_DeviceBarrier(); MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); //************************************************************************ From a02288631ac918ed52dde45936d503addc743030 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Tue, 29 Sep 2020 15:48:39 -0400 Subject: [PATCH 055/205] fix dumb bug of asssigning inverse of tau --- models/IonModel.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/models/IonModel.cpp b/models/IonModel.cpp index 5cf38e64..bc58a649 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -693,15 +693,9 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ //2. ElectricField is from Poisson model //LB-related parameter - vector rlx(tau.begin(),tau.end()); - for (double item : rlx){ - item = 1.0/item; - } - //**debug - if (rank==0){ - for (unsigned int ic=0;ic rlx; + for (unsigned int ic=0;ic Date: Tue, 29 Sep 2020 16:35:09 -0400 Subject: [PATCH 056/205] building wide halo class --- common/WideHalo.cpp | 339 ++++++++++++++++++++++------------------ common/WideHalo.h | 17 ++ models/FreeLeeModel.cpp | 4 +- models/FreeLeeModel.h | 3 +- 4 files changed, 211 insertions(+), 152 deletions(-) diff --git a/common/WideHalo.cpp b/common/WideHalo.cpp index 9582b601..5d375b7f 100644 --- a/common/WideHalo.cpp +++ b/common/WideHalo.cpp @@ -56,7 +56,7 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr (new ScaLBL_Communicator(Mask)); ScaLBL_Comm_Regular = std::shared_ptr(new ScaLBL_Communicator(Mask)); - - // create wide halo for phase field - //ScaLBL_Comm_Regular->WideHalo + ScaLBL_Comm_WideHalo = std::shared_ptr(new ScaLBLWideHalo_Communicator(Mask,2)); // create the layout for the LBM int Npad=(Np/16 + 2)*16; diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h index d89fc1c5..01fb54c3 100644 --- a/models/FreeLeeModel.h +++ b/models/FreeLeeModel.h @@ -48,7 +48,8 @@ public: std::shared_ptr Mask; // this domain is for lbm std::shared_ptr ScaLBL_Comm; std::shared_ptr ScaLBL_Comm_Regular; - + std::shared_ptr ScaLBL_Comm_WideHalo; + // input database std::shared_ptr db; std::shared_ptr domain_db; From e69f2db7774f7c631c39376c02ee6fc88c35cdd7 Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 30 Sep 2020 09:06:15 -0400 Subject: [PATCH 057/205] add map to widehalo --- common/Domain.cpp | 2 +- common/WideHalo.cpp | 128 ++++++-------------------------------------- common/WideHalo.h | 2 +- 3 files changed, 17 insertions(+), 115 deletions(-) diff --git a/common/Domain.cpp b/common/Domain.cpp index 7a1a6230..fb1e1c50 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -1,5 +1,5 @@ // Created by James McClure -// Copyright 2008-2013 +// Copyright 2008-2020 #include #include #include diff --git a/common/WideHalo.cpp b/common/WideHalo.cpp index 5d375b7f..48f83cd9 100644 --- a/common/WideHalo.cpp +++ b/common/WideHalo.cpp @@ -21,6 +21,8 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr rank(); iproc = Dm->iproc(); jproc = Dm->jproc(); @@ -56,118 +58,7 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr Date: Wed, 30 Sep 2020 11:45:17 -0400 Subject: [PATCH 058/205] add test color grad --- tests/CMakeLists.txt | 3 ++- tests/TestColorGrad.cpp | 15 +-------------- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c536a7ec..01efe997 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -48,7 +48,8 @@ ADD_LBPM_TEST( TestTopo3D ) ADD_LBPM_TEST( TestFluxBC ) ADD_LBPM_TEST( TestMap ) #ADD_LBPM_TEST( TestMRT ) -#ADD_LBPM_TEST( TestColorGrad ) +ADD_LBPM_TEST( TestColorGrad ) +ADD_LBPM_TEST( TestWideHalo ) #ADD_LBPM_TEST( TestColorGradDFH ) ADD_LBPM_TEST( TestColorGradDFH ) ADD_LBPM_TEST( TestBubbleDFH ../example/Bubble/input.db) diff --git a/tests/TestColorGrad.cpp b/tests/TestColorGrad.cpp index 5cd6d924..33c9f3ce 100644 --- a/tests/TestColorGrad.cpp +++ b/tests/TestColorGrad.cpp @@ -72,20 +72,7 @@ int main(int argc, char **argv) //....................................................................... // Reading the domain information file //....................................................................... - ifstream domain("Domain.in"); - if (domain.good()){ - domain >> nprocx; - domain >> nprocy; - domain >> nprocz; - domain >> Nx; - domain >> Ny; - domain >> Nz; - domain >> nspheres; - domain >> Lx; - domain >> Ly; - domain >> Lz; - } - else if (nprocs==1){ + if (nprocs==1){ nprocx=nprocy=nprocz=1; Nx=Ny=Nz=3; nspheres=0; From 8d8a22a374cdd5ce72d69060aeb63bc3e3636102 Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 30 Sep 2020 11:46:23 -0400 Subject: [PATCH 059/205] add test wide halo --- tests/TestWideHalo.cpp | 269 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100644 tests/TestWideHalo.cpp diff --git a/tests/TestWideHalo.cpp b/tests/TestWideHalo.cpp new file mode 100644 index 00000000..5cd6d924 --- /dev/null +++ b/tests/TestWideHalo.cpp @@ -0,0 +1,269 @@ + +//************************************************************************* +// Lattice Boltzmann Simulator for Single Phase Flow in Porous Media +// James E. McCLure +//************************************************************************* +#include +#include +#include +#include "common/ScaLBL.h" +#include "common/MPI_Helpers.h" + +using namespace std; + + +//*************************************************************************************** +int main(int argc, char **argv) +{ + //***************************************** + // ***** MPI STUFF **************** + //***************************************** + // Initialize MPI + int rank,nprocs; + MPI_Init(&argc,&argv); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Comm_rank(comm,&rank); + MPI_Comm_size(comm,&nprocs); + int check; + { + // parallel domain size (# of sub-domains) + int nprocx,nprocy,nprocz; + int iproc,jproc,kproc; + + + if (rank == 0){ + printf("********************************************************\n"); + printf("Running Color Model: TestColor \n"); + printf("********************************************************\n"); + } + + // BGK Model parameters + string FILENAME; + unsigned int nBlocks, nthreads; + int timestepMax, interval; + double Fx,Fy,Fz,tol; + // Domain variables + double Lx,Ly,Lz; + int nspheres; + int Nx,Ny,Nz; + int i,j,k,n; + int dim = 3; + //if (rank == 0) printf("dim=%d\n",dim); + int timestep = 0; + int timesteps = 100; + int centralNode = 2; + + double tauA = 1.0; + double tauB = 1.0; + double rhoA = 1.0; + double rhoB = 1.0; + double alpha = 0.005; + double beta = 0.95; + + double tau = 1.0; + double mu=(tau-0.5)/3.0; + double rlx_setA=1.0/tau; + double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA); + + Fx = Fy = 0.f; + Fz = 0.f; + + if (rank==0){ + //....................................................................... + // Reading the domain information file + //....................................................................... + ifstream domain("Domain.in"); + if (domain.good()){ + domain >> nprocx; + domain >> nprocy; + domain >> nprocz; + domain >> Nx; + domain >> Ny; + domain >> Nz; + domain >> nspheres; + domain >> Lx; + domain >> Ly; + domain >> Lz; + } + else if (nprocs==1){ + nprocx=nprocy=nprocz=1; + Nx=Ny=Nz=3; + nspheres=0; + Lx=Ly=Lz=1; + } + else if (nprocs==2){ + nprocx=2; nprocy=1; + nprocz=1; + Nx=Ny=Nz=dim; + Nx = dim; Ny = dim; Nz = dim; + nspheres=0; + Lx=Ly=Lz=1; + } + else if (nprocs==4){ + nprocx=nprocy=2; + nprocz=1; + Nx=Ny=Nz=dim; + nspheres=0; + Lx=Ly=Lz=1; + } + else if (nprocs==8){ + nprocx=nprocy=nprocz=2; + Nx=Ny=Nz=dim; + nspheres=0; + Lx=Ly=Lz=1; + } + //....................................................................... + } + // ************************************************************** + // Broadcast simulation parameters from rank 0 to all other procs + MPI_Barrier(comm); + //................................................. + MPI_Bcast(&Nx,1,MPI_INT,0,comm); + MPI_Bcast(&Ny,1,MPI_INT,0,comm); + MPI_Bcast(&Nz,1,MPI_INT,0,comm); + MPI_Bcast(&nprocx,1,MPI_INT,0,comm); + MPI_Bcast(&nprocy,1,MPI_INT,0,comm); + MPI_Bcast(&nprocz,1,MPI_INT,0,comm); + MPI_Bcast(&nspheres,1,MPI_INT,0,comm); + MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); + MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); + //................................................. + MPI_Barrier(comm); + // ************************************************************** + // ************************************************************** + + if (nprocs != nprocx*nprocy*nprocz){ + printf("nprocx = %i \n",nprocx); + printf("nprocy = %i \n",nprocy); + printf("nprocz = %i \n",nprocz); + INSIST(nprocs == nprocx*nprocy*nprocz,"Fatal error in processor count!"); + } + + if (rank==0){ + printf("********************************************************\n"); + printf("Sub-domain size = %i x %i x %i\n",Nx,Ny,Nz); + printf("********************************************************\n"); + } + + MPI_Barrier(comm); + + double iVol_global = 1.0/Nx/Ny/Nz/nprocx/nprocy/nprocz; + int BoundaryCondition=0; + + Domain Dm(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); + + Nx += 2; + Ny += 2; + Nz += 2; + int N = Nx*Ny*Nz; + + int Np=0; // number of local pore nodes + double *PhaseLabel; + PhaseLabel = new double[N]; + //....................................................................... + for (k=0;k 0){ + int idx = Map(i,j,k); + CX=COLORGRAD[idx]; + CY=COLORGRAD[Np+idx]; + CZ=COLORGRAD[2*Np+idx]; + double error=sqrt((CX-1.0)*(CX-1.0)+(CY-2.0)*(CY-2.0)+ (CZ-3.0)*(CZ-3.0)); + if (error > 1e-8) + printf("i,j,k=%i,%i,%i: Color gradient=%f,%f,%f \n",i,j,k,CX,CY,CZ); + } + } + } + } + + } + // **************************************************** + MPI_Barrier(comm); + MPI_Finalize(); + // **************************************************** + + return check; +} + From fed2fc43042ba3301cbe9fdddf8560c6af1d2d23 Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 30 Sep 2020 11:53:28 -0400 Subject: [PATCH 060/205] update pointer in tests --- tests/TestColorGrad.cpp | 3 +-- tests/TestWideHalo.cpp | 18 ++---------------- 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/tests/TestColorGrad.cpp b/tests/TestColorGrad.cpp index 33c9f3ce..58531601 100644 --- a/tests/TestColorGrad.cpp +++ b/tests/TestColorGrad.cpp @@ -138,8 +138,7 @@ int main(int argc, char **argv) double iVol_global = 1.0/Nx/Ny/Nz/nprocx/nprocy/nprocz; int BoundaryCondition=0; - Domain Dm(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); - + Dm = std::shared_ptr(Domain Dm(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); Nx += 2; Ny += 2; Nz += 2; diff --git a/tests/TestWideHalo.cpp b/tests/TestWideHalo.cpp index 5cd6d924..58531601 100644 --- a/tests/TestWideHalo.cpp +++ b/tests/TestWideHalo.cpp @@ -72,20 +72,7 @@ int main(int argc, char **argv) //....................................................................... // Reading the domain information file //....................................................................... - ifstream domain("Domain.in"); - if (domain.good()){ - domain >> nprocx; - domain >> nprocy; - domain >> nprocz; - domain >> Nx; - domain >> Ny; - domain >> Nz; - domain >> nspheres; - domain >> Lx; - domain >> Ly; - domain >> Lz; - } - else if (nprocs==1){ + if (nprocs==1){ nprocx=nprocy=nprocz=1; Nx=Ny=Nz=3; nspheres=0; @@ -151,8 +138,7 @@ int main(int argc, char **argv) double iVol_global = 1.0/Nx/Ny/Nz/nprocx/nprocy/nprocz; int BoundaryCondition=0; - Domain Dm(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); - + Dm = std::shared_ptr(Domain Dm(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); Nx += 2; Ny += 2; Nz += 2; From e54ab8e195485c4339ce3c0e5bf618dc2d47e845 Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 30 Sep 2020 11:54:44 -0400 Subject: [PATCH 061/205] update pointer in tests --- tests/TestColorGrad.cpp | 2 +- tests/TestWideHalo.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/TestColorGrad.cpp b/tests/TestColorGrad.cpp index 58531601..6e584bf1 100644 --- a/tests/TestColorGrad.cpp +++ b/tests/TestColorGrad.cpp @@ -138,7 +138,7 @@ int main(int argc, char **argv) double iVol_global = 1.0/Nx/Ny/Nz/nprocx/nprocy/nprocz; int BoundaryCondition=0; - Dm = std::shared_ptr(Domain Dm(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); + Dm = std::shared_ptr(new Domain(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); Nx += 2; Ny += 2; Nz += 2; diff --git a/tests/TestWideHalo.cpp b/tests/TestWideHalo.cpp index 58531601..6e584bf1 100644 --- a/tests/TestWideHalo.cpp +++ b/tests/TestWideHalo.cpp @@ -138,7 +138,7 @@ int main(int argc, char **argv) double iVol_global = 1.0/Nx/Ny/Nz/nprocx/nprocy/nprocz; int BoundaryCondition=0; - Dm = std::shared_ptr(Domain Dm(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); + Dm = std::shared_ptr(new Domain(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); Nx += 2; Ny += 2; Nz += 2; From 9f1e170a476c0a8a4f778c0fa32cae0138926e81 Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 30 Sep 2020 11:57:25 -0400 Subject: [PATCH 062/205] update pointer in tests --- tests/TestColorGrad.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/TestColorGrad.cpp b/tests/TestColorGrad.cpp index 6e584bf1..aa02cc37 100644 --- a/tests/TestColorGrad.cpp +++ b/tests/TestColorGrad.cpp @@ -138,7 +138,7 @@ int main(int argc, char **argv) double iVol_global = 1.0/Nx/Ny/Nz/nprocx/nprocy/nprocz; int BoundaryCondition=0; - Dm = std::shared_ptr(new Domain(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); + std::shared_ptr Dm = std::shared_ptr(new Domain(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); Nx += 2; Ny += 2; Nz += 2; From 7c16a76d1c10e0e342679208ed3bca557e5c2354 Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 30 Sep 2020 12:00:58 -0400 Subject: [PATCH 063/205] update pointer in tests --- tests/TestColorGrad.cpp | 10 +++++----- tests/TestWideHalo.cpp | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/TestColorGrad.cpp b/tests/TestColorGrad.cpp index aa02cc37..d2f1d6de 100644 --- a/tests/TestColorGrad.cpp +++ b/tests/TestColorGrad.cpp @@ -138,7 +138,7 @@ int main(int argc, char **argv) double iVol_global = 1.0/Nx/Ny/Nz/nprocx/nprocy/nprocz; int BoundaryCondition=0; - std::shared_ptr Dm = std::shared_ptr(new Domain(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); + std::shared_ptr Dm = std::shared_ptr(new Domain(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition)); Nx += 2; Ny += 2; Nz += 2; @@ -152,7 +152,7 @@ int main(int argc, char **argv) for (j=0;jid[n]=1; Np++; // Initialize gradient ColorGrad = (1,2,3) double value=double(3*k+2*j+i); @@ -160,7 +160,7 @@ int main(int argc, char **argv) } } } - Dm.CommInit(); + Dm->CommInit(); MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; if (rank==0) printf ("Create ScaLBL_Communicator \n"); @@ -177,7 +177,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Np]; - ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm.id,Np); + ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); MPI_Barrier(comm); //......................device distributions................................. @@ -231,7 +231,7 @@ int main(int argc, char **argv) for (j=1;j 0){ + if (Dm->id[n] > 0){ int idx = Map(i,j,k); CX=COLORGRAD[idx]; CY=COLORGRAD[Np+idx]; diff --git a/tests/TestWideHalo.cpp b/tests/TestWideHalo.cpp index 6e584bf1..d2f1d6de 100644 --- a/tests/TestWideHalo.cpp +++ b/tests/TestWideHalo.cpp @@ -138,7 +138,7 @@ int main(int argc, char **argv) double iVol_global = 1.0/Nx/Ny/Nz/nprocx/nprocy/nprocz; int BoundaryCondition=0; - Dm = std::shared_ptr(new Domain(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); + std::shared_ptr Dm = std::shared_ptr(new Domain(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition)); Nx += 2; Ny += 2; Nz += 2; @@ -152,7 +152,7 @@ int main(int argc, char **argv) for (j=0;jid[n]=1; Np++; // Initialize gradient ColorGrad = (1,2,3) double value=double(3*k+2*j+i); @@ -160,7 +160,7 @@ int main(int argc, char **argv) } } } - Dm.CommInit(); + Dm->CommInit(); MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; if (rank==0) printf ("Create ScaLBL_Communicator \n"); @@ -177,7 +177,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Np]; - ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm.id,Np); + ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); MPI_Barrier(comm); //......................device distributions................................. @@ -231,7 +231,7 @@ int main(int argc, char **argv) for (j=1;j 0){ + if (Dm->id[n] > 0){ int idx = Map(i,j,k); CX=COLORGRAD[idx]; CY=COLORGRAD[Np+idx]; From 0c594fa33d7e872ea507ee8714e7f12e3055b6f4 Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 30 Sep 2020 14:45:51 -0400 Subject: [PATCH 064/205] add halo to memory optimized layout --- common/ScaLBL.cpp | 24 +++++++++++------------- common/ScaLBL.h | 2 +- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 13508dd4..a2bd1d56 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -352,7 +352,7 @@ void ScaLBL_Communicator::D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, int *list, i delete [] ReturnDist; } -int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborList, signed char *id, int Np){ +int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborList, signed char *id, int Np, int width){ /* * Generate a memory optimized layout * id[n] == 0 implies that site n should be ignored (treat as a mask) @@ -391,28 +391,26 @@ int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLis n = k*Nx*Ny+j*Nx+i; if (id[n] > 0){ // Counts for the six faces - if (i==1) Map(n)=idx++; - else if (j==1) Map(n)=idx++; - else if (k==1) Map(n)=idx++; - else if (i==Nx-2) Map(n)=idx++; - else if (j==Ny-2) Map(n)=idx++; - else if (k==Nz-2) Map(n)=idx++; + if (i>0 && i<=width) Map(n)=idx++; + else if (j>0 && j<=width)) Map(n)=idx++; + else if (k>0 && k<=width)) Map(n)=idx++; + else if (i>Nx-width-1 && iNy-width-1 && jNz-width-1 && k 0 ){ diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 2e8eef1a..dc199639 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -295,7 +295,7 @@ public: int FirstInterior(); int LastInterior(); - int MemoryOptimizedLayoutAA(IntArray &Map, int *neighborList, signed char *id, int Np); + int MemoryOptimizedLayoutAA(IntArray &Map, int *neighborList, signed char *id, int Np, int width); void SendD3Q19AA(double *dist); void RecvD3Q19AA(double *dist); // void BiSendD3Q7(double *A_even, double *A_odd, double *B_even, double *B_odd); From 624ec5a14591edb6b486fc1a62517226d1a794f9 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Wed, 30 Sep 2020 15:10:18 -0400 Subject: [PATCH 065/205] memory optimized layout with halo width --- common/ScaLBL.cpp | 6 +++--- models/ColorModel.cpp | 2 +- models/DFHModel.cpp | 2 +- models/FreeLeeModel.cpp | 2 +- models/GreyscaleColorModel.cpp | 2 +- models/GreyscaleModel.cpp | 2 +- models/MRTModel.cpp | 2 +- tests/TestBubbleDFH.cpp | 2 +- tests/TestColorGrad.cpp | 3 +-- tests/TestColorGradDFH.cpp | 2 +- tests/TestColorMassBounceback.cpp | 2 +- tests/TestCommD3Q19.cpp | 2 +- tests/TestFluxBC.cpp | 2 +- tests/TestForceMoments.cpp | 2 +- tests/TestMRT.cpp | 2 +- tests/TestMap.cpp | 2 +- tests/TestPressVel.cpp | 2 +- tests/TestWideHalo.cpp | 2 +- 18 files changed, 20 insertions(+), 21 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index a2bd1d56..28264d17 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -391,9 +391,9 @@ int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLis n = k*Nx*Ny+j*Nx+i; if (id[n] > 0){ // Counts for the six faces - if (i>0 && i<=width) Map(n)=idx++; - else if (j>0 && j<=width)) Map(n)=idx++; - else if (k>0 && k<=width)) Map(n)=idx++; + if (i>0 && i<=width) Map(n)=idx++; + else if (j>0 && j<=width) Map(n)=idx++; + else if (k>0 && k<=width) Map(n)=idx++; else if (i>Nx-width-1 && iNy-width-1 && jNz-width-1 && kMemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np,1); MPI_Barrier(comm); //........................................................................... diff --git a/models/DFHModel.cpp b/models/DFHModel.cpp index 4eb03bea..1f796f54 100644 --- a/models/DFHModel.cpp +++ b/models/DFHModel.cpp @@ -205,7 +205,7 @@ void ScaLBL_DFHModel::Create(){ if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np,1); MPI_Barrier(comm); //........................................................................... diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index e6525bf4..547885b8 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -206,7 +206,7 @@ void ScaLBL_FreeLeeModel::Create(){ if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np,2); MPI_Barrier(comm); //........................................................................... diff --git a/models/GreyscaleColorModel.cpp b/models/GreyscaleColorModel.cpp index d97c844d..736012ce 100644 --- a/models/GreyscaleColorModel.cpp +++ b/models/GreyscaleColorModel.cpp @@ -641,7 +641,7 @@ void ScaLBL_GreyscaleColorModel::Create(){ if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np,1); MPI_Barrier(comm); //........................................................................... diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index 85832a4b..e61bdec8 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -313,7 +313,7 @@ void ScaLBL_GreyscaleModel::Create(){ if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np,1); MPI_Barrier(comm); //........................................................................... diff --git a/models/MRTModel.cpp b/models/MRTModel.cpp index acfb8821..8b18b238 100644 --- a/models/MRTModel.cpp +++ b/models/MRTModel.cpp @@ -170,7 +170,7 @@ void ScaLBL_MRTModel::Create(){ if (rank==0) printf ("Set up memory efficient layout \n"); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np,1); MPI_Barrier(comm); //........................................................................... // MAIN VARIABLES ALLOCATED HERE diff --git a/tests/TestBubbleDFH.cpp b/tests/TestBubbleDFH.cpp index a8ba0cde..0841311e 100644 --- a/tests/TestBubbleDFH.cpp +++ b/tests/TestBubbleDFH.cpp @@ -249,7 +249,7 @@ int main(int argc, char **argv) if (rank==0) printf ("Set up memory efficient layout Npad=%i \n",Npad); IntArray Map(Nx,Ny,Nz); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np,1); MPI_Barrier(comm); //........................................................................... diff --git a/tests/TestColorGrad.cpp b/tests/TestColorGrad.cpp index d2f1d6de..f7808f93 100644 --- a/tests/TestColorGrad.cpp +++ b/tests/TestColorGrad.cpp @@ -1,4 +1,3 @@ - //************************************************************************* // Lattice Boltzmann Simulator for Single Phase Flow in Porous Media // James E. McCLure @@ -177,7 +176,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Np]; - ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); + ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np,1); MPI_Barrier(comm); //......................device distributions................................. diff --git a/tests/TestColorGradDFH.cpp b/tests/TestColorGradDFH.cpp index d6376d82..cd18e401 100644 --- a/tests/TestColorGradDFH.cpp +++ b/tests/TestColorGradDFH.cpp @@ -104,7 +104,7 @@ int main(int argc, char **argv) int *neighborList; IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np,1); MPI_Barrier(comm); //......................device distributions................................. diff --git a/tests/TestColorMassBounceback.cpp b/tests/TestColorMassBounceback.cpp index c05c245e..678b4dfb 100644 --- a/tests/TestColorMassBounceback.cpp +++ b/tests/TestColorMassBounceback.cpp @@ -169,7 +169,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); Npad=Np+32; neighborList= new int[18*Npad]; - Np=ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); + Np=ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np,1); MPI_Barrier(comm); //......................device distributions................................. diff --git a/tests/TestCommD3Q19.cpp b/tests/TestCommD3Q19.cpp index e1fa821f..f74eac31 100644 --- a/tests/TestCommD3Q19.cpp +++ b/tests/TestCommD3Q19.cpp @@ -284,7 +284,7 @@ int main(int argc, char **argv) auto neighborList= new int[18*Npad]; IntArray Map(Nx,Ny,Nz); Map.fill(-2); - Np = ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); + Np = ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np,1); MPI_Barrier(comm); int neighborSize=18*Np*sizeof(int); //......................device distributions................................. diff --git a/tests/TestFluxBC.cpp b/tests/TestFluxBC.cpp index 020bbd89..6eac35d2 100644 --- a/tests/TestFluxBC.cpp +++ b/tests/TestFluxBC.cpp @@ -88,7 +88,7 @@ int main (int argc, char **argv) IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np,1); MPI_Barrier(comm); //......................device distributions................................. diff --git a/tests/TestForceMoments.cpp b/tests/TestForceMoments.cpp index 1fb1e0a4..54324103 100644 --- a/tests/TestForceMoments.cpp +++ b/tests/TestForceMoments.cpp @@ -183,7 +183,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Np]; - ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); + ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np,1); if (rank == 0) PrintNeighborList(neighborList,Np, rank); diff --git a/tests/TestMRT.cpp b/tests/TestMRT.cpp index 30f46689..2b10411e 100644 --- a/tests/TestMRT.cpp +++ b/tests/TestMRT.cpp @@ -705,7 +705,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Np]; - ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm.id,Np); + ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm.id,Np,1); MPI_Barrier(comm); //......................device distributions................................. diff --git a/tests/TestMap.cpp b/tests/TestMap.cpp index a47c0d9e..ff0ba811 100644 --- a/tests/TestMap.cpp +++ b/tests/TestMap.cpp @@ -93,7 +93,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np,1); MPI_Barrier(comm); // Check the neighborlist diff --git a/tests/TestPressVel.cpp b/tests/TestPressVel.cpp index e655ced9..f66c1d2c 100644 --- a/tests/TestPressVel.cpp +++ b/tests/TestPressVel.cpp @@ -132,7 +132,7 @@ int main(int argc, char **argv) int *neighborList; IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np,1); MPI_Barrier(comm); //......................device distributions................................. diff --git a/tests/TestWideHalo.cpp b/tests/TestWideHalo.cpp index d2f1d6de..d5bb218c 100644 --- a/tests/TestWideHalo.cpp +++ b/tests/TestWideHalo.cpp @@ -177,7 +177,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Np]; - ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np); + ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np,2); MPI_Barrier(comm); //......................device distributions................................. From 5ed82a54f46afecb1db05a83ee0ab7425a878b38 Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 30 Sep 2020 15:14:28 -0400 Subject: [PATCH 066/205] fix failing commm test --- common/ScaLBL.cpp | 6 +++--- tests/TestCommD3Q19.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index a2bd1d56..f0beb25b 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -394,9 +394,9 @@ int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLis if (i>0 && i<=width) Map(n)=idx++; else if (j>0 && j<=width)) Map(n)=idx++; else if (k>0 && k<=width)) Map(n)=idx++; - else if (i>Nx-width-1 && iNy-width-1 && jNz-width-1 && kNx-width-2 && iNy-width-2 && jNz-width-2 && k Date: Wed, 30 Sep 2020 15:22:37 -0400 Subject: [PATCH 067/205] update Memory Optimized Layout -tests passing --- common/ScaLBL.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 09fef5d4..ecc9647f 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -392,8 +392,8 @@ int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLis if (id[n] > 0){ // Counts for the six faces if (i>0 && i<=width) Map(n)=idx++; - else if (j>0 && j<=width)) Map(n)=idx++; - else if (k>0 && k<=width)) Map(n)=idx++; + else if (j>0 && j<=width) Map(n)=idx++; + else if (k>0 && k<=width) Map(n)=idx++; else if (i>Nx-width-2 && iNy-width-2 && jNz-width-2 && k Date: Thu, 1 Oct 2020 16:27:46 -0400 Subject: [PATCH 068/205] save the work;upgrade output data writing by writing single file instead of decomposed data --- common/Domain.cpp | 91 +++++++++++++++++++++++++++++++++++++++++++++ common/Domain.h | 1 + models/IonModel.cpp | 13 ++++--- models/IonModel.h | 1 + 4 files changed, 101 insertions(+), 5 deletions(-) diff --git a/common/Domain.cpp b/common/Domain.cpp index fc797a8e..9cf9a1ca 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -1433,3 +1433,94 @@ void Domain::ReadFromFile(const std::string& Filename,const std::string& Datatyp //Comm.barrier(); MPI_Barrier(Comm); } + +void Domain::AggregateLabels( const std::string& filename, DoubleArray &UserData ){ + + int nx = Nx; + int ny = Ny; + int nz = Nz; + + int npx = nprocx(); + int npy = nprocy(); + int npz = nprocz(); + + int ipx = iproc(); + int ipy = jproc(); + int ipz = kproc(); + + int nprocs = nprocx()*nprocy()*nprocz(); + + int full_nx = npx*(nx-2); + int full_ny = npy*(ny-2); + int full_nz = npz*(nz-2); + int local_size = (nx-2)*(ny-2)*(nz-2); + unsigned long int full_size = long(full_nx)*long(full_ny)*long(full_nz); + + double *LocalID; + LocalID = new double [local_size]; + + //printf("aggregate labels: local size=%i, global size = %i",local_size, full_size); + // assign the ID for the local sub-region + for (int k=1; kAggregateLabels(OutputFilename,PhaseField); } } diff --git a/models/IonModel.h b/models/IonModel.h index 995898bd..0f527c3a 100644 --- a/models/IonModel.h +++ b/models/IonModel.h @@ -85,6 +85,7 @@ private: char LocalRankString[8]; char LocalRankFilename[40]; char LocalRestartFile[40]; + char OutputFilename[200]; //int rank,nprocs; void LoadParams(std::shared_ptr db0); From cc654bb54b02b11f23530c600bf3b36e0ba776ec Mon Sep 17 00:00:00 2001 From: James McClure Date: Thu, 1 Oct 2020 16:36:16 -0400 Subject: [PATCH 069/205] adding wide halo gradient test --- common/ScaLBL.h | 100 +---------------------------------------- tests/TestWideHalo.cpp | 29 ++++++------ 2 files changed, 18 insertions(+), 111 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index dc199639..65211440 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -77,104 +77,6 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_MRT(double *dist, int start, int f extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_MRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz, double *Poros,double *Perm, double *Velocity,double Den,double *Pressure); -// GREYSCALE FREE-ENERGY MODEL (Two-component) - -//extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleFE(double *dist, double *Aq, double *Bq, double *Den, -// double *DenGradA, double *DenGradB, double *SolidForce, int start, int finish, int Np, -// double tauA,double tauB,double tauA_eff,double tauB_eff,double rhoA,double rhoB,double Gsc, double Gx, double Gy, double Gz, -// double *Poros,double *Perm, double *Velocity,double *Pressure); -// -//extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleFE(int *neighborList, double *dist, double *Aq, double *Bq, double *Den, -// double *DenGradA, double *DenGradB, double *SolidForce, int start, int finish, int Np, -// double tauA,double tauB,double tauA_eff,double tauB_eff,double rhoA,double rhoB,double Gsc, double Gx, double Gy, double Gz, -// double *Poros,double *Perm, double *Velocity,double *Pressure); -// -//extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleFEChem(double *dist, double *Cq, double *Phi, double *SolidForce, int start, int finish, int Np, -// double tauA,double tauB,double tauA_eff,double tauB_eff,double rhoA,double rhoB,double gamma,double kappaA,double kappaB,double lambdaA,double lambdaB, -// double Gx, double Gy, double Gz, -// double *Poros,double *Perm, double *Velocity,double *Pressure,double *PressureGrad,double *PressTensorGrad,double *PhiLap); -// -//extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleFEChem(int *neighborList, double *dist, double *Cq, double *Phi, double *SolidForce, int start, int finish, int Np, -// double tauA,double tauB,double tauA_eff,double tauB_eff,double rhoA,double rhoB,double gamma,double kappaA,double kappaB,double lambdaA,double lambdaB, -// double Gx, double Gy, double Gz, -// double *Poros,double *Perm, double *Velocity,double *Pressure,double *PressureGrad,double *PressTensorGrad,double *PhiLap); -// -//extern "C" void ScaLBL_D3Q7_GreyscaleFE_Init(double *Den, double *Cq, double *PhiLap, double gamma, double kappaA, double kappaB, double lambdaA, double lambdaB, int start, int finish, int Np); -// -//extern "C" void ScaLBL_D3Q19_GreyscaleFE_IMRT_Init(double *dist, double *Den, double rhoA, double rhoB, int Np); -// -//extern "C" void ScaLBL_D3Q7_AAodd_GreyscaleFEDensity(int *NeighborList, double *Aq, double *Bq, double *Den, double *Phi, int start, int finish, int Np); -// -//extern "C" void ScaLBL_D3Q7_AAeven_GreyscaleFEDensity(double *Aq, double *Bq, double *Den, double *Phi, int start, int finish, int Np); -// -//extern "C" void ScaLBL_D3Q7_AAodd_GreyscaleFEPhi(int *NeighborList, double *Cq, double *Phi, int start, int finish, int Np); -// -//extern "C" void ScaLBL_D3Q7_AAeven_GreyscaleFEPhi(double *Cq, double *Phi, int start, int finish, int Np); -// -//extern "C" void ScaLBL_D3Q19_GreyscaleFE_Gradient(int *neighborList, double *Den, double *DenGrad, int start, int finish, int Np); -// -//extern "C" void ScaLBL_D3Q19_GreyscaleFE_Laplacian(int *neighborList, double *Den, double *DenLap, int start, int finish, int Np); -// -//extern "C" void ScaLBL_D3Q19_GreyscaleFE_Pressure(double *dist, double *Den, double *Porosity,double *Velocity, -// double *Pressure, double rhoA,double rhoB, int Np); -// -//extern "C" void ScaLBL_D3Q19_GreyscaleFE_PressureTensor(int *neighborList, double *Phi,double *Pressure, double *PressTensor, double *PhiLap, -// double kappaA,double kappaB,double lambdaA,double lambdaB, int start, int finish, int Np); - -// GREYSCALE SHAN-CHEN MODEL (Two-component) - -//extern "C" void ScaLBL_D3Q19_GreyscaleSC_Init(int *Map, double *distA, double *distB, double *DenA, double *DenB, int Np); -// -//extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleSC_Density(int *NeighborList, int *Map, double *distA, double *distB, double *DenA, double *DenB, int start, int finish, int Np); -// -//extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleSC_Density(int *Map, double *distA, double *distB, double *DenA, double *DenB, int start, int finish, int Np); -// -//extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleSC_MRT(int *neighborList, int *Mpa, double *distA, double *distB, double *DenA,double *DenB, double *DenGradA, double *DenGradB, -// double *SolidForceA, double *SolidForceB, double *Poros,double *Perm, double *Velocity,double *Pressure, -// double tauA,double tauB,double tauA_eff,double tauB_eff, double Gsc, double Gx, double Gy, double Gz, -// int start, int finish, int Np); -// -//extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleSC_MRT(int *Map,double *distA, double *distB, double *DenA,double *DenB, double *DenGradA, double *DenGradB, -// double *SolidForceA, double *SolidForceB, double *Poros,double *Perm, double *Velocity,double *Pressure, -// double tauA,double tauB,double tauA_eff,double tauB_eff, double Gsc, double Gx, double Gy, double Gz, -// int start, int finish, int Np); -// -//extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleSC_BGK(int *neighborList, int *Map, double *distA, double *distB, double *DenA, double *DenB, double *DenGradA, double *DenGradB, -// double *SolidForceA, double *SolidForceB, double *Poros,double *Perm, double *Velocity,double *Pressure, -// double tauA,double tauB,double tauA_eff,double tauB_eff, double Gsc, double Gx, double Gy, double Gz, -// int start, int finish, int Np); -// -//extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleSC_BGK(int *Map, double *distA, double *distB, double *DenA, double *DenB, double *DenGradA, double *DenGradB, -// double *SolidForceA, double *SolidForceB, double *Poros,double *Perm, double *Velocity,double *Pressure, -// double tauA,double tauB,double tauA_eff,double tauB_eff, double Gsc, double Gx, double Gy, double Gz, -// int start, int finish, int Np); -// -//extern "C" void ScaLBL_D3Q19_GreyscaleSC_Gradient(int *neighborList, int *Map, double *Den, double *DenGrad, int strideY, int strideZ,int start, int finish, int Np); -// -//extern "C" void ScaLBL_GreyscaleSC_BC_z(int *list, int *Map, double *DenA, double *DenB, double vA, double vB, int count); -// -//extern "C" void ScaLBL_GreyscaleSC_BC_Z(int *list, int *Map, double *DenA, double *DenB, double vA, double vB, int count); -// -//extern "C" void ScaLBL_GreyscaleSC_AAeven_Pressure_BC_z(int *list, double *distA, double *distB, double dinA, double dinB, int count, int N); -// -//extern "C" void ScaLBL_GreyscaleSC_AAeven_Pressure_BC_Z(int *list, double *distA, double *distB, double doutA, double doutB, int count, int N); -// -//extern "C" void ScaLBL_GreyscaleSC_AAodd_Pressure_BC_z(int *neighborList, int *list, double *distA, double *distB, double dinA, double dinB, int count, int N); -// -//extern "C" void ScaLBL_GreyscaleSC_AAodd_Pressure_BC_Z(int *neighborList, int *list, double *distA, double *distB, double doutA, double doutB, int count, int N); - -// GREYSCALE COLOR MODEL (Two-component) -//extern "C" void ScaLBL_D3Q19_GreyscaleColor_Init(double *dist, double *Porosity, int Np); - -//extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleColor(int *Map, double *dist, double *Aq, double *Bq, double *Den, -// double *ColorGrad,double *Phi,double *GreySolidGrad, double *Poros,double *Perm,double *Vel, -// double rhoA, double rhoB, double tauA, double tauB,double tauA_eff,double tauB_eff, double alpha, double beta, -// double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np); -// -//extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleColor(int *d_neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, -// double *ColorGrad,double *Phi, double *GreySolidGrad, double *Poros,double *Perm,double *Vel, -// double rhoA, double rhoB, double tauA, double tauB, double tauA_eff,double tauB_eff, double alpha, double beta, -// double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np); extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleColor(int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi,double *GreySolidGrad, double *Poros,double *Perm,double *Vel, @@ -211,6 +113,8 @@ extern "C" void ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq, extern "C" void ScaLBL_D3Q19_Gradient(int *Map, double *Phi, double *ColorGrad, int start, int finish, int Np, int Nx, int Ny, int Nz); +extern "C" void ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz); + extern "C" void ScaLBL_PhaseField_Init(int *Map, double *Phi, double *Den, double *Aq, double *Bq, int start, int finish, int Np); // Density functional hydrodynamics LBM diff --git a/tests/TestWideHalo.cpp b/tests/TestWideHalo.cpp index d5bb218c..b6dd970b 100644 --- a/tests/TestWideHalo.cpp +++ b/tests/TestWideHalo.cpp @@ -168,6 +168,7 @@ int main(int argc, char **argv) //Create a second communicator based on the regular data layout ScaLBL_Communicator ScaLBL_Comm_Regular(Dm); ScaLBL_Communicator ScaLBL_Comm(Dm); + ScaLBLWideHalo_Communicator WideHalo(Dm); // LBM variables if (rank==0) printf ("Set up the neighborlist \n"); @@ -197,20 +198,19 @@ int main(int argc, char **argv) //........................................................................... // Update GPU data structures if (rank==0) printf ("Setting up device map and neighbor list \n"); - int *TmpMap; - TmpMap=new int[Np*sizeof(int)]; - for (k=1; k Date: Thu, 1 Oct 2020 16:41:17 -0400 Subject: [PATCH 070/205] adding wide halo gradient test --- common/ScaLBL.h | 1 + tests/TestWideHalo.cpp | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 65211440..998ad74b 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -10,6 +10,7 @@ #ifndef ScalLBL_H #define ScalLBL_H #include "common/Domain.h" +#include "common/WideHalo.h" extern "C" int ScaLBL_SetDevice(int rank); diff --git a/tests/TestWideHalo.cpp b/tests/TestWideHalo.cpp index b6dd970b..b78426e0 100644 --- a/tests/TestWideHalo.cpp +++ b/tests/TestWideHalo.cpp @@ -218,9 +218,9 @@ int main(int argc, char **argv) ScaLBL_CopyToDevice(Phi, PhaseLabel, N*sizeof(double)); //........................................................................... - Nxh = Nx+2; - Nyh = Ny+2; - Nzh = Nz+2; + int Nxh = Nx+2; + int Nyh = Ny+2; + int Nzh = Nz+2; ScaLBL_D3Q19_MixedGradient(dvcMap, Phi, ColorGrad, 0, Np, Np, Nxh, Nyh, Nzh); double *COLORGRAD; From d40de38c488da9c1f37f2cbed9e0cc5fab05edae Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 5 Oct 2020 11:03:35 -0400 Subject: [PATCH 071/205] save the work;results needs to be validated --- models/IonModel.cpp | 23 +++++++++++------ models/IonModel.h | 1 + models/PoissonSolver.cpp | 54 ++++++++++++++++++++++++++++++++-------- models/PoissonSolver.h | 3 +++ models/StokesModel.cpp | 26 +++++++++++++++++++ models/StokesModel.h | 2 ++ 6 files changed, 92 insertions(+), 17 deletions(-) diff --git a/models/IonModel.cpp b/models/IonModel.cpp index 0d977416..24c4d8bd 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -787,23 +787,32 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ } void ScaLBL_IonModel::getIonConcentration(int timestep){ - + //This function wirte out the data in a normal layout (by aggregating all decomposed domains) DoubleArray PhaseField(Nx,Ny,Nz); for (int ic=0; icRegularLayout(Map,&Ci[ic*Np],PhaseField); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); IonConcentration_LB_to_Phys(PhaseField); - //FILE *OUTFILE; - //sprintf(LocalRankFilename,"Ion%02i_Time_%i.%05i.raw",ic+1,timestep,rank); - //OUTFILE = fopen(LocalRankFilename,"wb"); - //fwrite(PhaseField.data(),8,N,OUTFILE); - //fclose(OUTFILE); - sprintf(OutputFilename,"Ion%02i_Time_%i.raw",ic+1,timestep); Mask->AggregateLabels(OutputFilename,PhaseField); } +} +void ScaLBL_IonModel::getIonConcentration_debug(int timestep){ + //This function write out decomposed data + DoubleArray PhaseField(Nx,Ny,Nz); + for (int ic=0; icRegularLayout(Map,&Ci[ic*Np],PhaseField); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + IonConcentration_LB_to_Phys(PhaseField); + + FILE *OUTFILE; + sprintf(LocalRankFilename,"Ion%02i_Time_%i.%05i.raw",ic+1,timestep,rank); + OUTFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,OUTFILE); + fclose(OUTFILE); + } } void ScaLBL_IonModel::IonConcentration_LB_to_Phys(DoubleArray &Den_reg){ diff --git a/models/IonModel.h b/models/IonModel.h index 0f527c3a..5a568182 100644 --- a/models/IonModel.h +++ b/models/IonModel.h @@ -31,6 +31,7 @@ public: void Initialize(); void Run(double *Velocity, double *ElectricField); void getIonConcentration(int timestep); + void getIonConcentration_debug(int timestep); void DummyFluidVelocity(); void DummyElectricField(); double CalIonDenConvergence(vector &ci_avg_previous); diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index fccb1feb..3e11de0a 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -544,21 +544,55 @@ void ScaLBL_Poisson::DummyChargeDensity(){ delete [] ChargeDensity_host; } -void ScaLBL_Poisson::getElectricPotential(int timestep){ +void ScaLBL_Poisson::getElectricPotential_debug(int timestep){ + //This function write out decomposed data + DoubleArray PhaseField(Nx,Ny,Nz); + //ScaLBL_Comm->RegularLayout(Map,Psi,PhaseField); + ScaLBL_CopyToHost(PhaseField.data(),Psi,sizeof(double)*Nx*Ny*Nz); + //ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + FILE *OUTFILE; + sprintf(LocalRankFilename,"Electric_Potential_Time_%i.%05i.raw",timestep,rank); + OUTFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,OUTFILE); + fclose(OUTFILE); +} - DoubleArray PhaseField(Nx,Ny,Nz); - //ScaLBL_Comm->RegularLayout(Map,Psi,PhaseField); - ScaLBL_CopyToHost(PhaseField.data(),Psi,sizeof(double)*Nx*Ny*Nz); - //ScaLBL_DeviceBarrier(); MPI_Barrier(comm); - FILE *OUTFILE; - sprintf(LocalRankFilename,"Electric_Potential_Time_%i.%05i.raw",timestep,rank); - OUTFILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,OUTFILE); - fclose(OUTFILE); +void ScaLBL_Poisson::getElectricPotential(int timestep){ + //This function wirte out the data in a normal layout (by aggregating all decomposed domains) + DoubleArray PhaseField(Nx,Ny,Nz); + //ScaLBL_Comm->RegularLayout(Map,Psi,PhaseField); + ScaLBL_CopyToHost(PhaseField.data(),Psi,sizeof(double)*Nx*Ny*Nz); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + + sprintf(OutputFilename,"Electric_Potential_Time_%i.raw",timestep); + Mask->AggregateLabels(OutputFilename,PhaseField); } void ScaLBL_Poisson::getElectricField(int timestep){ + DoubleArray PhaseField(Nx,Ny,Nz); + + ScaLBL_Comm->RegularLayout(Map,&ElectricField[0*Np],PhaseField); + ElectricField_LB_to_Phys(PhaseField); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + sprintf(OutputFilename,"ElectricField_X_Time_%i.raw",timestep); + Mask->AggregateLabels(OutputFilename,PhaseField); + + ScaLBL_Comm->RegularLayout(Map,&ElectricField[1*Np],PhaseField); + ElectricField_LB_to_Phys(PhaseField); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + sprintf(OutputFilename,"ElectricField_Y_Time_%i.raw",timestep); + Mask->AggregateLabels(OutputFilename,PhaseField); + + ScaLBL_Comm->RegularLayout(Map,&ElectricField[2*Np],PhaseField); + ElectricField_LB_to_Phys(PhaseField); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + sprintf(OutputFilename,"ElectricField_Z_Time_%i.raw",timestep); + Mask->AggregateLabels(OutputFilename,PhaseField); +} + +void ScaLBL_Poisson::getElectricField_debug(int timestep){ + //ScaLBL_D3Q7_Poisson_getElectricField(fq,ElectricField,tau,Np); //ScaLBL_DeviceBarrier(); MPI_Barrier(comm); diff --git a/models/PoissonSolver.h b/models/PoissonSolver.h index 921963ed..dfd098d5 100644 --- a/models/PoissonSolver.h +++ b/models/PoissonSolver.h @@ -29,7 +29,9 @@ public: void Initialize(); void Run(double *ChargeDensity); void getElectricPotential(int timestep); + void getElectricPotential_debug(int timestep); void getElectricField(int timestep); + void getElectricField_debug(int timestep); void DummyChargeDensity();//for debugging //bool Restart,pBC; @@ -76,6 +78,7 @@ private: char LocalRankString[8]; char LocalRankFilename[40]; char LocalRestartFile[40]; + char OutputFilename[200]; //int rank,nprocs; void LoadParams(std::shared_ptr db0); diff --git a/models/StokesModel.cpp b/models/StokesModel.cpp index 09528567..964baaae 100644 --- a/models/StokesModel.cpp +++ b/models/StokesModel.cpp @@ -375,6 +375,32 @@ void ScaLBL_StokesModel::getVelocity(int timestep){ ScaLBL_D3Q19_Momentum(fq, Velocity, Np); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + DoubleArray PhaseField(Nx,Ny,Nz); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField); + Velocity_LB_to_Phys(PhaseField); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + sprintf(OutputFilename,"Velocity_X_Time_%i.raw",timestep); + Mask->AggregateLabels(OutputFilename,PhaseField); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],PhaseField); + Velocity_LB_to_Phys(PhaseField); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + sprintf(OutputFilename,"Velocity_Y_Time_%i.raw",timestep); + Mask->AggregateLabels(OutputFilename,PhaseField); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],PhaseField); + Velocity_LB_to_Phys(PhaseField); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + sprintf(OutputFilename,"Velocity_Z_Time_%i.raw",timestep); + Mask->AggregateLabels(OutputFilename,PhaseField); +} + +void ScaLBL_StokesModel::getVelocity_debug(int timestep){ + //get velocity in physical unit [m/sec] + ScaLBL_D3Q19_Momentum(fq, Velocity, Np); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + DoubleArray PhaseField(Nx,Ny,Nz); ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField); Velocity_LB_to_Phys(PhaseField); diff --git a/models/StokesModel.h b/models/StokesModel.h index 346d75c3..b7ad345e 100644 --- a/models/StokesModel.h +++ b/models/StokesModel.h @@ -32,6 +32,7 @@ public: void Run_Lite(double *ChargeDensity, double *ElectricField); void VelocityField(); void getVelocity(int timestep); + void getVelocity_debug(int timestep); double CalVelocityConvergence(double& flow_rate_previous,double *ChargeDensity, double *ElectricField); bool Restart,pBC; @@ -79,6 +80,7 @@ private: char LocalRankString[8]; char LocalRankFilename[40]; char LocalRestartFile[40]; + char OutputFilename[200]; //int rank,nprocs; void LoadParams(std::shared_ptr db0); From 8b095b24ada5970a3b051f7b7ed79c3f8213ec6b Mon Sep 17 00:00:00 2001 From: James McClure Date: Mon, 5 Oct 2020 14:36:26 -0400 Subject: [PATCH 072/205] added mix gradient --- cpu/MixedGradient.cpp | 48 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 cpu/MixedGradient.cpp diff --git a/cpu/MixedGradient.cpp b/cpu/MixedGradient.cpp new file mode 100644 index 00000000..8aa7c53f --- /dev/null +++ b/cpu/MixedGradient.cpp @@ -0,0 +1,48 @@ +/* Implement Mixed Gradient (Lee et al. JCP 2016)*/ + +extern "C" void ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz) +{ + static int D3Q19[18][3]={{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1}, + {1,1,0},{-1,-1,0},{1,-1,0},{-1,1,0}, + {1,0,1},{-1,0,-1},{1,0,-1},{-1,0,1}, + {0,1,1},{0,-1,-1},{0,1,-1},{0,-1,1}}; + + int i,j,k,n,N; + int np,np2,nm; // neighbors + double v,vp,vp2,vm; // values at neighbors + double grad; + for (int idx=start; idx Date: Mon, 5 Oct 2020 14:36:50 -0400 Subject: [PATCH 073/205] fixing build --- common/ScaLBL.h | 1 - common/WideHalo.h | 5 +++-- tests/TestWideHalo.cpp | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 998ad74b..65211440 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -10,7 +10,6 @@ #ifndef ScalLBL_H #define ScalLBL_H #include "common/Domain.h" -#include "common/WideHalo.h" extern "C" int ScaLBL_SetDevice(int rank); diff --git a/common/WideHalo.h b/common/WideHalo.h index 24429e3f..601eda13 100644 --- a/common/WideHalo.h +++ b/common/WideHalo.h @@ -106,8 +106,9 @@ private: } } } - ScaLBL_AllocateZeroCopy((void **) &dvcList, count*sizeof(int)); // Allocate device memory - ScaLBL_CopyToZeroCopy(dvcList,List,count*sizeof(int)); + size_t numbytes=count*sizeof(int); + ScaLBL_AllocateZeroCopy((void **) &dvcList, numbytes); // Allocate device memory + ScaLBL_CopyToZeroCopy(dvcList,List,numbytes); return count; } diff --git a/tests/TestWideHalo.cpp b/tests/TestWideHalo.cpp index b78426e0..cc29a15d 100644 --- a/tests/TestWideHalo.cpp +++ b/tests/TestWideHalo.cpp @@ -7,6 +7,7 @@ #include #include #include "common/ScaLBL.h" +#include "common/WideHalo.h" #include "common/MPI_Helpers.h" using namespace std; @@ -168,7 +169,7 @@ int main(int argc, char **argv) //Create a second communicator based on the regular data layout ScaLBL_Communicator ScaLBL_Comm_Regular(Dm); ScaLBL_Communicator ScaLBL_Comm(Dm); - ScaLBLWideHalo_Communicator WideHalo(Dm); + ScaLBLWideHalo_Communicator WideHalo(Dm,2); // LBM variables if (rank==0) printf ("Set up the neighborlist \n"); From db5579530de88836f84a026689682ff29bb1051f Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Tue, 6 Oct 2020 14:05:26 -0400 Subject: [PATCH 074/205] mix gradient compiles --- cpu/MixedGradient.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpu/MixedGradient.cpp b/cpu/MixedGradient.cpp index 8aa7c53f..841dbdf1 100644 --- a/cpu/MixedGradient.cpp +++ b/cpu/MixedGradient.cpp @@ -29,7 +29,7 @@ extern "C" void ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradie vp = Phi[np]; vp2 = Phi[np2]; vm = Phi[nm]; - grad += 0.25*(5.0*vp1-vp2-3.0*v-vm); + grad += 0.25*(5.0*vp-vp2-3.0*v-vm); } for (int q=6; q<18; q++){ int iqx = D3Q19[q][0]; @@ -41,7 +41,7 @@ extern "C" void ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradie vp = Phi[np]; vp2 = Phi[np2]; vm = Phi[nm]; - grad += 0.125*(5.0*vp1-vp2-3.0*v-vm); + grad += 0.125*(5.0*vp-vp2-3.0*v-vm); } Gradient[n] = grad; } From 5109c08bc378c9a965b445f062b2beeb38a0ac65 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Wed, 7 Oct 2020 12:31:49 -0400 Subject: [PATCH 075/205] Initial hip addition --- CMakeLists.txt | 26 +- {gpu => cuda}/BGK.cu | 0 {gpu => cuda}/Color.cu | 0 {gpu => cuda}/CudaExtras.cu | 0 {gpu => cuda}/D3Q19.cu | 0 {gpu => cuda}/D3Q7.cu | 0 {gpu => cuda}/Extras.cu | 0 {gpu => cuda}/MRT.cu | 0 {gpu => cuda}/dfh.cu | 0 {gpu => cuda}/exe/CMakeLists.txt | 0 {gpu => cuda}/exe/lb1_MRT-swap.cu | 0 {gpu => cuda}/exe/lb1_MRT.cu | 0 {gpu => cuda}/exe/lb1_MRT_mpi.cpp | 0 {gpu => cuda}/exe/lb1_MRT_mpi.cu | 0 {gpu => cuda}/exe/lb2_Color.cpp | 0 {gpu => cuda}/exe/lb2_Color.cu | 0 {gpu => cuda}/exe/lb2_Color_mpi.cpp | 0 {gpu => cuda}/exe/lb2_Color_pBC_wia_mpi.cpp | 0 hip/BGK.hip | 311 ++ hip/CMakeLists.txt | 9 + hip/Color.hip | 4131 +++++++++++++++++++ hip/CudaExtras.hip | 34 + hip/D3Q19.hip | 2645 ++++++++++++ hip/D3Q7.hip | 246 ++ hip/Extras.hip | 62 + hip/MRT.hip | 310 ++ hip/dfh.hip | 1508 +++++++ 27 files changed, 9278 insertions(+), 4 deletions(-) rename {gpu => cuda}/BGK.cu (100%) rename {gpu => cuda}/Color.cu (100%) rename {gpu => cuda}/CudaExtras.cu (100%) rename {gpu => cuda}/D3Q19.cu (100%) rename {gpu => cuda}/D3Q7.cu (100%) rename {gpu => cuda}/Extras.cu (100%) rename {gpu => cuda}/MRT.cu (100%) rename {gpu => cuda}/dfh.cu (100%) rename {gpu => cuda}/exe/CMakeLists.txt (100%) rename {gpu => cuda}/exe/lb1_MRT-swap.cu (100%) rename {gpu => cuda}/exe/lb1_MRT.cu (100%) rename {gpu => cuda}/exe/lb1_MRT_mpi.cpp (100%) rename {gpu => cuda}/exe/lb1_MRT_mpi.cu (100%) rename {gpu => cuda}/exe/lb2_Color.cpp (100%) rename {gpu => cuda}/exe/lb2_Color.cu (100%) rename {gpu => cuda}/exe/lb2_Color_mpi.cpp (100%) rename {gpu => cuda}/exe/lb2_Color_pBC_wia_mpi.cpp (100%) create mode 100644 hip/BGK.hip create mode 100644 hip/CMakeLists.txt create mode 100644 hip/Color.hip create mode 100644 hip/CudaExtras.hip create mode 100644 hip/D3Q19.hip create mode 100644 hip/D3Q7.hip create mode 100644 hip/Extras.hip create mode 100644 hip/MRT.hip create mode 100644 hip/dfh.hip diff --git a/CMakeLists.txt b/CMakeLists.txt index 1e7eeaea..33528b62 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -104,7 +104,7 @@ IF ( USE_DOXYGEN ) ADD_DEPENDENCIES( doc latex_docs doxygen ) ELSE() SET( USE_DOXYGEN 0 ) - ENDIF() + ENDIF()lbpm-wia ENDIF() @@ -123,11 +123,26 @@ IF ( USE_CUDA ) ADD_DEFINITIONS( -DUSE_CUDA ) ENABLE_LANGUAGE( CUDA ) ELSEIF ( USE_HIP ) - FIND_PACKAGE( HIP ) - MESSAGE( FATAL_ERROR "STOP" ) + IF ( NOT DEFINED HIP_PATH ) + IF ( NOT DEFINED ENV{HIP_PATH} ) + SET( HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed" ) + ELSE() + SET( HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed" ) + ENDIF() + ENDIF() + SET( CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH} ) + FIND_PACKAGE( HIP REQUIRED ) + FIND_PACKAGE( CUDA QUIET ) + MESSAGE( "HIP Found") + MESSAGE( " HIP version: ${HIP_VERSION_STRING}") + MESSAGE( " HIP platform: ${HIP_PLATFORM}") + MESSAGE( " HIP Include Path: ${HIP_INCLUDE_DIRS}") + MESSAGE( " HIP Libraries: ${HIP_LIBRARIES}") + ADD_DEFINITIONS( -DUSE_HIP ) ENDIF() + # Configure external packages IF ( NOT ONLY_BUILD_DOCS ) CONFIGURE_MPI() # MPI must be before other libraries @@ -161,7 +176,10 @@ IF ( NOT ONLY_BUILD_DOCS ) ADD_PACKAGE_SUBDIRECTORY( StackTrace ) ADD_PACKAGE_SUBDIRECTORY( models ) IF ( USE_CUDA ) - ADD_PACKAGE_SUBDIRECTORY( gpu ) + ADD_PACKAGE_SUBDIRECTORY( cuda ) + ELSEIF ( USE_HIP ) + ADD_SUBDIRECTORY( gpu ) + SET( LBPM_LIBRARIES lbpm-hip lbpm-wia ) ELSE() ADD_PACKAGE_SUBDIRECTORY( cpu ) ENDIF() diff --git a/gpu/BGK.cu b/cuda/BGK.cu similarity index 100% rename from gpu/BGK.cu rename to cuda/BGK.cu diff --git a/gpu/Color.cu b/cuda/Color.cu similarity index 100% rename from gpu/Color.cu rename to cuda/Color.cu diff --git a/gpu/CudaExtras.cu b/cuda/CudaExtras.cu similarity index 100% rename from gpu/CudaExtras.cu rename to cuda/CudaExtras.cu diff --git a/gpu/D3Q19.cu b/cuda/D3Q19.cu similarity index 100% rename from gpu/D3Q19.cu rename to cuda/D3Q19.cu diff --git a/gpu/D3Q7.cu b/cuda/D3Q7.cu similarity index 100% rename from gpu/D3Q7.cu rename to cuda/D3Q7.cu diff --git a/gpu/Extras.cu b/cuda/Extras.cu similarity index 100% rename from gpu/Extras.cu rename to cuda/Extras.cu diff --git a/gpu/MRT.cu b/cuda/MRT.cu similarity index 100% rename from gpu/MRT.cu rename to cuda/MRT.cu diff --git a/gpu/dfh.cu b/cuda/dfh.cu similarity index 100% rename from gpu/dfh.cu rename to cuda/dfh.cu diff --git a/gpu/exe/CMakeLists.txt b/cuda/exe/CMakeLists.txt similarity index 100% rename from gpu/exe/CMakeLists.txt rename to cuda/exe/CMakeLists.txt diff --git a/gpu/exe/lb1_MRT-swap.cu b/cuda/exe/lb1_MRT-swap.cu similarity index 100% rename from gpu/exe/lb1_MRT-swap.cu rename to cuda/exe/lb1_MRT-swap.cu diff --git a/gpu/exe/lb1_MRT.cu b/cuda/exe/lb1_MRT.cu similarity index 100% rename from gpu/exe/lb1_MRT.cu rename to cuda/exe/lb1_MRT.cu diff --git a/gpu/exe/lb1_MRT_mpi.cpp b/cuda/exe/lb1_MRT_mpi.cpp similarity index 100% rename from gpu/exe/lb1_MRT_mpi.cpp rename to cuda/exe/lb1_MRT_mpi.cpp diff --git a/gpu/exe/lb1_MRT_mpi.cu b/cuda/exe/lb1_MRT_mpi.cu similarity index 100% rename from gpu/exe/lb1_MRT_mpi.cu rename to cuda/exe/lb1_MRT_mpi.cu diff --git a/gpu/exe/lb2_Color.cpp b/cuda/exe/lb2_Color.cpp similarity index 100% rename from gpu/exe/lb2_Color.cpp rename to cuda/exe/lb2_Color.cpp diff --git a/gpu/exe/lb2_Color.cu b/cuda/exe/lb2_Color.cu similarity index 100% rename from gpu/exe/lb2_Color.cu rename to cuda/exe/lb2_Color.cu diff --git a/gpu/exe/lb2_Color_mpi.cpp b/cuda/exe/lb2_Color_mpi.cpp similarity index 100% rename from gpu/exe/lb2_Color_mpi.cpp rename to cuda/exe/lb2_Color_mpi.cpp diff --git a/gpu/exe/lb2_Color_pBC_wia_mpi.cpp b/cuda/exe/lb2_Color_pBC_wia_mpi.cpp similarity index 100% rename from gpu/exe/lb2_Color_pBC_wia_mpi.cpp rename to cuda/exe/lb2_Color_pBC_wia_mpi.cpp diff --git a/hip/BGK.hip b/hip/BGK.hip new file mode 100644 index 00000000..f3e746af --- /dev/null +++ b/hip/BGK.hip @@ -0,0 +1,311 @@ +#include + +#define NBLOCKS 1024 +#define NTHREADS 256 + +__global__ void dvc_ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ + int n; + // conserved momemnts + double rho,ux,uy,uz,uu; + // non-conserved moments + double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + f7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + f8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + f9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + f10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + f11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + f12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + f13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + f14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + f15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + f16 = dist[nr16]; + + // q=17 + //fq = dist[18*Np+n]; + nr17 = neighborList[n+16*Np]; + f17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + f18 = dist[nr18]; + + rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; + ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14; + uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18; + uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18; + uu = 1.5*(ux*ux+uy*uy+uz*uz); + + // q=0 + dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu); + + // q = 1 + dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx; + + // q=2 + dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx; + + // q = 3 + dist[nr4] = f3*(1.0-rlx) + + rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy; + + // q = 4 + dist[nr3] = f4*(1.0-rlx) + + rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy; + + // q = 5 + dist[nr6] = f5*(1.0-rlx) + + rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz; + + // q = 6 + dist[nr5] = f6*(1.0-rlx) + + rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz; + + // q = 7 + dist[nr8] = f7*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy); + + // q = 8 + dist[nr7] = f8*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy); + + // q = 9 + dist[nr10] = f9*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy); + + // q = 10 + dist[nr9] = f10*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy); + + // q = 11 + dist[nr12] = f11*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz); + + // q = 12 + dist[nr11] = f12*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz); + + // q = 13 + dist[nr14] = f13*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz); + + // q= 14 + dist[nr13] = f14*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz); + + // q = 15 + dist[nr16] = f15*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz); + + // q = 16 + dist[nr15] = f16*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz); + + // q = 17 + dist[nr18] = f17*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz); + + // q = 18 + dist[nr17] = f18*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz); + } + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ + + dvc_ScaLBL_D3Q19_AAeven_BGK<<>>(dist,start,finish,Np,rlx,Fx,Fy,Fz); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_BGK: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ + dvc_ScaLBL_D3Q19_AAodd_BGK<<>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_BGK: %s \n",hipGetErrorString(err)); + } +} diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt new file mode 100644 index 00000000..38ef7c27 --- /dev/null +++ b/hip/CMakeLists.txt @@ -0,0 +1,9 @@ +SET( HIP_SEPERABLE_COMPILATION ON ) +SET_SOURCE_FILES_PROPERTIES( BGK.hip Color.hip CudaExtras.hip D3Q19.hip D3Q7.hip dfh.hip Extras.hip MRT.hip PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 ) +HIP_ADD_LIBRARY( lbpm-hip BGK.hip Color.hip CudaExtras.hip D3Q19.hip D3Q7.hip dfh.hip Extras.hip MRT.hip SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} ) +TARGET_LINK_LIBRARIES( lbpm-hip /opt/rocm-3.3.0/lib/libhip_hcc.so ) +TARGET_LINK_LIBRARIES( lbpm-wia lbpm-hip ) +ADD_DEPENDENCIES( lbpm-hip copy-include ) + + + diff --git a/hip/Color.hip b/hip/Color.hip new file mode 100644 index 00000000..b802ab1f --- /dev/null +++ b/hip/Color.hip @@ -0,0 +1,4131 @@ +#include +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 1024 +#define NTHREADS 256 + +__global__ void dvc_ScaLBL_Color_Init(char *ID, double *Den, double *Phi, double das, double dbs, int Nx, int Ny, int Nz) +{ + //int i,j,k; + int n,N; + char id; + N = Nx*Ny*Nz; + + int S = N/NBLOCKS/NTHREADS + 1; + for (int s=0; s 0){ + + // Retrieve the color gradient + nx = ColorGrad[n]; + ny = ColorGrad[N+n]; + nz = ColorGrad[2*N+n]; + //...........Normalize the Color Gradient................................. + C = sqrt(nx*nx+ny*ny+nz*nz); + nx = nx/C; + ny = ny/C; + nz = nz/C; + //......No color gradient at z-boundary if pressure BC are set............. + // if (pBC && k==0) nx = ny = nz = 0.f; + // if (pBC && k==Nz-1) nx = ny = nz = 0.f; + //........................................................................ + // READ THE DISTRIBUTIONS + // (read from opposite array due to previous swap operation) + //........................................................................ + f2 = distodd[n]; + f4 = distodd[N+n]; + f6 = distodd[2*N+n]; + f8 = distodd[3*N+n]; + f10 = distodd[4*N+n]; + f12 = distodd[5*N+n]; + f14 = distodd[6*N+n]; + f16 = distodd[7*N+n]; + f18 = distodd[8*N+n]; + //........................................................................ + f0 = disteven[n]; + f1 = disteven[N+n]; + f3 = disteven[2*N+n]; + f5 = disteven[3*N+n]; + f7 = disteven[4*N+n]; + f9 = disteven[5*N+n]; + f11 = disteven[6*N+n]; + f13 = disteven[7*N+n]; + f15 = disteven[8*N+n]; + f17 = disteven[9*N+n]; + //........................................................................ + // PERFORM RELAXATION PROCESS + //........................................................................ + //....................compute the moments............................................... + rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; + m1 = -30*f0-11*(f2+f1+f4+f3+f6+f5)+8*(f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18 +f17); + m2 = 12*f0-4*(f2+f1 +f4+f3+f6 +f5)+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; + jx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14; + m4 = 4*(-f1+f2)+f7-f8+f9-f10+f11-f12+f13-f14; + jy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18; + m6 = -4*(f3-f4)+f7-f8-f9+f10+f15-f16+f17-f18; + jz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18; + m8 = -4*(f5-f6)+f11-f12-f13+f14+f15-f16-f17+f18; + m9 = 2*(f1+f2)-f3-f4-f5-f6+f7+f8+f9+f10+f11+f12+f13+f14-2*(f15+f16+f17+f18); + m10 = -4*(f1+f2)+2*(f4+f3+f6+f5)+f8+f7+f10+f9+f12+f11+f14+f13-2*(f16+f15+f18+f17); + m11 = f4+f3-f6-f5+f8+f7+f10+f9-f12-f11-f14-f13; + m12 = -2*(f4+f3-f6-f5)+f8+f7+f10+f9-f12-f11-f14-f13; + m13 = f8+f7-f10-f9; + m14 = f16+f15-f18-f17; + m15 = f12+f11-f14-f13; + m16 = f7-f8+f9-f10-f11+f12-f13+f14; + m17 = -f7+f8+f9-f10+f15-f16+f17-f18; + m18 = f11-f12-f13+f14-f15+f16+f17-f18; + //..........Toelke, Fruediger et. al. 2006............... + if (C == 0.0) nx = ny = nz = 1.0; + m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) -alpha*C - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho)- m2); + m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4); + m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6); + m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8); + m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); + m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10); + m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); + m12 = m12 + rlx_setA*( -0.5*((jy*jy-jz*jz)/rho) - m12); + m13 = m13 + rlx_setA*( (jx*jy/rho) + 0.5*alpha*C*nx*ny - m13); + m14 = m14 + rlx_setA*( (jy*jz/rho) + 0.5*alpha*C*ny*nz - m14); + m15 = m15 + rlx_setA*( (jx*jz/rho) + 0.5*alpha*C*nx*nz - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //.................inverse transformation...................................................... + f0 = 0.05263157894736842*rho-0.012531328320802*m1+0.04761904761904762*m2; + f1 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 + +0.1*(jx-m4)+0.0555555555555555555555555*(m9-m10); + f2 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 + +0.1*(m4-jx)+0.0555555555555555555555555*(m9-m10); + f3 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 + +0.1*(jy-m6)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12); + f4 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 + +0.1*(m6-jy)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12); + f5 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 + +0.1*(jz-m8)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11); + f6 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 + +0.1*(m8-jz)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11); + f7 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx+jy)+0.025*(m4+m6) + +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 + +0.04166666666666666*m12+0.25*m13+0.125*(m16-m17); + f8 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2-0.1*(jx+jy)-0.025*(m4+m6) + +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 + +0.04166666666666666*m12+0.25*m13+0.125*(m17-m16); + f9 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx-jy)+0.025*(m4-m6) + +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 + +0.04166666666666666*m12-0.25*m13+0.125*(m16+m17); + f10 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jy-jx)+0.025*(m6-m4) + +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 + +0.04166666666666666*m12-0.25*m13-0.125*(m16+m17); + f11 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2+0.1*(jx+jz)+0.025*(m4+m8) + +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 + -0.04166666666666666*m12+0.25*m15+0.125*(m18-m16); + f12 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2-0.1*(jx+jz)-0.025*(m4+m8) + +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 + -0.04166666666666666*m12+0.25*m15+0.125*(m16-m18); + f13 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2+0.1*(jx-jz)+0.025*(m4-m8) + +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 + -0.04166666666666666*m12-0.25*m15-0.125*(m16+m18); + f14 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2+0.1*(jz-jx)+0.025*(m8-m4) + +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 + -0.04166666666666666*m12-0.25*m15+0.125*(m16+m18); + f15 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2+0.1*(jy+jz)+0.025*(m6+m8) + -0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m17-m18); + f16 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2-0.1*(jy+jz)-0.025*(m6+m8) + -0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m18-m17); + f17 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2+0.1*(jy-jz)+0.025*(m6-m8) + -0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14+0.125*(m17+m18); + f18 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2+0.1*(jz-jy)+0.025*(m8-m6) + -0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14-0.125*(m17+m18); + //....................................................................................................... + // incorporate external force + f1 += 0.16666666*Fx; + f2 -= 0.16666666*Fx; + f3 += 0.16666666*Fy; + f4 -= 0.16666666*Fy; + f5 += 0.16666666*Fz; + f6 -= 0.16666666*Fz; + f7 += 0.08333333333*(Fx+Fy); + f8 -= 0.08333333333*(Fx+Fy); + f9 += 0.08333333333*(Fx-Fy); + f10 -= 0.08333333333*(Fx-Fy); + f11 += 0.08333333333*(Fx+Fz); + f12 -= 0.08333333333*(Fx+Fz); + f13 += 0.08333333333*(Fx-Fz); + f14 -= 0.08333333333*(Fx-Fz); + f15 += 0.08333333333*(Fy+Fz); + f16 -= 0.08333333333*(Fy+Fz); + f17 += 0.08333333333*(Fy-Fz); + f18 -= 0.08333333333*(Fy-Fz); + //*********** WRITE UPDATED VALUES TO MEMORY ****************** + // Write the updated distributions + //....EVEN..................................... + disteven[n] = f0; + disteven[N+n] = f2; + disteven[2*N+n] = f4; + disteven[3*N+n] = f6; + disteven[4*N+n] = f8; + disteven[5*N+n] = f10; + disteven[6*N+n] = f12; + disteven[7*N+n] = f14; + disteven[8*N+n] = f16; + disteven[9*N+n] = f18; + //....ODD...................................... + distodd[n] = f1; + distodd[N+n] = f3; + distodd[2*N+n] = f5; + distodd[3*N+n] = f7; + distodd[4*N+n] = f9; + distodd[5*N+n] = f11; + distodd[6*N+n] = f13; + distodd[7*N+n] = f15; + distodd[8*N+n] = f17; + + //...Store the Velocity.......................... + Velocity[n] = jx; + Velocity[N+n] = jy; + Velocity[2*N+n] = jz; + /* Velocity[3*n] = jx; + Velocity[3*n+1] = jy; + Velocity[3*n+2] = jz; + */ //...Store the Color Gradient.................... + // ColorGrad[3*n] = nx*C; + // ColorGrad[3*n+1] = ny*C; + // ColorGrad[3*n+2] = nz*C; + //............................................... + //*************************************************************** + } // check if n is in the solid + } // loop over n +} + +__global__ void +__launch_bounds__(512,2) +dvc_ScaLBL_D3Q19_ColorCollide( char *ID, double *disteven, double *distodd, double *phi, double *ColorGrad, + double *Velocity, int Nx, int Ny, int Nz, double rlx_setA, double rlx_setB, + double alpha, double beta, double Fx, double Fy, double Fz) +{ + + int i,j,k,n,nn,N; + // distributions + double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; + double f10,f11,f12,f13,f14,f15,f16,f17,f18; + + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + // additional variables needed for computations + double rho,jx,jy,jz,C,nx,ny,nz; + char id; + + N = Nx*Ny*Nz; + + int S = N/NBLOCKS/NTHREADS + 1; + for (int s=0; s 0){ + + //.......Back out the 3-D indices for node n.............. + k = n/(Nx*Ny); + j = (n-Nx*Ny*k)/Nx; + i = n-Nx*Ny*k-Nx*j; + //........................................................................ + //........Get 1-D index for this thread.................... + // n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x; + //........................................................................ + // COMPUTE THE COLOR GRADIENT + //........................................................................ + //.................Read Phase Indicator Values............................ + //........................................................................ + nn = n-1; // neighbor index (get convention) + if (i-1<0) nn += Nx; // periodic BC along the x-boundary + f1 = phi[nn]; // get neighbor for phi - 1 + //........................................................................ + nn = n+1; // neighbor index (get convention) + if (!(i+10)) delta=0; + a1 = na*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nb*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = na*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nb*(0.1111111111111111*(1-4.5*ux))+delta; + + A_odd[n] = a1; + A_even[N+n] = a2; + B_odd[n] = b1; + B_even[N+n] = b2; + //............................................... + // q = 2 + // Cq = {0,1,0} + delta = beta*na*nb*nab*0.1111111111111111*ny; + if (!(na*nb*nab>0)) delta=0; + a1 = na*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nb*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = na*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nb*(0.1111111111111111*(1-4.5*uy))+delta; + + A_odd[N+n] = a1; + A_even[2*N+n] = a2; + B_odd[N+n] = b1; + B_even[2*N+n] = b2; + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*na*nb*nab*0.1111111111111111*nz; + if (!(na*nb*nab>0)) delta=0; + a1 = na*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nb*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = na*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nb*(0.1111111111111111*(1-4.5*uz))+delta; + + A_odd[2*N+n] = a1; + A_even[3*N+n] = a2; + B_odd[2*N+n] = b1; + B_even[3*N+n] = b2; + + } + } +} + +//************************************************************************* +__global__ void dvc_DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity, + double beta, int Nx, int Ny, int Nz, bool pBC) +{ + char id; + + int idx; + int in,jn,kn,n,nn,N; + int q,Cqx,Cqy,Cqz; + // int sendLoc; + + double na,nb; // density values + double ux,uy,uz; // flow velocity + double nx,ny,nz,C; // color gradient components + double a1,a2,b1,b2; + double sp,delta; + double feq[6]; // equilibrium distributions + // Set of Discrete velocities for the D3Q19 Model + int D3Q7[3][3]={{1,0,0},{0,1,0},{0,0,1}}; + N = Nx*Ny*Nz; + + int S = N/NBLOCKS/NTHREADS + 1; + for (int s=0; s 0 && na+nb > 0.0){ + //.......Back out the 3-D indices for node n.............. + int k = n/(Nx*Ny); + int j = (n-Nx*Ny*k)/Nx; + int i = n-Nx*Ny*k-Nx*j; + //.....Load the Color gradient......... + nx = ColorGrad[n]; + ny = ColorGrad[N+n]; + nz = ColorGrad[2*N+n]; + C = sqrt(nx*nx+ny*ny+nz*nz); + if (C == 0.0) C=1.0; + nx = nx/C; + ny = ny/C; + nz = nz/C; + //....Load the flow velocity........... + ux = Velocity[n]; + uy = Velocity[N+n]; + uz = Velocity[2*N+n]; + //....Instantiate the density distributions + // Generate Equilibrium Distributions and stream + // Stationary value - distribution 0 + // Den[2*n] += 0.3333333333333333*na; + // Den[2*n+1] += 0.3333333333333333*nb; + Den[2*n] += 0.3333333333333333*na; + Den[2*n+1] += 0.3333333333333333*nb; + // Non-Stationary equilibrium distributions + feq[0] = 0.1111111111111111*(1+3*ux); + feq[1] = 0.1111111111111111*(1-3*ux); + feq[2] = 0.1111111111111111*(1+3*uy); + feq[3] = 0.1111111111111111*(1-3*uy); + feq[4] = 0.1111111111111111*(1+3*uz); + feq[5] = 0.1111111111111111*(1-3*uz); + // Construction and streaming for the components + for (idx=0; idx<3; idx++){ + // Distribution index + q = 2*idx; + // Associated discrete velocity + Cqx = D3Q7[idx][0]; + Cqy = D3Q7[idx][1]; + Cqz = D3Q7[idx][2]; + // Generate the Equilibrium Distribution + a1 = na*feq[q]; + b1 = nb*feq[q]; + a2 = na*feq[q+1]; + b2 = nb*feq[q+1]; + // Recolor the distributions + if (C > 0.0){ + sp = nx*double(Cqx)+ny*double(Cqy)+nz*double(Cqz); + //if (idx > 2) sp = 0.7071067811865475*sp; + //delta = sp*min( min(a1,a2), min(b1,b2) ); + delta = na*nb/(na+nb)*0.1111111111111111*sp; + //if (a1>0 && b1>0){ + a1 += beta*delta; + a2 -= beta*delta; + b1 -= beta*delta; + b2 += beta*delta; + } + + // .......Get the neighbor node.............. + //nn = n + Stride[idx]; + in = i+Cqx; + jn = j+Cqy; + kn = k+Cqz; + + // Adjust for periodic BC, if necessary + // if (in<0) in+= Nx; + // if (jn<0) jn+= Ny; + // if (kn<0) kn+= Nz; + // if (!(in 0){ + // Get the density value (Streaming already performed) + Na = Den[n]; + Nb = Den[N+n]; + Phi[n] = (Na-Nb)/(Na+Nb); + } + } + } + //................................................................... +} + +__global__ void dvc_ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice) +{ + int n = Slice*Nx*Ny + blockIdx.x*blockDim.x + threadIdx.x; + if (n < (Slice+1)*Nx*Ny){ + Phi[n] = value; + } +} + + + +__global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi, + double *Velocity, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, + double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ + int ijk,nn,n; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m3,m5,m7; + double nA,nB; // number density + double a1,b1,a2,b2,nAB,delta; + double C,nx,ny,nz; //color gradient magnitude and direction + double ux,uy,uz; + double phi,tau,rho0,rlx_setA,rlx_setB; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; + + Aq[1*Np+n] = a1; + Bq[1*Np+n] = b1; + Aq[2*Np+n] = a2; + Bq[2*Np+n] = b2; + + //............................................... + // q = 2 + // Cq = {0,1,0} + delta = beta*nA*nB*nAB*0.1111111111111111*ny; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; + + Aq[3*Np+n] = a1; + Bq[3*Np+n] = b1; + Aq[4*Np+n] = a2; + Bq[4*Np+n] = b2; + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nz; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; + + Aq[5*Np+n] = a1; + Bq[5*Np+n] = b1; + Aq[6*Np+n] = a2; + Bq[6*Np+n] = b2; + //............................................... + + } + } +} + + +__global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, + double *Phi, double *Velocity, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, + double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ + + int n,nn,ijk,nread; + int nr1,nr2,nr3,nr4,nr5,nr6; + int nr7,nr8,nr9,nr10; + int nr11,nr12,nr13,nr14; + //int nr15,nr16,nr17,nr18; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m3,m5,m7; + double nA,nB; // number density + double a1,b1,a2,b2,nAB,delta; + double C,nx,ny,nz; //color gradient magnitude and direction + double ux,uy,uz; + double phi,tau,rho0,rlx_setA,rlx_setB; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s even part of dist) + //fq = dist[nread]; // reading the f2 data into register fq + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nr2]; // reading the f2 data into register fq + rho += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + //nread = neighborList[n+2*Np]; // neighbor 4 + //fq = dist[nread]; + nr3 = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nr3]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + //nread = neighborList[n+3*Np]; // neighbor 3 + //fq = dist[nread]; + nr4 = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nr4]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + //nread = neighborList[n+4*Np]; + //fq = dist[nread]; + nr5 = neighborList[n+4*Np]; + fq = dist[nr5]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + + // q = 6 + //nread = neighborList[n+5*Np]; + //fq = dist[nread]; + nr6 = neighborList[n+5*Np]; + fq = dist[nr6]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + //nread = neighborList[n+6*Np]; + //fq = dist[nread]; + nr7 = neighborList[n+6*Np]; + fq = dist[nr7]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + //nread = neighborList[n+7*Np]; + //fq = dist[nread]; + nr8 = neighborList[n+7*Np]; + fq = dist[nr8]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + //nread = neighborList[n+8*Np]; + //fq = dist[nread]; + nr9 = neighborList[n+8*Np]; + fq = dist[nr9]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + //nread = neighborList[n+9*Np]; + //fq = dist[nread]; + nr10 = neighborList[n+9*Np]; + fq = dist[nr10]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + //nread = neighborList[n+10*Np]; + //fq = dist[nread]; + nr11 = neighborList[n+10*Np]; + fq = dist[nr11]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + //nread = neighborList[n+11*Np]; + //fq = dist[nread]; + nr12 = neighborList[n+11*Np]; + fq = dist[nr12]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + //nread = neighborList[n+12*Np]; + //fq = dist[nread]; + nr13 = neighborList[n+12*Np]; + fq = dist[nr13]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + //nread = neighborList[n+13*Np]; + //fq = dist[nread]; + nr14 = neighborList[n+13*Np]; + fq = dist[nr14]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + //fq = dist[17*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + //fq = dist[8*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + //fq = dist[18*Np+n]; + nread = neighborList[n+16*Np]; + fq = dist[nread]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + //fq = dist[9*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + + //........................................................................ + //..............carry out relaxation process.............................. + //..........Toelke, Fruediger et. al. 2006................................ + if (C == 0.0) nx = ny = nz = 0.0; + m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) -19*alpha*C - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0)- m2); + m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4); + m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6); + m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8); + m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); + m10 = m10 + rlx_setA*( - m10); + m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); + m12 = m12 + rlx_setA*( - m12); + m13 = m13 + rlx_setA*( (jx*jy/rho0) + 0.5*alpha*C*nx*ny - m13); + m14 = m14 + rlx_setA*( (jy*jz/rho0) + 0.5*alpha*C*ny*nz - m14); + m15 = m15 + rlx_setA*( (jx*jz/rho0) + 0.5*alpha*C*nx*nz - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //.................inverse transformation...................................................... + + // q=0 + fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx; + //nread = neighborList[n+Np]; + dist[nr2] = fq; + + // q=2 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx; + //nread = neighborList[n]; + dist[nr1] = fq; + + // q = 3 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy; + //nread = neighborList[n+3*Np]; + dist[nr4] = fq; + + // q = 4 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy; + //nread = neighborList[n+2*Np]; + dist[nr3] = fq; + + // q = 5 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz; + //nread = neighborList[n+5*Np]; + dist[nr6] = fq; + + // q = 6 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz; + //nread = neighborList[n+4*Np]; + dist[nr5] = fq; + + // q = 7 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy); + //nread = neighborList[n+7*Np]; + dist[nr8] = fq; + + // q = 8 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy); + //nread = neighborList[n+6*Np]; + dist[nr7] = fq; + + // q = 9 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy); + //nread = neighborList[n+9*Np]; + dist[nr10] = fq; + + // q = 10 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy); + //nread = neighborList[n+8*Np]; + dist[nr9] = fq; + + // q = 11 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz); + //nread = neighborList[n+11*Np]; + dist[nr12] = fq; + + // q = 12 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ + mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz); + //nread = neighborList[n+10*Np]; + dist[nr11]= fq; + + // q = 13 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz); + //nread = neighborList[n+13*Np]; + dist[nr14] = fq; + + // q= 14 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz); + //nread = neighborList[n+12*Np]; + dist[nr13] = fq; + + + // q = 15 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + + // q = 17 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) + -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) + -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + + // write the velocity + ux = jx / rho0; + uy = jy / rho0; + uz = jz / rho0; + Velocity[n] = ux; + Velocity[Np+n] = uy; + Velocity[2*Np+n] = uz; + + // Instantiate mass transport distributions + // Stationary value - distribution 0 + nAB = 1.0/(nA+nB); + Aq[n] = 0.3333333333333333*nA; + Bq[n] = 0.3333333333333333*nB; + + //............................................... + // q = 0,2,4 + // Cq = {1,0,0}, {0,1,0}, {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nx; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; + + // q = 1 + //nread = neighborList[n+Np]; + Aq[nr2] = a1; + Bq[nr2] = b1; + // q=2 + //nread = neighborList[n]; + Aq[nr1] = a2; + Bq[nr1] = b2; + + //............................................... + // Cq = {0,1,0} + delta = beta*nA*nB*nAB*0.1111111111111111*ny; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; + + // q = 3 + //nread = neighborList[n+3*Np]; + Aq[nr4] = a1; + Bq[nr4] = b1; + // q = 4 + //nread = neighborList[n+2*Np]; + Aq[nr3] = a2; + Bq[nr3] = b2; + + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nz; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; + + // q = 5 + //nread = neighborList[n+5*Np]; + Aq[nr6] = a1; + Bq[nr6] = b1; + // q = 6 + //nread = neighborList[n+4*Np]; + Aq[nr5] = a2; + Bq[nr5] = b2; + //............................................... + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAodd_ColorMomentum(int *neighborList, double *dist, double *Den, + double *Velocity, double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, + double Fx, double Fy, double Fz, int start, int finish, int Np){ + + int n,nread; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double nA,nB; // number density + double C,nx,ny,nz; //color gradient magnitude and direction + double ux,uy,uz; + double phi,tau,rho0,rlx_setA,rlx_setB; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + fq = dist[nread]; // reading the f1 data into register fq + //fp = dist[10*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jx = fq; + m4 = -4.0*fq; + m9 = 2.0*fq; + m10 = -4.0*fq; + + // f2 = dist[10*Np+n]; + nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nread]; // reading the f2 data into register fq + //fq = dist[Np+n]; + rho += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + nread = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nread]; + //fq = dist[11*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + nread = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nread]; + //fq = dist[2*Np+n]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + nread = neighborList[n+4*Np]; + fq = dist[nread]; + //fq = dist[12*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + + // q = 6 + nread = neighborList[n+5*Np]; + fq = dist[nread]; + //fq = dist[3*Np+n]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + nread = neighborList[n+6*Np]; + fq = dist[nread]; + //fq = dist[13*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + nread = neighborList[n+7*Np]; + fq = dist[nread]; + //fq = dist[4*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + nread = neighborList[n+8*Np]; + fq = dist[nread]; + //fq = dist[14*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + nread = neighborList[n+9*Np]; + fq = dist[nread]; + //fq = dist[5*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + nread = neighborList[n+10*Np]; + fq = dist[nread]; + //fq = dist[15*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + nread = neighborList[n+11*Np]; + fq = dist[nread]; + //fq = dist[6*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + nread = neighborList[n+12*Np]; + fq = dist[nread]; + //fq = dist[16*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + nread = neighborList[n+13*Np]; + fq = dist[nread]; + //fq = dist[7*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + //fq = dist[17*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + //fq = dist[8*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + //fq = dist[18*Np+n]; + nread = neighborList[n+16*Np]; + fq = dist[nread]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + //fq = dist[9*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + + //........................................................................ + //..............carry out relaxation process.............................. + //..........Toelke, Fruediger et. al. 2006................................ + if (C == 0.0) nx = ny = nz = 0.0; + m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) -alpha*C - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0)- m2); + m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4); + m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6); + m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8); + m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); + m10 = m10 + rlx_setA*( - m10); + m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); + m12 = m12 + rlx_setA*( - m12); + m13 = m13 + rlx_setA*( (jx*jy/rho0) + 0.5*alpha*C*nx*ny - m13); + m14 = m14 + rlx_setA*( (jy*jz/rho0) + 0.5*alpha*C*ny*nz - m14); + m15 = m15 + rlx_setA*( (jx*jz/rho0) + 0.5*alpha*C*nx*nz - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //.................inverse transformation...................................................... + + // q=0 + fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx; + nread = neighborList[n+Np]; + dist[nread] = fq; + + // q=2 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx; + nread = neighborList[n]; + dist[nread] = fq; + + // q = 3 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy; + nread = neighborList[n+3*Np]; + dist[nread] = fq; + + // q = 4 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy; + nread = neighborList[n+2*Np]; + dist[nread] = fq; + + // q = 5 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz; + nread = neighborList[n+5*Np]; + dist[nread] = fq; + + // q = 6 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz; + nread = neighborList[n+4*Np]; + dist[nread] = fq; + + // q = 7 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy); + nread = neighborList[n+7*Np]; + dist[nread] = fq; + + // q = 8 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy); + nread = neighborList[n+6*Np]; + dist[nread] = fq; + + // q = 9 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy); + nread = neighborList[n+9*Np]; + dist[nread] = fq; + + // q = 10 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy); + nread = neighborList[n+8*Np]; + dist[nread] = fq; + + // q = 11 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz); + nread = neighborList[n+11*Np]; + dist[nread] = fq; + + // q = 12 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ + mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz); + nread = neighborList[n+10*Np]; + dist[nread]= fq; + + // q = 13 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz); + nread = neighborList[n+13*Np]; + dist[nread] = fq; + + // q= 14 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz); + nread = neighborList[n+12*Np]; + dist[nread] = fq; + + + // q = 15 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + + // q = 17 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) + -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) + -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + + // write the velocity + ux = jx / rho0; + uy = jy / rho0; + uz = jz / rho0; + Velocity[n] = ux; + Velocity[Np+n] = uy; + Velocity[2*Np+n] = uz; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_ColorMomentum(double *dist, double *Den, double *Velocity, + double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, + double Fx, double Fy, double Fz, int start, int finish, int Np){ + int n; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double nA,nB; // number density + double C,nx,ny,nz; //color gradient magnitude and direction + double ux,uy,uz; + double phi,tau,rho0,rlx_setA,rlx_setB; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; + + Aq[1*Np+n] = a1; + Bq[1*Np+n] = b1; + Aq[2*Np+n] = a2; + Bq[2*Np+n] = b2; + + //............................................... + // q = 2 + // Cq = {0,1,0} + delta = beta*nA*nB*nAB*0.1111111111111111*ny; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; + + Aq[3*Np+n] = a1; + Bq[3*Np+n] = b1; + Aq[4*Np+n] = a2; + Bq[4*Np+n] = b2; + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nz; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; + + Aq[5*Np+n] = a1; + Bq[5*Np+n] = b1; + Aq[6*Np+n] = a2; + Bq[6*Np+n] = b2; + //............................................... + + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAodd_ColorMass(int *neighborList, double *Aq, double *Bq, double *Den, + double *Velocity, double *ColorGrad, double beta, int start, int finish, int Np){ + + int n,nread; + // non-conserved moments + double nA,nB; // number density + double a1,b1,a2,b2,nAB,delta; + double C,nx,ny,nz; //color gradient magnitude and direction + double ux,uy,uz; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; + + // q = 1 + nread = neighborList[n+Np]; + Aq[nread] = a1; + Bq[nread] = b1; + // q=2 + nread = neighborList[n]; + Aq[nread] = a2; + Bq[nread] = b2; + + //............................................... + // Cq = {0,1,0} + delta = beta*nA*nB*nAB*0.1111111111111111*ny; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; + + // q = 3 + nread = neighborList[n+3*Np]; + Aq[nread] = a1; + Bq[nread] = b1; + // q = 4 + nread = neighborList[n+2*Np]; + Aq[nread] = a2; + Bq[nread] = b2; + + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nz; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; + + // q = 5 + nread = neighborList[n+5*Np]; + Aq[nread] = a1; + Bq[nread] = b1; + // q = 6 + nread = neighborList[n+4*Np]; + Aq[nread] = a2; + Bq[nread] = b2; + //............................................... + } + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double *Aq, double *Bq, + double *Den, double *Phi, int start, int finish, int Np){ + int idx,n,nread; + double fq,nA,nB; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 1.f){ + nA = 1.0; nB = 0.f; + } + else if (phi < -1.f){ + nB = 1.0; nA = 0.f; + } + else{ + nA=0.5*(phi+1.f); + nB=0.5*(1.f-phi); + } + Den[idx] = nA; + Den[Np+idx] = nB; + + Aq[idx]=0.3333333333333333*nA; + Aq[Np+idx]=0.1111111111111111*nA; + Aq[2*Np+idx]=0.1111111111111111*nA; + Aq[3*Np+idx]=0.1111111111111111*nA; + Aq[4*Np+idx]=0.1111111111111111*nA; + Aq[5*Np+idx]=0.1111111111111111*nA; + Aq[6*Np+idx]=0.1111111111111111*nA; + + Bq[idx]=0.3333333333333333*nB; + Bq[Np+idx]=0.1111111111111111*nB; + Bq[2*Np+idx]=0.1111111111111111*nB; + Bq[3*Np+idx]=0.1111111111111111*nB; + Bq[4*Np+idx]=0.1111111111111111*nB; + Bq[5*Np+idx]=0.1111111111111111*nB; + Bq[6*Np+idx]=0.1111111111111111*nB; + } + } +} + +extern "C" void ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice){ + int GRID = Nx*Ny / 512 + 1; + dvc_ScaLBL_SetSlice_z<<>>(Phi,value,Nx,Ny,Nz,Slice); +} + +extern "C" void ScaLBL_Color_Init(char *ID, double *Den, double *Phi, double das, double dbs, int Nx, int Ny, int Nz){ + dvc_ScaLBL_Color_Init<<>>(ID, Den, Phi, das, dbs, Nx, Ny, Nz); +} + +extern "C" void ScaLBL_Color_InitDistance(char *ID, double *Den, double *Phi, double *Distance, + double das, double dbs, double beta, double xp, int Nx, int Ny, int Nz){ + + dvc_ScaLBL_Color_InitDistance<<>>(ID, Den, Phi, Distance, das, dbs, beta, xp, Nx, Ny, Nz); +} + +extern "C" void ScaLBL_D3Q19_ColorGradient(char *ID, double *phi, double *ColorGrad, int Nx, int Ny, int Nz){ + dvc_ScaLBL_D3Q19_ColorGradient<<>>(ID, phi, ColorGrad, Nx, Ny, Nz); +} + +extern "C" void ColorCollide( char *ID, double *disteven, double *distodd, double *ColorGrad, + double *Velocity, int Nx, int Ny, int Nz,double rlx_setA, double rlx_setB, + double alpha, double beta, double Fx, double Fy, double Fz, bool pBC){ + dvc_ColorCollide<<>>( ID, disteven, distodd, ColorGrad,Velocity, Nx, Ny, Nz,rlx_setA, rlx_setB, + alpha, beta, Fx, Fy, Fz, pBC); + +} + +extern "C" void ScaLBL_D3Q19_ColorCollide( char *ID, double *disteven, double *distodd, double *phi, double *ColorGrad, + double *Velocity, int Nx, int Ny, int Nz,double rlx_setA, double rlx_setB, + double alpha, double beta, double Fx, double Fy, double Fz){ + dvc_ScaLBL_D3Q19_ColorCollide<<>>(ID, disteven, distodd, phi, ColorGrad, Velocity, Nx, Ny, Nz, rlx_setA, rlx_setB, + alpha, beta, Fx, Fy, Fz); + +} + +extern "C" void DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity, + double beta, int Nx, int Ny, int Nz, bool pBC){ + + dvc_DensityStreamD3Q7<<>>(ID, Den, Copy, Phi, ColorGrad, Velocity, beta, Nx, Ny, Nz, pBC); +} + +extern "C" void ScaLBL_ComputePhaseField(char *ID, double *Phi, double *Den, int N){ + dvc_ScaLBL_ComputePhaseField<<>>(ID, Phi, Den, N); +} +extern "C" void ScaLBL_D3Q7_ColorCollideMass(char *ID, double *A_even, double *A_odd, double *B_even, double *B_odd, + double *Den, double *Phi, double *ColorGrad, double *Velocity, double beta, int N, bool pBC){ + dvc_ScaLBL_D3Q7_ColorCollideMass<<>>(ID, A_even, A_odd, B_even, B_odd, Den, Phi, ColorGrad, Velocity, beta, N, pBC); +} +// Pressure Boundary Conditions Functions + +extern "C" void ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi, + double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, + double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ + + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_Color, hipFuncCachePreferL1); + + dvc_ScaLBL_D3Q19_AAeven_Color<<>>(Map, dist, Aq, Bq, Den, Phi, Vel, rhoA, rhoB, tauA, tauB, + alpha, beta, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_Color: %s \n",hipGetErrorString(err)); + } + +} + +extern "C" void ScaLBL_D3Q19_AAodd_Color(int *d_neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, + double *Phi, double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, + double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ + + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_Color, hipFuncCachePreferL1); + + dvc_ScaLBL_D3Q19_AAodd_Color<<>>(d_neighborList, Map, dist, Aq, Bq, Den, Phi, Vel, + rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_Color: %s \n",hipGetErrorString(err)); + } + hipProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAodd_PhaseField(int *NeighborList, int *Map, double *Aq, double *Bq, + double *Den, double *Phi, int start, int finish, int Np){ + + dvc_ScaLBL_D3Q7_AAodd_PhaseField<<>>(NeighborList, Map, Aq, Bq, Den, Phi, start, finish, Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAodd_PhaseField: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq, double *Den, double *Phi, + int start, int finish, int Np){ + + dvc_ScaLBL_D3Q7_AAeven_PhaseField<<>>(Map, Aq, Bq, Den, Phi, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAeven_PhaseField: %s \n",hipGetErrorString(err)); + } + +} + +extern "C" void ScaLBL_D3Q19_Gradient(int *Map, double *Phi, double *ColorGrad, int start, int finish, int Np, + int Nx, int Ny, int Nz){ + + int strideY=Nx; + int strideZ=Nx*Ny; + dvc_ScaLBL_D3Q19_Gradient<<>>(Map, Phi, ColorGrad, start, finish, Np, strideY, strideZ); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_ColorGrad: %s \n",hipGetErrorString(err)); + } + +} + +extern "C" void ScaLBL_PhaseField_Init(int *Map, double *Phi, double *Den, double *Aq, double *Bq, int start, int finish, int Np){ + dvc_ScaLBL_PhaseField_Init<<>>(Map, Phi, Den, Aq, Bq, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_PhaseField_Init: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_ColorMomentum(double *dist, double *Den, double *Vel, + double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, + double Fx, double Fy, double Fz, int start, int finish, int Np){ + + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_ColorMomentum, hipFuncCachePreferL1); + + dvc_ScaLBL_D3Q19_AAeven_ColorMomentum<<>>(dist, Den, Vel, ColorGrad, rhoA, rhoB, tauA, tauB, + alpha, beta, Fx, Fy, Fz, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_ColorMomentum: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_ColorMomentum(int *d_neighborList, double *dist, double *Den, double *Vel, + double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, + double Fx, double Fy, double Fz, int start, int finish, int Np){ + + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_ColorMomentum, hipFuncCachePreferL1); + + dvc_ScaLBL_D3Q19_AAodd_ColorMomentum<<>>(d_neighborList, dist, Den, Vel, ColorGrad, + rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_ColorMomentum: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_ColorMass(double *Aq, double *Bq, double *Den, double *Vel, + double *ColorGrad, double beta, int start, int finish, int Np){ + + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_ColorMass, hipFuncCachePreferL1); + + dvc_ScaLBL_D3Q19_AAeven_ColorMass<<>>(Aq, Bq, Den, Vel, ColorGrad, beta, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_Color: %s \n",hipGetErrorString(err)); + } + +} + +extern "C" void ScaLBL_D3Q19_AAodd_ColorMass(int *d_neighborList, double *Aq, double *Bq, double *Den, double *Vel, + double *ColorGrad, double beta, int start, int finish, int Np){ + + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_ColorMass, hipFuncCachePreferL1); + + dvc_ScaLBL_D3Q19_AAodd_ColorMass<<>>(d_neighborList, Aq, Bq, Den, Vel, ColorGrad, beta, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_Color: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_Color_BC_z(int *list, int *Map, double *Phi, double *Den, double vA, double vB, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_Color_BC_z<<>>(list, Map, Phi, Den, vA, vB, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_Color_BC_z: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_Color_BC_Z(int *list, int *Map, double *Phi, double *Den, double vA, double vB, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_Color_BC_Z<<>>(list, Map, Phi, Den, vA, vB, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_Color_BC_Z: %s \n",hipGetErrorString(err)); + } +} + + + diff --git a/hip/CudaExtras.hip b/hip/CudaExtras.hip new file mode 100644 index 00000000..5be72e5a --- /dev/null +++ b/hip/CudaExtras.hip @@ -0,0 +1,34 @@ +// Basic hip functions callable from C/C++ code +#include "hip/hip_runtime.h" + +extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size){ + hipMalloc(address,size); + hipMemset(*address,0,size); +} + +extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size){ + hipMemcpy(dest,source,size,hipMemcpyHostToDevice); +} + + +extern "C" void dvc_CopyToHost(void* dest, void* source, size_t size){ + hipMemcpy(dest,source,size,hipMemcpyDeviceToHost); +} + +extern "C" void dvc_Barrier(){ + hipDeviceSynchronize(); +} +/* +#if __CUDA_ARCH__ < 600 +__device__ double atomicAdd(double* address, double val) { +unsigned long long int* address_as_ull = (unsigned long long int*)address; unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } + while (assumed != old); return __longlong_as_double(old); +} + +#endif +*/ diff --git a/hip/D3Q19.hip b/hip/D3Q19.hip new file mode 100644 index 00000000..1bcfa04d --- /dev/null +++ b/hip/D3Q19.hip @@ -0,0 +1,2645 @@ +#include +#include "hip/hip_runtime.h" +#include + +#define NBLOCKS 1024 +#define NTHREADS 256 + +/* +1. constants that are known at compile time should be defined using preprocessor macros (e.g. #define) or via C/C++ const variables at global/file scope. +2. Usage of __constant__ memory may be beneficial for programs who use certain values that don't change for the duration of the kernel and for which certain access patterns are present (e.g. all threads access the same value at the same time). This is not better or faster than constants that satisfy the requirements of item 1 above. +3. If the number of choices to be made by a program are relatively small in number, and these choices affect kernel execution, one possible approach for additional compile-time optimization would be to use templated code/kernels + */ + +__constant__ __device__ double mrt_V1=0.05263157894736842; +__constant__ __device__ double mrt_V2=0.012531328320802; +__constant__ __device__ double mrt_V3=0.04761904761904762; +__constant__ __device__ double mrt_V4=0.004594820384294068; +__constant__ __device__ double mrt_V5=0.01587301587301587; +__constant__ __device__ double mrt_V6=0.0555555555555555555555555; +__constant__ __device__ double mrt_V7=0.02777777777777778; +__constant__ __device__ double mrt_V8=0.08333333333333333; +__constant__ __device__ double mrt_V9=0.003341687552213868; +__constant__ __device__ double mrt_V10=0.003968253968253968; +__constant__ __device__ double mrt_V11=0.01388888888888889; +__constant__ __device__ double mrt_V12=0.04166666666666666; + + +// functionality for parallel reduction in Flux BC routines -- probably should be re-factored to another location +// functions copied from https://devblogs.nvidia.com/parallelforall/faster-parallel-reductions-kepler/ + +//__shared__ double Transform[722]= +// {}; + +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 +#else +__device__ double atomicAdd(double* address, double val) { + unsigned long long int* address_as_ull = (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val+__longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); +} +#endif + +using namespace cooperative_groups; +__device__ double reduce_sum(thread_group g, double *temp, double val) +{ + int lane = g.thread_rank(); + + // Each iteration halves the number of active threads + // Each thread adds its partial sum[i] to sum[lane+i] + for (int i = g.size() / 2; i > 0; i /= 2) + { + temp[lane] = val; + g.sync(); // wait for all threads to store + if(lane 0; offset /= 2) + val += __shfl_down_sync(0xFFFFFFFF, val, offset, 32); + return val; +} + +__inline__ __device__ +double blockReduceSum(double val) { + + static __shared__ double shared[32]; // Shared mem for 32 partial sums + int lane = threadIdx.x % warpSize; + int wid = threadIdx.x / warpSize; + + val = warpReduceSum(val); // Each warp performs partial reduction + + if (lane==0) shared[wid]=val; // Write reduced value to shared memory + + __syncthreads(); // Wait for all partial reductions + + //read from shared memory only if that warp existed + val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0; + + if (wid==0) val = warpReduceSum(val); //Final reduce within first warp + + return val; +} + +__global__ void deviceReduceKernel(double *in, double* out, int N) { + double sum = 0; + //reduce multiple elements per thread + for (int i = blockIdx.x * blockDim.x + threadIdx.x; + i < N; + i += blockDim.x * gridDim.x) { + sum += in[i]; + } + sum = blockReduceSum(sum); + if (threadIdx.x==0) + out[blockIdx.x]=sum; +} + +__global__ void dvc_ScaLBL_D3Q19_Pack(int q, int *list, int start, int count, double *sendbuf, double *dist, int N){ + //.................................................................................... + // Pack distribution q into the send buffer for the listed lattice sites + // dist may be even or odd distributions stored by stream layout + //.................................................................................... + int idx,n; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx 0 ){ + f_even[n] = 0.3333333333333333; + f_odd[n] = 0.055555555555555555; //double(100*n)+1.f; + f_even[N+n] = 0.055555555555555555; //double(100*n)+2.f; + f_odd[N+n] = 0.055555555555555555; //double(100*n)+3.f; + f_even[2*N+n] = 0.055555555555555555; //double(100*n)+4.f; + f_odd[2*N+n] = 0.055555555555555555; //double(100*n)+5.f; + f_even[3*N+n] = 0.055555555555555555; //double(100*n)+6.f; + f_odd[3*N+n] = 0.0277777777777778; //double(100*n)+7.f; + f_even[4*N+n] = 0.0277777777777778; //double(100*n)+8.f; + f_odd[4*N+n] = 0.0277777777777778; //double(100*n)+9.f; + f_even[5*N+n] = 0.0277777777777778; //double(100*n)+10.f; + f_odd[5*N+n] = 0.0277777777777778; //double(100*n)+11.f; + f_even[6*N+n] = 0.0277777777777778; //double(100*n)+12.f; + f_odd[6*N+n] = 0.0277777777777778; //double(100*n)+13.f; + f_even[7*N+n] = 0.0277777777777778; //double(100*n)+14.f; + f_odd[7*N+n] = 0.0277777777777778; //double(100*n)+15.f; + f_even[8*N+n] = 0.0277777777777778; //double(100*n)+16.f; + f_odd[8*N+n] = 0.0277777777777778; //double(100*n)+17.f; + f_even[9*N+n] = 0.0277777777777778; //double(100*n)+18.f; + } + else{ + for(int q=0; q<9; q++){ + f_even[q*N+n] = -1.0; + f_odd[q*N+n] = -1.0; + } + f_even[9*N+n] = -1.0; + } + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AA_Init(double *f_even, double *f_odd, int Np) +{ + int n; + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + fq = dist[nread]; // reading the f1 data into register fq + //fp = dist[10*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jx = fq; + m4 = -4.0*fq; + m9 = 2.0*fq; + m10 = -4.0*fq; + + // f2 = dist[10*Np+n]; + nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nread]; // reading the f2 data into register fq + //fq = dist[Np+n]; + rho += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + nread = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nread]; + //fq = dist[11*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + nread = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nread]; + //fq = dist[2*Np+n]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + nread = neighborList[n+4*Np]; + fq = dist[nread]; + //fq = dist[12*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + + // q = 6 + nread = neighborList[n+5*Np]; + fq = dist[nread]; + //fq = dist[3*Np+n]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + nread = neighborList[n+6*Np]; + fq = dist[nread]; + //fq = dist[13*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + nread = neighborList[n+7*Np]; + fq = dist[nread]; + //fq = dist[4*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + nread = neighborList[n+8*Np]; + fq = dist[nread]; + //fq = dist[14*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + nread = neighborList[n+9*Np]; + fq = dist[nread]; + //fq = dist[5*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + nread = neighborList[n+10*Np]; + fq = dist[nread]; + //fq = dist[15*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + nread = neighborList[n+11*Np]; + fq = dist[nread]; + //fq = dist[6*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + nread = neighborList[n+12*Np]; + fq = dist[nread]; + //fq = dist[16*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + nread = neighborList[n+13*Np]; + fq = dist[nread]; + //fq = dist[7*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + //fq = dist[17*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + //fq = dist[8*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + //fq = dist[18*Np+n]; + nread = neighborList[n+16*Np]; + fq = dist[nread]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + //fq = dist[9*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + + //..............incorporate external force................................................ + //..............carry out relaxation process............................................... + m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho) - m2); + m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4); + m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6); + m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8); + m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) - m9); + m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10); + m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) - m11); + m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho) - m12); + m13 = m13 + rlx_setA*((jx*jy/rho) - m13); + m14 = m14 + rlx_setA*((jy*jz/rho) - m14); + m15 = m15 + rlx_setA*((jx*jz/rho) - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //....................................................................................................... + //.................inverse transformation...................................................... + + // q=0 + fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx; + nread = neighborList[n+Np]; + dist[nread] = fq; + + // q=2 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx; + nread = neighborList[n]; + dist[nread] = fq; + + // q = 3 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy; + nread = neighborList[n+3*Np]; + dist[nread] = fq; + + // q = 4 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy; + nread = neighborList[n+2*Np]; + dist[nread] = fq; + + // q = 5 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz; + nread = neighborList[n+5*Np]; + dist[nread] = fq; + + // q = 6 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz; + nread = neighborList[n+4*Np]; + dist[nread] = fq; + + // q = 7 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+mrt_V7*m9+mrt_V11*m10+ + mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy); + + nread = neighborList[n+7*Np]; + dist[nread] = fq; + + // q = 8 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy); + nread = neighborList[n+6*Np]; + dist[nread] = fq; + + // q = 9 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+mrt_V7*m9+mrt_V11*m10+ + mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy); + nread = neighborList[n+9*Np]; + dist[nread] = fq; + + // q = 10 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+mrt_V7*m9+mrt_V11*m10+ + mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy); + nread = neighborList[n+8*Np]; + dist[nread] = fq; + + // q = 11 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz); + nread = neighborList[n+11*Np]; + dist[nread] = fq; + + // q = 12 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ + mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz); + nread = neighborList[n+10*Np]; + dist[nread]= fq; + + // q = 13 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz); + nread = neighborList[n+13*Np]; + dist[nread] = fq; + + // q= 14 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz); + nread = neighborList[n+12*Np]; + dist[nread] = fq; + + + // q = 15 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + + // q = 17 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) + -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) + -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + + } + } +} + + +//__launch_bounds__(512,1) +__global__ void +dvc_ScaLBL_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz) { + + int n; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 0){ + //.......Back out the 3-D indices for node n.............. + k = n/(Nx*Ny); + j = (n-Nx*Ny*k)/Nx; + i = n-Nx*Ny*k-Nx*j; + //........................................................................ + // Retrieve even distributions from the local node (swap convention) + // f0 = disteven[n]; // Does not particupate in streaming + f1 = distodd[n]; + f3 = distodd[N+n]; + f5 = distodd[2*N+n]; + f7 = distodd[3*N+n]; + f9 = distodd[4*N+n]; + f11 = distodd[5*N+n]; + f13 = distodd[6*N+n]; + f15 = distodd[7*N+n]; + f17 = distodd[8*N+n]; + //........................................................................ + + //........................................................................ + // Retrieve odd distributions from neighboring nodes (swap convention) + //........................................................................ + nn = n+1; // neighbor index (pull convention) + if (!(i+1 0.0){ + distodd[n] = f2; + disteven[N+nn] = f1; + } + //} + //........................................................................ + nn = n+Nx; // neighbor index (pull convention) + if (!(j+1 0.0){ + distodd[N+n] = f4; + disteven[2*N+nn] = f3; + // } + } + //........................................................................ + nn = n+Nx*Ny; // neighbor index (pull convention) + if (!(k+1 0.0){ + distodd[2*N+n] = f6; + disteven[3*N+nn] = f5; + // } + } + //........................................................................ + nn = n+Nx+1; // neighbor index (pull convention) + if (!(i+1 0.0){ + distodd[3*N+n] = f8; + disteven[4*N+nn] = f7; + // } + } + //........................................................................ + nn = n-Nx+1; // neighbor index (pull convention) + if (!(i+1 0.0){ + distodd[4*N+n] = f10; + disteven[5*N+nn] = f9; + // } + } + //........................................................................ + nn = n+Nx*Ny+1; // neighbor index (pull convention) + if (!(i+1 0.0){ + distodd[5*N+n] = f12; + disteven[6*N+nn] = f11; + // } + } + //........................................................................ + nn = n-Nx*Ny+1; // neighbor index (pull convention) + if (!(i+1 0.0){ + distodd[6*N+n] = f14; + disteven[7*N+nn] = f13; + // } + } + //........................................................................ + nn = n+Nx*Ny+Nx; // neighbor index (pull convention) + if (!(j+1 0.0){ + distodd[7*N+n] = f16; + disteven[8*N+nn] = f15; + // } + } + //........................................................................ + nn = n-Nx*Ny+Nx; // neighbor index (pull convention) + if (!(j+1 0.0){ + distodd[8*N+n] = f18; + disteven[9*N+nn] = f17; + // } + } + //........................................................................ + + } + } + } +} + + +__global__ void dvc_ScaLBL_D3Q19_Momentum(double *dist, double *vel, int N) +{ + int n; + // distributions + double f1,f2,f3,f4,f5,f6,f7,f8,f9; + double f10,f11,f12,f13,f14,f15,f16,f17,f18; + double vx,vy,vz; + + int S = N/NBLOCKS/NTHREADS + 1; + for (int s=0; s 0 ){ + f_even[n] = 0 + 0.01*0; + f_odd[n] = 0+ 0.01*1; //double(100*n)+1.f; + f_even[N+n] = 1+ 0.01*2; //double(100*n)+2.f; + f_odd[N+n] = 1+ 0.01*3; //double(100*n)+3.f; + f_even[2*N+n] = 2+ 0.01*4; //double(100*n)+4.f; + f_odd[2*N+n] = 2+ 0.01*5; //double(100*n)+5.f; + f_even[3*N+n] = 3+ 0.01*6; //double(100*n)+6.f; + f_odd[3*N+n] = 3+ 0.01*7; //double(100*n)+7.f; + f_even[4*N+n] = 4+ 0.01*8; //double(100*n)+8.f; + f_odd[4*N+n] = 4+ 0.01*9; //double(100*n)+9.f; + f_even[5*N+n] = 5+ 0.01*10; //double(100*n)+10.f; + f_odd[5*N+n] = 5+ 0.01*11; //double(100*n)+11.f; + f_even[6*N+n] = 6+ 0.01*12; //double(100*n)+12.f; + f_odd[6*N+n] = 6+ 0.01*13; //double(100*n)+13.f; + f_even[7*N+n] = 7+ 0.01*14; //double(100*n)+14.f; + f_odd[7*N+n] = 7+ 0.01*15; //double(100*n)+15.f; + f_even[8*N+n] = 8+ 0.01*16; //double(100*n)+16.f; + f_odd[8*N+n] = 8+ 0.01*17; //double(100*n)+17.f; + f_even[9*N+n] = 9+ 0.01*18; //double(100*n)+18.f; + } + else{ + for(int q=0; q<9; q++){ + f_even[q*N+n] = -1.0; + f_odd[q*N+n] = -1.0; + } + f_even[9*N+n] = -1.0; + } + } + } +} + + +//************************************************************************* + +//extern "C" void ScaLBL_D3Q19_MapRecv(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count, +// int *d3q19_recvlist, int Nx, int Ny, int Nz){ +// int GRID = count / 512 + 1; +// dvc_ScaLBL_D3Q19_Unpack <<>>(q, Cqx, Cqy, Cqz, list, start, count, d3q19_recvlist, Nx, Ny, Nz); +//} + +extern "C" void ScaLBL_D3Q19_Pack(int q, int *list, int start, int count, double *sendbuf, double *dist, int N){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_Pack <<>>(q, list, start, count, sendbuf, dist, N); +} + +extern "C" void ScaLBL_D3Q19_Unpack(int q, int *list, int start, int count, double *recvbuf, double *dist, int N){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_Unpack <<>>(q, list, start, count, recvbuf, dist, N); +} +//************************************************************************* + +extern "C" void ScaLBL_D3Q19_AA_Init(double *f_even, double *f_odd, int Np){ + dvc_ScaLBL_D3Q19_AA_Init<<>>(f_even, f_odd, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AA_Init: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_Init(double *dist, int Np){ + dvc_ScaLBL_D3Q19_Init<<>>(dist, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AA_Init: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_Swap(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz){ + dvc_ScaLBL_D3Q19_Swap<<>>(ID, disteven, distodd, Nx, Ny, Nz); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Swap: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *disteven, double *distodd, int Np) +{ + + const int Q = 9; + // hipStream_t streams[Q]; + // Launch the swap operation as different kernels + for (int q=0; q>>(neighborList, disteven, distodd, Np, q); + } + // cpu should wait for all kernels to finish (to avoid launch of dependent kernels) + //hipDeviceSynchronize(); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Swap: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_Compact(char * ID, double *d_dist, int Np) { + hipFuncSetCacheConfig(dvc_ScaLBL_AAeven_Compact, hipFuncCachePreferL1); + dvc_ScaLBL_AAeven_Compact<<>>(ID, d_dist, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_Compact(char * ID, int *d_neighborList, double *d_dist, int Np) { + hipFuncSetCacheConfig(dvc_ScaLBL_AAodd_Compact, hipFuncCachePreferL1); + dvc_ScaLBL_AAodd_Compact<<>>(ID,d_neighborList, d_dist,Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_Momentum(double *dist, double *vel, int Np){ + + dvc_ScaLBL_D3Q19_Momentum<<>>(dist, vel, Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Velocity: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_Pressure(double *fq, double *Pressure, int Np){ + dvc_ScaLBL_D3Q19_Pressure<<< NBLOCKS,NTHREADS >>>(fq, Pressure, Np); +} + +extern "C" void ScaLBL_D3Q19_Velocity_BC_z(double *disteven, double *distodd, double uz,int Nx, int Ny, int Nz){ + int GRID = Nx*Ny / 512 + 1; + dvc_D3Q19_Velocity_BC_z<<>>(disteven,distodd, uz, Nx, Ny, Nz); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Velocity_BC_z: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_Velocity_BC_Z(double *disteven, double *distodd, double uz, int Nx, int Ny, int Nz, int outlet){ + int GRID = Nx*Ny / 512 + 1; + dvc_D3Q19_Velocity_BC_Z<<>>(disteven, distodd, uz, Nx, Ny, Nz, outlet); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Velocity_BC_Z: %s \n",hipGetErrorString(err)); + } +} + +extern "C" double ScaLBL_D3Q19_Flux_BC_z(double *disteven, double *distodd, double flux,int Nx, int Ny, int Nz){ + + int GRID = Nx*Ny / 512 + 1; + + // IMPORTANT -- this routine may fail if Nx*Ny > 512*512 + if (Nx*Ny > 512*512){ + printf("WARNING (ScaLBL_D3Q19_Flux_BC_z): CUDA reduction operation may fail if Nx*Ny > 512*512"); + } + + // Allocate memory to store the sums + double din; + double sum[1]; + double *dvcsum; + int sharedBytes = NTHREADS*sizeof(double); + hipMalloc((void **)&dvcsum,sizeof(double)*Nx*Ny); + hipMemset(dvcsum,0,sizeof(double)*Nx*Ny); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Flux_BC_z (memory allocation): %s \n",hipGetErrorString(err)); + } + + // compute the local flux and store the result + dvc_D3Q19_Flux_BC_z<<>>(disteven, distodd, flux, dvcsum, Nx, Ny, Nz); + + err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Flux_BC_z (flux calculation, step 1): %s \n",hipGetErrorString(err)); + } + + // Now read the total flux + hipMemcpy(&sum[0],dvcsum,sizeof(double),hipMemcpyDeviceToHost); + din=sum[0]; + + err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Flux_BC_z (flux calculation, step 2): %s \n",hipGetErrorString(err)); + } + + // free the memory needed for reduction + hipFree(dvcsum); + + return din; +} + + +extern "C" void ScaLBL_D3Q19_AAeven_Pressure_BC_z(int *list, double *dist, double din, int count, int N){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_AAeven_Pressure_BC_z<<>>(list, dist, din, count, N); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_Pressure_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_Pressure_BC_Z(int *list, double *dist, double dout, int count, int N){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_AAeven_Pressure_BC_Z<<>>(list, dist, dout, count, N); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_Pressure_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *neighborList, int *list, double *dist, double din, int count, int N){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_AAodd_Pressure_BC_z<<>>(neighborList, list, dist, din, count, N); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_Pressure_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_Z(int *neighborList, int *list, double *dist, double dout, int count, int N){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_AAodd_Pressure_BC_Z<<>>(neighborList, list, dist, dout, count, N); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_Pressure_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + + +extern "C" double ScaLBL_D3Q19_AAeven_Flux_BC_z(int *list, double *dist, double flux, double area, + int count, int N){ + + int GRID = count / 512 + 1; + + // IMPORTANT -- this routine may fail if Nx*Ny > 512*512 + if (count > 512*512){ + printf("WARNING (ScaLBL_D3Q19_Flux_BC_Z): CUDA reduction operation may fail if count > 512*512"); + } + + // Allocate memory to store the sums + double din; + double sum[1]; + double *dvcsum; + hipMalloc((void **)&dvcsum,sizeof(double)*count); + hipMemset(dvcsum,0,sizeof(double)*count); + int sharedBytes = 512*sizeof(double); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_Flux_BC_z (memory allocation): %s \n",hipGetErrorString(err)); + } + + // compute the local flux and store the result + dvc_ScaLBL_D3Q19_AAeven_Flux_BC_z<<>>(list, dist, flux, area, dvcsum, count, N); + err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_Flux_BC_z (kernel): %s \n",hipGetErrorString(err)); + } + + // Now read the total flux + hipMemcpy(&sum[0],dvcsum,sizeof(double),hipMemcpyDeviceToHost); + din=sum[0]; + err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_Flux_BC_z (reduction): %s \n",hipGetErrorString(err)); + } + + // free the memory needed for reduction + hipFree(dvcsum); + + return din; +} + +extern "C" double ScaLBL_D3Q19_AAodd_Flux_BC_z(int *neighborList, int *list, double *dist, double flux, + double area, int count, int N){ + + int GRID = count / 512 + 1; + + // IMPORTANT -- this routine may fail if Nx*Ny > 512*512 + if (count > 512*512){ + printf("WARNING (ScaLBL_D3Q19_AAodd_Flux_BC_z): CUDA reduction operation may fail if count > 512*512"); + } + + // Allocate memory to store the sums + double din; + double sum[1]; + double *dvcsum; + hipMalloc((void **)&dvcsum,sizeof(double)*count); + hipMemset(dvcsum,0,sizeof(double)*count); + int sharedBytes = 512*sizeof(double); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_Flux_BC_z (memory allocation): %s \n",hipGetErrorString(err)); + } + + // compute the local flux and store the result + dvc_ScaLBL_D3Q19_AAodd_Flux_BC_z<<>>(neighborList, list, dist, flux, area, dvcsum, count, N); + err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_Flux_BC_z (kernel): %s \n",hipGetErrorString(err)); + } + // Now read the total flux + hipMemcpy(&sum[0],dvcsum,sizeof(double),hipMemcpyDeviceToHost); + din=sum[0]; + err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_Flux_BC_z (reduction): %s \n",hipGetErrorString(err)); + } + + // free the memory needed for reduction + hipFree(dvcsum); + + return din; +} + +extern "C" double ScaLBL_D3Q19_Flux_BC_Z(double *disteven, double *distodd, double flux, int Nx, int Ny, int Nz, int outlet){ + + int GRID = Nx*Ny / 512 + 1; + + // IMPORTANT -- this routine may fail if Nx*Ny > 512*512 + if (Nx*Ny > 512*512){ + printf("WARNING (ScaLBL_D3Q19_Flux_BC_Z): CUDA reduction operation may fail if Nx*Ny > 512*512"); + } + + // Allocate memory to store the sums + double dout; + double sum[1]; + double *dvcsum; + hipMalloc((void **)&dvcsum,sizeof(double)*Nx*Ny); + hipMemset(dvcsum,0,sizeof(double)*Nx*Ny); + + // compute the local flux and store the result + dvc_D3Q19_Flux_BC_Z<<>>(disteven, distodd, flux, dvcsum, Nx, Ny, Nz, outlet); + + // Now read the total flux + hipMemcpy(&sum[0],dvcsum,sizeof(double),hipMemcpyDeviceToHost); + + // free the memory needed for reduction + + dout = sum[0]; + + hipFree(dvcsum); + + return dout; + +} + +extern "C" double deviceReduce(double *in, double* out, int N) { + int threads = 512; + int blocks = min((N + threads - 1) / threads, 1024); + + double sum = 0.f; + deviceReduceKernel<<>>(in, out, N); + deviceReduceKernel<<<1, 1024>>>(out, out, blocks); + return sum; +} + +// +//extern "C" void ScaLBL_D3Q19_Pressure_BC_Z(int *list, double *dist, double dout, int count, int Np){ +// int GRID = count / 512 + 1; +// dvc_ScaLBL_D3Q19_Pressure_BC_Z<<>>(disteven, distodd, dout, Nx, Ny, Nz, outlet); +//} + +extern "C" void ScaLBL_D3Q19_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, + double Fy, double Fz){ + + dvc_ScaLBL_AAeven_MRT<<>>(dist,start,finish,Np,rlx_setA,rlx_setB,Fx,Fy,Fz); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_MRT: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_MRT(int *neighborlist, double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, + double Fy, double Fz){ + + dvc_ScaLBL_AAodd_MRT<<>>(neighborlist,dist,start,finish,Np,rlx_setA,rlx_setB,Fx,Fy,Fz); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_MRT: %s \n",hipGetErrorString(err)); + } +} + diff --git a/hip/D3Q7.hip b/hip/D3Q7.hip new file mode 100644 index 00000000..16863fec --- /dev/null +++ b/hip/D3Q7.hip @@ -0,0 +1,246 @@ +// GPU Functions for D3Q7 Lattice Boltzmann Methods + +#define NBLOCKS 560 +#define NTHREADS 128 + +__global__ void dvc_ScaLBL_Scalar_Pack(int *list, int count, double *sendbuf, double *Data, int N){ + //.................................................................................... + // Pack distribution q into the send buffer for the listed lattice sites + // dist may be even or odd distributions stored by stream layout + //.................................................................................... + int idx,n; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx 0){ + value = Den[n]; + f_even[n] = 0.3333333333333333*value; + f_odd[n] = 0.1111111111111111*value; //double(100*n)+1.f; + f_even[N+n] = 0.1111111111111111*value; //double(100*n)+2.f; + f_odd[N+n] = 0.1111111111111111*value; //double(100*n)+3.f; + f_even[2*N+n] = 0.1111111111111111*value; //double(100*n)+4.f; + f_odd[2*N+n] = 0.1111111111111111*value; //double(100*n)+5.f; + f_even[3*N+n] = 0.1111111111111111*value; //double(100*n)+6.f; + } + else{ + for(int q=0; q<3; q++){ + f_even[q*N+n] = -1.0; + f_odd[q*N+n] = -1.0; + } + f_even[3*N+n] = -1.0; + } + } + } +} + +//************************************************************************* +__global__ void dvc_ScaLBL_D3Q7_Swap(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz) +{ + int i,j,k,n,nn,N; + // distributions + double f1,f2,f3,f4,f5,f6; + char id; + N = Nx*Ny*Nz; + + int S = N/NBLOCKS/NTHREADS + 1; + for (int s=0; s 0){ + //.......Back out the 3-D indices for node n.............. + k = n/(Nx*Ny); + j = (n-Nx*Ny*k)/Nx; + i = n-Nx*Ny*k-Nx*j; + //........................................................................ + // Retrieve even distributions from the local node (swap convention) + // f0 = disteven[n]; // Does not particupate in streaming + f1 = distodd[n]; + f3 = distodd[N+n]; + f5 = distodd[2*N+n]; + //........................................................................ + + //........................................................................ + // Retrieve odd distributions from neighboring nodes (swap convention) + //........................................................................ + nn = n+1; // neighbor index (pull convention) + if (!(i+1 0 ){ + // Read the distributions + f0 = disteven[n]; + f2 = disteven[N+n]; + f4 = disteven[2*N+n]; + f6 = disteven[3*N+n]; + f1 = distodd[n]; + f3 = distodd[N+n]; + f5 = distodd[2*N+n]; + // Compute the density + Den[n] = f0+f1+f2+f3+f4+f5+f6; + } + } + } +} + +extern "C" void ScaLBL_D3Q7_Unpack(int q, int *list, int start, int count, double *recvbuf, double *dist, int N){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_Unpack <<>>(q, list, start, count, recvbuf, dist, N); +} + +extern "C" void ScaLBL_Scalar_Pack(int *list, int count, double *sendbuf, double *Data, int N){ + int GRID = count / 512 + 1; + dvc_ScaLBL_Scalar_Pack <<>>(list, count, sendbuf, Data, N); +} + +extern "C" void ScaLBL_Scalar_Unpack(int *list, int count, double *recvbuf, double *Data, int N){ + int GRID = count / 512 + 1; + dvc_ScaLBL_Scalar_Unpack <<>>(list, count, recvbuf, Data, N); +} +extern "C" void ScaLBL_PackDenD3Q7(int *list, int count, double *sendbuf, int number, double *Data, int N){ + int GRID = count / 512 + 1; + dvc_ScaLBL_PackDenD3Q7 <<>>(list, count, sendbuf, number, Data, N); +} + +extern "C" void ScaLBL_UnpackDenD3Q7(int *list, int count, double *recvbuf, int number, double *Data, int N){ + int GRID = count / 512 + 1; + dvc_ScaLBL_UnpackDenD3Q7 <<>>(list, count, recvbuf, number, Data, N); +} + +extern "C" void ScaLBL_D3Q7_Init(char *ID, double *f_even, double *f_odd, double *Den, int Nx, int Ny, int Nz){ + dvc_ScaLBL_D3Q7_Init <<>>(ID, f_even, f_odd, Den, Nx, Ny, Nz); +} + +extern "C" void ScaLBL_D3Q7_Swap(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz){ + dvc_ScaLBL_D3Q7_Swap <<>>(ID, disteven, distodd, Nx, Ny, Nz); +} + +extern "C" void ScaLBL_D3Q7_Density(char *ID, double *disteven, double *distodd, double *Den, + int Nx, int Ny, int Nz){ + dvc_ScaLBL_D3Q7_Density <<>>(ID, disteven, distodd, Den, Nx, Ny, Nz); +} + diff --git a/hip/Extras.hip b/hip/Extras.hip new file mode 100644 index 00000000..6d2a65e3 --- /dev/null +++ b/hip/Extras.hip @@ -0,0 +1,62 @@ +// Basic hip functions callable from C/C++ code +#include "hip/hip_runtime.h" +#include + +extern "C" int ScaLBL_SetDevice(int rank){ + int n_devices; + //int local_rank = atoi(getenv("MV2_COMM_WORLD_LOCAL_RANK")); + hipGetDeviceCount(&n_devices); + //int device = local_rank % n_devices; + int device = rank % n_devices; + hipSetDevice(device); + if (rank < n_devices) printf("MPI rank=%i will use GPU ID %i / %i \n",rank,device,n_devices); + return device; +} + +extern "C" void ScaLBL_AllocateDeviceMemory(void** address, size_t size){ + hipMalloc(address,size); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("Error in hipMalloc: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_FreeDeviceMemory(void* pointer){ + hipFree(pointer); +} + +extern "C" void ScaLBL_CopyToDevice(void* dest, const void* source, size_t size){ + hipMemcpy(dest,source,size,hipMemcpyHostToDevice); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("Error in hipMemcpy (host->device): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_AllocateZeroCopy(void** address, size_t size){ + //hipMallocHost(address,size); + hipMalloc(address,size); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("Error in hipMallocHost: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_CopyToZeroCopy(void* dest, const void* source, size_t size){ + hipMemcpy(dest,source,size,hipMemcpyHostToDevice); + hipError_t err = hipGetLastError(); + //memcpy(dest, source, size); + +} + +extern "C" void ScaLBL_CopyToHost(void* dest, const void* source, size_t size){ + hipMemcpy(dest,source,size,hipMemcpyDeviceToHost); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("Error in hipMemcpy (device->host): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_DeviceBarrier(){ + hipDeviceSynchronize(); +} diff --git a/hip/MRT.hip b/hip/MRT.hip new file mode 100644 index 00000000..671e2801 --- /dev/null +++ b/hip/MRT.hip @@ -0,0 +1,310 @@ +//************************************************************************* +// CUDA kernels for single-phase ScaLBL_D3Q19_MRT code +// James McClure +//************************************************************************* +#include "hip/hip_runtime.h" + +#define NBLOCKS 560 +#define NTHREADS 128 + +__global__ void INITIALIZE(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz) +{ + int n,N; + N = Nx*Ny*Nz; + int S = N/NBLOCKS/NTHREADS + 1; + for (int s=0; s 0){ + f_even[n] = 0.3333333333333333; + f_odd[n] = 0.055555555555555555; //double(100*n)+1.f; + f_even[N+n] = 0.055555555555555555; //double(100*n)+2.f; + f_odd[N+n] = 0.055555555555555555; //double(100*n)+3.f; + f_even[2*N+n] = 0.055555555555555555; //double(100*n)+4.f; + f_odd[2*N+n] = 0.055555555555555555; //double(100*n)+5.f; + f_even[3*N+n] = 0.055555555555555555; //double(100*n)+6.f; + f_odd[3*N+n] = 0.0277777777777778; //double(100*n)+7.f; + f_even[4*N+n] = 0.0277777777777778; //double(100*n)+8.f; + f_odd[4*N+n] = 0.0277777777777778; //double(100*n)+9.f; + f_even[5*N+n] = 0.0277777777777778; //double(100*n)+10.f; + f_odd[5*N+n] = 0.0277777777777778; //double(100*n)+11.f; + f_even[6*N+n] = 0.0277777777777778; //double(100*n)+12.f; + f_odd[6*N+n] = 0.0277777777777778; //double(100*n)+13.f; + f_even[7*N+n] = 0.0277777777777778; //double(100*n)+14.f; + f_odd[7*N+n] = 0.0277777777777778; //double(100*n)+15.f; + f_even[8*N+n] = 0.0277777777777778; //double(100*n)+16.f; + f_odd[8*N+n] = 0.0277777777777778; //double(100*n)+17.f; + f_even[9*N+n] = 0.0277777777777778; //double(100*n)+18.f; + } + else{ + for(int q=0; q<9; q++){ + f_even[q*N+n] = -1.0; + f_odd[q*N+n] = -1.0; + } + f_even[9*N+n] = -1.0; + } + } + } +} + +__global__ void Compute_VELOCITY(char *ID, double *disteven, double *distodd, double *vel, int Nx, int Ny, int Nz) +{ + int n,N; + // distributions + double f1,f2,f3,f4,f5,f6,f7,f8,f9; + double f10,f11,f12,f13,f14,f15,f16,f17,f18; + double vx,vy,vz; + + N = Nx*Ny*Nz; + int S = N/NBLOCKS/NTHREADS + 1; + // S - number of threadblocks per grid block + for (int s=0; s 0){ + //........................................................................ + // Registers to store the distributions + //........................................................................ + f2 = disteven[N+n]; + f4 = disteven[2*N+n]; + f6 = disteven[3*N+n]; + f8 = disteven[4*N+n]; + f10 = disteven[5*N+n]; + f12 = disteven[6*N+n]; + f14 = disteven[7*N+n]; + f16 = disteven[8*N+n]; + f18 = disteven[9*N+n]; + //........................................................................ + f1 = distodd[n]; + f3 = distodd[1*N+n]; + f5 = distodd[2*N+n]; + f7 = distodd[3*N+n]; + f9 = distodd[4*N+n]; + f11 = distodd[5*N+n]; + f13 = distodd[6*N+n]; + f15 = distodd[7*N+n]; + f17 = distodd[8*N+n]; + //.................Compute the velocity................................... + vx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14; + vy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18; + vz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18; + //..................Write the velocity..................................... + vel[n] = vx; + vel[N+n] = vy; + vel[2*N+n] = vz; + //........................................................................ + + } + } + } +} + +//************************************************************************* +__global__ void +__launch_bounds__(512,2) +D3Q19_MRT(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz, + double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz) +{ + + int n,N; + // distributions + double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; + double f10,f11,f12,f13,f14,f15,f16,f17,f18; + + // conserved momemnts + double rho,jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + + N = Nx*Ny*Nz; + + char id; + int S = N/NBLOCKS/NTHREADS + 1; + // S - number of threadblocks per grid block + for (int s=0; s 0){ + //........................................................................ + // Registers to store the distributions - read based on swap convention + //........................................................................ + f2 = distodd[n]; + f4 = distodd[N+n]; + f6 = distodd[2*N+n]; + f8 = distodd[3*N+n]; + f10 = distodd[4*N+n]; + f12 = distodd[5*N+n]; + f14 = distodd[6*N+n]; + f16 = distodd[7*N+n]; + f18 = distodd[8*N+n]; + //........................................................................ + f0 = disteven[n]; + f1 = disteven[N+n]; + f3 = disteven[2*N+n]; + f5 = disteven[3*N+n]; + f7 = disteven[4*N+n]; + f9 = disteven[5*N+n]; + f11 = disteven[6*N+n]; + f13 = disteven[7*N+n]; + f15 = disteven[8*N+n]; + f17 = disteven[9*N+n]; + //........................................................................ + //....................compute the moments............................................... + rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; + m1 = -30*f0-11*(f2+f1+f4+f3+f6+f5)+8*(f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18 +f17); + m2 = 12*f0-4*(f2+f1 +f4+f3+f6 +f5)+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; + jx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14; + m4 = 4*(-f1+f2)+f7-f8+f9-f10+f11-f12+f13-f14; + jy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18; + m6 = -4*(f3-f4)+f7-f8-f9+f10+f15-f16+f17-f18; + jz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18; + m8 = -4*(f5-f6)+f11-f12-f13+f14+f15-f16-f17+f18; + m9 = 2*(f1+f2)-f3-f4-f5-f6+f7+f8+f9+f10+f11+f12+f13+f14-2*(f15+f16+f17+f18); + m10 = -4*(f1+f2)+2*(f4+f3+f6+f5)+f8+f7+f10+f9+f12+f11+f14+f13-2*(f16+f15+f18+f17); + m11 = f4+f3-f6-f5+f8+f7+f10+f9-f12-f11-f14-f13; + m12 = -2*(f4+f3-f6-f5)+f8+f7+f10+f9-f12-f11-f14-f13; + m13 = f8+f7-f10-f9; + m14 = f16+f15-f18-f17; + m15 = f12+f11-f14-f13; + m16 = f7-f8+f9-f10-f11+f12-f13+f14; + m17 = -f7+f8+f9-f10+f15-f16+f17-f18; + m18 = f11-f12-f13+f14-f15+f16+f17-f18; + //..............incorporate external force................................................ + //jx += 0.5*Fx; + //jy += 0.5*Fy; + //jz += 0.5*Fz; + //..............carry out relaxation process............................................... + m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho) - m2); + m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4); + m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6); + m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8); + m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) - m9); + m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10); + m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) - m11); + m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho) - m12); + m13 = m13 + rlx_setA*((jx*jy/rho) - m13); + m14 = m14 + rlx_setA*((jy*jz/rho) - m14); + m15 = m15 + rlx_setA*((jx*jz/rho) - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //.................inverse transformation...................................................... + f0 = 0.05263157894736842*rho-0.012531328320802*m1+0.04761904761904762*m2; + f1 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 + +0.1*(jx-m4)+0.05555555555555555*(m9-m10); + f2 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 + +0.1*(m4-jx)+0.05555555555555555*(m9-m10); + f3 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 + +0.1*(jy-m6)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12); + f4 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 + +0.1*(m6-jy)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12); + f5 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 + +0.1*(jz-m8)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11); + f6 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 + +0.1*(m8-jz)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11); + f7 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx+jy)+0.025*(m4+m6) + +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 + +0.04166666666666666*m12+0.25*m13+0.125*(m16-m17); + f8 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2-0.1*(jx+jy)-0.025*(m4+m6) + +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 + +0.04166666666666666*m12+0.25*m13+0.125*(m17-m16); + f9 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx-jy)+0.025*(m4-m6) + +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 + +0.04166666666666666*m12-0.25*m13+0.125*(m16+m17); + f10 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jy-jx)+0.025*(m6-m4) + +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 + +0.04166666666666666*m12-0.25*m13-0.125*(m16+m17); + f11 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2+0.1*(jx+jz)+0.025*(m4+m8) + +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 + -0.04166666666666666*m12+0.25*m15+0.125*(m18-m16); + f12 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2-0.1*(jx+jz)-0.025*(m4+m8) + +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 + -0.04166666666666666*m12+0.25*m15+0.125*(m16-m18); + f13 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2+0.1*(jx-jz)+0.025*(m4-m8) + +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 + -0.04166666666666666*m12-0.25*m15-0.125*(m16+m18); + f14 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2+0.1*(jz-jx)+0.025*(m8-m4) + +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 + -0.04166666666666666*m12-0.25*m15+0.125*(m16+m18); + f15 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2+0.1*(jy+jz)+0.025*(m6+m8) + -0.05555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m17-m18); + f16 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2-0.1*(jy+jz)-0.025*(m6+m8) + -0.05555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m18-m17); + f17 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2+0.1*(jy-jz)+0.025*(m6-m8) + -0.05555555555555555*m9-0.02777777777777778*m10-0.25*m14+0.125*(m17+m18); + f18 = 0.05263157894736842*rho+0.003341687552213868*m1 + +0.003968253968253968*m2+0.1*(jz-jy)+0.025*(m8-m6) + -0.05555555555555555*m9-0.02777777777777778*m10-0.25*m14-0.125*(m17+m18); + //....................................................................................................... + // incorporate external force + f1 += 0.16666666*Fx; + f2 -= 0.16666666*Fx; + f3 += 0.16666666*Fy; + f4 -= 0.16666666*Fy; + f5 += 0.16666666*Fz; + f6 -= 0.16666666*Fz; + f7 += 0.08333333333*(Fx+Fy); + f8 -= 0.08333333333*(Fx+Fy); + f9 += 0.08333333333*(Fx-Fy); + f10 -= 0.08333333333*(Fx-Fy); + f11 += 0.08333333333*(Fx+Fz); + f12 -= 0.08333333333*(Fx+Fz); + f13 += 0.08333333333*(Fx-Fz); + f14 -= 0.08333333333*(Fx-Fz); + f15 += 0.08333333333*(Fy+Fz); + f16 -= 0.08333333333*(Fy+Fz); + f17 += 0.08333333333*(Fy-Fz); + f18 -= 0.08333333333*(Fy-Fz); + //....................................................................................................... + // Write data based on un-swapped convention + disteven[n] = f0; + disteven[N+n] = f2; + disteven[2*N+n] = f4; + disteven[3*N+n] = f6; + disteven[4*N+n] = f8; + disteven[5*N+n] = f10; + disteven[6*N+n] = f12; + disteven[7*N+n] = f14; + disteven[8*N+n] = f16; + disteven[9*N+n] = f18; + + distodd[n] = f1; + distodd[N+n] = f3; + distodd[2*N+n] = f5; + distodd[3*N+n] = f7; + distodd[4*N+n] = f9; + distodd[5*N+n] = f11; + distodd[6*N+n] = f13; + distodd[7*N+n] = f15; + distodd[8*N+n] = f17; + //....................................................................................................... + } + } + } +} + +extern "C" void ScaLBL_D3Q19_MRT(char *ID, double *f_even, double *f_odd, double rlxA, double rlxB, double Fx, double Fy, double Fz,int Nx, int Ny, int Nz) +{ + D3Q19_MRT <<< NBLOCKS,NTHREADS>>> (ID, f_even, f_odd, Nx, Ny, Nz, rlxA, rlxB, Fx, Fy, Fz); +} + + diff --git a/hip/dfh.hip b/hip/dfh.hip new file mode 100644 index 00000000..37f91498 --- /dev/null +++ b/hip/dfh.hip @@ -0,0 +1,1508 @@ +#include +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 1024 +#define NTHREADS 256 + +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 +#else +__device__ double atomicAdd(double* address, double val) { + unsigned long long int* address_as_ull = (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val+__longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); +} +#endif + +__global__ void dvc_ScaLBL_Gradient_Unpack(double weight, double Cqx, double Cqy, double Cqz, + int *list, int start, int count, double *recvbuf, double *phi, double *grad, int N){ + //.................................................................................... + // Unpack distribution from the recv buffer + // Distribution q matche Cqx, Cqy, Cqz + // swap rule means that the distributions in recvbuf are OPPOSITE of q + // dist may be even or odd distributions stored by stream layout + //.................................................................................... + int n,idx; + double value, tmp; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx 0.f){ + nA = 1.0; nB = 0.f; + } + else{ + nB = 1.0; nA = 0.f; + } + Den[idx] = nA; + Den[Np+idx] = nB; + + Aq[idx]=0.3333333333333333*nA; + Aq[Np+idx]=0.1111111111111111*nA; + Aq[2*Np+idx]=0.1111111111111111*nA; + Aq[3*Np+idx]=0.1111111111111111*nA; + Aq[4*Np+idx]=0.1111111111111111*nA; + Aq[5*Np+idx]=0.1111111111111111*nA; + Aq[6*Np+idx]=0.1111111111111111*nA; + + Bq[idx]=0.3333333333333333*nB; + Bq[Np+idx]=0.1111111111111111*nB; + Bq[2*Np+idx]=0.1111111111111111*nB; + Bq[3*Np+idx]=0.1111111111111111*nB; + Bq[4*Np+idx]=0.1111111111111111*nB; + Bq[5*Np+idx]=0.1111111111111111*nB; + Bq[6*Np+idx]=0.1111111111111111*nB; + } + } +} + + +// LBM based on density functional hydrodynamics +__global__ void dvc_ScaLBL_D3Q19_AAeven_DFH(int *neighborList, double *dist, double *Aq, double *Bq, double *Den, double *Phi, + double *Gradient, double *SolidForce, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, + double Fx, double Fy, double Fz, int start, int finish, int Np){ + int nn,n; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m3,m5,m7; + double nA,nB; // number density + double a1,b1,a2,b2,nAB,delta; + double C,nx,ny,nz; //color gradient magnitude and direction + double ux,uy,uz; + double phi,tau,rho0,rlx_setA,rlx_setB; + double force_x,force_y,force_z; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; + + Aq[1*Np+n] = a1; + Bq[1*Np+n] = b1; + Aq[2*Np+n] = a2; + Bq[2*Np+n] = b2; + + //............................................... + // q = 2 + // Cq = {0,1,0} + delta = beta*nA*nB*nAB*0.1111111111111111*ny; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; + + Aq[3*Np+n] = a1; + Bq[3*Np+n] = b1; + Aq[4*Np+n] = a2; + Bq[4*Np+n] = b2; + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nz; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; + + Aq[5*Np+n] = a1; + Bq[5*Np+n] = b1; + Aq[6*Np+n] = a2; + Bq[6*Np+n] = b2; + //............................................... + } + } +} + + +__global__ void dvc_ScaLBL_D3Q19_AAodd_DFH(int *neighborList, double *dist, double *Aq, double *Bq, double *Den, + double *Phi, double *Gradient, double *SolidForce, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, + double Fx, double Fy, double Fz, int start, int finish, int Np){ + + int n,nn,nread; + int nr1,nr2,nr3,nr4,nr5,nr6; + int nr7,nr8,nr9,nr10; + int nr11,nr12,nr13,nr14; + //int nr15,nr16,nr17,nr18; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m3,m5,m7; + double nA,nB; // number density + double a1,b1,a2,b2,nAB,delta; + double C,nx,ny,nz; //color gradient magnitude and direction + double ux,uy,uz; + double phi,tau,rho0,rlx_setA,rlx_setB; + double force_x,force_y,force_z; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s even part of dist) + //fq = dist[nread]; // reading the f2 data into register fq + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nr2]; // reading the f2 data into register fq + rho += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + //nread = neighborList[n+2*Np]; // neighbor 4 + //fq = dist[nread]; + nr3 = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nr3]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + //nread = neighborList[n+3*Np]; // neighbor 3 + //fq = dist[nread]; + nr4 = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nr4]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + //nread = neighborList[n+4*Np]; + //fq = dist[nread]; + nr5 = neighborList[n+4*Np]; + fq = dist[nr5]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + + // q = 6 + //nread = neighborList[n+5*Np]; + //fq = dist[nread]; + nr6 = neighborList[n+5*Np]; + fq = dist[nr6]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + //nread = neighborList[n+6*Np]; + //fq = dist[nread]; + nr7 = neighborList[n+6*Np]; + fq = dist[nr7]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + //nread = neighborList[n+7*Np]; + //fq = dist[nread]; + nr8 = neighborList[n+7*Np]; + fq = dist[nr8]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + //nread = neighborList[n+8*Np]; + //fq = dist[nread]; + nr9 = neighborList[n+8*Np]; + fq = dist[nr9]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + //nread = neighborList[n+9*Np]; + //fq = dist[nread]; + nr10 = neighborList[n+9*Np]; + fq = dist[nr10]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + //nread = neighborList[n+10*Np]; + //fq = dist[nread]; + nr11 = neighborList[n+10*Np]; + fq = dist[nr11]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + //nread = neighborList[n+11*Np]; + //fq = dist[nread]; + nr12 = neighborList[n+11*Np]; + fq = dist[nr12]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + //nread = neighborList[n+12*Np]; + //fq = dist[nread]; + nr13 = neighborList[n+12*Np]; + fq = dist[nr13]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + //nread = neighborList[n+13*Np]; + //fq = dist[nread]; + nr14 = neighborList[n+13*Np]; + fq = dist[nr14]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + //fq = dist[17*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + //fq = dist[8*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + //fq = dist[18*Np+n]; + nread = neighborList[n+16*Np]; + fq = dist[nread]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + //fq = dist[9*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + + //........................................................................ + //..............carry out relaxation process.............................. + //..........Toelke, Fruediger et. al. 2006................................ + if (C == 0.0) nx = ny = nz = 0.0; + m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) -alpha*C - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0)- m2); + m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4); + m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6); + m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8); + m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); + m10 = m10 + rlx_setA*( - m10); + m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); + m12 = m12 + rlx_setA*( - m12); + m13 = m13 + rlx_setA*( (jx*jy/rho0) + 0.5*alpha*C*nx*ny - m13); + m14 = m14 + rlx_setA*( (jy*jz/rho0) + 0.5*alpha*C*ny*nz - m14); + m15 = m15 + rlx_setA*( (jx*jz/rho0) + 0.5*alpha*C*nx*nz - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + + // assign force with wetting BC + force_x = alpha*(nA-nB)*SolidForce[n] + Fx; + force_y = alpha*(nA-nB)*SolidForce[n+Np] + Fy; + force_z = alpha*(nA-nB)*SolidForce[n+2*Np] + Fz; + + //.................inverse transformation...................................................... + // q=0 + fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*force_x; + //nread = neighborList[n+Np]; + dist[nr2] = fq; + + // q=2 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*force_x; + //nread = neighborList[n]; + dist[nr1] = fq; + + // q = 3 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*force_y; + //nread = neighborList[n+3*Np]; + dist[nr4] = fq; + + // q = 4 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*force_y; + //nread = neighborList[n+2*Np]; + dist[nr3] = fq; + + // q = 5 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*force_z; + //nread = neighborList[n+5*Np]; + dist[nr6] = fq; + + // q = 6 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*force_z; + //nread = neighborList[n+4*Np]; + dist[nr5] = fq; + + // q = 7 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(force_x+force_y); + //nread = neighborList[n+7*Np]; + dist[nr8] = fq; + + // q = 8 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(force_x+force_y); + //nread = neighborList[n+6*Np]; + dist[nr7] = fq; + + // q = 9 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(force_x-force_y); + //nread = neighborList[n+9*Np]; + dist[nr10] = fq; + + // q = 10 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(force_x-force_y); + //nread = neighborList[n+8*Np]; + dist[nr9] = fq; + + // q = 11 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(force_x+force_z); + //nread = neighborList[n+11*Np]; + dist[nr12] = fq; + + // q = 12 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ + mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(force_x+force_z); + //nread = neighborList[n+10*Np]; + dist[nr11]= fq; + + // q = 13 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(force_x-force_z); + //nread = neighborList[n+13*Np]; + dist[nr14] = fq; + + // q= 14 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(force_x-force_z); + //nread = neighborList[n+12*Np]; + dist[nr13] = fq; + + + // q = 15 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(force_y+force_z); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(force_y+force_z); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + + // q = 17 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) + -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(force_y-force_z); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) + -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(force_y-force_z); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + + // write the velocity + ux = (jx + force_x) / rho0; + uy = (jy + force_y) / rho0; + uz = (jz + force_z) / rho0; + //Velocity[n] = ux; + //Velocity[Np+n] = uy; + //Velocity[2*Np+n] = uz; + + // Instantiate mass transport distributions + // Stationary value - distribution 0 + nAB = 1.0/(nA+nB); + Aq[n] = 0.3333333333333333*nA; + Bq[n] = 0.3333333333333333*nB; + + //............................................... + // q = 0,2,4 + // Cq = {1,0,0}, {0,1,0}, {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nx; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; + + // q = 1 + //nread = neighborList[n+Np]; + Aq[nr2] = a1; + Bq[nr2] = b1; + // q=2 + //nread = neighborList[n]; + Aq[nr1] = a2; + Bq[nr1] = b2; + + //............................................... + // Cq = {0,1,0} + delta = beta*nA*nB*nAB*0.1111111111111111*ny; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; + + // q = 3 + //nread = neighborList[n+3*Np]; + Aq[nr4] = a1; + Bq[nr4] = b1; + // q = 4 + //nread = neighborList[n+2*Np]; + Aq[nr3] = a2; + Bq[nr3] = b2; + + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nz; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; + + // q = 5 + //nread = neighborList[n+5*Np]; + Aq[nr6] = a1; + Bq[nr6] = b1; + // q = 6 + //nread = neighborList[n+4*Np]; + Aq[nr5] = a2; + Bq[nr5] = b2; + //............................................... + } + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_DFH(int *neighborList, double *Aq, double *Bq, + double *Den, double *Phi, int start, int finish, int Np){ + int n,nread; + double fq,nA,nB; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s>>(weight, Cqx, Cqy, Cqz, list, start, count, recvbuf, phi, grad, N); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_Gradient_Unpack: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_DFH_Init(double *Phi, double *Den, double *Aq, double *Bq, int start, int finish, int Np){ + dvc_ScaLBL_DFH_Init<<>>(Phi, Den, Aq, Bq, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_DFH_Init: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_DFH(int *neighborList, double *dist, double *Aq, double *Bq, double *Den, double *Phi, + double *Gradient, double *SolidForce, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, + double Fx, double Fy, double Fz, int start, int finish, int Np){ + + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_DFH, hipFuncCachePreferL1); + + dvc_ScaLBL_D3Q19_AAeven_DFH<<>>(neighborList, dist, Aq, Bq, Den, Phi, Gradient, SolidForce, rhoA, rhoB, tauA, tauB, + alpha, beta, Fx, Fy, Fz, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_DFH: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_DFH(int *neighborList, double *dist, double *Aq, double *Bq, double *Den, + double *Phi, double *Gradient, double *SolidForce, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, + double Fx, double Fy, double Fz, int start, int finish, int Np){ + + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_DFH, hipFuncCachePreferL1); + + dvc_ScaLBL_D3Q19_AAodd_DFH<<>>(neighborList,dist, Aq, Bq, Den, Phi, Gradient, + SolidForce, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, start, finish, Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_DFH: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_DFH(int *NeighborList, double *Aq, double *Bq, + double *Den, double *Phi, int start, int finish, int Np){ + + dvc_ScaLBL_D3Q7_AAodd_DFH<<>>(NeighborList, Aq, Bq, Den, Phi, start, finish, Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAodd_DFH: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_DFH(double *Aq, double *Bq, double *Den, double *Phi, + int start, int finish, int Np){ + + dvc_ScaLBL_D3Q7_AAeven_DFH<<>>(Aq, Bq, Den, Phi, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAeven_DFH: %s \n",hipGetErrorString(err)); + } + +} + +extern "C" void ScaLBL_D3Q19_Gradient_DFH(int *neighborList, double *Phi, double *ColorGrad, int start, int finish, int Np){ + + dvc_ScaLBL_D3Q19_Gradient_DFH<<>>(neighborList, Phi, ColorGrad, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_Gradient_DFH: %s \n",hipGetErrorString(err)); + } + +} From 34b75e2ccfdea633fd0f2d2d709e1df5b96776c2 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Wed, 7 Oct 2020 13:36:42 -0400 Subject: [PATCH 076/205] Getting HIP compiling on Summit --- CMakeLists.txt | 6 +++--- analysis/runAnalysis.cpp | 6 ++---- common/Utilities.cpp | 22 +++++++++++++--------- common/Utilities.h | 2 +- hip/{BGK.hip => BGK.cu} | 1 + hip/CMakeLists.txt | 10 +++++----- hip/{Color.hip => Color.cu} | 12 ++++++------ hip/{CudaExtras.hip => CudaExtras.cu} | 0 hip/{D3Q19.hip => D3Q19.cu} | 6 +++--- hip/{D3Q7.hip => D3Q7.cu} | 0 hip/{Extras.hip => Extras.cu} | 0 hip/{MRT.hip => MRT.cu} | 0 hip/{dfh.hip => dfh.cu} | 4 ++-- tests/lbpm_color_simulator.cpp | 11 +++++++++-- 14 files changed, 45 insertions(+), 35 deletions(-) rename hip/{BGK.hip => BGK.cu} (99%) rename hip/{Color.hip => Color.cu} (99%) rename hip/{CudaExtras.hip => CudaExtras.cu} (100%) rename hip/{D3Q19.hip => D3Q19.cu} (99%) rename hip/{D3Q7.hip => D3Q7.cu} (100%) rename hip/{Extras.hip => Extras.cu} (100%) rename hip/{MRT.hip => MRT.cu} (100%) rename hip/{dfh.hip => dfh.cu} (99%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 33528b62..8d479391 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -104,7 +104,7 @@ IF ( USE_DOXYGEN ) ADD_DEPENDENCIES( doc latex_docs doxygen ) ELSE() SET( USE_DOXYGEN 0 ) - ENDIF()lbpm-wia + ENDIF() ENDIF() @@ -112,7 +112,7 @@ ENDIF() ADD_CUSTOM_TARGET( build-test ) ADD_CUSTOM_TARGET( build-examples ) ADD_CUSTOM_TARGET( check COMMAND make test ) -ADD_DISTCLEAN( analysis null_timer tests liblbpm-wia.* cpu gpu example common IO threadpool StackTrace ) +ADD_DISTCLEAN( analysis null_timer tests liblbpm-wia.* cpu gpu cuda hip example common IO threadpool StackTrace ) # Check for CUDA @@ -178,7 +178,7 @@ IF ( NOT ONLY_BUILD_DOCS ) IF ( USE_CUDA ) ADD_PACKAGE_SUBDIRECTORY( cuda ) ELSEIF ( USE_HIP ) - ADD_SUBDIRECTORY( gpu ) + ADD_SUBDIRECTORY( hip ) SET( LBPM_LIBRARIES lbpm-hip lbpm-wia ) ELSE() ADD_PACKAGE_SUBDIRECTORY( cpu ) diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index 83153f6c..9c811752 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -678,10 +678,8 @@ void runAnalysis::createThreads( const std::string& method, int N_threads ) // Check if we have thread support int thread_support; MPI_Query_thread( &thread_support ); - if ( thread_support < MPI_THREAD_MULTIPLE ) { - std::cerr << "Warning: Failed to start MPI with necessary thread support, thread support will be disabled" << std::endl; - return; - } + if ( thread_support < MPI_THREAD_MULTIPLE && N_threads > 0 ) + std::cerr << "Warning: Failed to start MPI with necessary thread support, errors may occur\n"; // Create the threads const auto cores = d_tpool.getProcessAffinity(); if ( N_threads == 0 ) { diff --git a/common/Utilities.cpp b/common/Utilities.cpp index 723b34f8..4c156a57 100644 --- a/common/Utilities.cpp +++ b/common/Utilities.cpp @@ -37,7 +37,7 @@ static std::mutex Utilities_mutex; /**************************************************************************** * Function to perform the default startup/shutdown sequences * ****************************************************************************/ -void Utilities::startup( int argc, char **argv ) +void Utilities::startup( int argc, char **argv, bool multiple ) { NULL_USE( argc ); NULL_USE( argv ); @@ -46,15 +46,19 @@ void Utilities::startup( int argc, char **argv ) Utilities::setenv( "MKL_NUM_THREADS", "1" ); // Start MPI #ifdef USE_MPI - int provided; - MPI_Init_thread( &argc, &argv, MPI_THREAD_MULTIPLE, &provided ); - if ( provided < MPI_THREAD_MULTIPLE ) { - int rank; - MPI_Comm_rank( MPI_COMM_WORLD, &rank ); - if ( rank == 0 ) - std::cerr << "Warning: Failed to start MPI with necessary thread support, thread support will be disabled" << std::endl; + if ( multiple ) { + int provided; + MPI_Init_thread( &argc, &argv, MPI_THREAD_MULTIPLE, &provided ); + if ( provided < MPI_THREAD_MULTIPLE ) { + int rank; + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + if ( rank == 0 ) + std::cerr << "Warning: Failed to start MPI with necessary thread support, thread support will be disabled" << std::endl; + } + StackTrace::globalCallStackInitialize( MPI_COMM_WORLD ); + } else { + MPI_Init( &argc, &argv ); } - StackTrace::globalCallStackInitialize( MPI_COMM_WORLD ); #endif // Set the error handlers Utilities::setAbortBehavior( true, 3 ); diff --git a/common/Utilities.h b/common/Utilities.h index da579966..8dcd8c8e 100644 --- a/common/Utilities.h +++ b/common/Utilities.h @@ -31,7 +31,7 @@ using StackTrace::Utilities::sleep_s; * \param argc argc from main * \param argv argv from main */ -void startup( int argc, char **argv ); +void startup( int argc, char **argv, bool multiple=true ); /*! * \brief Stop MPI, error handlers diff --git a/hip/BGK.hip b/hip/BGK.cu similarity index 99% rename from hip/BGK.hip rename to hip/BGK.cu index f3e746af..df1c7dd3 100644 --- a/hip/BGK.hip +++ b/hip/BGK.cu @@ -1,4 +1,5 @@ #include +#include "hip/hip_runtime.h" #define NBLOCKS 1024 #define NTHREADS 256 diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index 38ef7c27..f63cf035 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -1,9 +1,9 @@ SET( HIP_SEPERABLE_COMPILATION ON ) -SET_SOURCE_FILES_PROPERTIES( BGK.hip Color.hip CudaExtras.hip D3Q19.hip D3Q7.hip dfh.hip Extras.hip MRT.hip PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 ) -HIP_ADD_LIBRARY( lbpm-hip BGK.hip Color.hip CudaExtras.hip D3Q19.hip D3Q7.hip dfh.hip Extras.hip MRT.hip SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} ) -TARGET_LINK_LIBRARIES( lbpm-hip /opt/rocm-3.3.0/lib/libhip_hcc.so ) -TARGET_LINK_LIBRARIES( lbpm-wia lbpm-hip ) -ADD_DEPENDENCIES( lbpm-hip copy-include ) +SET_SOURCE_FILES_PROPERTIES( BGK.cu Color.cu CudaExtras.cu D3Q19.cu D3Q7.cu dfh.cu Extras.cu MRT.hip PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 ) +HIP_ADD_LIBRARY( lbpm-hip BGK.cu Color.cu CudaExtras.cu D3Q19.cu D3Q7.cu dfh.cu Extras.cu MRT.cu SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} ) +#TARGET_LINK_LIBRARIES( lbpm-hip /opt/rocm-3.3.0/lib/libhip_hcc.so ) +#TARGET_LINK_LIBRARIES( lbpm-wia lbpm-hip ) +#ADD_DEPENDENCIES( lbpm-hip copy-include ) diff --git a/hip/Color.hip b/hip/Color.cu similarity index 99% rename from hip/Color.hip rename to hip/Color.cu index b802ab1f..95623e56 100644 --- a/hip/Color.hip +++ b/hip/Color.cu @@ -3986,7 +3986,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, do double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ - hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_Color, hipFuncCachePreferL1); + hipFuncSetCacheConfig( (void*) dvc_ScaLBL_D3Q19_AAeven_Color, hipFuncCachePreferL1); dvc_ScaLBL_D3Q19_AAeven_Color<<>>(Map, dist, Aq, Bq, Den, Phi, Vel, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); @@ -4001,7 +4001,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_Color(int *d_neighborList, int *Map, double * double *Phi, double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ - hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_Color, hipFuncCachePreferL1); + hipFuncSetCacheConfig( (void*) dvc_ScaLBL_D3Q19_AAodd_Color, hipFuncCachePreferL1); dvc_ScaLBL_D3Q19_AAodd_Color<<>>(d_neighborList, Map, dist, Aq, Bq, Den, Phi, Vel, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); @@ -4060,7 +4060,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_ColorMomentum(double *dist, double *Den, dou double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int start, int finish, int Np){ - hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_ColorMomentum, hipFuncCachePreferL1); + hipFuncSetCacheConfig( (void*) dvc_ScaLBL_D3Q19_AAeven_ColorMomentum, hipFuncCachePreferL1); dvc_ScaLBL_D3Q19_AAeven_ColorMomentum<<>>(dist, Den, Vel, ColorGrad, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, start, finish, Np); @@ -4074,7 +4074,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_ColorMomentum(int *d_neighborList, double *di double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int start, int finish, int Np){ - hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_ColorMomentum, hipFuncCachePreferL1); + hipFuncSetCacheConfig( (void*) dvc_ScaLBL_D3Q19_AAodd_ColorMomentum, hipFuncCachePreferL1); dvc_ScaLBL_D3Q19_AAodd_ColorMomentum<<>>(d_neighborList, dist, Den, Vel, ColorGrad, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, start, finish, Np); @@ -4087,7 +4087,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_ColorMomentum(int *d_neighborList, double *di extern "C" void ScaLBL_D3Q19_AAeven_ColorMass(double *Aq, double *Bq, double *Den, double *Vel, double *ColorGrad, double beta, int start, int finish, int Np){ - hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_ColorMass, hipFuncCachePreferL1); + hipFuncSetCacheConfig( (void*) dvc_ScaLBL_D3Q19_AAeven_ColorMass, hipFuncCachePreferL1); dvc_ScaLBL_D3Q19_AAeven_ColorMass<<>>(Aq, Bq, Den, Vel, ColorGrad, beta, start, finish, Np); hipError_t err = hipGetLastError(); @@ -4100,7 +4100,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_ColorMass(double *Aq, double *Bq, double *De extern "C" void ScaLBL_D3Q19_AAodd_ColorMass(int *d_neighborList, double *Aq, double *Bq, double *Den, double *Vel, double *ColorGrad, double beta, int start, int finish, int Np){ - hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_ColorMass, hipFuncCachePreferL1); + hipFuncSetCacheConfig( (void*) dvc_ScaLBL_D3Q19_AAodd_ColorMass, hipFuncCachePreferL1); dvc_ScaLBL_D3Q19_AAodd_ColorMass<<>>(d_neighborList, Aq, Bq, Den, Vel, ColorGrad, beta, start, finish, Np); hipError_t err = hipGetLastError(); diff --git a/hip/CudaExtras.hip b/hip/CudaExtras.cu similarity index 100% rename from hip/CudaExtras.hip rename to hip/CudaExtras.cu diff --git a/hip/D3Q19.hip b/hip/D3Q19.cu similarity index 99% rename from hip/D3Q19.hip rename to hip/D3Q19.cu index 1bcfa04d..20d93d64 100644 --- a/hip/D3Q19.hip +++ b/hip/D3Q19.cu @@ -1,6 +1,6 @@ #include -#include "hip/hip_runtime.h" #include +#include "hip/hip_runtime.h" #define NBLOCKS 1024 #define NTHREADS 256 @@ -2355,7 +2355,7 @@ extern "C" void ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *disteven, d } extern "C" void ScaLBL_D3Q19_AAeven_Compact(char * ID, double *d_dist, int Np) { - hipFuncSetCacheConfig(dvc_ScaLBL_AAeven_Compact, hipFuncCachePreferL1); + hipFuncSetCacheConfig( (void*) dvc_ScaLBL_AAeven_Compact, hipFuncCachePreferL1); dvc_ScaLBL_AAeven_Compact<<>>(ID, d_dist, Np); hipError_t err = hipGetLastError(); if (hipSuccess != err){ @@ -2364,7 +2364,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_Compact(char * ID, double *d_dist, int Np) } extern "C" void ScaLBL_D3Q19_AAodd_Compact(char * ID, int *d_neighborList, double *d_dist, int Np) { - hipFuncSetCacheConfig(dvc_ScaLBL_AAodd_Compact, hipFuncCachePreferL1); + hipFuncSetCacheConfig( (void*) dvc_ScaLBL_AAodd_Compact, hipFuncCachePreferL1); dvc_ScaLBL_AAodd_Compact<<>>(ID,d_neighborList, d_dist,Np); hipError_t err = hipGetLastError(); if (hipSuccess != err){ diff --git a/hip/D3Q7.hip b/hip/D3Q7.cu similarity index 100% rename from hip/D3Q7.hip rename to hip/D3Q7.cu diff --git a/hip/Extras.hip b/hip/Extras.cu similarity index 100% rename from hip/Extras.hip rename to hip/Extras.cu diff --git a/hip/MRT.hip b/hip/MRT.cu similarity index 100% rename from hip/MRT.hip rename to hip/MRT.cu diff --git a/hip/dfh.hip b/hip/dfh.cu similarity index 99% rename from hip/dfh.hip rename to hip/dfh.cu index 37f91498..0139e1ac 100644 --- a/hip/dfh.hip +++ b/hip/dfh.cu @@ -1450,7 +1450,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_DFH(int *neighborList, double *dist, double double *Gradient, double *SolidForce, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int start, int finish, int Np){ - hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_DFH, hipFuncCachePreferL1); + hipFuncSetCacheConfig( (void*) dvc_ScaLBL_D3Q19_AAeven_DFH, hipFuncCachePreferL1); dvc_ScaLBL_D3Q19_AAeven_DFH<<>>(neighborList, dist, Aq, Bq, Den, Phi, Gradient, SolidForce, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, start, finish, Np); @@ -1464,7 +1464,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_DFH(int *neighborList, double *dist, double * double *Phi, double *Gradient, double *SolidForce, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int start, int finish, int Np){ - hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_DFH, hipFuncCachePreferL1); + hipFuncSetCacheConfig( (void*) dvc_ScaLBL_D3Q19_AAodd_DFH, hipFuncCachePreferL1); dvc_ScaLBL_D3Q19_AAodd_DFH<<>>(neighborList,dist, Aq, Bq, Den, Phi, Gradient, SolidForce, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, start, finish, Np); diff --git a/tests/lbpm_color_simulator.cpp b/tests/lbpm_color_simulator.cpp index cef13189..76312a14 100644 --- a/tests/lbpm_color_simulator.cpp +++ b/tests/lbpm_color_simulator.cpp @@ -23,8 +23,13 @@ int main(int argc, char **argv) { + // Load the input database + auto db = std::make_shared( argv[1] ); + // Initialize MPI and error handlers - Utilities::startup( argc, argv ); + auto multiple = db->getWithDefault( "MPI_THREAD_MULTIPLE", true ); + Utilities::startup( argc, argv, multiple ); + Utilities::MPI::changeProfileLevel( 1 ); { // Limit scope so variables that contain communicators will free before MPI_Finialize @@ -60,7 +65,9 @@ int main(int argc, char **argv) //ColorModel.WriteDebug(); PROFILE_STOP("Main"); - PROFILE_SAVE("lbpm_color_simulator",1); + auto file = db->getWithDefault( "TimerFile", "lbpm_color_simulator" ); + auto level = db->getWithDefault( "TimerLevel", 1 ); + PROFILE_SAVE(file,level); // **************************************************** comm.barrier(); From 523f96abddfc51c0f2425e29f568e0bdc30e1b10 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Thu, 8 Oct 2020 11:03:42 -0400 Subject: [PATCH 077/205] Fixing compile errors without MPI --- analysis/runAnalysis.cpp | 5 ++--- tests/BlobAnalyzeParallel.cpp | 4 ++-- tests/BlobIdentify.cpp | 4 ++-- tests/BlobIdentifyParallel.cpp | 4 ++-- tests/ColorToBinary.cpp | 4 ++-- tests/ComponentLabel.cpp | 4 ++-- tests/GenerateSphereTest.cpp | 7 ++----- tests/TestBlobAnalyze.cpp | 4 ++-- tests/TestBlobIdentify.cpp | 4 ++-- tests/TestBlobIdentifyCorners.cpp | 2 +- tests/TestBubble.cpp | 2 +- tests/TestBubbleDFH.cpp | 5 ++--- tests/TestColorBubble.cpp | 7 ++----- tests/TestColorGrad.cpp | 4 ++-- tests/TestColorGradDFH.cpp | 7 ++----- tests/TestColorMassBounceback.cpp | 4 ++-- tests/TestColorSquareTube.cpp | 14 +++++--------- tests/TestCommD3Q19.cpp | 4 ++-- tests/TestDatabase.cpp | 5 ++--- tests/TestFluxBC.cpp | 5 ++--- tests/TestForceD3Q19.cpp | 4 ++-- tests/TestForceMoments.cpp | 7 ++----- tests/TestInterfaceSpeed.cpp | 6 ++---- tests/TestMRT.cpp | 4 ++-- tests/TestMap.cpp | 7 ++----- tests/TestMassConservationD3Q7.cpp | 7 ++----- tests/TestMicroCTReader.cpp | 4 ++-- tests/TestMomentsD3Q19.cpp | 5 ++--- tests/TestNetcdf.cpp | 4 ++-- tests/TestPoiseuille.cpp | 7 ++----- tests/TestPressVel.cpp | 7 ++----- tests/TestSegDist.cpp | 5 ++--- tests/TestSubphase.cpp | 5 ++--- tests/TestTopo3D.cpp | 5 ++--- tests/TestTorus.cpp | 5 ++--- tests/TestTorusEvolve.cpp | 4 ++-- tests/TestTwoPhase.cpp | 4 ++-- tests/TestWriter.cpp | 4 ++-- tests/convertIO.cpp | 4 ++-- tests/hello_world.cpp | 5 ++--- tests/lb2_CMT_wia.cpp | 4 ++-- tests/lb2_Color_blob_wia_mpi.cpp | 4 ++-- tests/lbpm_BGK_simulator.cpp | 4 ++-- tests/lbpm_captube_pp.cpp | 4 ++-- tests/lbpm_color_macro_simulator.cpp | 2 +- tests/lbpm_dfh_simulator.cpp | 8 ++++---- tests/lbpm_disc_pp.cpp | 4 ++-- tests/lbpm_inkbottle_pp.cpp | 4 ++-- tests/lbpm_juanes_bench_disc_pp.cpp | 4 ++-- tests/lbpm_minkowski_scalar.cpp | 5 ++--- tests/lbpm_morph_pp.cpp | 5 ++--- tests/lbpm_morphdrain_pp.cpp | 5 ++--- tests/lbpm_morphopen_pp.cpp | 5 ++--- tests/lbpm_nondarcy_simulator.cpp | 4 ++-- tests/lbpm_nonnewtonian_simulator.cpp | 2 +- tests/lbpm_permeability_simulator.cpp | 7 ++----- tests/lbpm_plates_pp.cpp | 4 ++-- tests/lbpm_porenetwork_pp.cpp | 4 ++-- tests/lbpm_random_pp.cpp | 4 ++-- tests/lbpm_refine_pp.cpp | 5 ++--- tests/lbpm_segmented_decomp.cpp | 4 ++-- tests/lbpm_segmented_pp.cpp | 4 ++-- tests/lbpm_sphere_pp.cpp | 4 ++-- tests/lbpm_squaretube_pp.cpp | 4 ++-- tests/lbpm_uCT_maskfilter.cpp | 4 ++-- tests/lbpm_uCT_pp.cpp | 4 ++-- tests/testCommunication.cpp | 4 ++-- tests/test_MPI.cpp | 5 +---- tests/test_dcel_minkowski.cpp | 5 ++--- tests/test_dcel_tri_normal.cpp | 5 ++--- 70 files changed, 140 insertions(+), 193 deletions(-) diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index 9c811752..c09b71c2 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -676,9 +676,8 @@ void runAnalysis::createThreads( const std::string& method, int N_threads ) if ( method == "none" ) return; // Check if we have thread support - int thread_support; - MPI_Query_thread( &thread_support ); - if ( thread_support < MPI_THREAD_MULTIPLE && N_threads > 0 ) + auto thread_support = Utilities::MPI::queryThreadSupport(); + if ( thread_support != Utilities::MPI::ThreadSupport::MULTIPLE && N_threads > 0 ) std::cerr << "Warning: Failed to start MPI with necessary thread support, errors may occur\n"; // Create the threads const auto cores = d_tpool.getProcessAffinity(); diff --git a/tests/BlobAnalyzeParallel.cpp b/tests/BlobAnalyzeParallel.cpp index 773309f9..2889844e 100644 --- a/tests/BlobAnalyzeParallel.cpp +++ b/tests/BlobAnalyzeParallel.cpp @@ -100,7 +100,7 @@ inline void WriteBlobStates(TwoPhase TCAT, double D, double porosity){ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -482,7 +482,7 @@ int main(int argc, char **argv) PROFILE_STOP("main"); PROFILE_SAVE("BlobIdentifyParallel",false); comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return 0; } diff --git a/tests/BlobIdentify.cpp b/tests/BlobIdentify.cpp index 39a59954..2680ca6f 100644 --- a/tests/BlobIdentify.cpp +++ b/tests/BlobIdentify.cpp @@ -150,7 +150,7 @@ void readRankData( int proc, int nx, int ny, int nz, DoubleArray& Phase, DoubleA int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); printf("-----------------------------------------------------------\n"); printf("Labeling Blobs from Two-Phase Lattice Boltzmann Simulation \n"); @@ -318,7 +318,7 @@ int main(int argc, char **argv) FILE *BLOBS = fopen("Blobs.dat","wb"); fwrite(GlobalBlobID.data(),4,Nx*Ny*Nz,BLOBS); fclose(BLOBS); - MPI_Finalize(); + Utilities::shutdown(); return 0; } diff --git a/tests/BlobIdentifyParallel.cpp b/tests/BlobIdentifyParallel.cpp index b8929a11..6508873d 100644 --- a/tests/BlobIdentifyParallel.cpp +++ b/tests/BlobIdentifyParallel.cpp @@ -47,7 +47,7 @@ void readRankData( int proc, int nx, int ny, int nz, DoubleArray& Phase, DoubleA int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -129,7 +129,7 @@ int main(int argc, char **argv) PROFILE_SAVE("BlobIdentifyParallel",false); #endif comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return 0; } diff --git a/tests/ColorToBinary.cpp b/tests/ColorToBinary.cpp index fae156d1..feb76658 100644 --- a/tests/ColorToBinary.cpp +++ b/tests/ColorToBinary.cpp @@ -114,7 +114,7 @@ inline void ReadFromRank(char *FILENAME, DoubleArray &Phase, int nx, int ny, int int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -276,7 +276,7 @@ int main(int argc, char **argv) // **************************************************** comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); // **************************************************** } diff --git a/tests/ComponentLabel.cpp b/tests/ComponentLabel.cpp index 624ce8f4..c62c020e 100644 --- a/tests/ComponentLabel.cpp +++ b/tests/ComponentLabel.cpp @@ -119,7 +119,7 @@ inline void ReadFromRank(char *FILENAME, DoubleArray &Phase, DoubleArray &Pressu int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -433,7 +433,7 @@ int main(int argc, char **argv) */ // **************************************************** comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); // **************************************************** } diff --git a/tests/GenerateSphereTest.cpp b/tests/GenerateSphereTest.cpp index 9e4cdfda..e28042d7 100644 --- a/tests/GenerateSphereTest.cpp +++ b/tests/GenerateSphereTest.cpp @@ -296,7 +296,7 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -474,8 +474,5 @@ int main(int argc, char **argv) fclose(IDFILE); //...................................................................... } - // **************************************************** - comm.barrier(); - MPI_Finalize(); - // **************************************************** + Utilities::shutdown(); } diff --git a/tests/TestBlobAnalyze.cpp b/tests/TestBlobAnalyze.cpp index 19360fe3..24163e4f 100644 --- a/tests/TestBlobAnalyze.cpp +++ b/tests/TestBlobAnalyze.cpp @@ -127,7 +127,7 @@ inline void WriteBlobStates(TwoPhase TCAT, double D, double porosity){ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -317,7 +317,7 @@ int main(int argc, char **argv) } // Limit scope so variables that contain communicators will free before MPI_Finialize comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return 0; } diff --git a/tests/TestBlobIdentify.cpp b/tests/TestBlobIdentify.cpp index 7eb5c270..71b0f565 100644 --- a/tests/TestBlobIdentify.cpp +++ b/tests/TestBlobIdentify.cpp @@ -152,7 +152,7 @@ void shift_data( DoubleArray& data, int sx, int sy, int sz, const RankInfoStruct int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -410,7 +410,7 @@ int main(int argc, char **argv) PROFILE_STOP("main"); PROFILE_SAVE("TestBlobIdentify",false); comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return N_errors; } diff --git a/tests/TestBlobIdentifyCorners.cpp b/tests/TestBlobIdentifyCorners.cpp index 904e52e0..5ec0b55d 100644 --- a/tests/TestBlobIdentifyCorners.cpp +++ b/tests/TestBlobIdentifyCorners.cpp @@ -18,7 +18,7 @@ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); int rank = comm.getRank(); int nprocs = comm.getSize(); /*if ( nprocs != 8 ) { diff --git a/tests/TestBubble.cpp b/tests/TestBubble.cpp index 6eb74b37..8e414182 100644 --- a/tests/TestBubble.cpp +++ b/tests/TestBubble.cpp @@ -991,6 +991,6 @@ int main(int argc, char **argv) // **************************************************** comm.barrier(); } // Limit scope so variables that contain communicators will free before MPI_Finialize - MPI_Finalize(); + Utilities::shutdown(); return 0; } diff --git a/tests/TestBubbleDFH.cpp b/tests/TestBubbleDFH.cpp index 3eecb13f..cd4f487b 100644 --- a/tests/TestBubbleDFH.cpp +++ b/tests/TestBubbleDFH.cpp @@ -27,8 +27,7 @@ using namespace std; int main(int argc, char **argv) { // Initialize MPI - int provided_thread_support = -1; - MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); + Utilities::startup( argc, argv, true ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -548,7 +547,7 @@ int main(int argc, char **argv) // **************************************************** comm.barrier(); } // Limit scope so variables that contain communicators will free before MPI_Finialize - MPI_Finalize(); + Utilities::shutdown(); return check; } diff --git a/tests/TestColorBubble.cpp b/tests/TestColorBubble.cpp index 1f42a71e..46f7f195 100644 --- a/tests/TestColorBubble.cpp +++ b/tests/TestColorBubble.cpp @@ -65,7 +65,7 @@ inline void InitializeBubble(ScaLBL_ColorModel &ColorModel, double BubbleRadius) int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -92,10 +92,7 @@ int main(int argc, char **argv) ColorModel.Run(); ColorModel.WriteDebug(); } - // **************************************************** - comm.barrier(); - MPI_Finalize(); - // **************************************************** + Utilities::shutdown(); return check; } diff --git a/tests/TestColorGrad.cpp b/tests/TestColorGrad.cpp index 2566f8c0..b407d01f 100644 --- a/tests/TestColorGrad.cpp +++ b/tests/TestColorGrad.cpp @@ -16,7 +16,7 @@ using namespace std; int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -257,7 +257,7 @@ int main(int argc, char **argv) } // **************************************************** comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); // **************************************************** return check; diff --git a/tests/TestColorGradDFH.cpp b/tests/TestColorGradDFH.cpp index 02c0dc9d..2fd65e7d 100644 --- a/tests/TestColorGradDFH.cpp +++ b/tests/TestColorGradDFH.cpp @@ -26,7 +26,7 @@ std::shared_ptr loadInputs( int nprocs ) int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -206,10 +206,7 @@ int main(int argc, char **argv) } } - // **************************************************** - comm.barrier(); - MPI_Finalize(); - // **************************************************** + Utilities::shutdown(); return check; } diff --git a/tests/TestColorMassBounceback.cpp b/tests/TestColorMassBounceback.cpp index 78508f9b..092e7541 100644 --- a/tests/TestColorMassBounceback.cpp +++ b/tests/TestColorMassBounceback.cpp @@ -16,7 +16,7 @@ using namespace std; int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -520,7 +520,7 @@ int main(int argc, char **argv) } // **************************************************** comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); // **************************************************** return check; } diff --git a/tests/TestColorSquareTube.cpp b/tests/TestColorSquareTube.cpp index cf8a9566..d7dae23f 100644 --- a/tests/TestColorSquareTube.cpp +++ b/tests/TestColorSquareTube.cpp @@ -85,12 +85,12 @@ void InitializeSquareTube(ScaLBL_ColorModel &ColorModel){ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + Utilities::startup( argc, argv ); int check=0; { + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); if (rank == 0){ printf("********************************************************\n"); printf("Running Color Model: TestColor \n"); @@ -108,11 +108,7 @@ int main(int argc, char **argv) ColorModel.WriteDebug(); } - // **************************************************** - comm.barrier(); - MPI_Finalize(); - // **************************************************** - + Utilities::shutdown(); return check; } diff --git a/tests/TestCommD3Q19.cpp b/tests/TestCommD3Q19.cpp index 1ffa2465..12adbb73 100644 --- a/tests/TestCommD3Q19.cpp +++ b/tests/TestCommD3Q19.cpp @@ -164,7 +164,7 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -427,7 +427,7 @@ int main(int argc, char **argv) } // **************************************************** comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); // **************************************************** return check; diff --git a/tests/TestDatabase.cpp b/tests/TestDatabase.cpp index ced704e2..c3341aab 100644 --- a/tests/TestDatabase.cpp +++ b/tests/TestDatabase.cpp @@ -17,7 +17,7 @@ // Main int main(int argc, char **argv) { - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); Utilities::setAbortBehavior(true,2); Utilities::setErrorHandlers(); @@ -66,8 +66,7 @@ int main(int argc, char **argv) // Finished PROFILE_SAVE("TestDatabase",true); - comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return err; } diff --git a/tests/TestFluxBC.cpp b/tests/TestFluxBC.cpp index 0798a481..ef1c70af 100644 --- a/tests/TestFluxBC.cpp +++ b/tests/TestFluxBC.cpp @@ -17,7 +17,7 @@ std::shared_ptr loadInputs( int nprocs ) int main (int argc, char **argv) { - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -265,7 +265,6 @@ int main (int argc, char **argv) } // Finished - comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return error; } diff --git a/tests/TestForceD3Q19.cpp b/tests/TestForceD3Q19.cpp index 31151584..7650c6f0 100644 --- a/tests/TestForceD3Q19.cpp +++ b/tests/TestForceD3Q19.cpp @@ -442,7 +442,7 @@ inline void MRT_Transform(double *dist, int Np, double Fx, double Fy, double Fz) int main (int argc, char **argv) { - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -541,5 +541,5 @@ int main (int argc, char **argv) printf("Fz = %f; Computed vz=%f \n",Fz,vel[2*Np+0]); comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); } diff --git a/tests/TestForceMoments.cpp b/tests/TestForceMoments.cpp index fab5fe68..a6c6569b 100644 --- a/tests/TestForceMoments.cpp +++ b/tests/TestForceMoments.cpp @@ -47,7 +47,7 @@ std::shared_ptr loadInputs( int nprocs ) int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -326,10 +326,7 @@ int main(int argc, char **argv) } - // **************************************************** - comm.barrier(); - MPI_Finalize(); - // **************************************************** + Utilities::shutdown(); return check; } diff --git a/tests/TestInterfaceSpeed.cpp b/tests/TestInterfaceSpeed.cpp index d2c901df..67bf8f95 100644 --- a/tests/TestInterfaceSpeed.cpp +++ b/tests/TestInterfaceSpeed.cpp @@ -18,7 +18,7 @@ int main (int argc, char *argv[]) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); @@ -146,9 +146,7 @@ int main (int argc, char *argv[]) return toReturn; - // **************************************************** comm.barrier(); return 0; - MPI_Finalize(); - // **************************************************** + Utilities::shutdown(); } diff --git a/tests/TestMRT.cpp b/tests/TestMRT.cpp index e4acba99..89c9b549 100644 --- a/tests/TestMRT.cpp +++ b/tests/TestMRT.cpp @@ -489,7 +489,7 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -802,7 +802,7 @@ int main(int argc, char **argv) } // **************************************************** comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); // **************************************************** return check; diff --git a/tests/TestMap.cpp b/tests/TestMap.cpp index 3ba403a7..c546bae3 100644 --- a/tests/TestMap.cpp +++ b/tests/TestMap.cpp @@ -27,7 +27,7 @@ std::shared_ptr loadInputs( int nprocs ) int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int check=0; { @@ -191,10 +191,7 @@ int main(int argc, char **argv) delete [] TmpMap; } - // **************************************************** - comm.barrier(); - MPI_Finalize(); - // **************************************************** + Utilities::shutdown(); return check; } diff --git a/tests/TestMassConservationD3Q7.cpp b/tests/TestMassConservationD3Q7.cpp index 68183cd2..ba3f6018 100644 --- a/tests/TestMassConservationD3Q7.cpp +++ b/tests/TestMassConservationD3Q7.cpp @@ -67,7 +67,7 @@ inline void InitializeBubble(ScaLBL_ColorModel &ColorModel, double BubbleRadius) int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -264,8 +264,5 @@ int main(int argc, char **argv) } } - // **************************************************** - comm.barrier(); - MPI_Finalize(); - // **************************************************** + Utilities::shutdown(); } diff --git a/tests/TestMicroCTReader.cpp b/tests/TestMicroCTReader.cpp index 52a5b9d3..d8609356 100644 --- a/tests/TestMicroCTReader.cpp +++ b/tests/TestMicroCTReader.cpp @@ -50,7 +50,7 @@ void testReadMicroCT( const std::string& filename, UnitTest& ut ) int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); UnitTest ut; // Run the tests @@ -62,7 +62,7 @@ int main(int argc, char **argv) int N_errors = ut.NumFailGlobal(); // Close MPI - MPI_Finalize(); + Utilities::shutdown(); return N_errors; } diff --git a/tests/TestMomentsD3Q19.cpp b/tests/TestMomentsD3Q19.cpp index 2660ed26..10413743 100644 --- a/tests/TestMomentsD3Q19.cpp +++ b/tests/TestMomentsD3Q19.cpp @@ -462,7 +462,7 @@ inline void MRT_Transform(double *dist, int Np) { int main (int argc, char **argv) { - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -539,7 +539,6 @@ int main (int argc, char **argv) error=count; // Finished - comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return error; } diff --git a/tests/TestNetcdf.cpp b/tests/TestNetcdf.cpp index 3d0498d2..4ebe3af9 100644 --- a/tests/TestNetcdf.cpp +++ b/tests/TestNetcdf.cpp @@ -95,7 +95,7 @@ void load( const std::string& filename ) int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); const int rank = comm.getRank(); UnitTest ut; @@ -117,7 +117,7 @@ int main(int argc, char **argv) // Close MPI comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return N_errors; } diff --git a/tests/TestPoiseuille.cpp b/tests/TestPoiseuille.cpp index 744d292d..210cb357 100644 --- a/tests/TestPoiseuille.cpp +++ b/tests/TestPoiseuille.cpp @@ -48,7 +48,7 @@ void ParallelPlates(ScaLBL_MRTModel &MRT){ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -125,10 +125,7 @@ int main(int argc, char **argv) } } - // **************************************************** - comm.barrier(); - MPI_Finalize(); - // **************************************************** + Utilities::shutdown(); return check; } diff --git a/tests/TestPressVel.cpp b/tests/TestPressVel.cpp index c19bdcef..6e6ddef7 100644 --- a/tests/TestPressVel.cpp +++ b/tests/TestPressVel.cpp @@ -14,7 +14,7 @@ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int check=0; @@ -188,10 +188,7 @@ int main(int argc, char **argv) } } } - // **************************************************** - comm.barrier(); - MPI_Finalize(); - // **************************************************** + Utilities::shutdown(); return check; } diff --git a/tests/TestSegDist.cpp b/tests/TestSegDist.cpp index ecb6d6b9..58f7a4a3 100644 --- a/tests/TestSegDist.cpp +++ b/tests/TestSegDist.cpp @@ -39,7 +39,7 @@ std::shared_ptr loadInputs( int nprocs ) int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -141,8 +141,7 @@ int main(int argc, char **argv) IO::writeData( "testSegDist", data, MPI_COMM_WORLD ); } - comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return 0; } diff --git a/tests/TestSubphase.cpp b/tests/TestSubphase.cpp index 9738812f..e6f566fa 100644 --- a/tests/TestSubphase.cpp +++ b/tests/TestSubphase.cpp @@ -26,7 +26,7 @@ std::shared_ptr loadInputs( int nprocs ) int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -136,8 +136,7 @@ int main(int argc, char **argv) // Averages->Reduce(); } // Limit scope so variables that contain communicators will free before MPI_Finialize - comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return 0; } diff --git a/tests/TestTopo3D.cpp b/tests/TestTopo3D.cpp index 948bb1d6..2a5eff97 100644 --- a/tests/TestTopo3D.cpp +++ b/tests/TestTopo3D.cpp @@ -26,7 +26,7 @@ std::shared_ptr loadInputs( int nprocs ) int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -225,8 +225,7 @@ int main(int argc, char **argv) IO::writeData( timestep, visData, comm ); } // Limit scope so variables that contain communicators will free before MPI_Finialize - comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return 0; } diff --git a/tests/TestTorus.cpp b/tests/TestTorus.cpp index 5125ce92..d6a33adf 100644 --- a/tests/TestTorus.cpp +++ b/tests/TestTorus.cpp @@ -26,7 +26,7 @@ std::shared_ptr loadInputs( int nprocs ) int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -164,8 +164,7 @@ int main(int argc, char **argv) // Averages->Reduce(); } // Limit scope so variables that contain communicators will free before MPI_Finialize - comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return 0; } diff --git a/tests/TestTorusEvolve.cpp b/tests/TestTorusEvolve.cpp index 32cf7fd8..580581b5 100644 --- a/tests/TestTorusEvolve.cpp +++ b/tests/TestTorusEvolve.cpp @@ -26,7 +26,7 @@ std::shared_ptr loadInputs( int nprocs ) int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -157,7 +157,7 @@ int main(int argc, char **argv) } } // Limit scope so variables that contain communicators will free before MPI_Finialize comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return 0; } diff --git a/tests/TestTwoPhase.cpp b/tests/TestTwoPhase.cpp index fa54d98d..5cbec956 100644 --- a/tests/TestTwoPhase.cpp +++ b/tests/TestTwoPhase.cpp @@ -17,7 +17,7 @@ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -111,7 +111,7 @@ int main(int argc, char **argv) // **************************************************** comm.barrier(); } // Limit scope so Domain will free it's communicator - MPI_Finalize(); + Utilities::shutdown(); return 0; } diff --git a/tests/TestWriter.cpp b/tests/TestWriter.cpp index 37858202..97774c55 100644 --- a/tests/TestWriter.cpp +++ b/tests/TestWriter.cpp @@ -226,7 +226,7 @@ void testWriter( const std::string& format, std::vector& mes // Main int main(int argc, char **argv) { - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -387,7 +387,7 @@ int main(int argc, char **argv) PROFILE_SAVE("TestWriter",true); int N_errors = ut.NumFailGlobal(); comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return N_errors; } diff --git a/tests/convertIO.cpp b/tests/convertIO.cpp index 27605237..49034274 100644 --- a/tests/convertIO.cpp +++ b/tests/convertIO.cpp @@ -17,7 +17,7 @@ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -83,7 +83,7 @@ int main(int argc, char **argv) PROFILE_STOP("Main"); PROFILE_SAVE("convertData",true); comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return 0; } diff --git a/tests/hello_world.cpp b/tests/hello_world.cpp index 810d3a9c..3de56719 100644 --- a/tests/hello_world.cpp +++ b/tests/hello_world.cpp @@ -5,7 +5,7 @@ int main (int argc, char **argv) { - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -27,7 +27,6 @@ int main (int argc, char **argv) int error = 0; // Finished - comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return error; } diff --git a/tests/lb2_CMT_wia.cpp b/tests/lb2_CMT_wia.cpp index 389bc8a8..d1a31e8c 100644 --- a/tests/lb2_CMT_wia.cpp +++ b/tests/lb2_CMT_wia.cpp @@ -138,7 +138,7 @@ extern "C" void CMT_ScaLBL_D3Q7_ColorCollideMass(char *ID, double *A_even, doubl int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); int n,N,Nx,Ny,Nz; @@ -359,6 +359,6 @@ int main(int argc, char **argv) // Close MPI comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return 0; } diff --git a/tests/lb2_Color_blob_wia_mpi.cpp b/tests/lb2_Color_blob_wia_mpi.cpp index e3323612..7f8bf071 100644 --- a/tests/lb2_Color_blob_wia_mpi.cpp +++ b/tests/lb2_Color_blob_wia_mpi.cpp @@ -97,7 +97,7 @@ inline void ZeroHalo(double *Data, int Nx, int Ny, int Nz) int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -2794,6 +2794,6 @@ int main(int argc, char **argv) // **************************************************** comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); // **************************************************** } diff --git a/tests/lbpm_BGK_simulator.cpp b/tests/lbpm_BGK_simulator.cpp index 1ac61853..b24f5be1 100644 --- a/tests/lbpm_BGK_simulator.cpp +++ b/tests/lbpm_BGK_simulator.cpp @@ -25,7 +25,7 @@ int main(int argc, char **argv) { // Initialize MPI int rank,nprocs; - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -428,6 +428,6 @@ int main(int argc, char **argv) } // **************************************************** comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); // **************************************************** } diff --git a/tests/lbpm_captube_pp.cpp b/tests/lbpm_captube_pp.cpp index b90ebb2a..90e849f9 100644 --- a/tests/lbpm_captube_pp.cpp +++ b/tests/lbpm_captube_pp.cpp @@ -25,7 +25,7 @@ std::shared_ptr loadInputs( ) int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -182,6 +182,6 @@ int main(int argc, char **argv) } // **************************************************** comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); // **************************************************** } diff --git a/tests/lbpm_color_macro_simulator.cpp b/tests/lbpm_color_macro_simulator.cpp index c92b0c45..de6b286d 100644 --- a/tests/lbpm_color_macro_simulator.cpp +++ b/tests/lbpm_color_macro_simulator.cpp @@ -655,7 +655,7 @@ int main(int argc, char **argv) // **************************************************** comm.barrier(); } // Limit scope so variables that contain communicators will free before MPI_Finialize - MPI_Finalize(); + Utilities::shutdown(); } diff --git a/tests/lbpm_dfh_simulator.cpp b/tests/lbpm_dfh_simulator.cpp index 0d5902df..5916cab9 100644 --- a/tests/lbpm_dfh_simulator.cpp +++ b/tests/lbpm_dfh_simulator.cpp @@ -24,12 +24,12 @@ using namespace std; int main(int argc, char **argv) { // Initialize MPI - int provided_thread_support = -1; - MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); - if ( rank==0 && provided_thread_supportAggregateLabels(filename2); } - comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); } diff --git a/tests/lbpm_morphdrain_pp.cpp b/tests/lbpm_morphdrain_pp.cpp index d3c5a428..a8e24273 100644 --- a/tests/lbpm_morphdrain_pp.cpp +++ b/tests/lbpm_morphdrain_pp.cpp @@ -23,7 +23,7 @@ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); { @@ -201,6 +201,5 @@ int main(int argc, char **argv) Mask->AggregateLabels( filename2 ); } - comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); } diff --git a/tests/lbpm_morphopen_pp.cpp b/tests/lbpm_morphopen_pp.cpp index a6209240..6afb8722 100644 --- a/tests/lbpm_morphopen_pp.cpp +++ b/tests/lbpm_morphopen_pp.cpp @@ -23,7 +23,7 @@ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); { @@ -203,6 +203,5 @@ int main(int argc, char **argv) Mask->AggregateLabels(filename2); } - comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); } diff --git a/tests/lbpm_nondarcy_simulator.cpp b/tests/lbpm_nondarcy_simulator.cpp index a25fef69..db11b90a 100644 --- a/tests/lbpm_nondarcy_simulator.cpp +++ b/tests/lbpm_nondarcy_simulator.cpp @@ -78,7 +78,7 @@ int main(int argc, char **argv) else { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -570,7 +570,7 @@ int main(int argc, char **argv) } // **************************************************** comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); // **************************************************** } } diff --git a/tests/lbpm_nonnewtonian_simulator.cpp b/tests/lbpm_nonnewtonian_simulator.cpp index bea3a814..29dbbc23 100644 --- a/tests/lbpm_nonnewtonian_simulator.cpp +++ b/tests/lbpm_nonnewtonian_simulator.cpp @@ -825,7 +825,7 @@ int main(int argc, char **argv) NULL_USE(RESTART_INTERVAL); } comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); //**************************************************** } diff --git a/tests/lbpm_permeability_simulator.cpp b/tests/lbpm_permeability_simulator.cpp index eb5e6d4b..7e5c8303 100644 --- a/tests/lbpm_permeability_simulator.cpp +++ b/tests/lbpm_permeability_simulator.cpp @@ -24,7 +24,7 @@ using namespace std; int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -50,8 +50,5 @@ int main(int argc, char **argv) MRT.Run(); MRT.VelocityField(); } - // **************************************************** - comm.barrier(); - MPI_Finalize(); - // **************************************************** + Utilities::shutdown(); } diff --git a/tests/lbpm_plates_pp.cpp b/tests/lbpm_plates_pp.cpp index 37191979..be019dba 100644 --- a/tests/lbpm_plates_pp.cpp +++ b/tests/lbpm_plates_pp.cpp @@ -14,7 +14,7 @@ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -195,6 +195,6 @@ int main(int argc, char **argv) } // **************************************************** comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); // **************************************************** } diff --git a/tests/lbpm_porenetwork_pp.cpp b/tests/lbpm_porenetwork_pp.cpp index 1715811f..b1bfa5e3 100644 --- a/tests/lbpm_porenetwork_pp.cpp +++ b/tests/lbpm_porenetwork_pp.cpp @@ -14,7 +14,7 @@ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -287,6 +287,6 @@ int main(int argc, char **argv) } // **************************************************** comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); // **************************************************** } diff --git a/tests/lbpm_random_pp.cpp b/tests/lbpm_random_pp.cpp index 8318f50f..56b5570d 100644 --- a/tests/lbpm_random_pp.cpp +++ b/tests/lbpm_random_pp.cpp @@ -52,7 +52,7 @@ inline void UnpackID(int *list, int count, char *recvbuf, char *ID){ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -404,6 +404,6 @@ int main(int argc, char **argv) fclose(ID); comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return 0; } diff --git a/tests/lbpm_refine_pp.cpp b/tests/lbpm_refine_pp.cpp index 149ae673..35058ba1 100644 --- a/tests/lbpm_refine_pp.cpp +++ b/tests/lbpm_refine_pp.cpp @@ -16,7 +16,7 @@ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -421,7 +421,6 @@ int main(int argc, char **argv) } - comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return 0; } diff --git a/tests/lbpm_segmented_decomp.cpp b/tests/lbpm_segmented_decomp.cpp index 65b8576f..3058dad5 100644 --- a/tests/lbpm_segmented_decomp.cpp +++ b/tests/lbpm_segmented_decomp.cpp @@ -18,7 +18,7 @@ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -365,5 +365,5 @@ int main(int argc, char **argv) } } comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); } diff --git a/tests/lbpm_segmented_pp.cpp b/tests/lbpm_segmented_pp.cpp index 484a11e2..b14f6930 100644 --- a/tests/lbpm_segmented_pp.cpp +++ b/tests/lbpm_segmented_pp.cpp @@ -115,7 +115,7 @@ double ReadFromBlock( char *ID, int iproc, int jproc, int kproc, int Nx, int Ny, int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -231,7 +231,7 @@ int main(int argc, char **argv) } comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return 0; } diff --git a/tests/lbpm_sphere_pp.cpp b/tests/lbpm_sphere_pp.cpp index 0df11b96..a2966675 100644 --- a/tests/lbpm_sphere_pp.cpp +++ b/tests/lbpm_sphere_pp.cpp @@ -23,7 +23,7 @@ using namespace std; int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -212,6 +212,6 @@ int main(int argc, char **argv) // **************************************************** comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); // **************************************************** } diff --git a/tests/lbpm_squaretube_pp.cpp b/tests/lbpm_squaretube_pp.cpp index a4ee5f60..a2287c5d 100644 --- a/tests/lbpm_squaretube_pp.cpp +++ b/tests/lbpm_squaretube_pp.cpp @@ -14,7 +14,7 @@ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -253,6 +253,6 @@ int main(int argc, char **argv) } // **************************************************** comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); // **************************************************** } diff --git a/tests/lbpm_uCT_maskfilter.cpp b/tests/lbpm_uCT_maskfilter.cpp index 857bc4e0..ee05585e 100644 --- a/tests/lbpm_uCT_maskfilter.cpp +++ b/tests/lbpm_uCT_maskfilter.cpp @@ -31,7 +31,7 @@ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -446,7 +446,7 @@ int main(int argc, char **argv) PROFILE_STOP("Main"); PROFILE_SAVE("lbpm_uCT_maskfilter",true); comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return 0; } diff --git a/tests/lbpm_uCT_pp.cpp b/tests/lbpm_uCT_pp.cpp index 6e8d1bde..30bdb0c8 100644 --- a/tests/lbpm_uCT_pp.cpp +++ b/tests/lbpm_uCT_pp.cpp @@ -31,7 +31,7 @@ int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -457,7 +457,7 @@ int main(int argc, char **argv) PROFILE_STOP("Main"); PROFILE_SAVE("lbpm_uCT_pp",true); comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return 0; } diff --git a/tests/testCommunication.cpp b/tests/testCommunication.cpp index 911ef1c5..e666a882 100644 --- a/tests/testCommunication.cpp +++ b/tests/testCommunication.cpp @@ -251,7 +251,7 @@ int testHalo( const Utilities::MPI& comm, int nprocx, int nprocy, int nprocz, in int main(int argc, char **argv) { // Initialize MPI - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); @@ -287,7 +287,7 @@ int main(int argc, char **argv) comm.barrier(); int N_errors_global = comm.sumReduce( N_errors ); comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); if ( rank==0 ) { if ( N_errors_global==0 ) std::cout << "All tests passed\n"; diff --git a/tests/test_MPI.cpp b/tests/test_MPI.cpp index c0cf35af..a0d404a0 100644 --- a/tests/test_MPI.cpp +++ b/tests/test_MPI.cpp @@ -1452,10 +1452,7 @@ int main( int argc, char *argv[] ) // Test splitByNode MPI_CLASS nodeComm = globalComm.splitByNode(); - int length; - char name[MPI_MAX_PROCESSOR_NAME]; - MPI_Get_processor_name( name, &length ); - std::string localName( name ); + std::string localName = MPI_CLASS::getNodeName(); std::vector globalStrings( globalComm.getSize() ); std::vector nodeStrings( nodeComm.getSize() ); globalComm.allGather( localName, &globalStrings[0] ); diff --git a/tests/test_dcel_minkowski.cpp b/tests/test_dcel_minkowski.cpp index 2669b522..821e7ca1 100644 --- a/tests/test_dcel_minkowski.cpp +++ b/tests/test_dcel_minkowski.cpp @@ -25,7 +25,7 @@ std::shared_ptr loadInputs( ) int main(int argc, char **argv) { - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); //int rank = comm.getRank(); //int nprocs = comm.getSize(); @@ -99,7 +99,6 @@ int main(int argc, char **argv) } PROFILE_SAVE("test_dcel_minkowski"); - comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return toReturn; } diff --git a/tests/test_dcel_tri_normal.cpp b/tests/test_dcel_tri_normal.cpp index b6497140..34a4a639 100644 --- a/tests/test_dcel_tri_normal.cpp +++ b/tests/test_dcel_tri_normal.cpp @@ -25,7 +25,7 @@ std::shared_ptr loadInputs( ) int main(int argc, char **argv) { - MPI_Init(&argc,&argv); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int toReturn = 0; { @@ -136,7 +136,6 @@ int main(int argc, char **argv) if (count_check > 0) toReturn=2; else printf("Succeeded. \n"); } - comm.barrier(); - MPI_Finalize(); + Utilities::shutdown(); return toReturn; } From a53c15939ee74ff7e2b236f22e47c735a84c1aba Mon Sep 17 00:00:00 2001 From: Mark Allen Berrill Date: Thu, 8 Oct 2020 10:11:00 -0500 Subject: [PATCH 078/205] Fixing minor compile error with hip and AMD gpus --- hip/D3Q7.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/hip/D3Q7.cu b/hip/D3Q7.cu index 16863fec..1c0de271 100644 --- a/hip/D3Q7.cu +++ b/hip/D3Q7.cu @@ -1,4 +1,5 @@ // GPU Functions for D3Q7 Lattice Boltzmann Methods +#include "hip/hip_runtime.h" #define NBLOCKS 560 #define NTHREADS 128 From 8d1c07ac48a5fba9ddca4ed2533eed1c85306e68 Mon Sep 17 00:00:00 2001 From: Mark Allen Berrill Date: Thu, 8 Oct 2020 10:38:38 -0500 Subject: [PATCH 079/205] Fixing compile error --- hip/D3Q19.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hip/D3Q19.cu b/hip/D3Q19.cu index 20d93d64..13d4ab75 100644 --- a/hip/D3Q19.cu +++ b/hip/D3Q19.cu @@ -1,6 +1,6 @@ #include -#include #include "hip/hip_runtime.h" +#include "hip/hip_cooperative_groups.h" #define NBLOCKS 1024 #define NTHREADS 256 From 9b8dd50f78db9c1109f919471aaa59a6bf0a1c84 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Thu, 8 Oct 2020 13:17:15 -0400 Subject: [PATCH 080/205] wide halo compiles --- common/WideHalo.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/common/WideHalo.cpp b/common/WideHalo.cpp index 48f83cd9..ecc563f3 100644 --- a/common/WideHalo.cpp +++ b/common/WideHalo.cpp @@ -291,6 +291,11 @@ void ScaLBLWideHalo_Communicator::Send(double *data){ MPI_Irecv(recvbuf_XYz, recvCount_XYz,MPI_DOUBLE,rank_XYz,recvtag,MPI_COMM_SCALBL,&req2[25]); //................................................................................... } + + +ScaLBLWideHalo_Communicator::~ScaLBLWideHalo_Communicator() +{ +} void ScaLBLWideHalo_Communicator::Recv(double *data){ //................................................................................... From 21d9e6c9007c1bd54c11ca45651574608620a830 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Thu, 8 Oct 2020 22:13:28 -0400 Subject: [PATCH 081/205] save the work;wait for validation results --- tests/TestIonModel.cpp | 35 +++++++++++-------- tests/TestNernstPlanck.cpp | 35 +++++++++++-------- tests/TestPNP_Stokes.cpp | 35 +++++++++++-------- tests/TestPoissonSolver.cpp | 33 ++++++++++------- ...m_electrokinetic_SingleFluid_simulator.cpp | 35 +++++++++++-------- 5 files changed, 104 insertions(+), 69 deletions(-) diff --git a/tests/TestIonModel.cpp b/tests/TestIonModel.cpp index 32f8302b..0b57ff1c 100644 --- a/tests/TestIonModel.cpp +++ b/tests/TestIonModel.cpp @@ -9,6 +9,7 @@ #include "models/IonModel.h" #include "models/MultiPhysController.h" +#include "common/Utilities.h" using namespace std; @@ -18,24 +19,27 @@ using namespace std; int main(int argc, char **argv) { - // Initialize MPI - int provided_thread_support = -1; - MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); - MPI_Comm comm; - MPI_Comm_dup(MPI_COMM_WORLD,&comm); - int rank = comm_rank(comm); - int nprocs = comm_size(comm); - if ( rank==0 && provided_thread_support #include "models/PoissonSolver.h" +#include "common/Utilities.h" using namespace std; @@ -18,23 +19,26 @@ using namespace std; int main(int argc, char **argv) { // Initialize MPI - int provided_thread_support = -1; - MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE,&provided_thread_support); - MPI_Comm comm; - MPI_Comm_dup(MPI_COMM_WORLD,&comm); - int rank = comm_rank(comm); - int nprocs = comm_size(comm); - if ( rank==0 && provided_thread_support Date: Fri, 9 Oct 2020 12:52:04 -0400 Subject: [PATCH 082/205] Adding example configure script for HIP on Summit --- sample_scripts/config_summit_hip | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100755 sample_scripts/config_summit_hip diff --git a/sample_scripts/config_summit_hip b/sample_scripts/config_summit_hip new file mode 100755 index 00000000..23d3d919 --- /dev/null +++ b/sample_scripts/config_summit_hip @@ -0,0 +1,32 @@ +module load gcc/7.4.0 +module load spectrum-mpi +module load cmake +module load cuda/9.2.148 +module load hip/1.5-cuda9 + +export TPL_DIR=/gpfs/alpine/eng101/proj-shared/Mark/install + +rm -rf CMake* + +cmake \ + -D CMAKE_BUILD_TYPE:STRING=Release \ + -D CMAKE_C_COMPILER:PATH=mpicc \ + -D CMAKE_CXX_COMPILER:PATH=mpic++ \ + -D CMAKE_CXX_STANDARD=14 \ + -D DISABLE_LTO=1 \ + -D USE_CUDA=0 \ + -D CMAKE_CUDA_FLAGS="-arch sm_70" \ + -D CMAKE_CUDA_HOST_COMPILER="/sw/summit/gcc/7.4.0/bin/gcc" \ + -D USE_HIP=1 \ + -D HIP_NVCC_OPTIONS="-arch sm_70" \ + -D LINK_LIBRARIES="/sw/summit/cuda/9.2.148/lib64/libcudart.so" \ + -D USE_MPI=1 \ + -D MPI_COMPILER:BOOL=TRUE \ + -D USE_NETCDF=0 \ + -D USE_SILO=1 \ + -D SILO_DIRECTORY=${TPL_DIR}/silo \ + -D HDF5_DIRECTORY=${TPL_DIR}/hdf5 \ +   -D USE_TIMER=1 \ + -D TIMER_DIRECTORY=${TPL_DIR}/timer \ + -D USE_DOXYGEN=0 \ + ${HOME}/repos/LBPM-WIA From a9bfe9d79d0456c748017c4040960e4d4d533869 Mon Sep 17 00:00:00 2001 From: Mark Allen Berrill Date: Fri, 9 Oct 2020 11:55:00 -0500 Subject: [PATCH 083/205] Adding sample script for HIP on poplar --- sample_scripts/config_poplar_hip | 41 ++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100755 sample_scripts/config_poplar_hip diff --git a/sample_scripts/config_poplar_hip b/sample_scripts/config_poplar_hip new file mode 100755 index 00000000..f1c3159c --- /dev/null +++ b/sample_scripts/config_poplar_hip @@ -0,0 +1,41 @@ +module load cmake +#module load llvm +module load mpich +module load hdf5 +module load rocm + +export HDF5_DIR=${HDF5DIR} +export SILO_DIR=/ccs/proj/csc380/mcclurej/install/silo/4.10.2/ + +# configure +rm -rf CMake* +cmake \ + -D CMAKE_BUILD_TYPE:STRING=Release \ + -D CMAKE_C_COMPILER:PATH=clang \ + -D CMAKE_CXX_COMPILER:PATH=clang++ \ + -D CMAKE_CXX_STANDARD=14 \ + -D USE_HIP=1 \ + -D LINK_LIBRARIES=/opt/rocm-3.8.0/hip/lib/libamdhip64.so \ + -D USE_CUDA=0 \ + -D CMAKE_CUDA_FLAGS="-arch sm_70 -Xptxas=-v -Xptxas -dlcm=cg -lineinfo" \ + -D CMAKE_CUDA_HOST_COMPILER="gcc" \ + -D USE_MPI=0 \ + -D USE_HDF5=1 \ + -D HDF5_DIRECTORY="$HDF5_DIR" \ + -D HDF5_LIB="$HDF5_DIR/lib/libhdf5.a" \ + -D USE_SILO=0 \ + -D SILO_LIB="$SILO_DIR/lib/libsiloh5.a" \ + -D SILO_DIRECTORY="$SILO_DIR" \ + -D USE_DOXYGEN:BOOL=false \ + -D USE_TIMER=0 \ + ~/repos/LBPM-WIA + + +# -D CMAKE_CUDA_FLAGS="-arch sm_70 -Xptxas=-v -Xptxas -dlcm=cg -lineinfo" \ +# -D CMAKE_CUDA_HOST_COMPILER="/sw/summit/gcc/6.4.0/bin/gcc" \ + +# MPI_THREAD_MULTIPLE=1 MV2_USE_RDMA_CM=0 MV2_USE_RDMA_CM= MV2_NUM_HCAS=1 MV2_USE_CUDA=1 MV2_ENABLE_AFFINITY=0 srun -n 2 -N 1 --cpu-bind=v -c 1 ./test_MPI + +# -D MPI_COMPILER:BOOL=TRUE \ +# -D MPIEXEC=mpirun \ +# -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ From 35447181dfeb81e0b3e8de854f7e1fcbcc313190 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Fri, 9 Oct 2020 15:19:59 -0400 Subject: [PATCH 084/205] merging multi-halo with electrokinetic --- common/ScaLBL.h | 10 ---------- models/IonModel.cpp | 2 +- models/PoissonSolver.cpp | 2 +- models/StokesModel.cpp | 2 +- 4 files changed, 3 insertions(+), 13 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 9992f226..39905ee6 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -268,13 +268,8 @@ public: int MemoryOptimizedLayoutAA(IntArray &Map, int *neighborList, signed char *id, int Np, int width); void SendD3Q19AA(double *dist); void RecvD3Q19AA(double *dist); -<<<<<<< HEAD -// void BiSendD3Q7(double *A_even, double *A_odd, double *B_even, double *B_odd); -// void BiRecvD3Q7(double *A_even, double *A_odd, double *B_even, double *B_odd); -======= void SendD3Q7AA(double *fq, int Component); void RecvD3Q7AA(double *fq, int Component); ->>>>>>> electrokinetic void BiSendD3Q7AA(double *Aq, double *Bq); void BiRecvD3Q7AA(double *Aq, double *Bq); void TriSendD3Q7AA(double *Aq, double *Bq, double *Cq); @@ -295,21 +290,16 @@ public: void D3Q19_Reflection_BC_z(double *fq); void D3Q19_Reflection_BC_Z(double *fq); double D3Q19_Flux_BC_z(int *neighborList, double *fq, double flux, int time); -<<<<<<< HEAD void GreyscaleSC_BC_z(int *Map, double *DenA, double *DenB, double vA, double vB); void GreyscaleSC_BC_Z(int *Map, double *DenA, double *DenB, double vA, double vB); void GreyscaleSC_Pressure_BC_z(int *neighborList, double *fqA, double *fqB, double dinA, double dinB, int time); void GreyscaleSC_Pressure_BC_Z(int *neighborList, double *fqA, double *fqB, double doutA, double doutB, int time); -// void TestSendD3Q19(double *f_even, double *f_odd); -// void TestRecvD3Q19(double *f_even, double *f_odd); -======= void D3Q7_Poisson_Potential_BC_z(int *neighborList, double *fq, double Vin, int time); void D3Q7_Poisson_Potential_BC_Z(int *neighborList, double *fq, double Vout, int time); void Poisson_D3Q7_BC_z(int *Map, double *Psi, double Vin); void Poisson_D3Q7_BC_Z(int *Map, double *Psi, double Vout); void D3Q7_Ion_Concentration_BC_z(int *neighborList, double *fq, double Cin, int time); void D3Q7_Ion_Concentration_BC_Z(int *neighborList, double *fq, double Cout, int time); ->>>>>>> electrokinetic // Debugging and unit testing functions void PrintD3Q19(); diff --git a/models/IonModel.cpp b/models/IonModel.cpp index 24c4d8bd..eb9101fa 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -593,7 +593,7 @@ void ScaLBL_IonModel::Create(){ if (rank==0) printf ("LB Ion Solver: Set up memory efficient layout \n"); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np,1); MPI_Barrier(comm); //........................................................................... // MAIN VARIABLES ALLOCATED HERE diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index 3e11de0a..636b88d1 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -271,7 +271,7 @@ void ScaLBL_Poisson::Create(){ if (rank==0) printf ("LB-Poisson Solver: Set up memory efficient layout \n"); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np,1); MPI_Barrier(comm); //........................................................................... // MAIN VARIABLES ALLOCATED HERE diff --git a/models/StokesModel.cpp b/models/StokesModel.cpp index 964baaae..891ea480 100644 --- a/models/StokesModel.cpp +++ b/models/StokesModel.cpp @@ -278,7 +278,7 @@ void ScaLBL_StokesModel::Create(){ if (rank==0) printf ("LB Single-Fluid Solver: Set up memory efficient layout \n"); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np,1); MPI_Barrier(comm); //........................................................................... // MAIN VARIABLES ALLOCATED HERE From 29e486a1a078bb54dcbca7e29b924712cc07390d Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Mon, 12 Oct 2020 09:08:53 -0400 Subject: [PATCH 085/205] moving electrochem model tests to executables --- tests/CMakeLists.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index fd66d8e4..28a02749 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -35,8 +35,13 @@ ADD_LBPM_EXECUTABLE( GenerateSphereTest ) #ADD_LBPM_EXECUTABLE( BlobIdentifyParallel ) #ADD_LBPM_EXECUTABLE( convertIO ) #ADD_LBPM_EXECUTABLE( DataAggregator ) -#ADD_LBPM_EXECUTABLE( BlobAnalyzeParallel ) +#ADD_LBPM_EXECUTABLE( BlobAnalyzeParallel )( ADD_LBPM_EXECUTABLE( lbpm_minkowski_scalar ) +ADD_LBPM_EXECUTABLE( TestPoissonSolver ) +ADD_LBPM_EXECUTABLE( TestIonModel ) +ADD_LBPM_EXECUTABLE( TestNernstPlanck ) +ADD_LBPM_EXECUTABLE( TestPNP_Stokes ) + CONFIGURE_FILE( ${CMAKE_CURRENT_SOURCE_DIR}/cylindertest ${CMAKE_CURRENT_BINARY_DIR}/cylindertest COPYONLY ) @@ -48,10 +53,6 @@ ADD_LBPM_TEST( TestTorusEvolve ) ADD_LBPM_TEST( TestTopo3D ) ADD_LBPM_TEST( TestFluxBC ) ADD_LBPM_TEST( TestMap ) -ADD_LBPM_TEST( TestPoissonSolver ) -ADD_LBPM_TEST( TestIonModel ) -ADD_LBPM_TEST( TestNernstPlanck ) -ADD_LBPM_TEST( TestPNP_Stokes ) #ADD_LBPM_TEST( TestMRT ) ADD_LBPM_TEST( TestColorGrad ) ADD_LBPM_TEST( TestWideHalo ) From b5f9ad5a6c65826b097616a6977665bafc2385c7 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Thu, 15 Oct 2020 11:54:26 -0400 Subject: [PATCH 086/205] PoissonSolver: remove extra setSlice for halo layer in Initialize() --- models/PoissonSolver.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index 3e11de0a..a84b0916 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -390,14 +390,15 @@ void ScaLBL_Poisson::Initialize(){ delete [] psi_host; //extra treatment for halo layer - if (BoundaryCondition==1){ - if (Dm->kproc()==0){ - ScaLBL_SetSlice_z(Psi,Vin,Nx,Ny,Nz,0); - } - if (Dm->kproc() == nprocz-1){ - ScaLBL_SetSlice_z(Psi,Vout,Nx,Ny,Nz,Nz-1); - } - } + //maybe not useful + //if (BoundaryCondition==1){ + // if (Dm->kproc()==0){ + // ScaLBL_SetSlice_z(Psi,Vin,Nx,Ny,Nz,0); + // } + // if (Dm->kproc() == nprocz-1){ + // ScaLBL_SetSlice_z(Psi,Vout,Nx,Ny,Nz,Nz-1); + // } + //} } void ScaLBL_Poisson::Run(double *ChargeDensity){ From 21a0ec8c0ba1ad852e0bd5158558e2d7dd437125 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Fri, 16 Oct 2020 12:50:28 -0400 Subject: [PATCH 087/205] add a routine to write convergence log for Poisson solver --- models/PoissonSolver.cpp | 55 ++++++++++++++++++++++++++++++++-------- models/PoissonSolver.h | 4 +++ 2 files changed, 49 insertions(+), 10 deletions(-) diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index a84b0916..aca6ec95 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -8,7 +8,7 @@ ScaLBL_Poisson::ScaLBL_Poisson(int RANK, int NP, MPI_Comm COMM): rank(RANK), nprocs(NP),timestep(0),timestepMax(0),tau(0),k2_inv(0),tolerance(0),h(0), epsilon0(0),epsilon0_LB(0),epsilonR(0),epsilon_LB(0),Vin(0),Vout(0),Nx(0),Ny(0),Nz(0),N(0),Np(0),analysis_interval(0), -chargeDen_dummy(0), +chargeDen_dummy(0),WriteLog(0), nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),BoundaryConditionSolid(0),Lx(0),Ly(0),Lz(0),comm(COMM) { @@ -36,6 +36,7 @@ void ScaLBL_Poisson::ReadParams(string filename){ Vin = 1.0; //Boundary-z (inlet) electric potential Vout = 1.0; //Boundary-Z (outlet) electric potential chargeDen_dummy = 1.0e-3;//For debugging;unit=[C/m^3] + WriteLog = 0; // LB-Poisson Model parameters if (electric_db->keyExists( "timestepMax" )){ @@ -53,6 +54,15 @@ void ScaLBL_Poisson::ReadParams(string filename){ if (electric_db->keyExists( "DummyChargeDen" )){ chargeDen_dummy = electric_db->getScalar( "DummyChargeDen" ); } + if (electric_db->keyExists( "WriteLog" )){ + auto writelog = electric_db->getScalar( "WriteLog" ); + if (writelog !="True" && writelog !="False"){ + ERROR("Error: LB-Poisson Solver: WriteLog cannot be identified! Uesage: WriteLog is either True or False.\n"); + } + else if (writelog =="True"){ + WriteLog = 1; + } + } // Read solid boundary condition specific to Poisson equation BoundaryConditionSolid = 1; @@ -390,15 +400,14 @@ void ScaLBL_Poisson::Initialize(){ delete [] psi_host; //extra treatment for halo layer - //maybe not useful - //if (BoundaryCondition==1){ - // if (Dm->kproc()==0){ - // ScaLBL_SetSlice_z(Psi,Vin,Nx,Ny,Nz,0); - // } - // if (Dm->kproc() == nprocz-1){ - // ScaLBL_SetSlice_z(Psi,Vout,Nx,Ny,Nz,Nz-1); - // } - //} + if (BoundaryCondition==1){ + if (Dm->kproc()==0){ + ScaLBL_SetSlice_z(Psi,Vin,Nx,Ny,Nz,0); + } + if (Dm->kproc() == nprocz-1){ + ScaLBL_SetSlice_z(Psi,Vout,Nx,Ny,Nz,Nz-1); + } + } } void ScaLBL_Poisson::Run(double *ChargeDensity){ @@ -457,6 +466,9 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ psi_avg_previous = psi_avg; } } + if(WriteLog==1){ + getConvergenceLog(timestep,error); + } //************************************************************************/ //stoptime = MPI_Wtime(); @@ -476,6 +488,29 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ } +void ScaLBL_Poisson::getConvergenceLog(int timestep,double error){ + if (rank==0){ + bool WriteHeader=false; + TIMELOG = fopen("PoissonSolver_Convergence.csv","r"); + if (TIMELOG != NULL) + fclose(TIMELOG); + else + WriteHeader=true; + + TIMELOG = fopen("PoissonSolver_Convergence.csv","a+"); + if (WriteHeader) + { + fprintf(TIMELOG,"Timestep Error\n"); + fprintf(TIMELOG,"%i %.5g\n",timestep,error); + fflush(TIMELOG); + } + else { + fprintf(TIMELOG,"%i %.5g\n",timestep,error); + fflush(TIMELOG); + } + } +} + void ScaLBL_Poisson::SolveElectricPotentialAAodd(){ ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FROM NORMAL ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); diff --git a/models/PoissonSolver.h b/models/PoissonSolver.h index dfd098d5..f4d2efd7 100644 --- a/models/PoissonSolver.h +++ b/models/PoissonSolver.h @@ -3,6 +3,7 @@ */ #include #include +#include #include #include #include @@ -45,6 +46,7 @@ public: double epsilon0,epsilon0_LB,epsilonR,epsilon_LB; double Vin, Vout; double chargeDen_dummy;//for debugging + short WriteLog; int Nx,Ny,Nz,N,Np; int rank,nprocx,nprocy,nprocz,nprocs; @@ -79,6 +81,7 @@ private: char LocalRankFilename[40]; char LocalRestartFile[40]; char OutputFilename[200]; + FILE *TIMELOG; //int rank,nprocs; void LoadParams(std::shared_ptr db0); @@ -90,5 +93,6 @@ private: //void SolveElectricField(); void SolvePoissonAAodd(double *ChargeDensity); void SolvePoissonAAeven(double *ChargeDensity); + void getConvergenceLog(int timestep,double error); }; From 7cc2be826e60e6456fd5df507017004f3ce2f41f Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Sun, 18 Oct 2020 12:20:15 -0400 Subject: [PATCH 088/205] PoissonSolver: remove extra setSlice for halo layer in Initialize() --- models/PoissonSolver.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index aca6ec95..0ef13c84 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -400,14 +400,14 @@ void ScaLBL_Poisson::Initialize(){ delete [] psi_host; //extra treatment for halo layer - if (BoundaryCondition==1){ - if (Dm->kproc()==0){ - ScaLBL_SetSlice_z(Psi,Vin,Nx,Ny,Nz,0); - } - if (Dm->kproc() == nprocz-1){ - ScaLBL_SetSlice_z(Psi,Vout,Nx,Ny,Nz,Nz-1); - } - } + //if (BoundaryCondition==1){ + // if (Dm->kproc()==0){ + // ScaLBL_SetSlice_z(Psi,Vin,Nx,Ny,Nz,0); + // } + // if (Dm->kproc() == nprocz-1){ + // ScaLBL_SetSlice_z(Psi,Vout,Nx,Ny,Nz,Nz-1); + // } + //} } void ScaLBL_Poisson::Run(double *ChargeDensity){ From 69f319ab5c27baca55f535959c4c7d8dcf1d5741 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Thu, 5 Nov 2020 21:51:29 -0500 Subject: [PATCH 089/205] work in progress; add CPU ion flux BC --- common/ScaLBL.cpp | 22 ++++ common/ScaLBL.h | 10 ++ cpu/D3Q7BC.cpp | 52 ++++++++ models/IonModel.cpp | 287 ++++++++++++++++++++++++++++---------------- models/IonModel.h | 7 +- 5 files changed, 272 insertions(+), 106 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 8b0abeba..e946e975 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -2109,3 +2109,25 @@ void ScaLBL_Communicator::D3Q7_Ion_Concentration_BC_Z(int *neighborList, double } } } + +void ScaLBL_Communicator::D3Q7_Ion_Flux_BC_z(int *neighborList, double *fq, double Cin, int time){ + if (kproc == 0) { + if (time%2==0){ + ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(dvcSendList_z, fq, Cin, sendCount_z, N); + } + else{ + ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(neighborList, dvcSendList_z, fq, Cin, sendCount_z, N); + } + } +} + +void ScaLBL_Communicator::D3Q7_Ion_Flux_BC_Z(int *neighborList, double *fq, double Cout, int time){ + if (kproc == nprocz-1){ + if (time%2==0){ + ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(dvcSendList_Z, fq, Cout, sendCount_Z, N); + } + else{ + ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(neighborList, dvcSendList_Z, fq, Cout, sendCount_Z, N); + } + } +} diff --git a/common/ScaLBL.h b/common/ScaLBL.h index edd601e8..4b34655d 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -214,6 +214,14 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, in extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double Cin, int count, int Np); + +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double Cout, int count, int Np); + +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np); + +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np); + class ScaLBL_Communicator{ public: //...................................................................................... @@ -279,6 +287,8 @@ public: void Poisson_D3Q7_BC_Z(int *Map, double *Psi, double Vout); void D3Q7_Ion_Concentration_BC_z(int *neighborList, double *fq, double Cin, int time); void D3Q7_Ion_Concentration_BC_Z(int *neighborList, double *fq, double Cout, int time); + void D3Q7_Ion_Flux_BC_z(int *neighborList, double *fq, double Cin, int time); + void D3Q7_Ion_Flux_BC_Z(int *neighborList, double *fq, double Cout, int time); // Debugging and unit testing functions void PrintD3Q19(); diff --git a/cpu/D3Q7BC.cpp b/cpu/D3Q7BC.cpp index 161e6a5c..85012df0 100644 --- a/cpu/D3Q7BC.cpp +++ b/cpu/D3Q7BC.cpp @@ -223,3 +223,55 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, in dist[nr6] = f6; } } + +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, int count, int Np){ + //NOTE: FluxIn is the inward flux + for (int idx=0; idx &num_iter){ IonDiffusivity.push_back(1.0e-9);//user-input diffusivity has physical unit [m^2/sec] IonValence.push_back(1);//algebraic valence charge IonConcentration.push_back(1.0e-3);//user-input ion concentration has physical unit [mol/m^3] - Cin.push_back(1.0e-3);//user-input inlet boundary ion concentration;unit [mol/m^3] - Cout.push_back(1.0e-3);//user-input outlet boundary ion concentration;unit [mol/m^3] //tau.push_back(0.5+k2_inv*time_conv/(h*1.0e-6)/(h*1.0e-6)*IonDiffusivity[0]); time_conv.push_back((tau[0]-0.5)/k2_inv*(h*h*1.0e-12)/IonDiffusivity[0]); fluidVelx_dummy = 0.0;//for debugging, unit [m/sec] @@ -162,54 +160,78 @@ void ScaLBL_IonModel::ReadParams(string filename,vector &num_iter){ } // Read boundary condition for ion transport // BC = 0: normal periodic BC - // BC = 1: fixed inlet and outlet ion concentration - BoundaryCondition = 0; - if (ion_db->keyExists( "BC" )){ - BoundaryCondition = ion_db->getScalar( "BC" ); + // BC = 1: fixed ion concentration; unit=[mol/m^3] + // BC = 2: fixed ion flux (inward flux); unit=[mol/m^2/sec] + BoundaryConditionInlet.push_back(0); + BoundaryConditionOutlet.push_back(0); + //Inlet + if (ion_db->keyExists( "BC_InletList" )){ + BoundaryConditionInlet = ion_db->getVector( "BC_InletList" ); + if (BoundaryConditionInlet.size()!=number_ion_species){ + ERROR("Error: number_ion_species and BC_InletList must be of the same length! \n"); + } + unsigned short int BC_inlet_min = *min_element(BoundaryConditionInlet.begin(),BoundaryConditionInlet.end()); + unsigned short int BC_inlet_max = *max_element(BoundaryConditionInlet.begin(),BoundaryConditionInlet.end()); + if (BC_inlet_min == 0 && BC_inlet_max>0){ + ERROR("Error: BC_InletList: mix of periodic, ion concentration and flux BC is not supported! \n"); + } + if (BC_inlet_min>0){ + //read in inlet values Cin + if (ion_db->keyExists("InletValueList")){ + Cin = ion_db->getVector( "InletValueList" ); + if (Cin.size()!=number_ion_species){ + ERROR("Error: number_ion_species and InletValueList must be the same length! \n"); + } + } + else { + ERROR("Error: Non-periodic BCs are specified but InletValueList cannot be found! \n"); + } + for (unsigned int i=0;ikeyExists("CinList")){ - Cin.clear(); - Cin = ion_db->getVector( "CinList" ); - if (Cin.size()!=number_ion_species){ - ERROR("Error: number_ion_species and CinList must be the same length! \n"); + //Outlet + if (ion_db->keyExists( "BC_OutletList" )){ + BoundaryConditionOutlet = ion_db->getVector( "BC_OutletList" ); + if (BoundaryConditionOutlet.size()!=number_ion_species){ + ERROR("Error: number_ion_species and BC_OutletList must be of the same length! \n"); + } + unsigned short int BC_outlet_min = *min_element(BoundaryConditionOutlet.begin(),BoundaryConditionOutlet.end()); + unsigned short int BC_outlet_max = *max_element(BoundaryConditionOutlet.begin(),BoundaryConditionOutlet.end()); + if (BC_outlet_min == 0 && BC_outlet_max>0){ + ERROR("Error: BC_OutletList: mix of periodic, ion concentration and flux BC is not supported! \n"); + } + if (BC_outlet_min>0){ + //read in outlet values Cout + if (ion_db->keyExists("OutletValueList")){ + Cout = ion_db->getVector( "OutletValueList" ); + if (Cout.size()!=number_ion_species){ + ERROR("Error: number_ion_species and OutletValueList must be the same length! \n"); + } } - else{ - for (int i=0; ikeyExists("CoutList")){ - Cout.clear(); - Cout = ion_db->getVector( "CoutList" ); - if (Cout.size()!=number_ion_species){ - ERROR("Error: number_ion_species and CoutList must be the same length! \n"); - } - else{ - for (int i=0; ikeyExists( "BC" )){ - BoundaryCondition = ion_db->getScalar( "BC" ); + // BC = 1: fixed ion concentration; unit=[mol/m^3] + // BC = 2: fixed ion flux (inward flux); unit=[mol/m^2/sec] + BoundaryConditionInlet.push_back(0); + BoundaryConditionOutlet.push_back(0); + //Inlet + if (ion_db->keyExists( "BC_InletList" )){ + BoundaryConditionInlet = ion_db->getVector( "BC_InletList" ); + if (BoundaryConditionInlet.size()!=number_ion_species){ + ERROR("Error: number_ion_species and BC_InletList must be of the same length! \n"); + } + unsigned short int BC_inlet_min = *min_element(BoundaryConditionInlet.begin(),BoundaryConditionInlet.end()); + unsigned short int BC_inlet_max = *max_element(BoundaryConditionInlet.begin(),BoundaryConditionInlet.end()); + if (BC_inlet_min == 0 && BC_inlet_max>0){ + ERROR("Error: BC_InletList: mix of periodic, ion concentration and flux BC is not supported! \n"); + } + if (BC_inlet_min>0){ + //read in inlet values Cin + if (ion_db->keyExists("InletValueList")){ + Cin = ion_db->getVector( "InletValueList" ); + if (Cin.size()!=number_ion_species){ + ERROR("Error: number_ion_species and InletValueList must be the same length! \n"); + } + } + else { + ERROR("Error: Non-periodic BCs are specified but InletValueList cannot be found! \n"); + } + for (unsigned int i=0;ikeyExists("CinList")){ - Cin.clear(); - Cin = ion_db->getVector( "CinList" ); - if (Cin.size()!=number_ion_species){ - ERROR("Error: number_ion_species and CinList must be the same length! \n"); + //Outlet + if (ion_db->keyExists( "BC_OutletList" )){ + BoundaryConditionOutlet = ion_db->getVector( "BC_OutletList" ); + if (BoundaryConditionOutlet.size()!=number_ion_species){ + ERROR("Error: number_ion_species and BC_OutletList must be of the same length! \n"); + } + unsigned short int BC_outlet_min = *min_element(BoundaryConditionOutlet.begin(),BoundaryConditionOutlet.end()); + unsigned short int BC_outlet_max = *max_element(BoundaryConditionOutlet.begin(),BoundaryConditionOutlet.end()); + if (BC_outlet_min == 0 && BC_outlet_max>0){ + ERROR("Error: BC_OutletList: mix of periodic, ion concentration and flux BC is not supported! \n"); + } + if (BC_outlet_min>0){ + //read in outlet values Cout + if (ion_db->keyExists("OutletValueList")){ + Cout = ion_db->getVector( "OutletValueList" ); + if (Cout.size()!=number_ion_species){ + ERROR("Error: number_ion_species and OutletValueList must be the same length! \n"); + } } - else{ - for (int i=0; ikeyExists("CoutList")){ - Cout.clear(); - Cout = ion_db->getVector( "CoutList" ); - if (Cout.size()!=number_ion_species){ - ERROR("Error: number_ion_species and CoutList must be the same length! \n"); - } - else{ - for (int i=0; iid[i] = 1; // initialize this way //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object MPI_Barrier(comm); - Dm->BoundaryCondition = BoundaryCondition; - Mask->BoundaryCondition = BoundaryCondition; + + unsigned short int BC_inlet_min = *min_element(BoundaryConditionInlet.begin(),BoundaryConditionInlet.end()); + unsigned short int BC_outlet_min = *min_element(BoundaryConditionOutlet.begin(),BoundaryConditionOutlet.end()); + if (BC_inlet_min==0 && BC_outlet_min==0){ + Dm->BoundaryCondition = 0; + Mask->BoundaryCondition = 0; + } + else if (BC_inlet_min>0 && BC_outlet_min>0){ + Dm->BoundaryCondition = 1; + Mask->BoundaryCondition = 1; + } + else { //i.e. periodic and non-periodic BCs are mixed + ERROR("Error: check the type of inlet and outlet boundary condition! Mixed periodic and non-periodic BCs are found. \n"); + } + Dm->CommInit(); MPI_Barrier(comm); @@ -714,12 +771,24 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ ScaLBL_D3Q7_AAodd_IonConcentration(NeighborList, &fq[ic*Np*7],&Ci[ic*Np],ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); - // Set boundary conditions - if (BoundaryCondition == 1){ - ScaLBL_Comm->D3Q7_Ion_Concentration_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); - ScaLBL_Comm->D3Q7_Ion_Concentration_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); + //--------------------------------------- Set boundary conditions -------------------------------------// + switch (BoundaryConditionInlet[i]){ + case 1: + ScaLBL_Comm->D3Q7_Ion_Concentration_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); + break; + case 2: + ScaLBL_Comm->D3Q7_Ion_Flux_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); + break; } - //-------------------------// + switch (BoundaryConditionOutlet[i]){ + case 1: + ScaLBL_Comm->D3Q7_Ion_Concentration_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); + break; + case 2: + ScaLBL_Comm->D3Q7_Ion_Flux_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); + break; + } + //----------------------------------------------------------------------------------------------------// ScaLBL_D3Q7_AAodd_IonConcentration(NeighborList, &fq[ic*Np*7],&Ci[ic*Np], 0, ScaLBL_Comm->LastExterior(), Np); @@ -742,12 +811,24 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ ScaLBL_D3Q7_AAeven_IonConcentration(&fq[ic*Np*7],&Ci[ic*Np],ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); - // Set boundary conditions - if (BoundaryCondition == 1){ - ScaLBL_Comm->D3Q7_Ion_Concentration_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); - ScaLBL_Comm->D3Q7_Ion_Concentration_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); + //--------------------------------------- Set boundary conditions -------------------------------------// + switch (BoundaryConditionInlet[i]){ + case 1: + ScaLBL_Comm->D3Q7_Ion_Concentration_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); + break; + case 2: + ScaLBL_Comm->D3Q7_Ion_Flux_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); + break; } - //-------------------------// + switch (BoundaryConditionOutlet[i]){ + case 1: + ScaLBL_Comm->D3Q7_Ion_Concentration_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); + break; + case 2: + ScaLBL_Comm->D3Q7_Ion_Flux_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); + break; + } + //----------------------------------------------------------------------------------------------------// ScaLBL_D3Q7_AAeven_IonConcentration(&fq[ic*Np*7],&Ci[ic*Np], 0, ScaLBL_Comm->LastExterior(), Np); diff --git a/models/IonModel.h b/models/IonModel.h index 5a568182..59382002 100644 --- a/models/IonModel.h +++ b/models/IonModel.h @@ -39,7 +39,6 @@ public: //bool Restart,pBC; int timestep; vector timestepMax; - int BoundaryCondition; int BoundaryConditionSolid; double h;//domain resolution, unit [um/lu] double kb,electron_charge,T,Vt; @@ -49,11 +48,13 @@ public: double Ex_dummy,Ey_dummy,Ez_dummy; int number_ion_species; + vector BoundaryConditionInlet; + vector BoundaryConditionOutlet; vector IonDiffusivity;//User input unit [m^2/sec] vector IonValence; vector IonConcentration;//unit [mol/m^3] - vector Cin;//unit [mol/m^3] - vector Cout;//unit [mol/m^3] + vector Cin;//inlet boundary value, can be either concentration [mol/m^3] or flux [mol/m^2/sec] + vector Cout;//outlet boundary value, can be either concentration [mol/m^3] or flux [mol/m^2/sec] vector tau; vector time_conv; From f9c32855e5d51eaf45096707e6eb7d43b8c2fbfe Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Sat, 7 Nov 2020 16:41:07 -0500 Subject: [PATCH 090/205] save the work; validation to be continued --- models/IonModel.cpp | 107 +++++++++++++++++++++++++++++--------------- 1 file changed, 70 insertions(+), 37 deletions(-) diff --git a/models/IonModel.cpp b/models/IonModel.cpp index ec004967..cfb69366 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -721,15 +721,6 @@ void ScaLBL_IonModel::Initialize(){ ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, IonValence[ic], ic, 0, ScaLBL_Comm->LastExterior(), Np); } - if (rank==0) printf("*****************************************************\n"); - if (rank==0) printf("LB Ion Transport Solver: \n"); - for (int i=0; iRecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); //--------------------------------------- Set boundary conditions -------------------------------------// - switch (BoundaryConditionInlet[i]){ - case 1: - ScaLBL_Comm->D3Q7_Ion_Concentration_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); - break; - case 2: - ScaLBL_Comm->D3Q7_Ion_Flux_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); - break; + if (BoundaryConditionInlet[ic]>0){ + switch (BoundaryConditionInlet[ic]){ + case 1: + ScaLBL_Comm->D3Q7_Ion_Concentration_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); + break; + case 2: + ScaLBL_Comm->D3Q7_Ion_Flux_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); + break; + } } - switch (BoundaryConditionOutlet[i]){ - case 1: - ScaLBL_Comm->D3Q7_Ion_Concentration_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); - break; - case 2: - ScaLBL_Comm->D3Q7_Ion_Flux_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); - break; + if (BoundaryConditionOutlet[ic]>0){ + switch (BoundaryConditionOutlet[ic]){ + case 1: + ScaLBL_Comm->D3Q7_Ion_Concentration_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); + break; + case 2: + ScaLBL_Comm->D3Q7_Ion_Flux_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); + break; + } } //----------------------------------------------------------------------------------------------------// ScaLBL_D3Q7_AAodd_IonConcentration(NeighborList, &fq[ic*Np*7],&Ci[ic*Np], 0, ScaLBL_Comm->LastExterior(), Np); @@ -812,21 +841,25 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ ScaLBL_Comm->RecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); //--------------------------------------- Set boundary conditions -------------------------------------// - switch (BoundaryConditionInlet[i]){ - case 1: - ScaLBL_Comm->D3Q7_Ion_Concentration_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); - break; - case 2: - ScaLBL_Comm->D3Q7_Ion_Flux_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); - break; + if (BoundaryConditionInlet[ic]>0){ + switch (BoundaryConditionInlet[ic]){ + case 1: + ScaLBL_Comm->D3Q7_Ion_Concentration_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); + break; + case 2: + ScaLBL_Comm->D3Q7_Ion_Flux_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); + break; + } } - switch (BoundaryConditionOutlet[i]){ - case 1: - ScaLBL_Comm->D3Q7_Ion_Concentration_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); - break; - case 2: - ScaLBL_Comm->D3Q7_Ion_Flux_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); - break; + if (BoundaryConditionOutlet[ic]>0){ + switch (BoundaryConditionOutlet[ic]){ + case 1: + ScaLBL_Comm->D3Q7_Ion_Concentration_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); + break; + case 2: + ScaLBL_Comm->D3Q7_Ion_Flux_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); + break; + } } //----------------------------------------------------------------------------------------------------// ScaLBL_D3Q7_AAeven_IonConcentration(&fq[ic*Np*7],&Ci[ic*Np], 0, ScaLBL_Comm->LastExterior(), Np); From e93b941d9edb7931e9950df654f8ecc2c3f88fd9 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Thu, 19 Nov 2020 13:17:31 -0500 Subject: [PATCH 091/205] save the work; fluxBC for Ion solver still does not fully agree COMSOL --- common/ScaLBL.cpp | 12 +++--- common/ScaLBL.h | 12 +++--- cpu/D3Q7BC.cpp | 102 +++++++++++++++++++++++++++++++++++++------- models/IonModel.cpp | 12 +++--- 4 files changed, 104 insertions(+), 34 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index e946e975..ca4e89b6 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -2110,24 +2110,24 @@ void ScaLBL_Communicator::D3Q7_Ion_Concentration_BC_Z(int *neighborList, double } } -void ScaLBL_Communicator::D3Q7_Ion_Flux_BC_z(int *neighborList, double *fq, double Cin, int time){ +void ScaLBL_Communicator::D3Q7_Ion_Flux_BC_z(int *neighborList, double *fq, double Cin, double tau, double *VelocityZ, int time){ if (kproc == 0) { if (time%2==0){ - ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(dvcSendList_z, fq, Cin, sendCount_z, N); + ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(dvcSendList_z, fq, Cin, tau, VelocityZ, sendCount_z, N); } else{ - ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(neighborList, dvcSendList_z, fq, Cin, sendCount_z, N); + ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(neighborList, dvcSendList_z, fq, Cin, tau, VelocityZ, sendCount_z, N); } } } -void ScaLBL_Communicator::D3Q7_Ion_Flux_BC_Z(int *neighborList, double *fq, double Cout, int time){ +void ScaLBL_Communicator::D3Q7_Ion_Flux_BC_Z(int *neighborList, double *fq, double Cout, double tau, double *VelocityZ, int time){ if (kproc == nprocz-1){ if (time%2==0){ - ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(dvcSendList_Z, fq, Cout, sendCount_Z, N); + ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(dvcSendList_Z, fq, Cout, tau, VelocityZ, sendCount_Z, N); } else{ - ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(neighborList, dvcSendList_Z, fq, Cout, sendCount_Z, N); + ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(neighborList, dvcSendList_Z, fq, Cout, tau, VelocityZ, sendCount_Z, N); } } } diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 4b34655d..3ef8c31c 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -214,13 +214,13 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, in extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np); -extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double Cin, int count, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double Cin, double tau, double *VelocityZ, int count, int Np); -extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double Cout, int count, int Np); +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double Cout, double tau, double *VelocityZ, int count, int Np); -extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double Cin, double tau, double *VelocityZ, int count, int Np); -extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, double tau, double *VelocityZ, int count, int Np); class ScaLBL_Communicator{ public: @@ -287,8 +287,8 @@ public: void Poisson_D3Q7_BC_Z(int *Map, double *Psi, double Vout); void D3Q7_Ion_Concentration_BC_z(int *neighborList, double *fq, double Cin, int time); void D3Q7_Ion_Concentration_BC_Z(int *neighborList, double *fq, double Cout, int time); - void D3Q7_Ion_Flux_BC_z(int *neighborList, double *fq, double Cin, int time); - void D3Q7_Ion_Flux_BC_Z(int *neighborList, double *fq, double Cout, int time); + void D3Q7_Ion_Flux_BC_z(int *neighborList, double *fq, double Cin, double tau, double *VelocityZ, int time); + void D3Q7_Ion_Flux_BC_Z(int *neighborList, double *fq, double Cout, double tau, double *VelocityZ, int time); // Debugging and unit testing functions void PrintD3Q19(); diff --git a/cpu/D3Q7BC.cpp b/cpu/D3Q7BC.cpp index 85012df0..39c95172 100644 --- a/cpu/D3Q7BC.cpp +++ b/cpu/D3Q7BC.cpp @@ -224,54 +224,124 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, in } } -extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, int count, int Np){ +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){ //NOTE: FluxIn is the inward flux + double f0,f1,f2,f3,f4,f5,f6; + double fsum_partial; + int n; + double uz; for (int idx=0; idxD3Q7_Ion_Concentration_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); break; case 2: - ScaLBL_Comm->D3Q7_Ion_Flux_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); + ScaLBL_Comm->D3Q7_Ion_Flux_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], tau[ic], &Velocity[2*Np], timestep); break; } } @@ -813,7 +813,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ ScaLBL_Comm->D3Q7_Ion_Concentration_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); break; case 2: - ScaLBL_Comm->D3Q7_Ion_Flux_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); + ScaLBL_Comm->D3Q7_Ion_Flux_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], tau[ic], &Velocity[2*Np], timestep); break; } } @@ -847,7 +847,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ ScaLBL_Comm->D3Q7_Ion_Concentration_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); break; case 2: - ScaLBL_Comm->D3Q7_Ion_Flux_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], timestep); + ScaLBL_Comm->D3Q7_Ion_Flux_BC_z(NeighborList, &fq[ic*Np*7], Cin[ic], tau[ic], &Velocity[2*Np], timestep); break; } } @@ -857,7 +857,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ ScaLBL_Comm->D3Q7_Ion_Concentration_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); break; case 2: - ScaLBL_Comm->D3Q7_Ion_Flux_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], timestep); + ScaLBL_Comm->D3Q7_Ion_Flux_BC_Z(NeighborList, &fq[ic*Np*7], Cout[ic], tau[ic], &Velocity[2*Np], timestep); break; } } From 9693bb94400567d4f8e455c0202ff16e5dca6fec Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Thu, 3 Dec 2020 02:15:20 -0500 Subject: [PATCH 092/205] CPU only; revert the definition of pressure back to normal, without the factor of voxel porosity --- cpu/GreyscaleColor.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpu/GreyscaleColor.cpp b/cpu/GreyscaleColor.cpp index a04fb22a..cf01cd84 100644 --- a/cpu/GreyscaleColor.cpp +++ b/cpu/GreyscaleColor.cpp @@ -494,7 +494,8 @@ extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleColor(int *neighborList, int *Map, d Velocity[n] = ux; Velocity[Np+n] = uy; Velocity[2*Np+n] = uz; - Pressure[n] = rho/3.f/porosity; + //Pressure[n] = rho/3.f/porosity; + Pressure[n] = rho/3.f; //........................................................................ //..............carry out relaxation process.............................. @@ -1149,7 +1150,8 @@ extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleColor(int *Map, double *dist, doubl Velocity[n] = ux; Velocity[Np+n] = uy; Velocity[2*Np+n] = uz; - Pressure[n] = rho/3.f/porosity; + //Pressure[n] = rho/3.f/porosity; + Pressure[n] = rho/3.f; //........................................................................ //..............carry out relaxation process.............................. From 1528aa7435115ac7a835fca293d62e9ef79d0ed9 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Thu, 3 Dec 2020 19:21:03 -0500 Subject: [PATCH 093/205] CPU only: flux BC validated and done --- cpu/D3Q7BC.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpu/D3Q7BC.cpp b/cpu/D3Q7BC.cpp index 39c95172..2917bab4 100644 --- a/cpu/D3Q7BC.cpp +++ b/cpu/D3Q7BC.cpp @@ -242,7 +242,7 @@ extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double uz = VelocityZ[n]; //................................................... - f5 =(FluxIn+(1.0-0.5/tau)*f6-uz*fsum_partial)/(1.0-0.5/tau+uz); + f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau); dist[6*Np+n] = f5; } } @@ -266,7 +266,7 @@ extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double uz = VelocityZ[n]; //................................................... - f6 =(FluxIn+(1.0-0.5/tau)*f5+uz*fsum_partial)/(1.0-0.5/tau-uz); + f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau); dist[5*Np+n] = f6; } } @@ -300,7 +300,7 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, fsum_partial = f0+f1+f2+f3+f4+f6; uz = VelocityZ[n]; //................................................... - f5 =(FluxIn+(1.0-0.5/tau)*f6-uz*fsum_partial)/(1.0-0.5/tau+uz); + f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau); // Unknown distributions nr5 = d_neighborList[n+4*Np]; @@ -338,7 +338,7 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, fsum_partial = f0+f1+f2+f3+f4+f5; uz = VelocityZ[n]; //................................................... - f6 =(FluxIn+(1.0-0.5/tau)*f5+uz*fsum_partial)/(1.0-0.5/tau-uz); + f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau); // unknown distributions nr6 = d_neighborList[n+5*Np]; From de60bae8fcfc3c9e6538c72095742eae98fb0971 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Thu, 3 Dec 2020 20:28:09 -0500 Subject: [PATCH 094/205] GPU fluxBC is ready too --- gpu/D3Q7BC.cu | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) diff --git a/gpu/D3Q7BC.cu b/gpu/D3Q7BC.cu index 8d27f7d5..63aee2bb 100644 --- a/gpu/D3Q7BC.cu +++ b/gpu/D3Q7BC.cu @@ -265,6 +265,131 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList } } +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np) +{ + //NOTE: FluxIn is the inward flux + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + double fsum_partial; + double uz; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f6 = dist[5*Np+n]; + fsum_partial = f0+f1+f2+f3+f4+f6; + uz = VelocityZ[n]; + //................................................... + f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau); + dist[6*Np+n] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np) +{ + //NOTE: FluxIn is the inward flux + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + double fsum_partial; + double uz; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f5 = dist[6*Np+n]; + fsum_partial = f0+f1+f2+f3+f4+f5; + uz = VelocityZ[n]; + //................................................... + f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau); + dist[5*Np+n] = f6; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np) +{ + //NOTE: FluxIn is the inward flux + int idx, n; + int nread,nr5; + double f0,f1,f2,f3,f4,f5,f6; + double fsum_partial; + double uz; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + nread = d_neighborList[n+5*Np]; + f6 = dist[nread]; + + fsum_partial = f0+f1+f2+f3+f4+f6; + uz = VelocityZ[n]; + //................................................... + f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau); + + // Unknown distributions + nr5 = d_neighborList[n+4*Np]; + dist[nr5] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np) +{ + //NOTE: FluxIn is the inward flux + int idx, n; + int nread,nr6; + double f0,f1,f2,f3,f4,f5,f6; + double fsum_partial; + double uz; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+4*Np]; + f5 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + fsum_partial = f0+f1+f2+f3+f4+f5; + uz = VelocityZ[n]; + //................................................... + f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau); + + // unknown distributions + nr6 = d_neighborList[n+5*Np]; + dist[nr6] = f6; + } +} //************************************************************************* extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){ @@ -375,3 +500,38 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, in } } +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z<<>>(list, dist, FluxIn, tau, VelocityZ, count, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z (kernel): %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z<<>>(list, dist, FluxIn, tau, VelocityZ, count, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z (kernel): %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z<<>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z (kernel): %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z<<>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z (kernel): %s \n",cudaGetErrorString(err)); + } +} From 7ef18bfea3e1390e506d431e898757728bd2b975 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Fri, 4 Dec 2020 00:37:02 -0500 Subject: [PATCH 095/205] make the WriteLog in PoissonSolver a bool type to facilitate input --- models/PoissonSolver.cpp | 12 +++--------- models/PoissonSolver.h | 2 +- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index 0ef13c84..c50d8816 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -36,7 +36,7 @@ void ScaLBL_Poisson::ReadParams(string filename){ Vin = 1.0; //Boundary-z (inlet) electric potential Vout = 1.0; //Boundary-Z (outlet) electric potential chargeDen_dummy = 1.0e-3;//For debugging;unit=[C/m^3] - WriteLog = 0; + WriteLog = false; // LB-Poisson Model parameters if (electric_db->keyExists( "timestepMax" )){ @@ -55,13 +55,7 @@ void ScaLBL_Poisson::ReadParams(string filename){ chargeDen_dummy = electric_db->getScalar( "DummyChargeDen" ); } if (electric_db->keyExists( "WriteLog" )){ - auto writelog = electric_db->getScalar( "WriteLog" ); - if (writelog !="True" && writelog !="False"){ - ERROR("Error: LB-Poisson Solver: WriteLog cannot be identified! Uesage: WriteLog is either True or False.\n"); - } - else if (writelog =="True"){ - WriteLog = 1; - } + WriteLog = electric_db->getScalar( "WriteLog" ); } // Read solid boundary condition specific to Poisson equation @@ -466,7 +460,7 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ psi_avg_previous = psi_avg; } } - if(WriteLog==1){ + if(WriteLog==true){ getConvergenceLog(timestep,error); } diff --git a/models/PoissonSolver.h b/models/PoissonSolver.h index f4d2efd7..241e871a 100644 --- a/models/PoissonSolver.h +++ b/models/PoissonSolver.h @@ -46,7 +46,7 @@ public: double epsilon0,epsilon0_LB,epsilonR,epsilon_LB; double Vin, Vout; double chargeDen_dummy;//for debugging - short WriteLog; + bool WriteLog; int Nx,Ny,Nz,N,Np; int rank,nprocx,nprocy,nprocz,nprocs; From cb0efd10e362dc29d077e4f843c458afe9d2ae7b Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Fri, 4 Dec 2020 17:54:57 -0500 Subject: [PATCH 096/205] CPU/GPU single/two-fluid, MRT only; revert the definition of pressure back to normal --- cpu/Greyscale.cpp | 6 ++++-- gpu/Greyscale.cu | 6 ++++-- gpu/GreyscaleColor.cu | 6 ++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/cpu/Greyscale.cpp b/cpu/Greyscale.cpp index 5f6d3633..4aab97e2 100644 --- a/cpu/Greyscale.cpp +++ b/cpu/Greyscale.cpp @@ -1959,7 +1959,8 @@ extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_MRT(int *neighborList, double *dist } //Calculate pressure for MRT model - pressure=rho/3.f/porosity; + //pressure=rho/3.f/porosity; + pressure=rho/3.f; //-------------------- MRT collison where body force has NO higher-order terms -------------// m1 = m1 + rlx_setA*((19*(ux*ux+uy*uy+uz*uz)*rho0/porosity - 11*rho) - m1); @@ -2457,7 +2458,8 @@ extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_MRT(double *dist, int start, int f } //Calculate pressure for Incompressible-MRT model - pressure=rho/3.f/porosity; + //pressure=rho/3.f/porosity; + pressure=rho/3.f; //-------------------- IMRT collison where body force has NO higher-order terms -------------// m1 = m1 + rlx_setA*((19*(ux*ux+uy*uy+uz*uz)*rho0/porosity - 11*rho) - m1); diff --git a/gpu/Greyscale.cu b/gpu/Greyscale.cu index 57452bbb..edf80a16 100644 --- a/gpu/Greyscale.cu +++ b/gpu/Greyscale.cu @@ -1990,7 +1990,8 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Greyscale_MRT(int *neighborList, double * } //Calculate pressure for MRT model - pressure=rho/3.f/porosity; + //pressure=rho/3.f/porosity; + pressure=rho/3.f; //-------------------- MRT collison where body force has NO higher-order terms -------------// m1 = m1 + rlx_setA*((19*(ux*ux+uy*uy+uz*uz)*rho0/porosity - 11*rho) - m1); @@ -2496,7 +2497,8 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale_MRT(double *dist, int start, i } //Calculate pressure for Incompressible-MRT model - pressure=rho/3.f/porosity; + //pressure=rho/3.f/porosity; + pressure=rho/3.f; //-------------------- IMRT collison where body force has NO higher-order terms -------------// m1 = m1 + rlx_setA*((19*(ux*ux+uy*uy+uz*uz)*rho0/porosity - 11*rho) - m1); diff --git a/gpu/GreyscaleColor.cu b/gpu/GreyscaleColor.cu index b3398d32..69c9a080 100644 --- a/gpu/GreyscaleColor.cu +++ b/gpu/GreyscaleColor.cu @@ -512,7 +512,8 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_GreyscaleColor(int *neighborList, int *Ma Velocity[n] = ux; Velocity[Np+n] = uy; Velocity[2*Np+n] = uz; - Pressure[n] = rho/3.f/porosity; + //Pressure[n] = rho/3.f/porosity; + Pressure[n] = rho/3.f; //........................................................................ //..............carry out relaxation process.............................. @@ -1218,7 +1219,8 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor(int *Map, double *dist, Velocity[n] = ux; Velocity[Np+n] = uy; Velocity[2*Np+n] = uz; - Pressure[n] = rho/3.f/porosity; + //Pressure[n] = rho/3.f/porosity; + Pressure[n] = rho/3.f; //........................................................................ //..............carry out relaxation process.............................. From f3ba90337a33836366888edda4290bbc45f29744 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Sun, 13 Dec 2020 19:34:56 -0500 Subject: [PATCH 097/205] to be compiled: add basic restart and write visualization functionality --- models/GreyscaleColorModel.cpp | 169 +++++++++++++++++++++++++++++++++ models/GreyscaleColorModel.h | 1 + 2 files changed, 170 insertions(+) diff --git a/models/GreyscaleColorModel.cpp b/models/GreyscaleColorModel.cpp index 9cb011e2..9ebad61c 100644 --- a/models/GreyscaleColorModel.cpp +++ b/models/GreyscaleColorModel.cpp @@ -9,6 +9,12 @@ Two-fluid greyscale color lattice boltzmann model #include #include +template +void DeleteArray( const TYPE *p ) +{ + delete [] p; +} + ScaLBL_GreyscaleColorModel::ScaLBL_GreyscaleColorModel(int RANK, int NP, MPI_Comm COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),tauA_eff(0),tauB_eff(0),rhoA(0),rhoB(0),alpha(0),beta(0), Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0),GreyPorosity(0), @@ -786,6 +792,15 @@ void ScaLBL_GreyscaleColorModel::Run(){ double initial_volume = 0.0; double delta_volume = 0.0; double delta_volume_target = 0.0; + + //TODO -------- For temporary use - should be included in the analysis framework later ------------- + if (analysis_db->keyExists( "visualization_interval" )){ + visualization_interval = analysis_db->getScalar( "visualization_interval" ); + } + if (analysis_db->keyExists( "restart_interval" )){ + restart_interval = analysis_db->getScalar( "restart_interval" ); + } + //------------------------------------------------------------------------------------------------- /* history for morphological algoirthm */ double KRA_MORPH_FACTOR=0.5; @@ -1015,6 +1030,51 @@ void ScaLBL_GreyscaleColorModel::Run(){ //************************************************************************ PROFILE_STOP("Update"); + //TODO For temporary use - writing Restart and Vis files should be included in the analysis framework in the future + if (timestep%restart_interval==0){ + //Use rank=0 write out Restart.db + if (rank==0) { + greyscaleColor_db->putScalar("timestep",timestep); + greyscaleColor_db->putScalar( "Restart", true ); + current_db->putDatabase("Color", greyscaleColor_db); + std::ofstream OutStream("Restart.db"); + current_db->print(OutStream, ""); + OutStream.close(); + + } + //Write out Restart data. + std::shared_ptr cDen; + std::shared_ptr cfq; + cDen = std::shared_ptr(new double[2*Np], DeleteArray); + cfq = std::shared_ptr(new double[19*Np],DeleteArray); + ScaLBL_CopyToHost(cDen.get(),Den,2*Np*sizeof(double));// Copy restart data to the CPU + ScaLBL_CopyToHost(cfq.get(), fq,19*Np*sizeof(double));// Copy restart data to the CPU + + ofstream RESTARTFILE(LocalRestartFile,ios::binary); + double value; + for (int n=0; n visData; + fillHalo fillData(Dm->Comm,Dm->rank_info,{Dm->Nx-2,Dm->Ny-2,Dm->Nz-2},{1,1,1},0,1); + + auto VxVar = std::make_shared(); + auto VyVar = std::make_shared(); + auto VzVar = std::make_shared(); + auto SignDistVar = std::make_shared(); + auto PressureVar = std::make_shared(); + auto PhaseVar = std::make_shared(); + + // Create the MeshDataStruct + IO::initialize("","silo","false"); + visData.resize(1); + visData[0].meshName = "domain"; + visData[0].mesh = std::make_shared( Dm->rank_info,Dm->Nx-2,Dm->Ny-2,Dm->Nz-2,Dm->Lx,Dm->Ly,Dm->Lz ); + + // create a temp data for copy from device + DoubleArray DataTemp(Nx,Ny,Nz); + + if (vis_db->getWithDefault( "save_phase_field", true )){ + + PhaseVar->name = "Phase"; + PhaseVar->type = IO::VariableType::VolumeVariable; + PhaseVar->dim = 1; + PhaseVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(PaseVar); + + ASSERT(visData[0].vars[0]->name=="Phase"); + Array& PhaseData = visData[0].vars[0]->data; + ScaLBL_Comm->RegularLayout(Map,Phase,DataTemp); + fillData.copy(DataTemp,PhaseData); + } + + if (vis_db->getWithDefault( "save_pressure", false )){ + + PressureVar->name = "Pressure"; + PressureVar->type = IO::VariableType::VolumeVariable; + PressureVar->dim = 1; + PressureVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(PressureVar); + + ASSERT(visData[0].vars[1]->name=="Pressure"); + Array& PressData = visData[0].vars[1]->data; + ScaLBL_Comm->RegularLayout(Map,Pressure,DataTemp); + fillData.copy(DataTemp,PressData); + } + + if (vis_db->getWithDefault( "save_velocity", false )){ + + VxVar->name = "Velocity_x"; + VxVar->type = IO::VariableType::VolumeVariable; + VxVar->dim = 1; + VxVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VxVar); + VyVar->name = "Velocity_y"; + VyVar->type = IO::VariableType::VolumeVariable; + VyVar->dim = 1; + VyVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VyVar); + VzVar->name = "Velocity_z"; + VzVar->type = IO::VariableType::VolumeVariable; + VzVar->dim = 1; + VzVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VzVar); + + ASSERT(visData[0].vars[2]->name=="Velocity_x"); + ASSERT(visData[0].vars[3]->name=="Velocity_y"); + ASSERT(visData[0].vars[4]->name=="Velocity_z"); + Array& VelxData = visData[0].vars[2]->data; + Array& VelyData = visData[0].vars[3]->data; + Array& VelzData = visData[0].vars[4]->data; + ScaLBL_Comm->RegularLayout(Map,&Velocity[0],DataTemp); + fillData.copy(DataTemp,VelxData); + ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],DataTemp); + fillData.copy(DataTemp,VelyData); + ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],DataTemp); + fillData.copy(DataTemp,VelzData); + } + + if (vis_db->getWithDefault( "save_distance", false )){ + + SignDistVar->name = "SignDist"; + SignDistVar->type = IO::VariableType::VolumeVariable; + SignDistVar->dim = 1; + SignDistVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(SignDistVar); + + ASSERT(visData[0].vars[5]->name=="SignDist"); + Array& SignData = visData[0].vars[5]->data; + fillData.copy(Averages.SDs,SignData); + } + + if (vis_db->getWithDefault( "write_silo", true )){ + IO::writeData( timestep, visData, Dm->Comm ); + } + + if (vis_db->getWithDefault( "save_8bit_raw", true )){ + //TODO + //char CurrentIDFilename[40]; + //sprintf(CurrentIDFilename,"id_t%d.raw",timestep); + //Averages.AggregateLabels(CurrentIDFilename); + } + +} + void ScaLBL_GreyscaleColorModel::WriteDebug(){ // Copy back final phase indicator field and convert to regular layout DoubleArray PhaseField(Nx,Ny,Nz); diff --git a/models/GreyscaleColorModel.h b/models/GreyscaleColorModel.h index d7043257..1ba92c8a 100644 --- a/models/GreyscaleColorModel.h +++ b/models/GreyscaleColorModel.h @@ -90,5 +90,6 @@ private: double MorphInit(const double beta, const double morph_delta); double SeedPhaseField(const double seed_water_in_oil); double MorphOpenConnected(double target_volume_change); + double WriteVisFiles(); }; From d02eff3017bda53b4053897f10e935299e2aa0ed Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 14 Dec 2020 01:08:59 -0500 Subject: [PATCH 098/205] done: add restart and write visfiles for greyscale Color --- common/ScaLBL.h | 2 ++ cpu/GreyscaleColor.cpp | 26 +++++++++++++++++++++++ gpu/GreyscaleColor.cu | 38 ++++++++++++++++++++++++++++++++++ models/GreyscaleColorModel.cpp | 28 +++++++++++++++---------- models/GreyscaleColorModel.h | 2 +- 5 files changed, 84 insertions(+), 12 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index a5efe816..3b129d06 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -186,6 +186,8 @@ extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleColor(int *d_neighborList, int *Map, double rhoA, double rhoB, double tauA, double tauB, double tauA_eff,double tauB_eff, double alpha, double beta, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np); +extern "C" void ScaLBL_PhaseField_InitFromRestart(double *Den, double *Aq, double *Bq, int start, int finish, int Np); + // MRT MODEL extern "C" void ScaLBL_D3Q19_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz); diff --git a/cpu/GreyscaleColor.cpp b/cpu/GreyscaleColor.cpp index cf01cd84..057c8a7d 100644 --- a/cpu/GreyscaleColor.cpp +++ b/cpu/GreyscaleColor.cpp @@ -1338,6 +1338,32 @@ extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleColor(int *Map, double *dist, doubl } } +extern "C" void ScaLBL_PhaseField_InitFromRestart(double *Den, double *Aq, double *Bq, int start, int finish, int Np){ + int idx; + double nA,nB; + + for (idx=start; idx>>(Den, Aq, Bq, start, finish, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_PhaseField_InitFromRestart: %s \n",cudaGetErrorString(err)); + } +} ////Model-2&3 //extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleColor(int *Map, double *dist, double *Aq, double *Bq, double *Den, // double *Phi,double *GreySolidGrad, double *Poros,double *Perm,double *Vel, diff --git a/models/GreyscaleColorModel.cpp b/models/GreyscaleColorModel.cpp index 9ebad61c..f1b37dad 100644 --- a/models/GreyscaleColorModel.cpp +++ b/models/GreyscaleColorModel.cpp @@ -673,13 +673,17 @@ void ScaLBL_GreyscaleColorModel::Create(){ } void ScaLBL_GreyscaleColorModel::Initialize(){ - - if (rank==0) printf ("Initializing distributions \n"); - ScaLBL_D3Q19_Init(fq, Np); - //ScaLBL_D3Q19_GreyscaleColor_Init(fq, Porosity_dvc, Np); /* * This function initializes model */ + if (rank==0) printf ("Initializing distributions \n"); + ScaLBL_D3Q19_Init(fq, Np); + //ScaLBL_D3Q19_GreyscaleColor_Init(fq, Porosity_dvc, Np); + + if (rank==0) printf ("Initializing phase field \n"); + ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + if (Restart == true){ if (rank==0){ printf("Reading restart file! \n"); @@ -738,11 +742,11 @@ void ScaLBL_GreyscaleColorModel::Initialize(){ ScaLBL_DeviceBarrier(); MPI_Barrier(comm); - } - if (rank==0) printf ("Initializing phase field \n"); - ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + if (rank==0) printf ("Initializing phase field from Restart\n"); + ScaLBL_PhaseField_InitFromRestart(Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_PhaseField_InitFromRestart(Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + } // establish reservoirs for external bC if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4 ){ @@ -794,6 +798,8 @@ void ScaLBL_GreyscaleColorModel::Run(){ double delta_volume_target = 0.0; //TODO -------- For temporary use - should be included in the analysis framework later ------------- + int visualization_interval = 50000; + int restart_interval = 100000; if (analysis_db->keyExists( "visualization_interval" )){ visualization_interval = analysis_db->getScalar( "visualization_interval" ); } @@ -1482,11 +1488,11 @@ void ScaLBL_GreyscaleColorModel::WriteVisFiles(){ PhaseVar->type = IO::VariableType::VolumeVariable; PhaseVar->dim = 1; PhaseVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); - visData[0].vars.push_back(PaseVar); + visData[0].vars.push_back(PhaseVar); ASSERT(visData[0].vars[0]->name=="Phase"); Array& PhaseData = visData[0].vars[0]->data; - ScaLBL_Comm->RegularLayout(Map,Phase,DataTemp); + ScaLBL_CopyToHost(DataTemp.data(), Phi, sizeof(double)*Nx*Ny*Nz); fillData.copy(DataTemp,PhaseData); } @@ -1546,7 +1552,7 @@ void ScaLBL_GreyscaleColorModel::WriteVisFiles(){ ASSERT(visData[0].vars[5]->name=="SignDist"); Array& SignData = visData[0].vars[5]->data; - fillData.copy(Averages.SDs,SignData); + fillData.copy(Averages->SDs,SignData); } if (vis_db->getWithDefault( "write_silo", true )){ diff --git a/models/GreyscaleColorModel.h b/models/GreyscaleColorModel.h index 1ba92c8a..8c8b4eee 100644 --- a/models/GreyscaleColorModel.h +++ b/models/GreyscaleColorModel.h @@ -90,6 +90,6 @@ private: double MorphInit(const double beta, const double morph_delta); double SeedPhaseField(const double seed_water_in_oil); double MorphOpenConnected(double target_volume_change); - double WriteVisFiles(); + void WriteVisFiles(); }; From 6252822da25aadbaf9605c6d0cafbb0e99e26adf Mon Sep 17 00:00:00 2001 From: James McClure Date: Sat, 26 Dec 2020 14:00:17 -0500 Subject: [PATCH 099/205] adding analysis capabilities for electrochemistry --- analysis/ElectroChemistry.cpp | 29 +++++++++++++++++++ analysis/ElectroChemistry.h | 52 +++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 analysis/ElectroChemistry.cpp create mode 100644 analysis/ElectroChemistry.h diff --git a/analysis/ElectroChemistry.cpp b/analysis/ElectroChemistry.cpp new file mode 100644 index 00000000..571efc95 --- /dev/null +++ b/analysis/ElectroChemistry.cpp @@ -0,0 +1,29 @@ +#include "analysis/ElectroChemistry.h" + +void ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(std::shared_ptr Dm): + Dm(dm){ + Nx=dm->Nx; Ny=dm->Ny; Nz=dm->Nz; + Volume=(Nx-2)*(Ny-2)*(Nz-2)*Dm->nprocx()*Dm->nprocy()*Dm->nprocz()*1.0; + + ChemicalPotential.resize(Nx,Ny,Nz); ChemicalPotential.fill(0); + ElectricalPotential.resize(Nx,Ny,Nz); ElectricalPotential.fill(0); + Pressure.resize(Nx,Ny,Nz); Pressure.fill(0); + Rho.resize(Nx,Ny,Nz); Rho.fill(0); + Vel_x.resize(Nx,Ny,Nz); Vel_x.fill(0); // Gradient of the phase indicator field + Vel_y.resize(Nx,Ny,Nz); Vel_y.fill(0); + Vel_z.resize(Nx,Ny,Nz); Vel_z.fill(0); + SDs.resize(Nx,Ny,Nz); SDs.fill(0); + + DoubleArray Rho; // density field + DoubleArray ChemicalPotential; // density field + DoubleArray ElectricalPotential; // density field + DoubleArray Pressure; // pressure field + DoubleArray Vel_x; // velocity field + DoubleArray Vel_y; + DoubleArray Vel_z; + DoubleArray SDs; +} + +void ElectroChemistryAnalyzer::~ElectroChemistryAnalyzer(){ + +} diff --git a/analysis/ElectroChemistry.h b/analysis/ElectroChemistry.h new file mode 100644 index 00000000..d290cccc --- /dev/null +++ b/analysis/ElectroChemistry.h @@ -0,0 +1,52 @@ +/* + * Sub-phase averaging tools + */ + +#ifndef ElectroChem_INC +#define ElectroChem_INC + +#include +#include "common/Domain.h" +#include "common/Communication.h" +#include "analysis/analysis.h" +#include "analysis/distance.h" +#include "analysis/Minkowski.h" +#include "common/Utilities.h" +#include "common/MPI_Helpers.h" +#include "IO/MeshDatabase.h" +#include "IO/Reader.h" +#include "IO/Writer.h" + +class ElectroChemistryAnalyzer{ +public: + std::shared_ptr Dm; + double Volume; + // input variables + double rho_n, rho_w; + double nu_n, nu_w; + double gamma_wn, beta; + double Fx, Fy, Fz; + + //........................................................................... + int Nx,Ny,Nz; + DoubleArray Rho; // density field + DoubleArray ChemicalPotential; // density field + DoubleArray ElectricalPotential; // density field + DoubleArray Pressure; // pressure field + DoubleArray Vel_x; // velocity field + DoubleArray Vel_y; + DoubleArray Vel_z; + DoubleArray SDs; + + ElectroChemistryAnalyzer(std::shared_ptr Dm); + ~ElectroChemistryAnalyzer(); + + void SetParams(); + void Basic(); + void Write(int time); + +private: + FILE *TIMELOG; +} +#endif + From 1122b0480dffb2741d934c042bb7964c003fdd8a Mon Sep 17 00:00:00 2001 From: James McClure Date: Sat, 26 Dec 2020 20:26:27 -0500 Subject: [PATCH 100/205] electrochem analyzer compiles --- analysis/ElectroChemistry.cpp | 34 ++++++++++++++++++++++++++++++++-- analysis/ElectroChemistry.h | 2 +- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/analysis/ElectroChemistry.cpp b/analysis/ElectroChemistry.cpp index 571efc95..84d618b1 100644 --- a/analysis/ElectroChemistry.cpp +++ b/analysis/ElectroChemistry.cpp @@ -1,6 +1,6 @@ #include "analysis/ElectroChemistry.h" -void ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(std::shared_ptr Dm): +ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(std::shared_ptr dm): Dm(dm){ Nx=dm->Nx; Ny=dm->Ny; Nz=dm->Nz; Volume=(Nx-2)*(Ny-2)*(Nz-2)*Dm->nprocx()*Dm->nprocy()*Dm->nprocz()*1.0; @@ -22,8 +22,38 @@ void ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(std::shared_ptr DoubleArray Vel_y; DoubleArray Vel_z; DoubleArray SDs; + + if (Dm->rank()==0){ + bool WriteHeader=false; + TIMELOG = fopen("electrokinetic.csv","r"); + if (TIMELOG != NULL) + fclose(TIMELOG); + else + WriteHeader=true; + + TIMELOG = fopen("electrokinetic.csv","a+"); + if (WriteHeader) + { + // If timelog is empty, write a short header to list the averages + //fprintf(TIMELOG,"--------------------------------------------------------------------------------------\n"); + fprintf(TIMELOG,"sw krw krn vw vn pw pn\n"); + } + } + } -void ElectroChemistryAnalyzer::~ElectroChemistryAnalyzer(){ +ElectroChemistryAnalyzer::~ElectroChemistryAnalyzer(){ + +} + +void ElectroChemistryAnalyzer::SetParams(){ + +} + +void ElectroChemistryAnalyzer::Basic(){ + +} + +void ElectroChemistryAnalyzer::Write(int time){ } diff --git a/analysis/ElectroChemistry.h b/analysis/ElectroChemistry.h index d290cccc..ad3a3578 100644 --- a/analysis/ElectroChemistry.h +++ b/analysis/ElectroChemistry.h @@ -47,6 +47,6 @@ public: private: FILE *TIMELOG; -} +}; #endif From 879f8637bfcacbdf5b647b48ab9060c94a24d546 Mon Sep 17 00:00:00 2001 From: James McClure Date: Sat, 26 Dec 2020 20:26:55 -0500 Subject: [PATCH 101/205] fix getVelocity --- models/StokesModel.cpp | 17 ++++++----------- models/StokesModel.h | 1 - 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/models/StokesModel.cpp b/models/StokesModel.cpp index 964baaae..a365c46e 100644 --- a/models/StokesModel.cpp +++ b/models/StokesModel.cpp @@ -375,25 +375,20 @@ void ScaLBL_StokesModel::getVelocity(int timestep){ ScaLBL_D3Q19_Momentum(fq, Velocity, Np); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); - DoubleArray PhaseField(Nx,Ny,Nz); - - ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField); - Velocity_LB_to_Phys(PhaseField); + ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x); + Velocity_LB_to_Phys(Velocity_x); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); sprintf(OutputFilename,"Velocity_X_Time_%i.raw",timestep); - Mask->AggregateLabels(OutputFilename,PhaseField); - ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],PhaseField); - Velocity_LB_to_Phys(PhaseField); + ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y); + Velocity_LB_to_Phys(Velocity_y); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); sprintf(OutputFilename,"Velocity_Y_Time_%i.raw",timestep); - Mask->AggregateLabels(OutputFilename,PhaseField); - ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],PhaseField); - Velocity_LB_to_Phys(PhaseField); + ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z); + Velocity_LB_to_Phys(Velocity_z); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); sprintf(OutputFilename,"Velocity_Z_Time_%i.raw",timestep); - Mask->AggregateLabels(OutputFilename,PhaseField); } void ScaLBL_StokesModel::getVelocity_debug(int timestep){ diff --git a/models/StokesModel.h b/models/StokesModel.h index b7ad345e..6375d4ff 100644 --- a/models/StokesModel.h +++ b/models/StokesModel.h @@ -69,7 +69,6 @@ public: double *Pressure; //Minkowski Morphology; - DoubleArray Velocity_x; DoubleArray Velocity_y; DoubleArray Velocity_z; From 9826ef5624f65f05ae853399d129d654786506d7 Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 29 Dec 2020 14:04:43 -0500 Subject: [PATCH 102/205] adding silo vis capabilities to electrochem --- analysis/ElectroChemistry.cpp | 119 ++++++++++++++++-- analysis/ElectroChemistry.h | 13 +- models/IonModel.cpp | 16 +-- models/IonModel.h | 7 +- models/PoissonSolver.cpp | 32 ++--- models/PoissonSolver.h | 8 +- models/StokesModel.cpp | 17 ++- models/StokesModel.h | 6 +- tests/TestIonModel.cpp | 2 +- tests/TestNernstPlanck.cpp | 6 +- tests/TestPNP_Stokes.cpp | 8 +- tests/TestPoissonSolver.cpp | 4 +- ...m_electrokinetic_SingleFluid_simulator.cpp | 17 +-- 13 files changed, 176 insertions(+), 79 deletions(-) diff --git a/analysis/ElectroChemistry.cpp b/analysis/ElectroChemistry.cpp index 84d618b1..b052d459 100644 --- a/analysis/ElectroChemistry.cpp +++ b/analysis/ElectroChemistry.cpp @@ -1,7 +1,11 @@ #include "analysis/ElectroChemistry.h" ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(std::shared_ptr dm): - Dm(dm){ + Dm(dm), + fillData(dm->Comm,dm->rank_info,{dm->Nx-2,dm->Ny-2,dm->Nz-2},{1,1,1},0,1) +{ + + MPI_Comm_dup(dm->Comm,&comm); Nx=dm->Nx; Ny=dm->Ny; Nz=dm->Nz; Volume=(Nx-2)*(Ny-2)*(Nz-2)*Dm->nprocx()*Dm->nprocy()*Dm->nprocz()*1.0; @@ -14,15 +18,6 @@ ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(std::shared_ptr dm): Vel_z.resize(Nx,Ny,Nz); Vel_z.fill(0); SDs.resize(Nx,Ny,Nz); SDs.fill(0); - DoubleArray Rho; // density field - DoubleArray ChemicalPotential; // density field - DoubleArray ElectricalPotential; // density field - DoubleArray Pressure; // pressure field - DoubleArray Vel_x; // velocity field - DoubleArray Vel_y; - DoubleArray Vel_z; - DoubleArray SDs; - if (Dm->rank()==0){ bool WriteHeader=false; TIMELOG = fopen("electrokinetic.csv","r"); @@ -36,7 +31,7 @@ ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(std::shared_ptr dm): { // If timelog is empty, write a short header to list the averages //fprintf(TIMELOG,"--------------------------------------------------------------------------------------\n"); - fprintf(TIMELOG,"sw krw krn vw vn pw pn\n"); + fprintf(TIMELOG,"TBD TBD\n"); } } @@ -50,10 +45,108 @@ void ElectroChemistryAnalyzer::SetParams(){ } -void ElectroChemistryAnalyzer::Basic(){ +void ElectroChemistryAnalyzer::Basic(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes){ + + Poisson.getElectricPotential(ElectricalPotential); + for (int ion=0; ion input_db, int timestep){ + auto vis_db = input_db->getDatabase( "Visualization" ); + char VisName[40]; + + IO::initialize("","silo","false"); + // Create the MeshDataStruct + visData.resize(1); + + visData[0].meshName = "domain"; + visData[0].mesh = std::make_shared( Dm->rank_info,Dm->Nx-2,Dm->Ny-2,Dm->Nz-2,Dm->Lx,Dm->Ly,Dm->Lz ); + auto ElectricPotential = std::make_shared(); + auto IonConcentration = std::make_shared(); + auto VxVar = std::make_shared(); + auto VyVar = std::make_shared(); + auto VzVar = std::make_shared(); + + if (vis_db->getWithDefault( "save_electric_potential", true )){ + ElectricPotential->name = "ElectricPotential"; + ElectricPotential->type = IO::VariableType::VolumeVariable; + ElectricPotential->dim = 1; + ElectricPotential->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(ElectricPotential); + } + + if (vis_db->getWithDefault( "save_concentration", true )){ + for (int ion=0; ionname = VisName; + IonConcentration->type = IO::VariableType::VolumeVariable; + IonConcentration->dim = 1; + IonConcentration->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(IonConcentration); + } + + } + if (vis_db->getWithDefault( "save_velocity", false )){ + VxVar->name = "Velocity_x"; + VxVar->type = IO::VariableType::VolumeVariable; + VxVar->dim = 1; + VxVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VxVar); + VyVar->name = "Velocity_y"; + VyVar->type = IO::VariableType::VolumeVariable; + VyVar->dim = 1; + VyVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VyVar); + VzVar->name = "Velocity_z"; + VzVar->type = IO::VariableType::VolumeVariable; + VzVar->dim = 1; + VzVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VzVar); + } + + if (vis_db->getWithDefault( "save_electric_potential", true )){ + ASSERT(visData[0].vars[0]->name=="ElectricPotential"); + Poisson.getElectricPotential(ElectricalPotential); + Array& ElectricPotentialData = visData[0].vars[0]->data; + fillData.copy(ElectricalPotential,ElectricPotentialData); + } + + if (vis_db->getWithDefault( "save_concentration", true )){ + for (int ion=0; ionname = VisName; + ASSERT(visData[0].vars[1]->name==VisName); + Array& IonConcentrationData = visData[0].vars[1]->data; + Ion.getIonConcentration(Rho,ion); + fillData.copy(Rho,IonConcentrationData); + } + } + + if (vis_db->getWithDefault( "save_velocity", false )){ + ASSERT(visData[0].vars[2]->name=="Velocity_x"); + ASSERT(visData[0].vars[3]->name=="Velocity_y"); + ASSERT(visData[0].vars[4]->name=="Velocity_z"); + Stokes.getVelocity(Vel_x,Vel_y,Vel_z); + Array& VelxData = visData[0].vars[2]->data; + Array& VelyData = visData[0].vars[3]->data; + Array& VelzData = visData[0].vars[4]->data; + fillData.copy(Vel_x,VelxData); + fillData.copy(Vel_y,VelyData); + fillData.copy(Vel_z,VelzData); + } + + if (vis_db->getWithDefault( "write_silo", true )) + IO::writeData( timestep, visData, comm ); + +/* if (vis_db->getWithDefault( "save_8bit_raw", true )){ + char CurrentIDFilename[40]; + sprintf(CurrentIDFilename,"id_t%d.raw",timestep); + Averages.AggregateLabels(CurrentIDFilename); + } +*/ } diff --git a/analysis/ElectroChemistry.h b/analysis/ElectroChemistry.h index ad3a3578..fa404fb0 100644 --- a/analysis/ElectroChemistry.h +++ b/analysis/ElectroChemistry.h @@ -1,5 +1,5 @@ /* - * Sub-phase averaging tools + * averaging tools for electrochemistry */ #ifndef ElectroChem_INC @@ -16,9 +16,14 @@ #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" +#include "models/IonModel.h" +#include "models/PoissonSolver.h" +#include "models/StokesModel.h" class ElectroChemistryAnalyzer{ public: + MPI_Comm comm; + int tag; std::shared_ptr Dm; double Volume; // input variables @@ -42,10 +47,12 @@ public: ~ElectroChemistryAnalyzer(); void SetParams(); - void Basic(); - void Write(int time); + void Basic( ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes); + void WriteVis( ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes, std::shared_ptr input_db, int timestep); private: + std::vector visData; + fillHalo fillData; FILE *TIMELOG; }; #endif diff --git a/models/IonModel.cpp b/models/IonModel.cpp index 852bc194..6fbb627e 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -900,17 +900,13 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ //if (rank==0) printf("********************************************************\n"); } -void ScaLBL_IonModel::getIonConcentration(int timestep){ - //This function wirte out the data in a normal layout (by aggregating all decomposed domains) - DoubleArray PhaseField(Nx,Ny,Nz); - for (int ic=0; icRegularLayout(Map,&Ci[ic*Np],PhaseField); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); - IonConcentration_LB_to_Phys(PhaseField); +void ScaLBL_IonModel::getIonConcentration(DoubleArray &IonConcentration, const int ic){ + //This function wirte out the data in a normal layout (by aggregating all decomposed domains) + + ScaLBL_Comm->RegularLayout(Map,&Ci[ic*Np],IonConcentration); + ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + IonConcentration_LB_to_Phys(IonConcentration); - sprintf(OutputFilename,"Ion%02i_Time_%i.raw",ic+1,timestep); - Mask->AggregateLabels(OutputFilename,PhaseField); - } } void ScaLBL_IonModel::getIonConcentration_debug(int timestep){ diff --git a/models/IonModel.h b/models/IonModel.h index 59382002..4b370978 100644 --- a/models/IonModel.h +++ b/models/IonModel.h @@ -1,6 +1,10 @@ /* * Ion transporte LB Model */ + +#ifndef ScaLBL_IonModel_INC +#define ScaLBL_IonModel_INC + #include #include #include @@ -30,7 +34,7 @@ public: void Create(); void Initialize(); void Run(double *Velocity, double *ElectricField); - void getIonConcentration(int timestep); + void getIonConcentration(DoubleArray &IonConcentration, const int ic); void getIonConcentration_debug(int timestep); void DummyFluidVelocity(); void DummyElectricField(); @@ -95,3 +99,4 @@ private: void AssignIonConcentration_FromFile(double *Ci,const vector &File_ion); void IonConcentration_LB_to_Phys(DoubleArray &Den_reg); }; +#endif diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index c50d8816..b0dde2c7 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -587,38 +587,26 @@ void ScaLBL_Poisson::getElectricPotential_debug(int timestep){ fclose(OUTFILE); } -void ScaLBL_Poisson::getElectricPotential(int timestep){ +void ScaLBL_Poisson::getElectricPotential(DoubleArray &ReturnValues){ //This function wirte out the data in a normal layout (by aggregating all decomposed domains) - DoubleArray PhaseField(Nx,Ny,Nz); //ScaLBL_Comm->RegularLayout(Map,Psi,PhaseField); - ScaLBL_CopyToHost(PhaseField.data(),Psi,sizeof(double)*Nx*Ny*Nz); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); - - sprintf(OutputFilename,"Electric_Potential_Time_%i.raw",timestep); - Mask->AggregateLabels(OutputFilename,PhaseField); + ScaLBL_CopyToHost(ReturnValues.data(),Psi,sizeof(double)*Nx*Ny*Nz); } -void ScaLBL_Poisson::getElectricField(int timestep){ +void ScaLBL_Poisson::getElectricField(DoubleArray &Values_x, DoubleArray &Values_y, DoubleArray &Values_z){ - DoubleArray PhaseField(Nx,Ny,Nz); - - ScaLBL_Comm->RegularLayout(Map,&ElectricField[0*Np],PhaseField); - ElectricField_LB_to_Phys(PhaseField); + ScaLBL_Comm->RegularLayout(Map,&ElectricField[0*Np],Values_x); + ElectricField_LB_to_Phys(Values_x); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); - sprintf(OutputFilename,"ElectricField_X_Time_%i.raw",timestep); - Mask->AggregateLabels(OutputFilename,PhaseField); - ScaLBL_Comm->RegularLayout(Map,&ElectricField[1*Np],PhaseField); - ElectricField_LB_to_Phys(PhaseField); + ScaLBL_Comm->RegularLayout(Map,&ElectricField[1*Np],Values_y); + ElectricField_LB_to_Phys(Values_y); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); - sprintf(OutputFilename,"ElectricField_Y_Time_%i.raw",timestep); - Mask->AggregateLabels(OutputFilename,PhaseField); - ScaLBL_Comm->RegularLayout(Map,&ElectricField[2*Np],PhaseField); - ElectricField_LB_to_Phys(PhaseField); + ScaLBL_Comm->RegularLayout(Map,&ElectricField[2*Np],Values_z); + ElectricField_LB_to_Phys(Values_z); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); - sprintf(OutputFilename,"ElectricField_Z_Time_%i.raw",timestep); - Mask->AggregateLabels(OutputFilename,PhaseField); + } void ScaLBL_Poisson::getElectricField_debug(int timestep){ diff --git a/models/PoissonSolver.h b/models/PoissonSolver.h index 241e871a..74abd775 100644 --- a/models/PoissonSolver.h +++ b/models/PoissonSolver.h @@ -16,6 +16,9 @@ #include "analysis/Minkowski.h" #include "ProfilerApp.h" +#ifndef ScaLBL_POISSON_INC +#define ScaLBL_POISSON_INC + class ScaLBL_Poisson{ public: ScaLBL_Poisson(int RANK, int NP, MPI_Comm COMM); @@ -29,9 +32,9 @@ public: void Create(); void Initialize(); void Run(double *ChargeDensity); - void getElectricPotential(int timestep); + void getElectricPotential(DoubleArray &ReturnValues); void getElectricPotential_debug(int timestep); - void getElectricField(int timestep); + void getElectricField(DoubleArray &Values_x, DoubleArray &Values_y, DoubleArray &Values_z); void getElectricField_debug(int timestep); void DummyChargeDensity();//for debugging @@ -96,3 +99,4 @@ private: void getConvergenceLog(int timestep,double error); }; +#endif diff --git a/models/StokesModel.cpp b/models/StokesModel.cpp index a365c46e..086e3633 100644 --- a/models/StokesModel.cpp +++ b/models/StokesModel.cpp @@ -370,25 +370,22 @@ void ScaLBL_StokesModel::Run_Lite(double *ChargeDensity, double *ElectricField){ } } -void ScaLBL_StokesModel::getVelocity(int timestep){ +void ScaLBL_StokesModel::getVelocity(DoubleArray &Vel_x, DoubleArray &Vel_y, DoubleArray &Vel_z){ //get velocity in physical unit [m/sec] ScaLBL_D3Q19_Momentum(fq, Velocity, Np); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); - ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x); - Velocity_LB_to_Phys(Velocity_x); + ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Vel_x); + Velocity_LB_to_Phys(Vel_x); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); - sprintf(OutputFilename,"Velocity_X_Time_%i.raw",timestep); - ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y); - Velocity_LB_to_Phys(Velocity_y); + ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Vel_y); + Velocity_LB_to_Phys(Vel_y); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); - sprintf(OutputFilename,"Velocity_Y_Time_%i.raw",timestep); - ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z); - Velocity_LB_to_Phys(Velocity_z); + ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Vel_z); + Velocity_LB_to_Phys(Vel_z); ScaLBL_DeviceBarrier(); MPI_Barrier(comm); - sprintf(OutputFilename,"Velocity_Z_Time_%i.raw",timestep); } void ScaLBL_StokesModel::getVelocity_debug(int timestep){ diff --git a/models/StokesModel.h b/models/StokesModel.h index 6375d4ff..8da373bd 100644 --- a/models/StokesModel.h +++ b/models/StokesModel.h @@ -1,6 +1,9 @@ /* * Multi-relaxation time LBM Model */ +#ifndef ScaLBL_StokesModel_INC +#define ScaLBL_StokesModel_INC + #include #include #include @@ -31,7 +34,7 @@ public: void Run(); void Run_Lite(double *ChargeDensity, double *ElectricField); void VelocityField(); - void getVelocity(int timestep); + void getVelocity(DoubleArray &Velx, DoubleArray &Vel_y, DoubleArray &Vel_z); void getVelocity_debug(int timestep); double CalVelocityConvergence(double& flow_rate_previous,double *ChargeDensity, double *ElectricField); @@ -86,3 +89,4 @@ private: void Velocity_LB_to_Phys(DoubleArray &Vel_reg); vector computeElectricForceAvg(double *ChargeDensity, double *ElectricField); }; +#endif \ No newline at end of file diff --git a/tests/TestIonModel.cpp b/tests/TestIonModel.cpp index 0b57ff1c..2a0a02a9 100644 --- a/tests/TestIonModel.cpp +++ b/tests/TestIonModel.cpp @@ -76,7 +76,7 @@ int main(int argc, char **argv) error = IonModel.CalIonDenConvergence(ci_avg_previous); } } - IonModel.getIonConcentration(timestep); + IonModel.getIonConcentration_debug(timestep); if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n"); if (rank==0) printf("*************************************************************\n"); diff --git a/tests/TestNernstPlanck.cpp b/tests/TestNernstPlanck.cpp index 96d9c388..def67d5b 100644 --- a/tests/TestNernstPlanck.cpp +++ b/tests/TestNernstPlanck.cpp @@ -87,9 +87,9 @@ int main(int argc, char **argv) } } - PoissonSolver.getElectricPotential(timestep); - PoissonSolver.getElectricField(timestep); - IonModel.getIonConcentration(timestep); + PoissonSolver.getElectricPotential_debug(timestep); + PoissonSolver.getElectricField_debug(timestep); + IonModel.getIonConcentration_debug(timestep); if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n"); if (rank==0) printf("*************************************************************\n"); diff --git a/tests/TestPNP_Stokes.cpp b/tests/TestPNP_Stokes.cpp index 0e0c3e81..bf05f73c 100644 --- a/tests/TestPNP_Stokes.cpp +++ b/tests/TestPNP_Stokes.cpp @@ -107,10 +107,10 @@ int main(int argc, char **argv) } } - PoissonSolver.getElectricPotential(timestep); - PoissonSolver.getElectricField(timestep); - IonModel.getIonConcentration(timestep); - StokesModel.getVelocity(timestep); + PoissonSolver.getElectricPotential_debug(timestep); + PoissonSolver.getElectricField_debug(timestep); + IonModel.getIonConcentration_debug(timestep); + StokesModel.getVelocity_debug(timestep); if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n"); if (rank==0) printf("*************************************************************\n"); diff --git a/tests/TestPoissonSolver.cpp b/tests/TestPoissonSolver.cpp index 8753a7b0..32353f65 100644 --- a/tests/TestPoissonSolver.cpp +++ b/tests/TestPoissonSolver.cpp @@ -59,8 +59,8 @@ int main(int argc, char **argv) PoissonSolver.DummyChargeDensity(); PoissonSolver.Run(PoissonSolver.ChargeDensityDummy); - PoissonSolver.getElectricPotential(1); - PoissonSolver.getElectricField(1); + PoissonSolver.getElectricPotential_debug(1); + PoissonSolver.getElectricField_debug(1); if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n"); if (rank==0) printf("*************************************************************\n"); diff --git a/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp b/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp index 0c1054a2..2b3726a4 100644 --- a/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp +++ b/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp @@ -12,6 +12,7 @@ #include "models/PoissonSolver.h" #include "models/MultiPhysController.h" #include "common/Utilities.h" +#include "analysis/ElectroChemistry.h" using namespace std; @@ -53,7 +54,7 @@ int main(int argc, char **argv) ScaLBL_IonModel IonModel(rank,nprocs,comm); ScaLBL_Poisson PoissonSolver(rank,nprocs,comm); ScaLBL_Multiphys_Controller Study(rank,nprocs,comm);//multiphysics controller coordinating multi-model coupling - + // Load controller information Study.ReadParams(filename); @@ -68,7 +69,10 @@ int main(int argc, char **argv) IonModel.SetDomain(); IonModel.ReadInput(); - IonModel.Create(); + IonModel.Create(); + + // Create analysis object + ElectroChemistryAnalyzer Analysis(IonModel.Dm); // Get internal iteration number StokesModel.timestepMax = Study.getStokesNumIter_PNP_coupling(StokesModel.time_conv,IonModel.time_conv); @@ -96,18 +100,17 @@ int main(int argc, char **argv) timestep++;//AA operations if (timestep%Study.visualization_interval==0){ - PoissonSolver.getElectricPotential(timestep); + Analysis.WriteVis(IonModel,PoissonSolver,StokesModel,Study.db,timestep); + /* PoissonSolver.getElectricPotential(timestep); PoissonSolver.getElectricField(timestep); IonModel.getIonConcentration(timestep); StokesModel.getVelocity(timestep); + */ } } if (rank==0) printf("Save simulation raw data at maximum timestep\n"); - PoissonSolver.getElectricPotential(timestep); - PoissonSolver.getElectricField(timestep); - IonModel.getIonConcentration(timestep); - StokesModel.getVelocity(timestep); + Analysis.WriteVis(IonModel,PoissonSolver,StokesModel,Study.db,timestep); if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n"); if (rank==0) printf("*************************************************************\n"); From 6f0a10c48d0d54c1d90d6b9c74cbac8b592af409 Mon Sep 17 00:00:00 2001 From: James McClure Date: Thu, 31 Dec 2020 11:19:12 -0500 Subject: [PATCH 103/205] adding basic analysis capabilities --- analysis/ElectroChemistry.cpp | 78 +++++++++++++++++++++++++++++++++-- analysis/ElectroChemistry.h | 2 +- 2 files changed, 76 insertions(+), 4 deletions(-) diff --git a/analysis/ElectroChemistry.cpp b/analysis/ElectroChemistry.cpp index b052d459..4a7bcf29 100644 --- a/analysis/ElectroChemistry.cpp +++ b/analysis/ElectroChemistry.cpp @@ -38,21 +38,94 @@ ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(std::shared_ptr dm): } ElectroChemistryAnalyzer::~ElectroChemistryAnalyzer(){ - + if (Dm->rank()==0){ + fclose(TIMELOG); + } } void ElectroChemistryAnalyzer::SetParams(){ } -void ElectroChemistryAnalyzer::Basic(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes){ +void ElectroChemistryAnalyzer::Basic(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes, int timestep){ + int i,j,k; Poisson.getElectricPotential(ElectricalPotential); + + /* local sub-domain averages */ + double rho_avg_local[Ion.number_ion_species]; + double rho_mu_avg_local[Ion.number_ion_species]; + double rho_mu_fluctuation_local[Ion.number_ion_species]; + double rho_psi_avg_local[Ion.number_ion_species]; + double rho_psi_fluctuation_local[Ion.number_ion_species]; + /* global averages */ + double rho_avg_global[Ion.number_ion_species]; + double rho_mu_avg_global[Ion.number_ion_species]; + double rho_mu_fluctuation_global[Ion.number_ion_species]; + double rho_psi_avg_global[Ion.number_ion_species]; + double rho_psi_fluctuation_global[Ion.number_ion_species]; + for (int ion=0; ionComm, rho_avg_local[ion]); + rho_mu_avg_global[ion]=sumReduce( Dm->Comm, rho_mu_avg_local[ion]); + rho_psi_avg_global[ion]=sumReduce( Dm->Comm, rho_psi_avg_local[ion]); + + rho_mu_avg_global[ion] /= rho_avg_global[ion]; + rho_psi_avg_global[ion] /= rho_avg_global[ion]; } + for (int ion=0; ionComm, rho_mu_fluctuation_local[ion]); + rho_psi_fluctuation_global[ion]=sumReduce( Dm->Comm, rho_psi_fluctuation_local[ion]); + } + if (Dm->rank()==0){ + fprintf(TIMELOG,"%i ",timestep); + for (int ion=0; ion input_db, int timestep){ @@ -89,7 +162,6 @@ void ElectroChemistryAnalyzer::WriteVis( ScaLBL_IonModel &Ion, ScaLBL_Poisson &P IonConcentration->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); visData[0].vars.push_back(IonConcentration); } - } if (vis_db->getWithDefault( "save_velocity", false )){ VxVar->name = "Velocity_x"; diff --git a/analysis/ElectroChemistry.h b/analysis/ElectroChemistry.h index fa404fb0..8d613ef7 100644 --- a/analysis/ElectroChemistry.h +++ b/analysis/ElectroChemistry.h @@ -47,7 +47,7 @@ public: ~ElectroChemistryAnalyzer(); void SetParams(); - void Basic( ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes); + void Basic( ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes, int timestep); void WriteVis( ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes, std::shared_ptr input_db, int timestep); private: From 0aa352cf24cc84e97c4ba36a1401cc9d0502647f Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Sat, 2 Jan 2021 19:21:15 -0500 Subject: [PATCH 104/205] update writing IonConcentration for visualization --- analysis/ElectroChemistry.cpp | 38 +++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/analysis/ElectroChemistry.cpp b/analysis/ElectroChemistry.cpp index 4a7bcf29..30487f03 100644 --- a/analysis/ElectroChemistry.cpp +++ b/analysis/ElectroChemistry.cpp @@ -140,7 +140,10 @@ void ElectroChemistryAnalyzer::WriteVis( ScaLBL_IonModel &Ion, ScaLBL_Poisson &P visData[0].meshName = "domain"; visData[0].mesh = std::make_shared( Dm->rank_info,Dm->Nx-2,Dm->Ny-2,Dm->Nz-2,Dm->Lx,Dm->Ly,Dm->Lz ); auto ElectricPotential = std::make_shared(); - auto IonConcentration = std::make_shared(); + std::vector> IonConcentration; + for (int ion=0; ion()); + } auto VxVar = std::make_shared(); auto VyVar = std::make_shared(); auto VzVar = std::make_shared(); @@ -155,14 +158,15 @@ void ElectroChemistryAnalyzer::WriteVis( ScaLBL_IonModel &Ion, ScaLBL_Poisson &P if (vis_db->getWithDefault( "save_concentration", true )){ for (int ion=0; ionname = VisName; - IonConcentration->type = IO::VariableType::VolumeVariable; - IonConcentration->dim = 1; - IonConcentration->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); - visData[0].vars.push_back(IonConcentration); + sprintf(VisName,"IonConcentration_%i",ion+1); + IonConcentration[ion]->name = VisName; + IonConcentration[ion]->type = IO::VariableType::VolumeVariable; + IonConcentration[ion]->dim = 1; + IonConcentration[ion]->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(IonConcentration[ion]); } } + if (vis_db->getWithDefault( "save_velocity", false )){ VxVar->name = "Velocity_x"; VxVar->type = IO::VariableType::VolumeVariable; @@ -190,23 +194,23 @@ void ElectroChemistryAnalyzer::WriteVis( ScaLBL_IonModel &Ion, ScaLBL_Poisson &P if (vis_db->getWithDefault( "save_concentration", true )){ for (int ion=0; ionname = VisName; - ASSERT(visData[0].vars[1]->name==VisName); - Array& IonConcentrationData = visData[0].vars[1]->data; + sprintf(VisName,"IonConcentration_%i",ion+1); + IonConcentration[ion]->name = VisName; + ASSERT(visData[0].vars[1+ion]->name==VisName); + Array& IonConcentrationData = visData[0].vars[1+ion]->data; Ion.getIonConcentration(Rho,ion); fillData.copy(Rho,IonConcentrationData); } } if (vis_db->getWithDefault( "save_velocity", false )){ - ASSERT(visData[0].vars[2]->name=="Velocity_x"); - ASSERT(visData[0].vars[3]->name=="Velocity_y"); - ASSERT(visData[0].vars[4]->name=="Velocity_z"); + ASSERT(visData[0].vars[1+Ion.number_ion_species+0]->name=="Velocity_x"); + ASSERT(visData[0].vars[1+Ion.number_ion_species+1]->name=="Velocity_y"); + ASSERT(visData[0].vars[1+Ion.number_ion_species+2]->name=="Velocity_z"); Stokes.getVelocity(Vel_x,Vel_y,Vel_z); - Array& VelxData = visData[0].vars[2]->data; - Array& VelyData = visData[0].vars[3]->data; - Array& VelzData = visData[0].vars[4]->data; + Array& VelxData = visData[0].vars[1+Ion.number_ion_species+0]->data; + Array& VelyData = visData[0].vars[1+Ion.number_ion_species+1]->data; + Array& VelzData = visData[0].vars[1+Ion.number_ion_species+2]->data; fillData.copy(Vel_x,VelxData); fillData.copy(Vel_y,VelyData); fillData.copy(Vel_z,VelzData); From 5df60909c20b40110f946e9375134322cefbe893 Mon Sep 17 00:00:00 2001 From: James McClure Date: Mon, 4 Jan 2021 00:07:11 -0500 Subject: [PATCH 105/205] add analysis interval to simulator --- tests/lbpm_electrokinetic_SingleFluid_simulator.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp b/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp index 2b3726a4..689745b9 100644 --- a/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp +++ b/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp @@ -99,13 +99,16 @@ int main(int argc, char **argv) timestep++;//AA operations + if (timestep%Study.analysis_interval==0){ + Analysis.Basic(IonModel,PoissonSolver,StokesModel,timestep); + } if (timestep%Study.visualization_interval==0){ Analysis.WriteVis(IonModel,PoissonSolver,StokesModel,Study.db,timestep); - /* PoissonSolver.getElectricPotential(timestep); + /* PoissonSolver.getElectricPotential(timestep); PoissonSolver.getElectricField(timestep); IonModel.getIonConcentration(timestep); StokesModel.getVelocity(timestep); - */ + */ } } From 7a5e6b24fed4e54fa6492c90b78051de72689703 Mon Sep 17 00:00:00 2001 From: James McClure Date: Mon, 4 Jan 2021 19:55:09 -0500 Subject: [PATCH 106/205] laptop build script --- sample_scripts/configure_arden | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100755 sample_scripts/configure_arden diff --git a/sample_scripts/configure_arden b/sample_scripts/configure_arden new file mode 100755 index 00000000..057732a9 --- /dev/null +++ b/sample_scripts/configure_arden @@ -0,0 +1,24 @@ +#!/bin/bash + +cmake -D CMAKE_C_COMPILER:PATH=/opt/arden/openmpi/3.1.2/bin/mpicc \ + -D CMAKE_CXX_COMPILER:PATH=/opt/arden/openmpi/3.1.2/bin/mpicxx \ + -D CMAKE_C_FLAGS="-O3 -fPIC" \ + -D CMAKE_CXX_FLAGS="-O3 -fPIC " \ + -D CMAKE_CXX_STANDARD=14 \ + -D MPI_COMPILER:BOOL=TRUE \ + -D MPIEXEC=mpirun \ + -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ + -D CMAKE_BUILD_TYPE:STRING=Release \ + -D CUDA_FLAGS="-arch sm_35" \ + -D CUDA_HOST_COMPILER="/usr/bin/gcc" \ + -D HDF5_DIRECTORY="/opt/arden/hdf5/1.8.12" \ + -D HDF5_LIB="/opt/arden/hdf5/1.8.12/lib/libhdf5.a"\ + -D USE_SILO=1 \ + -D SILO_LIB="/opt/arden/silo/4.10.2/lib/libsiloh5.a" \ + -D SILO_DIRECTORY="/opt/arden/silo/4.10.2" \ + -D USE_NETCDF=0 \ + -D NETCDF_DIRECTORY="/opt/arden/netcdf/4.6.1" \ + -D USE_CUDA=0 \ + -D USE_TIMER=0 \ + ~/Programs/LBPM-WIA + From 2b9d776113ba6a5ec85955f9e837b70efc32e948 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 4 Jan 2021 20:13:48 -0500 Subject: [PATCH 107/205] save the work; to be compiled, tested and validated; add sine and cosine voltage input for Poisson solver --- models/MultiPhysController.cpp | 12 +- models/MultiPhysController.h | 2 + models/PoissonSolver.cpp | 196 +++++++++++++++--- models/PoissonSolver.h | 13 +- tests/TestPoissonSolver.cpp | 30 ++- ...m_electrokinetic_SingleFluid_simulator.cpp | 6 +- 6 files changed, 223 insertions(+), 36 deletions(-) diff --git a/models/MultiPhysController.cpp b/models/MultiPhysController.cpp index fcfb5403..9b361ad8 100644 --- a/models/MultiPhysController.cpp +++ b/models/MultiPhysController.cpp @@ -2,7 +2,7 @@ ScaLBL_Multiphys_Controller::ScaLBL_Multiphys_Controller(int RANK, int NP, MPI_Comm COMM): rank(RANK),nprocs(NP),Restart(0),timestepMax(0),num_iter_Stokes(0),num_iter_Ion(0), -analysis_interval(0),visualization_interval(0),tolerance(0),comm(COMM) +analysis_interval(0),visualization_interval(0),tolerance(0),time_conv_max(0),comm(COMM) { } @@ -25,6 +25,7 @@ void ScaLBL_Multiphys_Controller::ReadParams(string filename){ analysis_interval = 500; visualization_interval = 10000; tolerance = 1.0e-6; + time_conv_max = 0.0; // load input parameters if (study_db->keyExists( "timestepMax" )){ @@ -135,3 +136,12 @@ vector ScaLBL_Multiphys_Controller::getIonNumIter_PNP_coupling(double Stoke } return num_iter_ion; } + +void ScaLBL_Multiphys_Controller::getTimeConvMax_PNP_coupling(double StokesTimeConv,const vector &IonTimeConv){ + //Return maximum of the time converting factor from Stokes and ion solvers + vector TimeConv; + + TimeConv.assign(IonTimeConv.begin(),IonTimeConv.end()); + TimeConv.insert(TimeConv.begin(),StokesTimeConv); + time_conv_max = *max_element(TimeConv.begin(),TimeConv.end()); +} diff --git a/models/MultiPhysController.h b/models/MultiPhysController.h index f217248f..988f0225 100644 --- a/models/MultiPhysController.h +++ b/models/MultiPhysController.h @@ -27,6 +27,7 @@ public: int getStokesNumIter_PNP_coupling(double StokesTimeConv,const vector &IonTimeConv); vector getIonNumIter_PNP_coupling(double StokesTimeConv,const vector &IonTimeConv); //void getIonNumIter_PNP_coupling(double StokesTimeConv,vector &IonTimeConv,vector &IonTimeMax); + void getTimeConvMax_PNP_coupling(double StokesTimeConv,const vector &IonTimeConv); bool Restart; int timestepMax; @@ -35,6 +36,7 @@ public: int analysis_interval; int visualization_interval; double tolerance; + double time_conv_max; //double SchmidtNum;//Schmidt number = kinematic_viscosity/mass_diffusivity int rank,nprocs; diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index b0dde2c7..96d737bb 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -8,8 +8,11 @@ ScaLBL_Poisson::ScaLBL_Poisson(int RANK, int NP, MPI_Comm COMM): rank(RANK), nprocs(NP),timestep(0),timestepMax(0),tau(0),k2_inv(0),tolerance(0),h(0), epsilon0(0),epsilon0_LB(0),epsilonR(0),epsilon_LB(0),Vin(0),Vout(0),Nx(0),Ny(0),Nz(0),N(0),Np(0),analysis_interval(0), -chargeDen_dummy(0),WriteLog(0), -nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),BoundaryConditionSolid(0),Lx(0),Ly(0),Lz(0),comm(COMM) +chargeDen_dummy(0),WriteLog(0),nprocx(0),nprocy(0),nprocz(0), +BoundaryConditionInlet(0),BoundaryConditionOutlet(0),BoundaryConditionSolid(0),Lx(0),Ly(0),Lz(0), +Vin0(0),freqIn(0),t0_In(0),Vin_Type(0),Vout0(0),freqOut(0),t0_Out(0),Vout_Type(0), +TestPeriodic(0),TestPeriodicTime(0),TestPeriodicTimeConv(0),TestPeriodicSaveInterval(0), +comm(COMM) { } @@ -33,10 +36,12 @@ void ScaLBL_Poisson::ReadParams(string filename){ epsilonR = 78.4;//default dielectric constant of water epsilon_LB = epsilon0_LB*epsilonR;//electric permittivity analysis_interval = 1000; - Vin = 1.0; //Boundary-z (inlet) electric potential - Vout = 1.0; //Boundary-Z (outlet) electric potential chargeDen_dummy = 1.0e-3;//For debugging;unit=[C/m^3] WriteLog = false; + TestPeriodic = false; + TestPeriodicTime = 1.0;//unit: [sec] + TestPeriodicTimeConv = 0.01; //unit [sec/lt] + TestPeriodicSaveInterval = 0.1; //unit [sec] // LB-Poisson Model parameters if (electric_db->keyExists( "timestepMax" )){ @@ -57,6 +62,18 @@ void ScaLBL_Poisson::ReadParams(string filename){ if (electric_db->keyExists( "WriteLog" )){ WriteLog = electric_db->getScalar( "WriteLog" ); } + if (electric_db->keyExists( "TestPeriodic" )){ + TestPeriodic = electric_db->getScalar( "TestPeriodic" ); + } + if (electric_db->keyExists( "TestPeriodicTime" )){ + TestPeriodicTime = electric_db->getScalar( "TestPeriodicTime" ); + } + if (electric_db->keyExists( "TestPeriodicTimeConv" )){ + TestPeriodicTimeConv = electric_db->getScalar( "TestPeriodicTimeConv" ); + } + if (electric_db->keyExists( "TestPeriodicSaveInterval" )){ + TestPeriodicSaveInterval = electric_db->getScalar( "TestPeriodicSaveInterval" ); + } // Read solid boundary condition specific to Poisson equation BoundaryConditionSolid = 1; @@ -65,10 +82,15 @@ void ScaLBL_Poisson::ReadParams(string filename){ } // Read boundary condition for electric potential // BC = 0: normal periodic BC - // BC = 1: fixed inlet and outlet potential - BoundaryCondition = 0; - if (electric_db->keyExists( "BC" )){ - BoundaryCondition = electric_db->getScalar( "BC" ); + // BC = 1: fixed electric potential + // BC = 2: sine/cosine periodic electric potential (need extra input parameters) + BoundaryConditionInlet = 0; + BoundaryConditionOutlet = 0; + if (electric_db->keyExists( "BC_Inlet" )){ + BoundaryConditionInlet = electric_db->getScalar( "BC_Inlet" ); + } + if (electric_db->keyExists( "BC_Outlet" )){ + BoundaryConditionOutlet = electric_db->getScalar( "BC_Outlet" ); } // Read domain parameters @@ -342,15 +364,91 @@ void ScaLBL_Poisson::Create(){ void ScaLBL_Poisson::Potential_Init(double *psi_init){ - if (BoundaryCondition==1){ - if (electric_db->keyExists( "Vin" )){ - Vin = electric_db->getScalar( "Vin" ); - } - if (electric_db->keyExists( "Vout" )){ - Vout = electric_db->getScalar( "Vout" ); - } + //set up default boundary input parameters + Vin0 = Vout0 = 1.0; //unit: [V] + freqIn = freqOut = 50.0; //unit: [Hz] + t0_In = t0_Out = 0.0; //unit: [sec] + Vin_Type = Vout_Type = 1; //1->sin; 2->cos + Vin = 1.0; //Boundary-z (inlet) electric potential + Vout = 1.0; //Boundary-Z (outlet) electric potential + + if (BoundaryConditionInlet>0){ + switch (BoundaryConditionInlet){ + case 1: + if (electric_db->keyExists( "Vin" )){ + Vin = electric_db->getScalar( "Vin" ); + } + if (rank==0) printf("LB-Poisson Solver: inlet boundary; fixed electric potential Vin = %.3g \n",Vin); + break; + case 2: + if (electric_db->keyExists( "Vin0" )){//voltage amplitude; unit: Volt + Vin0 = electric_db->getScalar( "Vin0" ); + } + if (electric_db->keyExists( "freqIn" )){//unit: Hz + freqIn = electric_db->getScalar( "freqIn" ); + } + if (electric_db->keyExists( "t0_In" )){//timestep shift, unit: lt + t0_In = electric_db->getScalar( "t0_In" ); + } + if (electric_db->keyExists( "Vin_Type" )){ + //type=1 -> sine + //tyep=2 -> cosine + Vin_Type = electric_db->getScalar( "Vin_Type" ); + if (Vin_Type>2 || Vin_Type<=0) ERROR("Error: user-input Vin_Type is currently not supported! \n"); + } + if (rank==0){ + if (Vin_Type==1){ + printf("LB-Poisson Solver: inlet boundary; periodic electric potential Vin = %.3g*Sin[2*pi*%.3g*(t+%.3g)] \n",Vin,freqIn,t0_In); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vin,freqIn,t0_In); + } + else if (Vin_Type==2){ + printf("LB-Poisson Solver: inlet boundary; periodic electric potential Vin = %.3g*Cos[2*pi*%.3g*(t+%.3g)] \n",Vin,freqIn,t0_In); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vin,freqIn,t0_In); + } + } + break; + } + } + if (BoundaryConditionOutlet>0){ + switch (BoundaryConditionOutlet){ + case 1: + if (electric_db->keyExists( "Vout" )){ + Vout = electric_db->getScalar( "Vout" ); + } + if (rank==0) printf("LB-Poisson Solver: outlet boundary; fixed electric potential Vin = %.3g \n",Vout); + break; + case 2: + if (electric_db->keyExists( "Vout0" )){//voltage amplitude; unit: Volt + Vout0 = electric_db->getScalar( "Vout0" ); + } + if (electric_db->keyExists( "freqOut" )){//unit: Hz + freqOut = electric_db->getScalar( "freqOut" ); + } + if (electric_db->keyExists( "t0_Out" )){//timestep shift, unit: lt + t0_Out = electric_db->getScalar( "t0_Out" ); + } + if (electric_db->keyExists( "Vout_Type" )){ + //type=1 -> sine + //tyep=2 -> cosine + Vout_Type = electric_db->getScalar( "Vout_Type" ); + if (Vout_Type>2 || Vin_Type<=0) ERROR("Error: user-input Vout_Type is currently not supported! \n"); + } + if (rank==0){ + if (Vout_Type==1){ + printf("LB-Poisson Solver: outlet boundary; periodic electric potential Vout = %.3g*Sin[2*pi*%.3g*(t+%.3g)] \n",Vout,freqOut,t0_Out); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vout,freqOut,t0_Out); + } + else if (Vout_Type==2){ + printf("LB-Poisson Solver: outlet boundary; periodic electric potential Vout = %.3g*Cos[2*pi*%.3g*(t+%.3g)] \n",Vout,freqOut,t0_Out); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vout,freqOut,t0_Out); + } + } + break; + } } //By default only periodic BC is applied and Vin=Vout=1.0, i.e. there is no potential gradient along Z-axis + if (BoundaryConditionInlet==2) Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,t0_In,Vin_Type,0); + if (BoundaryConditionOutlet==2) Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,t0_Out,Vout_Type,0); double slope = (Vout-Vin)/(Nz-2); double psi_linearized; for (int k=0;kSendD3Q7AA(fq, 0); //READ FROM NORMAL ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); // Set boundary conditions - if (BoundaryCondition == 1){ - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + if (BoundaryConditionInlet > 0){ + switch (BoundaryConditionInlet){ + case 1: + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); + break; + case 2: + Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,t0_In,Vin_Type,timestep_from_Study); + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); + break; + } + } + if (BoundaryConditionOutlet > 0){ + switch (BoundaryConditionOutlet){ + case 1: + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + break; + case 2: + Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,t0_Out,Vout_Type,timestep_from_Study); + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + break; + } } //-------------------------// ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); } -void ScaLBL_Poisson::SolveElectricPotentialAAeven(){ +void ScaLBL_Poisson::SolveElectricPotentialAAeven(int timestep_from_Study){ ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FORM NORMAL ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); // Set boundary conditions - if (BoundaryCondition == 1){ - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + if (BoundaryConditionInlet > 0){ + switch (BoundaryConditionInlet){ + case 1: + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); + break; + case 2: + Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,t0_In,Vin_Type,timestep_from_Study); + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); + break; + } + } + if (BoundaryConditionOutlet > 0){ + switch (BoundaryConditionOutlet){ + case 1: + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + break; + case 2: + Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,t0_Out,Vout_Type,timestep_from_Study); + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + break; + } } //-------------------------// ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); diff --git a/models/PoissonSolver.h b/models/PoissonSolver.h index 74abd775..ebcac179 100644 --- a/models/PoissonSolver.h +++ b/models/PoissonSolver.h @@ -9,6 +9,7 @@ #include #include #include +#include #include "common/ScaLBL.h" #include "common/Communication.h" @@ -16,6 +17,7 @@ #include "analysis/Minkowski.h" #include "ProfilerApp.h" +#define _USE_MATH_DEFINES #ifndef ScaLBL_POISSON_INC #define ScaLBL_POISSON_INC @@ -41,7 +43,8 @@ public: //bool Restart,pBC; int timestep,timestepMax; int analysis_interval; - int BoundaryCondition; + int BoundaryConditionInlet; + int BoundaryConditionOutlet; int BoundaryConditionSolid; double tau; double tolerance; @@ -50,11 +53,18 @@ public: double Vin, Vout; double chargeDen_dummy;//for debugging bool WriteLog; + double Vin0,freqIn,t0_In,Vin_Type; + double Vout0,freqOut,t0_Out,Vout_Type; + bool TestPeriodic; + double TestPeriodicTime;//unit: [sec] + double TestPeriodicTimeConv; //unit [sec/lt] + double TestPeriodicSaveInterval; //unit [sec] int Nx,Ny,Nz,N,Np; int rank,nprocx,nprocy,nprocz,nprocs; double Lx,Ly,Lz; double h;//image resolution + double time_conv;//phys to LB time converting factor; unit=[sec/lt] std::shared_ptr Dm; // this domain is for analysis std::shared_ptr Mask; // this domain is for lbm @@ -97,6 +107,7 @@ private: void SolvePoissonAAodd(double *ChargeDensity); void SolvePoissonAAeven(double *ChargeDensity); void getConvergenceLog(int timestep,double error); + double getBoundaryVoltagefromPeriodicBC(double V0,double freq,double t0,int V_type,int time_step); }; #endif diff --git a/tests/TestPoissonSolver.cpp b/tests/TestPoissonSolver.cpp index 32353f65..5683ace1 100644 --- a/tests/TestPoissonSolver.cpp +++ b/tests/TestPoissonSolver.cpp @@ -53,14 +53,36 @@ int main(int argc, char **argv) PoissonSolver.SetDomain(); PoissonSolver.ReadInput(); PoissonSolver.Create(); - PoissonSolver.Initialize(); + if (PoissonSolver.TestPeriodic==true){ + PoissonSolver.Initialize(PoissonSolver.TestPeriodicTimeConv); + } + else { + PoissonSolver.Initialize(0); + } //Initialize dummy charge density for test PoissonSolver.DummyChargeDensity(); - PoissonSolver.Run(PoissonSolver.ChargeDensityDummy); - PoissonSolver.getElectricPotential_debug(1); - PoissonSolver.getElectricField_debug(1); + if (PoissonSolver.TestPeriodic==true){ + if (rank==0) printf("Testing periodic voltage input is enabled. Total test time is %.3g[s], saving data every %.3g[s]; + user-specified time resolution is %.3g[s/lt]\n", + PoissonSolver.TestPeriodicTime,PoissonSolver.TestPeriodicSaveInterval,PoissonSolver.TestPeriodicTimeConv); + int timestep = 0; + while (timestep<(PoissonSolver.TestPeriodicTime/PoissonSolver.TestPeriodicTimeConv)){ + timestep++; + PoissonSolver.Run(PoissonSolver.ChargeDensityDummy,timestep); + if (timestep%(PoissonSolver.TestPeriodicSaveInterval/PoissonSolver.TestPeriodicTimeConv)==0){ + if (rank==0) printf(" Time = %.3g[s]; saving electric potential and field\n",timestep*PoissonSolver.TestPeriodicTimeConv); + PoissonSolver.getElectricPotential_debug(timestep*PoissonSolver.TestPeriodicTimeConv); + PoissonSolver.getElectricField_debug(timestep*PoissonSolver.TestPeriodicTimeConv); + } + } + } + else { + PoissonSolver.Run(PoissonSolver.ChargeDensityDummy,1); + PoissonSolver.getElectricPotential_debug(1); + PoissonSolver.getElectricField_debug(1); + } if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n"); if (rank==0) printf("*************************************************************\n"); diff --git a/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp b/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp index 2b3726a4..93493331 100644 --- a/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp +++ b/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp @@ -80,20 +80,22 @@ int main(int argc, char **argv) IonModel.timestepMax = Study.getIonNumIter_PNP_coupling(StokesModel.time_conv,IonModel.time_conv); IonModel.Initialize(); + // Get maximal time converting factor based on Sotkes and Ion solvers + Study.getTimeConvMax_PNP_coupling(StokesModel.time_conv,IonModel.time_conv); // Initialize LB-Poisson model PoissonSolver.ReadParams(filename); PoissonSolver.SetDomain(); PoissonSolver.ReadInput(); PoissonSolver.Create(); - PoissonSolver.Initialize(); + PoissonSolver.Initialize(Study.time_conv_max); int timestep=0; while (timestep < Study.timestepMax){ timestep++; - PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental + PoissonSolver.Run(IonModel.ChargeDensity,timestep);//solve Poisson equtaion to get steady-state electrical potental StokesModel.Run_Lite(IonModel.ChargeDensity, PoissonSolver.ElectricField);// Solve the N-S equations to get velocity IonModel.Run(StokesModel.Velocity,PoissonSolver.ElectricField); //solve for ion transport and electric potential From 074d61860f6a76f6090f5be12553cadf4adea693 Mon Sep 17 00:00:00 2001 From: James McClure Date: Mon, 4 Jan 2021 20:15:31 -0500 Subject: [PATCH 108/205] working to add analysis --- analysis/ElectroChemistry.cpp | 13 ++++++------- analysis/ElectroChemistry.h | 2 -- analysis/GreyPhase.cpp | 26 +++++++++++++------------- 3 files changed, 19 insertions(+), 22 deletions(-) diff --git a/analysis/ElectroChemistry.cpp b/analysis/ElectroChemistry.cpp index 30487f03..e97e74ec 100644 --- a/analysis/ElectroChemistry.cpp +++ b/analysis/ElectroChemistry.cpp @@ -5,7 +5,6 @@ ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(std::shared_ptr dm): fillData(dm->Comm,dm->rank_info,{dm->Nx-2,dm->Ny-2,dm->Nz-2},{1,1,1},0,1) { - MPI_Comm_dup(dm->Comm,&comm); Nx=dm->Nx; Ny=dm->Ny; Nz=dm->Nz; Volume=(Nx-2)*(Ny-2)*(Nz-2)*Dm->nprocx()*Dm->nprocy()*Dm->nprocz()*1.0; @@ -80,9 +79,9 @@ void ElectroChemistryAnalyzer::Basic(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poiss } } } - rho_avg_global[ion]=sumReduce( Dm->Comm, rho_avg_local[ion]); - rho_mu_avg_global[ion]=sumReduce( Dm->Comm, rho_mu_avg_local[ion]); - rho_psi_avg_global[ion]=sumReduce( Dm->Comm, rho_psi_avg_local[ion]); + rho_avg_global[ion]=Dm->Comm.sumReduce( rho_avg_local[ion]); + rho_mu_avg_global[ion]=Dm->Comm.sumReduce( rho_mu_avg_local[ion]); + rho_psi_avg_global[ion]=Dm->Comm.sumReduce( rho_psi_avg_local[ion]); rho_mu_avg_global[ion] /= rho_avg_global[ion]; rho_psi_avg_global[ion] /= rho_avg_global[ion]; @@ -100,8 +99,8 @@ void ElectroChemistryAnalyzer::Basic(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poiss } } } - rho_mu_fluctuation_global[ion]=sumReduce( Dm->Comm, rho_mu_fluctuation_local[ion]); - rho_psi_fluctuation_global[ion]=sumReduce( Dm->Comm, rho_psi_fluctuation_local[ion]); + rho_mu_fluctuation_global[ion]=Dm->Comm.sumReduce( rho_mu_fluctuation_local[ion]); + rho_psi_fluctuation_global[ion]=Dm->Comm.sumReduce( rho_psi_fluctuation_local[ion]); } if (Dm->rank()==0){ @@ -217,7 +216,7 @@ void ElectroChemistryAnalyzer::WriteVis( ScaLBL_IonModel &Ion, ScaLBL_Poisson &P } if (vis_db->getWithDefault( "write_silo", true )) - IO::writeData( timestep, visData, comm ); + IO::writeData( timestep, visData, Dm->Comm.comm ); /* if (vis_db->getWithDefault( "save_8bit_raw", true )){ char CurrentIDFilename[40]; diff --git a/analysis/ElectroChemistry.h b/analysis/ElectroChemistry.h index 8d613ef7..26872da9 100644 --- a/analysis/ElectroChemistry.h +++ b/analysis/ElectroChemistry.h @@ -22,8 +22,6 @@ class ElectroChemistryAnalyzer{ public: - MPI_Comm comm; - int tag; std::shared_ptr Dm; double Volume; // input variables diff --git a/analysis/GreyPhase.cpp b/analysis/GreyPhase.cpp index dab2ad9b..74f4437a 100644 --- a/analysis/GreyPhase.cpp +++ b/analysis/GreyPhase.cpp @@ -112,27 +112,27 @@ void GreyPhaseAnalysis::Basic(){ } } } - Oil.M=sumReduce( Dm->Comm, Oil_local.M); - Oil.Px=sumReduce( Dm->Comm, Oil_local.Px); - Oil.Py=sumReduce( Dm->Comm, Oil_local.Py); - Oil.Pz=sumReduce( Dm->Comm, Oil_local.Pz); - - Water.M=sumReduce( Dm->Comm, Water_local.M); - Water.Px=sumReduce( Dm->Comm, Water_local.Px); - Water.Py=sumReduce( Dm->Comm, Water_local.Py); - Water.Pz=sumReduce( Dm->Comm, Water_local.Pz); + Oil.M=Dm->Comm.sumReduce( Oil_local.M); + Oil.Px=Dm->Comm.sumReduce( Oil_local.Px); + Oil.Py=Dm->Comm.sumReduce( Oil_local.Py); + Oil.Pz=Dm->Comm.sumReduce( Oil_local.Pz); + + Water.M=Dm->Comm.sumReduce( Water_local.M); + Water.Px=Dm->Comm.sumReduce( Water_local.Px); + Water.Py=Dm->Comm.sumReduce( Water_local.Py); + Water.Pz=Dm->Comm.sumReduce( Water_local.Pz); //Oil.p /= Oil.M; //Water.p /= Water.M; - count_w=sumReduce( Dm->Comm, count_w); - count_n=sumReduce( Dm->Comm, count_n); + count_w=Dm->Comm.sumReduce( count_w); + count_n=Dm->Comm.sumReduce( count_n); if (count_w > 0.0) - Water.p=sumReduce( Dm->Comm, Water_local.p) / count_w; + Water.p=Dm->Comm.sumReduce( Water_local.p) / count_w; else Water.p = 0.0; if (count_n > 0.0) - Oil.p=sumReduce( Dm->Comm, Oil_local.p) / count_n; + Oil.p=Dm->Comm.sumReduce( Oil_local.p) / count_n; else Oil.p = 0.0; From fbb2dd67b2de1ed39675fd4f2f669f0863d39e1a Mon Sep 17 00:00:00 2001 From: James McClure Date: Mon, 4 Jan 2021 21:40:41 -0500 Subject: [PATCH 109/205] fix sumreduce --- IO/Writer.cpp | 20 ++--- IO/Writer.h | 4 +- analysis/ElectroChemistry.cpp | 10 ++- analysis/ElectroChemistry.h | 6 +- analysis/SubPhase.cpp | 152 +++++++++++++++++----------------- 5 files changed, 96 insertions(+), 96 deletions(-) diff --git a/IO/Writer.cpp b/IO/Writer.cpp index 6581ad42..61c333af 100644 --- a/IO/Writer.cpp +++ b/IO/Writer.cpp @@ -2,7 +2,7 @@ #include "IO/MeshDatabase.h" #include "IO/IOHelpers.h" #include "IO/silo.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Utilities.h" #include @@ -36,7 +36,7 @@ void IO::initialize( const std::string& path, const std::string& format, bool ap global_IO_format = Format::SILO; else ERROR("Unknown format"); - int rank = comm_rank(MPI_COMM_WORLD); + int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); if ( !append && rank==0 ) { mkdir(path.c_str(),S_IRWXU|S_IRGRP); std::string filename; @@ -55,7 +55,7 @@ void IO::initialize( const std::string& path, const std::string& format, bool ap // Write the mesh data in the original format static std::vector writeMeshesOrigFormat( const std::vector& meshData, const std::string& path ) { - int rank = MPI_WORLD_RANK(); + int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); std::vector meshes_written; for (size_t i=0; i writeMeshesOrigFormat( const std::vector& meshes_written, cons static std::vector writeMeshesNewFormat( const std::vector& meshData, const std::string& path, int format ) { - int rank = MPI_WORLD_RANK(); + int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); std::vector meshes_written; char filename[100], fullpath[200]; sprintf(filename,"%05i",rank); @@ -419,7 +419,7 @@ static std::vector writeMeshesSilo( const std::vector& meshData, const std::string& path, int format ) { #ifdef USE_SILO - int rank = MPI_WORLD_RANK(); + int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); std::vector meshes_written; char filename[100], fullpath[200]; sprintf(filename,"%05i.silo",rank); @@ -441,12 +441,12 @@ static std::vector writeMeshesSilo( /**************************************************** * Write the mesh data * ****************************************************/ -void IO::writeData( const std::string& subdir, const std::vector& meshData, MPI_Comm comm ) +void IO::writeData( const std::string& subdir, const std::vector& meshData, const Utilities::MPI& comm ) { if ( global_IO_path.empty() ) IO::initialize( ); PROFILE_START("writeData"); - int rank = comm_rank(comm); + int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); // Check the meshData before writing for ( const auto& data : meshData ) { if ( !data.check() ) @@ -457,7 +457,7 @@ void IO::writeData( const std::string& subdir, const std::vector meshes_written; if ( global_IO_format == Format::OLD ) { diff --git a/IO/Writer.h b/IO/Writer.h index 710fa0d8..dfc22db8 100644 --- a/IO/Writer.h +++ b/IO/Writer.h @@ -34,7 +34,7 @@ void initialize( const std::string& path="", const std::string& format="silo", b * @param[in] meshData The data to write * @param[in] comm The comm to use for writing (usually MPI_COMM_WORLD or a dup thereof) */ -void writeData( const std::string& subdir, const std::vector& meshData, MPI_Comm comm ); +void writeData( const std::string& subdir, const std::vector& meshData, const Utilities::MPI& comm ); /*! @@ -44,7 +44,7 @@ void writeData( const std::string& subdir, const std::vector * @param[in] meshData The data to write * @param[in] comm The comm to use for writing (usually MPI_COMM_WORLD or a dup thereof) */ -inline void writeData( int timestep, const std::vector& meshData, MPI_Comm comm ) +inline void writeData( int timestep, const std::vector& meshData, const Utilities::MPI& comm ) { char subdir[100]; sprintf(subdir,"vis%03i",timestep); diff --git a/analysis/ElectroChemistry.cpp b/analysis/ElectroChemistry.cpp index e97e74ec..f9e25c35 100644 --- a/analysis/ElectroChemistry.cpp +++ b/analysis/ElectroChemistry.cpp @@ -1,8 +1,7 @@ #include "analysis/ElectroChemistry.h" ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(std::shared_ptr dm): - Dm(dm), - fillData(dm->Comm,dm->rank_info,{dm->Nx-2,dm->Ny-2,dm->Nz-2},{1,1,1},0,1) + Dm(dm) { Nx=dm->Nx; Ny=dm->Ny; Nz=dm->Nz; @@ -130,7 +129,10 @@ void ElectroChemistryAnalyzer::Basic(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poiss void ElectroChemistryAnalyzer::WriteVis( ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes, std::shared_ptr input_db, int timestep){ auto vis_db = input_db->getDatabase( "Visualization" ); - char VisName[40]; + char VisName[40]; + + std::vector visData; + fillHalo fillData(Dm->Comm,Dm->rank_info,{Dm->Nx-2,Dm->Ny-2,Dm->Nz-2},{1,1,1},0,1); IO::initialize("","silo","false"); // Create the MeshDataStruct @@ -216,7 +218,7 @@ void ElectroChemistryAnalyzer::WriteVis( ScaLBL_IonModel &Ion, ScaLBL_Poisson &P } if (vis_db->getWithDefault( "write_silo", true )) - IO::writeData( timestep, visData, Dm->Comm.comm ); + IO::writeData( timestep, visData, Dm->Comm ); /* if (vis_db->getWithDefault( "save_8bit_raw", true )){ char CurrentIDFilename[40]; diff --git a/analysis/ElectroChemistry.h b/analysis/ElectroChemistry.h index 26872da9..beaff833 100644 --- a/analysis/ElectroChemistry.h +++ b/analysis/ElectroChemistry.h @@ -7,12 +7,12 @@ #include #include "common/Domain.h" +#include "common/Utilities.h" +#include "common/MPI_Helpers.h" #include "common/Communication.h" #include "analysis/analysis.h" #include "analysis/distance.h" #include "analysis/Minkowski.h" -#include "common/Utilities.h" -#include "common/MPI_Helpers.h" #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" @@ -49,8 +49,6 @@ public: void WriteVis( ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes, std::shared_ptr input_db, int timestep); private: - std::vector visData; - fillHalo fillData; FILE *TIMELOG; }; #endif diff --git a/analysis/SubPhase.cpp b/analysis/SubPhase.cpp index 8e937f7e..3ca6d188 100644 --- a/analysis/SubPhase.cpp +++ b/analysis/SubPhase.cpp @@ -229,25 +229,25 @@ void SubPhase::Basic(){ } } } - gwb.V=sumReduce( Dm->Comm, wb.V); - gnb.V=sumReduce( Dm->Comm, nb.V); - gwb.M=sumReduce( Dm->Comm, wb.M); - gnb.M=sumReduce( Dm->Comm, nb.M); - gwb.Px=sumReduce( Dm->Comm, wb.Px); - gwb.Py=sumReduce( Dm->Comm, wb.Py); - gwb.Pz=sumReduce( Dm->Comm, wb.Pz); - gnb.Px=sumReduce( Dm->Comm, nb.Px); - gnb.Py=sumReduce( Dm->Comm, nb.Py); - gnb.Pz=sumReduce( Dm->Comm, nb.Pz); + gwb.V=Dm->Comm.sumReduce( wb.V); + gnb.V=Dm->Comm.sumReduce( nb.V); + gwb.M=Dm->Comm.sumReduce( wb.M); + gnb.M=Dm->Comm.sumReduce( nb.M); + gwb.Px=Dm->Comm.sumReduce( wb.Px); + gwb.Py=Dm->Comm.sumReduce( wb.Py); + gwb.Pz=Dm->Comm.sumReduce( wb.Pz); + gnb.Px=Dm->Comm.sumReduce( nb.Px); + gnb.Py=Dm->Comm.sumReduce( nb.Py); + gnb.Pz=Dm->Comm.sumReduce( nb.Pz); - count_w=sumReduce( Dm->Comm, count_w); - count_n=sumReduce( Dm->Comm, count_n); + count_w=Dm->Comm.sumReduce( count_w); + count_n=Dm->Comm.sumReduce( count_n); if (count_w > 0.0) - gwb.p=sumReduce( Dm->Comm, wb.p) / count_w; + gwb.p=Dm->Comm.sumReduce( wb.p) / count_w; else gwb.p = 0.0; if (count_n > 0.0) - gnb.p=sumReduce( Dm->Comm, nb.p) / count_n; + gnb.p=Dm->Comm.sumReduce( nb.p) / count_n; else gnb.p = 0.0; @@ -445,14 +445,14 @@ void SubPhase::Full(){ nd.X -= nc.X; // compute global entities - gnc.V=sumReduce( Dm->Comm, nc.V); - gnc.A=sumReduce( Dm->Comm, nc.A); - gnc.H=sumReduce( Dm->Comm, nc.H); - gnc.X=sumReduce( Dm->Comm, nc.X); - gnd.V=sumReduce( Dm->Comm, nd.V); - gnd.A=sumReduce( Dm->Comm, nd.A); - gnd.H=sumReduce( Dm->Comm, nd.H); - gnd.X=sumReduce( Dm->Comm, nd.X); + gnc.V=Dm->Comm.sumReduce( nc.V); + gnc.A=Dm->Comm.sumReduce( nc.A); + gnc.H=Dm->Comm.sumReduce( nc.H); + gnc.X=Dm->Comm.sumReduce( nc.X); + gnd.V=Dm->Comm.sumReduce( nd.V); + gnd.A=Dm->Comm.sumReduce( nd.A); + gnd.H=Dm->Comm.sumReduce( nd.H); + gnd.X=Dm->Comm.sumReduce( nd.X); gnd.Nc = nd.Nc; // wetting for (k=0; kComm, wc.V); - gwc.A=sumReduce( Dm->Comm, wc.A); - gwc.H=sumReduce( Dm->Comm, wc.H); - gwc.X=sumReduce( Dm->Comm, wc.X); - gwd.V=sumReduce( Dm->Comm, wd.V); - gwd.A=sumReduce( Dm->Comm, wd.A); - gwd.H=sumReduce( Dm->Comm, wd.H); - gwd.X=sumReduce( Dm->Comm, wd.X); + gwc.V=Dm->Comm.sumReduce( wc.V); + gwc.A=Dm->Comm.sumReduce( wc.A); + gwc.H=Dm->Comm.sumReduce( wc.H); + gwc.X=Dm->Comm.sumReduce( wc.X); + gwd.V=Dm->Comm.sumReduce( wd.V); + gwd.A=Dm->Comm.sumReduce( wd.A); + gwd.H=Dm->Comm.sumReduce( wd.H); + gwd.X=Dm->Comm.sumReduce( wd.X); gwd.Nc = wd.Nc; /* Set up geometric analysis of interface region */ @@ -527,20 +527,20 @@ void SubPhase::Full(){ iwn.A = morph_i->A(); iwn.H = morph_i->H(); iwn.X = morph_i->X(); - giwn.V=sumReduce( Dm->Comm, iwn.V); - giwn.A=sumReduce( Dm->Comm, iwn.A); - giwn.H=sumReduce( Dm->Comm, iwn.H); - giwn.X=sumReduce( Dm->Comm, iwn.X); + giwn.V=Dm->Comm.sumReduce( iwn.V); + giwn.A=Dm->Comm.sumReduce( iwn.A); + giwn.H=Dm->Comm.sumReduce( iwn.H); + giwn.X=Dm->Comm.sumReduce( iwn.X); // measure only the connected part iwnc.Nc = morph_i->MeasureConnectedPathway(); iwnc.V = morph_i->V(); iwnc.A = morph_i->A(); iwnc.H = morph_i->H(); iwnc.X = morph_i->X(); - giwnc.V=sumReduce( Dm->Comm, iwnc.V); - giwnc.A=sumReduce( Dm->Comm, iwnc.A); - giwnc.H=sumReduce( Dm->Comm, iwnc.H); - giwnc.X=sumReduce( Dm->Comm, iwnc.X); + giwnc.V=Dm->Comm.sumReduce( iwnc.V); + giwnc.A=Dm->Comm.sumReduce( iwnc.A); + giwnc.H=Dm->Comm.sumReduce( iwnc.H); + giwnc.X=Dm->Comm.sumReduce( iwnc.X); giwnc.Nc = iwnc.Nc; double vol_nc_bulk = 0.0; @@ -631,46 +631,46 @@ void SubPhase::Full(){ } } - gnd.M=sumReduce( Dm->Comm, nd.M); - gnd.Px=sumReduce( Dm->Comm, nd.Px); - gnd.Py=sumReduce( Dm->Comm, nd.Py); - gnd.Pz=sumReduce( Dm->Comm, nd.Pz); - gnd.K=sumReduce( Dm->Comm, nd.K); + gnd.M=Dm->Comm.sumReduce( nd.M); + gnd.Px=Dm->Comm.sumReduce( nd.Px); + gnd.Py=Dm->Comm.sumReduce( nd.Py); + gnd.Pz=Dm->Comm.sumReduce( nd.Pz); + gnd.K=Dm->Comm.sumReduce( nd.K); - gwd.M=sumReduce( Dm->Comm, wd.M); - gwd.Px=sumReduce( Dm->Comm, wd.Px); - gwd.Py=sumReduce( Dm->Comm, wd.Py); - gwd.Pz=sumReduce( Dm->Comm, wd.Pz); - gwd.K=sumReduce( Dm->Comm, wd.K); + gwd.M=Dm->Comm.sumReduce( wd.M); + gwd.Px=Dm->Comm.sumReduce( wd.Px); + gwd.Py=Dm->Comm.sumReduce( wd.Py); + gwd.Pz=Dm->Comm.sumReduce( wd.Pz); + gwd.K=Dm->Comm.sumReduce( wd.K); - gnc.M=sumReduce( Dm->Comm, nc.M); - gnc.Px=sumReduce( Dm->Comm, nc.Px); - gnc.Py=sumReduce( Dm->Comm, nc.Py); - gnc.Pz=sumReduce( Dm->Comm, nc.Pz); - gnc.K=sumReduce( Dm->Comm, nc.K); + gnc.M=Dm->Comm.sumReduce( nc.M); + gnc.Px=Dm->Comm.sumReduce( nc.Px); + gnc.Py=Dm->Comm.sumReduce( nc.Py); + gnc.Pz=Dm->Comm.sumReduce( nc.Pz); + gnc.K=Dm->Comm.sumReduce( nc.K); - gwc.M=sumReduce( Dm->Comm, wc.M); - gwc.Px=sumReduce( Dm->Comm, wc.Px); - gwc.Py=sumReduce( Dm->Comm, wc.Py); - gwc.Pz=sumReduce( Dm->Comm, wc.Pz); - gwc.K=sumReduce( Dm->Comm, wc.K); + gwc.M=Dm->Comm.sumReduce( wc.M); + gwc.Px=Dm->Comm.sumReduce( wc.Px); + gwc.Py=Dm->Comm.sumReduce( wc.Py); + gwc.Pz=Dm->Comm.sumReduce( wc.Pz); + gwc.K=Dm->Comm.sumReduce( wc.K); - giwn.Mn=sumReduce( Dm->Comm, iwn.Mn); - giwn.Pnx=sumReduce( Dm->Comm, iwn.Pnx); - giwn.Pny=sumReduce( Dm->Comm, iwn.Pny); - giwn.Pnz=sumReduce( Dm->Comm, iwn.Pnz); - giwn.Kn=sumReduce( Dm->Comm, iwn.Kn); - giwn.Mw=sumReduce( Dm->Comm, iwn.Mw); - giwn.Pwx=sumReduce( Dm->Comm, iwn.Pwx); - giwn.Pwy=sumReduce( Dm->Comm, iwn.Pwy); - giwn.Pwz=sumReduce( Dm->Comm, iwn.Pwz); - giwn.Kw=sumReduce( Dm->Comm, iwn.Kw); + giwn.Mn=Dm->Comm.sumReduce( iwn.Mn); + giwn.Pnx=Dm->Comm.sumReduce( iwn.Pnx); + giwn.Pny=Dm->Comm.sumReduce( iwn.Pny); + giwn.Pnz=Dm->Comm.sumReduce( iwn.Pnz); + giwn.Kn=Dm->Comm.sumReduce( iwn.Kn); + giwn.Mw=Dm->Comm.sumReduce( iwn.Mw); + giwn.Pwx=Dm->Comm.sumReduce( iwn.Pwx); + giwn.Pwy=Dm->Comm.sumReduce( iwn.Pwy); + giwn.Pwz=Dm->Comm.sumReduce( iwn.Pwz); + giwn.Kw=Dm->Comm.sumReduce( iwn.Kw); // pressure averaging - gnc.p=sumReduce( Dm->Comm, nc.p); - gnd.p=sumReduce( Dm->Comm, nd.p); - gwc.p=sumReduce( Dm->Comm, wc.p); - gwd.p=sumReduce( Dm->Comm, wd.p); + gnc.p=Dm->Comm.sumReduce( nc.p); + gnd.p=Dm->Comm.sumReduce( nd.p); + gwc.p=Dm->Comm.sumReduce( wc.p); + gwd.p=Dm->Comm.sumReduce( wd.p); if (vol_wc_bulk > 0.0) wc.p = wc.p /vol_wc_bulk; @@ -681,10 +681,10 @@ void SubPhase::Full(){ if (vol_nd_bulk > 0.0) nd.p = nd.p /vol_nd_bulk; - vol_wc_bulk=sumReduce( Dm->Comm, vol_wc_bulk); - vol_wd_bulk=sumReduce( Dm->Comm, vol_wd_bulk); - vol_nc_bulk=sumReduce( Dm->Comm, vol_nc_bulk); - vol_nd_bulk=sumReduce( Dm->Comm, vol_nd_bulk); + vol_wc_bulk=Dm->Comm.sumReduce( vol_wc_bulk); + vol_wd_bulk=Dm->Comm.sumReduce( vol_wd_bulk); + vol_nc_bulk=Dm->Comm.sumReduce( vol_nc_bulk); + vol_nd_bulk=Dm->Comm.sumReduce( vol_nd_bulk); if (vol_wc_bulk > 0.0) gwc.p = gwc.p /vol_wc_bulk; From 81953c2fce346bd74ad000631b32bbd00bbb1ce9 Mon Sep 17 00:00:00 2001 From: James McClure Date: Mon, 4 Jan 2021 21:49:54 -0500 Subject: [PATCH 110/205] files match direct from FOM --- common/Communication.h | 226 ++++++++++++++++++--------------------- common/Communication.hpp | 53 +++------ common/ReadMicroCT.cpp | 4 +- common/ReadMicroCT.h | 3 +- 4 files changed, 123 insertions(+), 163 deletions(-) diff --git a/common/Communication.h b/common/Communication.h index f2cfc7ce..4cd9ad70 100644 --- a/common/Communication.h +++ b/common/Communication.h @@ -1,7 +1,7 @@ #ifndef COMMUNICATION_H_INC #define COMMUNICATION_H_INC -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Utilities.h" #include "common/Array.h" @@ -38,7 +38,7 @@ struct RankInfoStruct { //! Redistribute domain data (dst may be smaller than the src) template Array redistribute( const RankInfoStruct& src_rank, const Array& src_data, - const RankInfoStruct& dst_rank, std::array dst_size, MPI_Comm comm ); + const RankInfoStruct& dst_rank, std::array dst_size, const Utilities::MPI& comm ); /*! @@ -59,7 +59,7 @@ public: * @param[in] fill Fill {faces,edges,corners} * @param[in] periodic Periodic dimensions */ - fillHalo( MPI_Comm comm, const RankInfoStruct& info, + fillHalo( const Utilities::MPI& comm, const RankInfoStruct& info, std::array n, std::array ng, int tag, int depth, std::array fill = {true,true,true}, std::array periodic = {true,true,true} ); @@ -83,7 +83,7 @@ public: private: - MPI_Comm comm; + Utilities::MPI comm; RankInfoStruct info; std::array n, ng; int depth; @@ -93,8 +93,6 @@ private: TYPE *mem; TYPE *send[3][3][3], *recv[3][3][3]; MPI_Request send_req[3][3][3], recv_req[3][3][3]; - size_t N_type; - MPI_Datatype datatype; fillHalo(); // Private empty constructor fillHalo(const fillHalo&); // Private copy constructor fillHalo& operator=(const fillHalo&); // Private assignment operator @@ -136,7 +134,7 @@ void InitializeRanks( const int rank, const int nprocx, const int nprocy, const //*************************************************************************************** -inline void CommunicateSendRecvCounts( MPI_Comm Communicator, int sendtag, int recvtag, +inline void CommunicateSendRecvCounts( const Utilities::MPI& comm, int sendtag, int recvtag, int rank_x, int rank_y, int rank_z, int rank_X, int rank_Y, int rank_Z, int rank_xy, int rank_XY, int rank_xY, int rank_Xy, @@ -154,54 +152,53 @@ inline void CommunicateSendRecvCounts( MPI_Comm Communicator, int sendtag, int r int& recvCount_yz, int& recvCount_YZ, int& recvCount_yZ, int& recvCount_Yz ) { MPI_Request req1[18], req2[18]; - MPI_Status stat1[18],stat2[18]; - MPI_Isend(&sendCount_x, 1,MPI_INT,rank_x,sendtag+0,Communicator,&req1[0]); - MPI_Irecv(&recvCount_X, 1,MPI_INT,rank_X,recvtag+0,Communicator,&req2[0]); - MPI_Isend(&sendCount_X, 1,MPI_INT,rank_X,sendtag+1,Communicator,&req1[1]); - MPI_Irecv(&recvCount_x, 1,MPI_INT,rank_x,recvtag+1,Communicator,&req2[1]); - MPI_Isend(&sendCount_y, 1,MPI_INT,rank_y,sendtag+2,Communicator,&req1[2]); - MPI_Irecv(&recvCount_Y, 1,MPI_INT,rank_Y,recvtag+2,Communicator,&req2[2]); - MPI_Isend(&sendCount_Y, 1,MPI_INT,rank_Y,sendtag+3,Communicator,&req1[3]); - MPI_Irecv(&recvCount_y, 1,MPI_INT,rank_y,recvtag+3,Communicator,&req2[3]); - MPI_Isend(&sendCount_z, 1,MPI_INT,rank_z,sendtag+4,Communicator,&req1[4]); - MPI_Irecv(&recvCount_Z, 1,MPI_INT,rank_Z,recvtag+4,Communicator,&req2[4]); - MPI_Isend(&sendCount_Z, 1,MPI_INT,rank_Z,sendtag+5,Communicator,&req1[5]); - MPI_Irecv(&recvCount_z, 1,MPI_INT,rank_z,recvtag+5,Communicator,&req2[5]); + req1[0] = comm.Isend(&sendCount_x,1,rank_x,sendtag+0); + req2[0] = comm.Irecv(&recvCount_X,1,rank_X,recvtag+0); + req1[1] = comm.Isend(&sendCount_X,1,rank_X,sendtag+1); + req2[1] = comm.Irecv(&recvCount_x,1,rank_x,recvtag+1); + req1[2] = comm.Isend(&sendCount_y,1,rank_y,sendtag+2); + req2[2] = comm.Irecv(&recvCount_Y,1,rank_Y,recvtag+2); + req1[3] = comm.Isend(&sendCount_Y,1,rank_Y,sendtag+3); + req2[3] = comm.Irecv(&recvCount_y,1,rank_y,recvtag+3); + req1[4] = comm.Isend(&sendCount_z,1,rank_z,sendtag+4); + req2[4] = comm.Irecv(&recvCount_Z,1,rank_Z,recvtag+4); + req1[5] = comm.Isend(&sendCount_Z,1,rank_Z,sendtag+5); + req2[5] = comm.Irecv(&recvCount_z,1,rank_z,recvtag+5); - MPI_Isend(&sendCount_xy, 1,MPI_INT,rank_xy,sendtag+6,Communicator,&req1[6]); - MPI_Irecv(&recvCount_XY, 1,MPI_INT,rank_XY,recvtag+6,Communicator,&req2[6]); - MPI_Isend(&sendCount_XY, 1,MPI_INT,rank_XY,sendtag+7,Communicator,&req1[7]); - MPI_Irecv(&recvCount_xy, 1,MPI_INT,rank_xy,recvtag+7,Communicator,&req2[7]); - MPI_Isend(&sendCount_Xy, 1,MPI_INT,rank_Xy,sendtag+8,Communicator,&req1[8]); - MPI_Irecv(&recvCount_xY, 1,MPI_INT,rank_xY,recvtag+8,Communicator,&req2[8]); - MPI_Isend(&sendCount_xY, 1,MPI_INT,rank_xY,sendtag+9,Communicator,&req1[9]); - MPI_Irecv(&recvCount_Xy, 1,MPI_INT,rank_Xy,recvtag+9,Communicator,&req2[9]); + req1[6] = comm.Isend(&sendCount_xy,1,rank_xy,sendtag+6); + req2[6] = comm.Irecv(&recvCount_XY,1,rank_XY,recvtag+6); + req1[7] = comm.Isend(&sendCount_XY,1,rank_XY,sendtag+7); + req2[7] = comm.Irecv(&recvCount_xy,1,rank_xy,recvtag+7); + req1[8] = comm.Isend(&sendCount_Xy,1,rank_Xy,sendtag+8); + req2[8] = comm.Irecv(&recvCount_xY,1,rank_xY,recvtag+8); + req1[9] = comm.Isend(&sendCount_xY,1,rank_xY,sendtag+9); + req2[9] = comm.Irecv(&recvCount_Xy,1,rank_Xy,recvtag+9); - MPI_Isend(&sendCount_xz, 1,MPI_INT,rank_xz,sendtag+10,Communicator,&req1[10]); - MPI_Irecv(&recvCount_XZ, 1,MPI_INT,rank_XZ,recvtag+10,Communicator,&req2[10]); - MPI_Isend(&sendCount_XZ, 1,MPI_INT,rank_XZ,sendtag+11,Communicator,&req1[11]); - MPI_Irecv(&recvCount_xz, 1,MPI_INT,rank_xz,recvtag+11,Communicator,&req2[11]); - MPI_Isend(&sendCount_Xz, 1,MPI_INT,rank_Xz,sendtag+12,Communicator,&req1[12]); - MPI_Irecv(&recvCount_xZ, 1,MPI_INT,rank_xZ,recvtag+12,Communicator,&req2[12]); - MPI_Isend(&sendCount_xZ, 1,MPI_INT,rank_xZ,sendtag+13,Communicator,&req1[13]); - MPI_Irecv(&recvCount_Xz, 1,MPI_INT,rank_Xz,recvtag+13,Communicator,&req2[13]); + req1[10] = comm.Isend(&sendCount_xz,1,rank_xz,sendtag+10); + req2[10] = comm.Irecv(&recvCount_XZ,1,rank_XZ,recvtag+10); + req1[11] = comm.Isend(&sendCount_XZ,1,rank_XZ,sendtag+11); + req2[11] = comm.Irecv(&recvCount_xz,1,rank_xz,recvtag+11); + req1[12] = comm.Isend(&sendCount_Xz,1,rank_Xz,sendtag+12); + req2[12] = comm.Irecv(&recvCount_xZ,1,rank_xZ,recvtag+12); + req1[13] = comm.Isend(&sendCount_xZ,1,rank_xZ,sendtag+13); + req2[13] = comm.Irecv(&recvCount_Xz,1,rank_Xz,recvtag+13); - MPI_Isend(&sendCount_yz, 1,MPI_INT,rank_yz,sendtag+14,Communicator,&req1[14]); - MPI_Irecv(&recvCount_YZ, 1,MPI_INT,rank_YZ,recvtag+14,Communicator,&req2[14]); - MPI_Isend(&sendCount_YZ, 1,MPI_INT,rank_YZ,sendtag+15,Communicator,&req1[15]); - MPI_Irecv(&recvCount_yz, 1,MPI_INT,rank_yz,recvtag+15,Communicator,&req2[15]); - MPI_Isend(&sendCount_Yz, 1,MPI_INT,rank_Yz,sendtag+16,Communicator,&req1[16]); - MPI_Irecv(&recvCount_yZ, 1,MPI_INT,rank_yZ,recvtag+16,Communicator,&req2[16]); - MPI_Isend(&sendCount_yZ, 1,MPI_INT,rank_yZ,sendtag+17,Communicator,&req1[17]); - MPI_Irecv(&recvCount_Yz, 1,MPI_INT,rank_Yz,recvtag+17,Communicator,&req2[17]); - MPI_Waitall(18,req1,stat1); - MPI_Waitall(18,req2,stat2); - MPI_Barrier(Communicator); + req1[14] = comm.Isend(&sendCount_yz,1,rank_yz,sendtag+14); + req2[14] = comm.Irecv(&recvCount_YZ,1,rank_YZ,recvtag+14); + req1[15] = comm.Isend(&sendCount_YZ,1,rank_YZ,sendtag+15); + req2[15] = comm.Irecv(&recvCount_yz,1,rank_yz,recvtag+15); + req1[16] = comm.Isend(&sendCount_Yz,1,rank_Yz,sendtag+16); + req2[16] = comm.Irecv(&recvCount_yZ,1,rank_yZ,recvtag+16); + req1[17] = comm.Isend(&sendCount_yZ,1,rank_yZ,sendtag+17); + req2[17] = comm.Irecv(&recvCount_Yz,1,rank_Yz,recvtag+17); + comm.waitAll( 18, req1 ); + comm.waitAll( 18, req2 ); + comm.barrier(); } //*************************************************************************************** -inline void CommunicateRecvLists( MPI_Comm Communicator, int sendtag, int recvtag, +inline void CommunicateRecvLists( const Utilities::MPI& comm, int sendtag, int recvtag, int *sendList_x, int *sendList_y, int *sendList_z, int *sendList_X, int *sendList_Y, int *sendList_Z, int *sendList_xy, int *sendList_XY, int *sendList_xY, int *sendList_Xy, int *sendList_xz, int *sendList_XZ, int *sendList_xZ, int *sendList_Xz, @@ -222,53 +219,52 @@ inline void CommunicateRecvLists( MPI_Comm Communicator, int sendtag, int recvta int rank_Xy, int rank_xz, int rank_XZ, int rank_xZ, int rank_Xz, int rank_yz, int rank_YZ, int rank_yZ, int rank_Yz) { MPI_Request req1[18], req2[18]; - MPI_Status stat1[18],stat2[18]; - MPI_Isend(sendList_x, sendCount_x,MPI_INT,rank_x,sendtag,Communicator,&req1[0]); - MPI_Irecv(recvList_X, recvCount_X,MPI_INT,rank_X,recvtag,Communicator,&req2[0]); - MPI_Isend(sendList_X, sendCount_X,MPI_INT,rank_X,sendtag,Communicator,&req1[1]); - MPI_Irecv(recvList_x, recvCount_x,MPI_INT,rank_x,recvtag,Communicator,&req2[1]); - MPI_Isend(sendList_y, sendCount_y,MPI_INT,rank_y,sendtag,Communicator,&req1[2]); - MPI_Irecv(recvList_Y, recvCount_Y,MPI_INT,rank_Y,recvtag,Communicator,&req2[2]); - MPI_Isend(sendList_Y, sendCount_Y,MPI_INT,rank_Y,sendtag,Communicator,&req1[3]); - MPI_Irecv(recvList_y, recvCount_y,MPI_INT,rank_y,recvtag,Communicator,&req2[3]); - MPI_Isend(sendList_z, sendCount_z,MPI_INT,rank_z,sendtag,Communicator,&req1[4]); - MPI_Irecv(recvList_Z, recvCount_Z,MPI_INT,rank_Z,recvtag,Communicator,&req2[4]); - MPI_Isend(sendList_Z, sendCount_Z,MPI_INT,rank_Z,sendtag,Communicator,&req1[5]); - MPI_Irecv(recvList_z, recvCount_z,MPI_INT,rank_z,recvtag,Communicator,&req2[5]); + req1[0] = comm.Isend(sendList_x,sendCount_x,rank_x,sendtag); + req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_X,recvtag); + req1[1] = comm.Isend(sendList_X,sendCount_X,rank_X,sendtag); + req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_x,recvtag); + req1[2] = comm.Isend(sendList_y,sendCount_y,rank_y,sendtag); + req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_Y,recvtag); + req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_Y,sendtag); + req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_y,recvtag); + req1[4] = comm.Isend(sendList_z,sendCount_z,rank_z,sendtag); + req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_Z,recvtag); + req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_Z,sendtag); + req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_z,recvtag); - MPI_Isend(sendList_xy, sendCount_xy,MPI_INT,rank_xy,sendtag,Communicator,&req1[6]); - MPI_Irecv(recvList_XY, recvCount_XY,MPI_INT,rank_XY,recvtag,Communicator,&req2[6]); - MPI_Isend(sendList_XY, sendCount_XY,MPI_INT,rank_XY,sendtag,Communicator,&req1[7]); - MPI_Irecv(recvList_xy, recvCount_xy,MPI_INT,rank_xy,recvtag,Communicator,&req2[7]); - MPI_Isend(sendList_Xy, sendCount_Xy,MPI_INT,rank_Xy,sendtag,Communicator,&req1[8]); - MPI_Irecv(recvList_xY, recvCount_xY,MPI_INT,rank_xY,recvtag,Communicator,&req2[8]); - MPI_Isend(sendList_xY, sendCount_xY,MPI_INT,rank_xY,sendtag,Communicator,&req1[9]); - MPI_Irecv(recvList_Xy, recvCount_Xy,MPI_INT,rank_Xy,recvtag,Communicator,&req2[9]); + req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_xy,sendtag); + req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_XY,recvtag); + req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_XY,sendtag); + req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_xy,recvtag); + req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_Xy,sendtag); + req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_xY,recvtag); + req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_xY,sendtag); + req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_Xy,recvtag); - MPI_Isend(sendList_xz, sendCount_xz,MPI_INT,rank_xz,sendtag,Communicator,&req1[10]); - MPI_Irecv(recvList_XZ, recvCount_XZ,MPI_INT,rank_XZ,recvtag,Communicator,&req2[10]); - MPI_Isend(sendList_XZ, sendCount_XZ,MPI_INT,rank_XZ,sendtag,Communicator,&req1[11]); - MPI_Irecv(recvList_xz, recvCount_xz,MPI_INT,rank_xz,recvtag,Communicator,&req2[11]); - MPI_Isend(sendList_Xz, sendCount_Xz,MPI_INT,rank_Xz,sendtag,Communicator,&req1[12]); - MPI_Irecv(recvList_xZ, recvCount_xZ,MPI_INT,rank_xZ,recvtag,Communicator,&req2[12]); - MPI_Isend(sendList_xZ, sendCount_xZ,MPI_INT,rank_xZ,sendtag,Communicator,&req1[13]); - MPI_Irecv(recvList_Xz, recvCount_Xz,MPI_INT,rank_Xz,recvtag,Communicator,&req2[13]); + req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_xz,sendtag); + req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_XZ,recvtag); + req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_XZ,sendtag); + req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_xz,recvtag); + req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_Xz,sendtag); + req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_xZ,recvtag); + req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_xZ,sendtag); + req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_Xz,recvtag); - MPI_Isend(sendList_yz, sendCount_yz,MPI_INT,rank_yz,sendtag,Communicator,&req1[14]); - MPI_Irecv(recvList_YZ, recvCount_YZ,MPI_INT,rank_YZ,recvtag,Communicator,&req2[14]); - MPI_Isend(sendList_YZ, sendCount_YZ,MPI_INT,rank_YZ,sendtag,Communicator,&req1[15]); - MPI_Irecv(recvList_yz, recvCount_yz,MPI_INT,rank_yz,recvtag,Communicator,&req2[15]); - MPI_Isend(sendList_Yz, sendCount_Yz,MPI_INT,rank_Yz,sendtag,Communicator,&req1[16]); - MPI_Irecv(recvList_yZ, recvCount_yZ,MPI_INT,rank_yZ,recvtag,Communicator,&req2[16]); - MPI_Isend(sendList_yZ, sendCount_yZ,MPI_INT,rank_yZ,sendtag,Communicator,&req1[17]); - MPI_Irecv(recvList_Yz, recvCount_Yz,MPI_INT,rank_Yz,recvtag,Communicator,&req2[17]); - MPI_Waitall(18,req1,stat1); - MPI_Waitall(18,req2,stat2); + req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_yz,sendtag); + req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_YZ,recvtag); + req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_YZ,sendtag); + req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_yz,recvtag); + req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_Yz,sendtag); + req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_yZ,recvtag); + req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_yZ,sendtag); + req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_Yz,recvtag); + comm.waitAll( 18, req1 ); + comm.waitAll( 18, req2 ); } //*************************************************************************************** -inline void CommunicateMeshHalo(DoubleArray &Mesh, MPI_Comm Communicator, +inline void CommunicateMeshHalo(DoubleArray &Mesh, const Utilities::MPI& comm, double *sendbuf_x,double *sendbuf_y,double *sendbuf_z,double *sendbuf_X,double *sendbuf_Y,double *sendbuf_Z, double *sendbuf_xy,double *sendbuf_XY,double *sendbuf_xY,double *sendbuf_Xy, double *sendbuf_xz,double *sendbuf_XZ,double *sendbuf_xZ,double *sendbuf_Xz, @@ -318,42 +314,24 @@ inline void CommunicateMeshHalo(DoubleArray &Mesh, MPI_Comm Communicator, PackMeshData(sendList_yZ, sendCount_yZ ,sendbuf_yZ, MeshData); PackMeshData(sendList_YZ, sendCount_YZ ,sendbuf_YZ, MeshData); //...................................................................................... - MPI_Sendrecv(sendbuf_x,sendCount_x,MPI_DOUBLE,rank_x,sendtag, - recvbuf_X,recvCount_X,MPI_DOUBLE,rank_X,recvtag,Communicator,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendbuf_X,sendCount_X,MPI_DOUBLE,rank_X,sendtag, - recvbuf_x,recvCount_x,MPI_DOUBLE,rank_x,recvtag,Communicator,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendbuf_y,sendCount_y,MPI_DOUBLE,rank_y,sendtag, - recvbuf_Y,recvCount_Y,MPI_DOUBLE,rank_Y,recvtag,Communicator,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendbuf_Y,sendCount_Y,MPI_DOUBLE,rank_Y,sendtag, - recvbuf_y,recvCount_y,MPI_DOUBLE,rank_y,recvtag,Communicator,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendbuf_z,sendCount_z,MPI_DOUBLE,rank_z,sendtag, - recvbuf_Z,recvCount_Z,MPI_DOUBLE,rank_Z,recvtag,Communicator,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendbuf_Z,sendCount_Z,MPI_DOUBLE,rank_Z,sendtag, - recvbuf_z,recvCount_z,MPI_DOUBLE,rank_z,recvtag,Communicator,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendbuf_xy,sendCount_xy,MPI_DOUBLE,rank_xy,sendtag, - recvbuf_XY,recvCount_XY,MPI_DOUBLE,rank_XY,recvtag,Communicator,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendbuf_XY,sendCount_XY,MPI_DOUBLE,rank_XY,sendtag, - recvbuf_xy,recvCount_xy,MPI_DOUBLE,rank_xy,recvtag,Communicator,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendbuf_Xy,sendCount_Xy,MPI_DOUBLE,rank_Xy,sendtag, - recvbuf_xY,recvCount_xY,MPI_DOUBLE,rank_xY,recvtag,Communicator,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendbuf_xY,sendCount_xY,MPI_DOUBLE,rank_xY,sendtag, - recvbuf_Xy,recvCount_Xy,MPI_DOUBLE,rank_Xy,recvtag,Communicator,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendbuf_xz,sendCount_xz,MPI_DOUBLE,rank_xz,sendtag, - recvbuf_XZ,recvCount_XZ,MPI_DOUBLE,rank_XZ,recvtag,Communicator,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendbuf_XZ,sendCount_XZ,MPI_DOUBLE,rank_XZ,sendtag, - recvbuf_xz,recvCount_xz,MPI_DOUBLE,rank_xz,recvtag,Communicator,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendbuf_Xz,sendCount_Xz,MPI_DOUBLE,rank_Xz,sendtag, - recvbuf_xZ,recvCount_xZ,MPI_DOUBLE,rank_xZ,recvtag,Communicator,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendbuf_xZ,sendCount_xZ,MPI_DOUBLE,rank_xZ,sendtag, - recvbuf_Xz,recvCount_Xz,MPI_DOUBLE,rank_Xz,recvtag,Communicator,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendbuf_yz,sendCount_yz,MPI_DOUBLE,rank_yz,sendtag, - recvbuf_YZ,recvCount_YZ,MPI_DOUBLE,rank_YZ,recvtag,Communicator,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendbuf_YZ,sendCount_YZ,MPI_DOUBLE,rank_YZ,sendtag, - recvbuf_yz,recvCount_yz,MPI_DOUBLE,rank_yz,recvtag,Communicator,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendbuf_Yz,sendCount_Yz,MPI_DOUBLE,rank_Yz,sendtag, - recvbuf_yZ,recvCount_yZ,MPI_DOUBLE,rank_yZ,recvtag,Communicator,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendbuf_yZ,sendCount_yZ,MPI_DOUBLE,rank_yZ,sendtag, - recvbuf_Yz,recvCount_Yz,MPI_DOUBLE,rank_Yz,recvtag,Communicator,MPI_STATUS_IGNORE); + comm.sendrecv(sendbuf_x,sendCount_x,rank_x,sendtag,recvbuf_X,recvCount_X,rank_X,recvtag); + comm.sendrecv(sendbuf_X,sendCount_X,rank_X,sendtag,recvbuf_x,recvCount_x,rank_x,recvtag); + comm.sendrecv(sendbuf_y,sendCount_y,rank_y,sendtag,recvbuf_Y,recvCount_Y,rank_Y,recvtag); + comm.sendrecv(sendbuf_Y,sendCount_Y,rank_Y,sendtag,recvbuf_y,recvCount_y,rank_y,recvtag); + comm.sendrecv(sendbuf_z,sendCount_z,rank_z,sendtag,recvbuf_Z,recvCount_Z,rank_Z,recvtag); + comm.sendrecv(sendbuf_Z,sendCount_Z,rank_Z,sendtag,recvbuf_z,recvCount_z,rank_z,recvtag); + comm.sendrecv(sendbuf_xy,sendCount_xy,rank_xy,sendtag,recvbuf_XY,recvCount_XY,rank_XY,recvtag); + comm.sendrecv(sendbuf_XY,sendCount_XY,rank_XY,sendtag,recvbuf_xy,recvCount_xy,rank_xy,recvtag); + comm.sendrecv(sendbuf_Xy,sendCount_Xy,rank_Xy,sendtag,recvbuf_xY,recvCount_xY,rank_xY,recvtag); + comm.sendrecv(sendbuf_xY,sendCount_xY,rank_xY,sendtag,recvbuf_Xy,recvCount_Xy,rank_Xy,recvtag); + comm.sendrecv(sendbuf_xz,sendCount_xz,rank_xz,sendtag,recvbuf_XZ,recvCount_XZ,rank_XZ,recvtag); + comm.sendrecv(sendbuf_XZ,sendCount_XZ,rank_XZ,sendtag,recvbuf_xz,recvCount_xz,rank_xz,recvtag); + comm.sendrecv(sendbuf_Xz,sendCount_Xz,rank_Xz,sendtag,recvbuf_xZ,recvCount_xZ,rank_xZ,recvtag); + comm.sendrecv(sendbuf_xZ,sendCount_xZ,rank_xZ,sendtag,recvbuf_Xz,recvCount_Xz,rank_Xz,recvtag); + comm.sendrecv(sendbuf_yz,sendCount_yz,rank_yz,sendtag,recvbuf_YZ,recvCount_YZ,rank_YZ,recvtag); + comm.sendrecv(sendbuf_YZ,sendCount_YZ,rank_YZ,sendtag,recvbuf_yz,recvCount_yz,rank_yz,recvtag); + comm.sendrecv(sendbuf_Yz,sendCount_Yz,rank_Yz,sendtag,recvbuf_yZ,recvCount_yZ,rank_yZ,recvtag); + comm.sendrecv(sendbuf_yZ,sendCount_yZ,rank_yZ,sendtag,recvbuf_Yz,recvCount_Yz,rank_Yz,recvtag); //........................................................................................ UnpackMeshData(recvList_x, recvCount_x ,recvbuf_x, MeshData); UnpackMeshData(recvList_X, recvCount_X ,recvbuf_X, MeshData); diff --git a/common/Communication.hpp b/common/Communication.hpp index 33fed3a7..ca310ea5 100644 --- a/common/Communication.hpp +++ b/common/Communication.hpp @@ -2,9 +2,8 @@ #define COMMUNICATION_HPP_INC #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Utilities.h" -//#include "ProfilerApp.h" /******************************************************** @@ -12,17 +11,19 @@ ********************************************************/ template Array redistribute( const RankInfoStruct& src_rank, const Array& src_data, - const RankInfoStruct& dst_rank, std::array dst_size, MPI_Comm comm ) + const RankInfoStruct& dst_rank, std::array dst_size, const Utilities::MPI& comm ) { -#ifdef USE_MPI + if ( comm.getSize() == 1 ) { + return src_data.subset( { 0, (size_t) dst_size[0]-1, 0, (size_t) dst_size[1]-1, 0, (size_t) dst_size[2]-1 } ); + } // Get the src size std::array src_size; int size0[3] = { (int) src_data.size(0), (int) src_data.size(1), (int) src_data.size(2) }; - MPI_Allreduce( size0, src_size.data(), 3, MPI_INT, MPI_MAX, comm ); + comm.maxReduce( size0, src_size.data(), 3 ); if ( !src_data.empty() ) ASSERT( src_size[0] == size0[0] && src_size[1] == size0[1] && src_size[2] == size0[2] ); // Check that dst_size matches on all ranks - MPI_Allreduce( dst_size.data(), size0, 3, MPI_INT, MPI_MAX, comm ); + comm.maxReduce( dst_size.data(), size0, 3 ); ASSERT( dst_size[0] == size0[0] && dst_size[1] == size0[1] && dst_size[2] == size0[2] ); // Function to get overlap range auto calcOverlap = []( int i1[3], int i2[3], int j1[3], int j2[3] ) { @@ -60,7 +61,7 @@ Array redistribute( const RankInfoStruct& src_rank, const Array& src } std::vector send_request( send_rank.size() ); for (size_t i=0; i dst_data( dst_size[0], dst_size[1], dst_size[2] ); int i1[3] = { dst_size[0] * dst_rank.ix, dst_size[1] * dst_rank.jy, dst_size[2] * dst_rank.kz }; @@ -75,17 +76,14 @@ Array redistribute( const RankInfoStruct& src_rank, const Array& src continue; int rank = src_rank.getRankForBlock(i,j,k); Array data( index[1] - index[0] + 1, index[3] - index[2] + 1, index[5] - index[4] + 1 ); - MPI_Recv( data.data(), sizeof(TYPE)*data.length(), MPI_BYTE, rank, 5462, comm, MPI_STATUS_IGNORE ); + comm.recv( data.data(), data.length(), rank, 5462 ); dst_data.copySubset( index, data ); } } } // Free data - MPI_Waitall( send_request.size(), send_request.data(), MPI_STATUSES_IGNORE ); + comm.waitAll( send_request.size(), send_request.data() ); return dst_data; -#else - return src_data.subset( { 0, dst_size[0]-1, 0, dst_size[1]-1, 0, dst_size[2]-1 ); -#endif } @@ -94,27 +92,11 @@ Array redistribute( const RankInfoStruct& src_rank, const Array& src * Structure to fill halo cells * ********************************************************/ template -fillHalo::fillHalo( MPI_Comm comm_, const RankInfoStruct& info_, +fillHalo::fillHalo( const Utilities::MPI& comm_, const RankInfoStruct& info_, std::array n_, std::array ng_, int tag0, int depth_, std::array fill, std::array periodic ): comm(comm_), info(info_), n(n_), ng(ng_), depth(depth_) { - if ( std::is_same() ) { - N_type = 1; - datatype = MPI_DOUBLE; - } else if ( std::is_same() ) { - N_type = 1; - datatype = MPI_FLOAT; - } else if ( sizeof(TYPE)%sizeof(double)==0 ) { - N_type = sizeof(TYPE) / sizeof(double); - datatype = MPI_DOUBLE; - } else if ( sizeof(TYPE)%sizeof(float)==0 ) { - N_type = sizeof(TYPE) / sizeof(float); - datatype = MPI_FLOAT; - } else { - N_type = sizeof(TYPE); - datatype = MPI_BYTE; - } // Set the fill pattern memset(fill_pattern,0,sizeof(fill_pattern)); if ( fill[0] ) { @@ -251,8 +233,8 @@ void fillHalo::fill( Array& data ) for (int k=0; k<3; k++) { if ( !fill_pattern[i][j][k] ) continue; - MPI_Irecv( recv[i][j][k], N_type*depth2*N_send_recv[i][j][k], datatype, - info.rank[i][j][k], tag[2-i][2-j][2-k], comm, &recv_req[i][j][k] ); + recv_req[i][j][k] = comm.Irecv( recv[i][j][k], depth2*N_send_recv[i][j][k], + info.rank[i][j][k], tag[2-i][2-j][2-k] ); } } } @@ -263,19 +245,18 @@ void fillHalo::fill( Array& data ) if ( !fill_pattern[i][j][k] ) continue; pack( data, i-1, j-1, k-1, send[i][j][k] ); - MPI_Isend( send[i][j][k], N_type*depth2*N_send_recv[i][j][k], datatype, - info.rank[i][j][k], tag[i][j][k], comm, &send_req[i][j][k] ); + send_req[i][j][k] = comm.Isend( send[i][j][k], depth2*N_send_recv[i][j][k], + info.rank[i][j][k], tag[i][j][k] ); } } } // Recv the dst data and unpack (we recive in reverse order to match the sends) - MPI_Status status; for (int i=2; i>=0; i--) { for (int j=2; j>=0; j--) { for (int k=2; k>=0; k--) { if ( !fill_pattern[i][j][k] ) continue; - MPI_Wait(&recv_req[i][j][k],&status); + comm.wait( recv_req[i][j][k] ); unpack( data, i-1, j-1, k-1, recv[i][j][k] ); } } @@ -286,7 +267,7 @@ void fillHalo::fill( Array& data ) for (int k=0; k<3; k++) { if ( !fill_pattern[i][j][k] ) continue; - MPI_Wait(&send_req[i][j][k],&status); + comm.wait( send_req[i][j][k] ); } } } diff --git a/common/ReadMicroCT.cpp b/common/ReadMicroCT.cpp index 79ef241e..2209e712 100644 --- a/common/ReadMicroCT.cpp +++ b/common/ReadMicroCT.cpp @@ -64,11 +64,11 @@ Array readMicroCT( const std::string& filename ) // Read the compressed micro CT data and distribute -Array readMicroCT( const Database& domain, MPI_Comm comm ) +Array readMicroCT( const Database& domain, const Utilities::MPI& comm ) { // Get the local problem info auto n = domain.getVector( "n" ); - int rank = comm_rank(MPI_COMM_WORLD); + int rank = comm.getRank(); auto nproc = domain.getVector( "nproc" ); RankInfoStruct rankInfo( rank, nproc[0], nproc[1], nproc[2] ); diff --git a/common/ReadMicroCT.h b/common/ReadMicroCT.h index f232740e..c8acc379 100644 --- a/common/ReadMicroCT.h +++ b/common/ReadMicroCT.h @@ -5,11 +5,12 @@ #include "common/Array.h" #include "common/Communication.h" #include "common/Database.h" +#include "common/MPI.h" Array readMicroCT( const std::string& filename ); -Array readMicroCT( const Database& domain, MPI_Comm comm ); +Array readMicroCT( const Database& domain, const Utilities::MPI& comm ); #endif From 2f5fc9ead101d2320d4ba8edf56deb9789bdef9f Mon Sep 17 00:00:00 2001 From: James McClure Date: Mon, 4 Jan 2021 22:16:58 -0500 Subject: [PATCH 111/205] merging with FOM --- analysis/Minkowski.cpp | 21 ++-- analysis/Minkowski.h | 2 +- analysis/SubPhase.cpp | 2 +- analysis/TwoPhase.cpp | 96 ++++++++------- analysis/TwoPhase.h | 2 +- analysis/analysis.cpp | 83 ++++++------- analysis/analysis.h | 8 +- analysis/distance.cpp | 144 +--------------------- analysis/distance.h | 22 ---- common/MPI_Helpers.cpp | 266 ----------------------------------------- common/MPI_Helpers.h | 239 ------------------------------------ common/MPI_Helpers.hpp | 154 ------------------------ 12 files changed, 104 insertions(+), 935 deletions(-) delete mode 100644 common/MPI_Helpers.cpp delete mode 100644 common/MPI_Helpers.h delete mode 100644 common/MPI_Helpers.hpp diff --git a/analysis/Minkowski.cpp b/analysis/Minkowski.cpp index 668875b9..9dfff477 100644 --- a/analysis/Minkowski.cpp +++ b/analysis/Minkowski.cpp @@ -4,7 +4,7 @@ #include "common/Domain.h" #include "common/Communication.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" @@ -123,13 +123,13 @@ void Minkowski::ComputeScalar(const DoubleArray& Field, const double isovalue) // convert X for 2D manifold to 3D object Xi *= 0.5; - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); // Phase averages - MPI_Allreduce(&Vi,&Vi_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Xi,&Xi_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Ai,&Ai_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Ji,&Ji_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Barrier(Dm->Comm); + Vi_global = Dm->Comm.sumReduce( Vi ); + Xi_global = Dm->Comm.sumReduce( Xi ); + Ai_global = Dm->Comm.sumReduce( Ai ); + Ji_global = Dm->Comm.sumReduce( Ji ); + Dm->Comm.barrier(); PROFILE_STOP("ComputeScalar"); } @@ -220,7 +220,7 @@ int Minkowski::MeasureConnectedPathway(){ double vF=0.0; n_connected_components = ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,Dm->rank_info,distance,distance,vF,vF,label,Dm->Comm); // int n_connected_components = ComputeGlobalPhaseComponent(Nx-2,Ny-2,Nz-2,Dm->rank_info,const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, Dm->Comm ) - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); for (int k=0; krank_info,distance,distance,vF,vF,label,Dm->Comm); // int n_connected_components = ComputeGlobalPhaseComponent(Nx-2,Ny-2,Nz-2,Dm->rank_info,const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, Dm->Comm ) - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); + for (int k=0; kComm); + Dm->Comm.barrier(); Dm->AggregateLabels( filename ); diff --git a/analysis/TwoPhase.cpp b/analysis/TwoPhase.cpp index 9b2e5fd8..d878a663 100644 --- a/analysis/TwoPhase.cpp +++ b/analysis/TwoPhase.cpp @@ -5,7 +5,7 @@ #include "common/Domain.h" #include "common/Communication.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" @@ -882,7 +882,7 @@ void TwoPhase::ComponentAverages() } } - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); if (Dm->rank()==0){ printf("Component averages computed locally -- reducing result... \n"); } @@ -890,14 +890,14 @@ void TwoPhase::ComponentAverages() RecvBuffer.resize(BLOB_AVG_COUNT,NumberComponents_NWP); /* for (int b=0; bComm); - MPI_Allreduce(&ComponentAverages_NWP(0,b),&RecvBuffer(0),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,Dm->Comm); + Dm->Comm.barrier(); + Dm->Comm.sumReduce(&ComponentAverages_NWP(0,b),&RecvBuffer(0),BLOB_AVG_COUNT); for (int idx=0; idxComm); - MPI_Allreduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT*NumberComponents_NWP, MPI_DOUBLE,MPI_SUM,Dm->Comm); - // MPI_Reduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,0,Dm->Comm); + Dm->Comm.barrier(); + Dm->Comm.sumReduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT*NumberComponents_NWP); + // Dm->Comm.sumReduce(ComponentAverages_NWP.data(),RecvBuffer.data(),BLOB_AVG_COUNT); if (Dm->rank()==0){ printf("rescaling... \n"); @@ -993,9 +993,8 @@ void TwoPhase::ComponentAverages() // reduce the wetting phase averages for (int b=0; bComm); -// MPI_Allreduce(&ComponentAverages_WP(0,b),RecvBuffer.data(),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Reduce(&ComponentAverages_WP(0,b),RecvBuffer.data(),BLOB_AVG_COUNT,MPI_DOUBLE,MPI_SUM,0,Dm->Comm); + Dm->Comm.barrier(); + Dm->Comm.sumReduce(&ComponentAverages_WP(0,b),RecvBuffer.data(),BLOB_AVG_COUNT); for (int idx=0; idxComm); - MPI_Allreduce(&nwp_volume,&nwp_volume_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&wp_volume,&wp_volume_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&awn,&awn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&ans,&ans_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&aws,&aws_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&lwns,&lwns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&As,&As_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Jwn,&Jwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Kwn,&Kwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&KGwns,&KGwns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&KNwns,&KNwns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&efawns,&efawns_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&wwndnw,&wwndnw_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&wwnsdnwn,&wwnsdnwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Jwnwwndnw,&Jwnwwndnw_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + Dm->Comm.barrier(); + nwp_volume_global = Dm->Comm.sumReduce( nwp_volume ); + wp_volume_global = Dm->Comm.sumReduce( wp_volume ); + awn_global = Dm->Comm.sumReduce( awn ); + ans_global = Dm->Comm.sumReduce( ans ); + aws_global = Dm->Comm.sumReduce( aws ); + lwns_global = Dm->Comm.sumReduce( lwns ); + As_global = Dm->Comm.sumReduce( As ); + Jwn_global = Dm->Comm.sumReduce( Jwn ); + Kwn_global = Dm->Comm.sumReduce( Kwn ); + KGwns_global = Dm->Comm.sumReduce( KGwns ); + KNwns_global = Dm->Comm.sumReduce( KNwns ); + efawns_global = Dm->Comm.sumReduce( efawns ); + wwndnw_global = Dm->Comm.sumReduce( wwndnw ); + wwnsdnwn_global = Dm->Comm.sumReduce( wwnsdnwn ); + Jwnwwndnw_global = Dm->Comm.sumReduce( Jwnwwndnw ); // Phase averages - MPI_Allreduce(&vol_w,&vol_w_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&vol_n,&vol_n_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&paw,&paw_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&pan,&pan_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&vaw(0),&vaw_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&van(0),&van_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&vawn(0),&vawn_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&vawns(0),&vawns_global(0),3,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Gwn(0),&Gwn_global(0),6,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Gns(0),&Gns_global(0),6,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Gws(0),&Gws_global(0),6,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&trawn,&trawn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&trJwn,&trJwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&trRwn,&trRwn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&euler,&euler_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&An,&An_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Jn,&Jn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&Kn,&Kn_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - - MPI_Barrier(Dm->Comm); + vol_w_global = Dm->Comm.sumReduce( vol_w ); + vol_n_global = Dm->Comm.sumReduce( vol_n ); + paw_global = Dm->Comm.sumReduce( paw ); + pan_global = Dm->Comm.sumReduce( pan ); + for (int idx=0; idx<3; idx++) + vaw_global(idx) = Dm->Comm.sumReduce( vaw(idx) ); + for (int idx=0; idx<3; idx++) + van_global(idx) = Dm->Comm.sumReduce( van(idx)); + for (int idx=0; idx<3; idx++) + vawn_global(idx) = Dm->Comm.sumReduce( vawn(idx) ); + for (int idx=0; idx<3; idx++) + vawns_global(idx) = Dm->Comm.sumReduce( vawns(idx) ); + for (int idx=0; idx<6; idx++){ + Gwn_global(idx) = Dm->Comm.sumReduce( Gwn(idx) ); + Gns_global(idx) = Dm->Comm.sumReduce( Gns(idx) ); + Gws_global(idx) = Dm->Comm.sumReduce( Gws(idx) ); + } + trawn_global = Dm->Comm.sumReduce( trawn ); + trJwn_global = Dm->Comm.sumReduce( trJwn ); + trRwn_global = Dm->Comm.sumReduce( trRwn ); + euler_global = Dm->Comm.sumReduce( euler ); + An_global = Dm->Comm.sumReduce( An ); + Jn_global = Dm->Comm.sumReduce( Jn ); + Kn_global = Dm->Comm.sumReduce( Kn ); + Dm->Comm.barrier(); // Normalize the phase averages // (density of both components = 1.0) diff --git a/analysis/TwoPhase.h b/analysis/TwoPhase.h index fddd04e8..4d500a89 100644 --- a/analysis/TwoPhase.h +++ b/analysis/TwoPhase.h @@ -12,7 +12,7 @@ #include "common/Domain.h" #include "common/Communication.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" diff --git a/analysis/analysis.cpp b/analysis/analysis.cpp index 7587f3c5..4298750e 100644 --- a/analysis/analysis.cpp +++ b/analysis/analysis.cpp @@ -188,7 +188,7 @@ int ComputeLocalPhaseComponent(const IntArray &PhaseID, int &VALUE, BlobIDArray /****************************************************************** * Reorder the global blob ids * ******************************************************************/ -static int ReorderBlobIDs2( BlobIDArray& ID, int N_blobs, int ngx, int ngy, int ngz, MPI_Comm comm ) +static int ReorderBlobIDs2( BlobIDArray& ID, int N_blobs, int ngx, int ngy, int ngz, const Utilities::MPI& comm ) { if ( N_blobs==0 ) return 0; @@ -212,7 +212,7 @@ static int ReorderBlobIDs2( BlobIDArray& ID, int N_blobs, int ngx, int ngy, int } } ASSERT(max_id > map1(N_blobs); int N_blobs2 = 0; for (int i=0; i& N_recv, int64_t *send_buf, std::vector& recv_buf, std::map& remote_map, - MPI_Comm comm ) + const Utilities::MPI& comm ) { std::vector send_req(neighbors.size()); std::vector recv_req(neighbors.size()); - std::vector status(neighbors.size()); - std::map::const_iterator it = map.begin(); + auto it = map.begin(); ASSERT(N_send==(int)map.size()); for (size_t i=0; ifirst; send_buf[2*i+1] = it->second.new_id; } for (size_t i=0; ifirst] = it->second.new_id; } for (size_t i=0; i& remote_map, @@ -304,18 +303,18 @@ static bool updateLocalIds( const std::map& remote_map, return changed; } static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_info, - int nblobs, BlobIDArray& IDs, MPI_Comm comm ) + int nblobs, BlobIDArray& IDs, const Utilities::MPI& comm ) { PROFILE_START("LocalToGlobalIDs",1); const int rank = rank_info.rank[1][1][1]; - int nprocs = comm_size(comm); + int nprocs = comm.getSize(); const int ngx = (IDs.size(0)-nx)/2; const int ngy = (IDs.size(1)-ny)/2; const int ngz = (IDs.size(2)-nz)/2; // Get the number of blobs for each rank std::vector N_blobs(nprocs,0); PROFILE_START("LocalToGlobalIDs-Allgather",1); - MPI_Allgather(&nblobs,1,MPI_INT,getPtr(N_blobs),1,MPI_INT,comm); + comm.allGather(nblobs,getPtr(N_blobs)); PROFILE_STOP("LocalToGlobalIDs-Allgather",1); int64_t N_blobs_tot = 0; int offset = 0; @@ -363,13 +362,12 @@ static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_ std::vector N_recv(neighbors.size(),0); std::vector send_req(neighbors.size()); std::vector recv_req(neighbors.size()); - std::vector status(neighbors.size()); for (size_t i=0; i recv_buf(neighbors.size()); @@ -398,8 +396,7 @@ static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_ bool changed = updateLocalIds( remote_map, map ); // Check if we are finished int test = changed ? 1:0; - int result = 0; - MPI_Allreduce(&test,&result,1,MPI_INT,MPI_SUM,comm); + int result = comm.sumReduce( test ); if ( result==0 ) break; } @@ -435,7 +432,7 @@ static int LocalToGlobalIDs( int nx, int ny, int nz, const RankInfoStruct& rank_ } int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_info, const DoubleArray& Phase, const DoubleArray& SignDist, double vF, double vS, - BlobIDArray& GlobalBlobID, MPI_Comm comm ) + BlobIDArray& GlobalBlobID, const Utilities::MPI& comm ) { PROFILE_START("ComputeGlobalBlobIDs"); // First compute the local ids @@ -446,7 +443,7 @@ int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_inf return nglobal; } int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& rank_info, - const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, MPI_Comm comm ) + const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, const Utilities::MPI& comm ) { PROFILE_START("ComputeGlobalPhaseComponent"); // First compute the local ids @@ -462,37 +459,27 @@ int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& r * Compute the mapping of blob ids between timesteps * ******************************************************************/ typedef std::map > map_type; -template inline MPI_Datatype getMPIType(); -template<> inline MPI_Datatype getMPIType() { return MPI_INT; } -template<> inline MPI_Datatype getMPIType() { - if ( sizeof(int64_t)==sizeof(long int) ) - return MPI_LONG; - else if ( sizeof(int64_t)==sizeof(double) ) - return MPI_DOUBLE; -} template -void gatherSet( std::set& set, MPI_Comm comm ) +void gatherSet( std::set& set, const Utilities::MPI& comm ) { - int nprocs = comm_size(comm); - MPI_Datatype type = getMPIType(); + int nprocs = comm.getSize(); std::vector send_data(set.begin(),set.end()); int send_count = send_data.size(); std::vector recv_count(nprocs,0), recv_disp(nprocs,0); - MPI_Allgather(&send_count,1,MPI_INT,getPtr(recv_count),1,MPI_INT,comm); + comm.allGather( send_count, getPtr(recv_count) ); for (int i=1; i recv_data(recv_disp[nprocs-1]+recv_count[nprocs-1]); - MPI_Allgatherv(getPtr(send_data),send_count,type, - getPtr(recv_data),getPtr(recv_count),getPtr(recv_disp),type,comm); + comm.allGather( getPtr(send_data), send_count, getPtr(recv_data), + getPtr(recv_count), getPtr(recv_disp), true ); for (size_t i=0; i(); + int nprocs = comm.getSize(); std::vector send_data; - for (map_type::const_iterator it=src_map.begin(); it!=src_map.end(); ++it) { + for (auto it=src_map.begin(); it!=src_map.end(); ++it) { int id = it->first; const std::map& src_ids = it->second; send_data.push_back(id); @@ -505,21 +492,21 @@ void gatherSrcIDMap( map_type& src_map, MPI_Comm comm ) } int send_count = send_data.size(); std::vector recv_count(nprocs,0), recv_disp(nprocs,0); - MPI_Allgather(&send_count,1,MPI_INT,getPtr(recv_count),1,MPI_INT,comm); + comm.allGather(send_count,getPtr(recv_count)); for (int i=1; i recv_data(recv_disp[nprocs-1]+recv_count[nprocs-1]); - MPI_Allgatherv(getPtr(send_data),send_count,type, - getPtr(recv_data),getPtr(recv_count),getPtr(recv_disp),type,comm); + comm.allGather(getPtr(send_data),send_count, + getPtr(recv_data),getPtr(recv_count),getPtr(recv_disp),true); size_t i=0; src_map.clear(); while ( i < recv_data.size() ) { BlobIDType id = recv_data[i]; size_t count = recv_data[i+1]; i += 2; - std::map& src_ids = src_map[id]; + auto& src_ids = src_map[id]; for (size_t j=0; j::iterator it = src_ids.find(recv_data[i]); + auto it = src_ids.find(recv_data[i]); if ( it == src_ids.end() ) src_ids.insert(std::pair(recv_data[i],recv_data[i+1])); else @@ -538,7 +525,7 @@ void addSrcDstIDs( BlobIDType src_id, map_type& src_map, map_type& dst_map, } } ID_map_struct computeIDMap( int nx, int ny, int nz, - const BlobIDArray& ID1, const BlobIDArray& ID2, MPI_Comm comm ) + const BlobIDArray& ID1, const BlobIDArray& ID2, const Utilities::MPI& comm ) { ASSERT(ID1.size()==ID2.size()); PROFILE_START("computeIDMap"); @@ -780,7 +767,7 @@ void renumberIDs( const std::vector& new_ids, BlobIDArray& IDs ) ******************************************************************/ void writeIDMap( const ID_map_struct& map, long long int timestep, const std::string& filename ) { - int rank = MPI_WORLD_RANK(); + int rank = Utilities::MPI( MPI_COMM_WORLD ).getRank(); if ( rank!=0 ) return; bool empty = map.created.empty() && map.destroyed.empty() && diff --git a/analysis/analysis.h b/analysis/analysis.h index 2ce531b1..ec377995 100644 --- a/analysis/analysis.h +++ b/analysis/analysis.h @@ -58,7 +58,7 @@ int ComputeLocalPhaseComponent( const IntArray &PhaseID, int &VALUE, IntArray &C */ int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_info, const DoubleArray& Phase, const DoubleArray& SignDist, double vF, double vS, - BlobIDArray& GlobalBlobID, MPI_Comm comm ); + BlobIDArray& GlobalBlobID, const Utilities::MPI& comm ); /*! @@ -75,7 +75,7 @@ int ComputeGlobalBlobIDs( int nx, int ny, int nz, const RankInfoStruct& rank_inf * @return Return the number of components in the specified phase */ int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& rank_info, - const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, MPI_Comm comm ); + const IntArray &PhaseID, int &VALUE, BlobIDArray &GlobalBlobID, const Utilities::MPI& comm ); /*! @@ -87,7 +87,7 @@ int ComputeGlobalPhaseComponent( int nx, int ny, int nz, const RankInfoStruct& r * @param[in] nz Number of elements in the z-direction * @param[in/out] ID The ids of the blobs */ -void ReorderBlobIDs( BlobIDArray& ID, MPI_Comm comm ); +void ReorderBlobIDs( BlobIDArray& ID, const Utilities::MPI& comm ); typedef std::pair > BlobIDSplitStruct; @@ -120,7 +120,7 @@ struct ID_map_struct { * @param[in] ID1 The blob ids at the first timestep * @param[in] ID2 The blob ids at the second timestep */ -ID_map_struct computeIDMap( int nx, int ny, int nz, const BlobIDArray& ID1, const BlobIDArray& ID2, MPI_Comm comm ); +ID_map_struct computeIDMap( int nx, int ny, int nz, const BlobIDArray& ID1, const BlobIDArray& ID2, const Utilities::MPI& comm ); /*! diff --git a/analysis/distance.cpp b/analysis/distance.cpp index fd48f7c7..9c605e1e 100644 --- a/analysis/distance.cpp +++ b/analysis/distance.cpp @@ -176,154 +176,12 @@ void CalcVecDist( Array &d, const Array &ID0, const Domain &Dm, // Update distance double err = calcVecUpdateInterior( d, dx[0], dx[1], dx[2] ); // Check if we are finished - err = maxReduce( Dm.Comm, err ); + err = Dm.Comm.maxReduce( err ); if ( err < tol ) break; } } -double Eikonal(DoubleArray &Distance, const Array &ID, Domain &Dm, int timesteps, const std::array& periodic){ - - /* - * This routine converts the data in the Distance array to a signed distance - * by solving the equation df/dt = sign(1-|grad f|), where Distance provides - * the values of f on the mesh associated with domain Dm - * It has been tested with segmented data initialized to values [-1,1] - * and will converge toward the signed distance to the surface bounding the associated phases - * - * Reference: - * Min C (2010) On reinitializing level set functions, Journal of Computational Physics229 - */ - - int i,j,k; - double dt=0.1; - double Dx,Dy,Dz; - double Dxp,Dxm,Dyp,Dym,Dzp,Dzm; - double Dxxp,Dxxm,Dyyp,Dyym,Dzzp,Dzzm; - double sign,norm; - double LocalVar,GlobalVar,LocalMax,GlobalMax; - - int xdim,ydim,zdim; - xdim=Dm.Nx-2; - ydim=Dm.Ny-2; - zdim=Dm.Nz-2; - //fillHalo fillData(Dm.Comm, Dm.rank_info,xdim,ydim,zdim,1,1,1,0,1); - fillHalo fillData( Dm.Comm, Dm.rank_info, {xdim, ydim, zdim}, {1,1,1}, 50, 1, {true,true,true}, periodic ); - - // Arrays to store the second derivatives - DoubleArray Dxx(Dm.Nx,Dm.Ny,Dm.Nz); - DoubleArray Dyy(Dm.Nx,Dm.Ny,Dm.Nz); - DoubleArray Dzz(Dm.Nx,Dm.Ny,Dm.Nz); - - int count = 0; - while (count < timesteps){ - - // Communicate the halo of values - fillData.fill(Distance); - - // Compute second order derivatives - for (k=1;k 0.f) Dx = Dxp*Dxp; - else Dx = Dxm*Dxm; - - if (Dyp + Dym > 0.f) Dy = Dyp*Dyp; - else Dy = Dym*Dym; - - if (Dzp + Dzm > 0.f) Dz = Dzp*Dzp; - else Dz = Dzm*Dzm; - } - else{ - - if (Dxp + Dxm < 0.f) Dx = Dxp*Dxp; - else Dx = Dxm*Dxm; - - if (Dyp + Dym < 0.f) Dy = Dyp*Dyp; - else Dy = Dym*Dym; - - if (Dzp + Dzm < 0.f) Dz = Dzp*Dzp; - else Dz = Dzm*Dzm; - } - - //Dx = max(Dxp*Dxp,Dxm*Dxm); - //Dy = max(Dyp*Dyp,Dym*Dym); - //Dz = max(Dzp*Dzp,Dzm*Dzm); - - norm=sqrt(Dx + Dy + Dz); - if (norm > 1.0) norm=1.0; - - Distance(i,j,k) += dt*sign*(1.0 - norm); - LocalVar += dt*sign*(1.0 - norm); - - if (fabs(dt*sign*(1.0 - norm)) > LocalMax) - LocalMax = fabs(dt*sign*(1.0 - norm)); - } - } - } - - MPI_Allreduce(&LocalVar,&GlobalVar,1,MPI_DOUBLE,MPI_SUM,Dm.Comm); - MPI_Allreduce(&LocalMax,&GlobalMax,1,MPI_DOUBLE,MPI_MAX,Dm.Comm); - GlobalVar /= Dm.Volume; - count++; - - if (count%50 == 0 && Dm.rank()==0 ) - printf("Time=%i, Max variation=%f, Global variation=%f \n",count,GlobalMax,GlobalVar); - - if (fabs(GlobalMax) < 1e-5){ - if (Dm.rank()==0) printf("Exiting with max tolerance of 1e-5 \n"); - count=timesteps; - } - } - return GlobalVar; -} // Explicit instantiations template void CalcDist( Array&, const Array&, const Domain&, const std::array&, const std::array& ); diff --git a/analysis/distance.h b/analysis/distance.h index d6c2740c..b3fc870e 100644 --- a/analysis/distance.h +++ b/analysis/distance.h @@ -16,16 +16,6 @@ struct Vec { }; inline bool operator<(const Vec& l, const Vec& r){ return l.x*l.x+l.y*l.y+l.z*l.z < r.x*r.x+r.y*r.y+r.z*r.z; } -inline double minmod(double &a, double &b){ - - double value; - - value = a; - if ( a*b < 0.0) value=0.0; - else if (fabs(a) > fabs(b)) value = b; - - return value; -} /*! * @brief Calculate the distance using a simple method @@ -50,16 +40,4 @@ void CalcDist( Array &Distance, const Array &ID, const Domain &Dm, void CalcVecDist( Array &Distance, const Array &ID, const Domain &Dm, const std::array& periodic = {true,true,true}, const std::array& dx = {1,1,1} ); - -/*! - * @brief Calculate the distance based on solution of Eikonal equation - * @details This routine calculates the signed distance to the nearest domain surface. - * @param[out] Distance Distance function - * @param[in] ID Domain id - * @param[in] Dm Domain information - * @param[in] timesteps number of timesteps to run for Eikonal solver - * @param[in] periodic Directions that are periodic - */ -double Eikonal(DoubleArray &Distance, const Array &ID, Domain &Dm, int timesteps, const std::array& periodic); - #endif diff --git a/common/MPI_Helpers.cpp b/common/MPI_Helpers.cpp deleted file mode 100644 index 736a2f02..00000000 --- a/common/MPI_Helpers.cpp +++ /dev/null @@ -1,266 +0,0 @@ -#include "common/MPI_Helpers.h" -#include "common/Utilities.h" - - -/******************************************************** -* Return the MPI data type * -********************************************************/ -template<> MPI_Datatype getMPItype() { - return MPI_CHAR; -} -template<> MPI_Datatype getMPItype() { - return MPI_UNSIGNED_CHAR; -} -template<> MPI_Datatype getMPItype() { - return MPI_INT; -} -template<> MPI_Datatype getMPItype() { - return MPI_LONG; -} -template<> MPI_Datatype getMPItype() { - return MPI_UNSIGNED_LONG; -} -template<> MPI_Datatype getMPItype() { - return MPI_LONG_LONG; -} -template<> MPI_Datatype getMPItype() { - return MPI_FLOAT; -} -template<> MPI_Datatype getMPItype() { - return MPI_DOUBLE; -} - - -/******************************************************** -* Concrete implimentations for packing/unpacking * -********************************************************/ -// unsigned char -template<> -size_t packsize( const unsigned char& ) -{ - return sizeof(unsigned char); -} -template<> -void pack( const unsigned char& rhs, char *buffer ) -{ - memcpy(buffer,&rhs,sizeof(unsigned char)); -} -template<> -void unpack( unsigned char& data, const char *buffer ) -{ - memcpy(&data,buffer,sizeof(unsigned char)); -} -// char -template<> -size_t packsize( const char& ) -{ - return sizeof(char); -} -template<> -void pack( const char& rhs, char *buffer ) -{ - memcpy(buffer,&rhs,sizeof(char)); -} -template<> -void unpack( char& data, const char *buffer ) -{ - memcpy(&data,buffer,sizeof(char)); -} -// int -template<> -size_t packsize( const int& ) -{ - return sizeof(int); -} -template<> -void pack( const int& rhs, char *buffer ) -{ - memcpy(buffer,&rhs,sizeof(int)); -} -template<> -void unpack( int& data, const char *buffer ) -{ - memcpy(&data,buffer,sizeof(int)); -} -// unsigned int -template<> -size_t packsize( const unsigned int& ) -{ - return sizeof(unsigned int); -} -template<> -void pack( const unsigned int& rhs, char *buffer ) -{ - memcpy(buffer,&rhs,sizeof(int)); -} -template<> -void unpack( unsigned int& data, const char *buffer ) -{ - memcpy(&data,buffer,sizeof(int)); -} -// size_t -template<> -size_t packsize( const size_t& ) -{ - return sizeof(size_t); -} -template<> -void pack( const size_t& rhs, char *buffer ) -{ - memcpy(buffer,&rhs,sizeof(size_t)); -} -template<> -void unpack( size_t& data, const char *buffer ) -{ - memcpy(&data,buffer,sizeof(size_t)); -} -// std::string -template<> -size_t packsize( const std::string& rhs ) -{ - return rhs.size()+1; -} -template<> -void pack( const std::string& rhs, char *buffer ) -{ - memcpy(buffer,rhs.c_str(),rhs.size()+1); -} -template<> -void unpack( std::string& data, const char *buffer ) -{ - data = std::string(buffer); -} - - -/******************************************************** -* Fake MPI routines * -********************************************************/ -#ifndef USE_MPI -int MPI_Init(int*,char***) -{ - return 0; -} -int MPI_Init_thread(int*,char***, int required, int *provided ) -{ - *provided = required; - return 0; -} -int MPI_Finalize() -{ - return 0; -} -int MPI_Comm_size( MPI_Comm, int *size ) -{ - *size = 1; - return 0; -} -int MPI_Comm_rank( MPI_Comm, int *rank ) -{ - *rank = 0; - return 0; -} -int MPI_Barrier( MPI_Comm ) -{ - return 0; -} -int MPI_Waitall( int, MPI_Request[], MPI_Status[] ) -{ - return 0; -} -int MPI_Wait( MPI_Request*, MPI_Status* ) -{ - return 0; -} -int MPI_Bcast( void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm ) -{ - return 0; -} -int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, - MPI_Comm comm) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, - MPI_Comm comm, MPI_Status *status) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, - MPI_Comm comm, MPI_Request *request) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source, - int tag, MPI_Comm comm, MPI_Request *request) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - MPI_Comm comm) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, const int *recvcounts, const int *displs, - MPI_Datatype recvtype, MPI_Comm comm) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - int dest, int sendtag, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - int source, int recvtag, - MPI_Comm comm, MPI_Status *status) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, - MPI_Op op, int root, MPI_Comm comm) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Comm_group(MPI_Comm comm, MPI_Group *group) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm) -{ - ERROR("Not implimented yet"); - return 0; -} -int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm) -{ - *newcomm = comm; - return 0; -} -double MPI_Wtime( void ) -{ - return 0.0; -} -int MPI_Comm_free(MPI_Comm *group) -{ - return 0; -} -int MPI_Group_free(MPI_Group *group) -{ - return 0; -} -#endif - - diff --git a/common/MPI_Helpers.h b/common/MPI_Helpers.h deleted file mode 100644 index 1d20318e..00000000 --- a/common/MPI_Helpers.h +++ /dev/null @@ -1,239 +0,0 @@ -// This file contains wrappers for MPI routines and functions to pack/unpack data structures -#ifndef MPI_WRAPPERS_INC -#define MPI_WRAPPERS_INC - -#include -#include -#include -#include - -#ifdef USE_MPI - // Inlcude MPI - #include "mpi.h" -#else - // Create fake MPI types - typedef int MPI_Comm; - typedef int MPI_Request; - typedef int MPI_Status; - #define MPI_COMM_WORLD 0 - #define MPI_COMM_SELF 0 - #define MPI_COMM_NULL -1 - #define MPI_GROUP_NULL -2 - #define MPI_STATUS_IGNORE NULL - enum MPI_Datatype { MPI_LOGICAL, MPI_CHAR, MPI_UNSIGNED_CHAR, MPI_INT, - MPI_UNSIGNED, MPI_LONG, MPI_UNSIGNED_LONG, MPI_LONG_LONG, MPI_FLOAT, MPI_DOUBLE }; - enum MPI_Op { MPI_MIN, MPI_MAX, MPI_SUM }; - typedef int MPI_Group; - #define MPI_THREAD_SINGLE 0 - #define MPI_THREAD_FUNNELED 1 - #define MPI_THREAD_SERIALIZED 2 - #define MPI_THREAD_MULTIPLE 3 - // Fake MPI functions - int MPI_Init(int*,char***); - int MPI_Init_thread( int *argc, char ***argv, int required, int *provided ); - int MPI_Finalize(); - int MPI_Comm_size( MPI_Comm, int *size ); - int MPI_Comm_rank( MPI_Comm, int *rank ); - int MPI_Barrier(MPI_Comm); - int MPI_Wait(MPI_Request*,MPI_Status*); - int MPI_Waitall(int,MPI_Request[],MPI_Status[]); - int MPI_Bcast(void*,int,MPI_Datatype,int,MPI_Comm); - int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, - MPI_Comm comm); - int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, - MPI_Comm comm, MPI_Status *status); - int MPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, - MPI_Comm comm, MPI_Request *request); - int MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source, - int tag, MPI_Comm comm, MPI_Request *request); - int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); - int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - MPI_Comm comm); - int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, const int *recvcounts, const int *displs, - MPI_Datatype recvtype, MPI_Comm comm); - int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - int dest, int sendtag, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - int source, int recvtag, - MPI_Comm comm, MPI_Status *status); - int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, - MPI_Op op, int root, MPI_Comm comm); - double MPI_Wtime( void ); - int MPI_Comm_group(MPI_Comm comm, MPI_Group *group); - int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm); - int MPI_Comm_free(MPI_Comm *group); - int MPI_Group_free(MPI_Group *group); - int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm); -#endif - - -//! Get the size of the MPI_Comm -// Note: this is a thread and interrupt safe function -inline int comm_size( MPI_Comm comm ) { - int size = 1; - MPI_Comm_size( comm, &size ); - return size; -} - - -//! Get the rank of the MPI_Comm -// Note: this is a thread and interrupt safe function -inline int comm_rank( MPI_Comm comm ) { - int rank = 1; - MPI_Comm_rank( comm, &rank ); - return rank; -} - - -//! Get the size of MPI_COMM_WORLD -inline int MPI_WORLD_SIZE( ) { - return comm_size( MPI_COMM_WORLD ); -} - -//! Get the size of MPI_COMM_WORLD -inline int MPI_WORLD_RANK( ) { - return comm_rank( MPI_COMM_WORLD ); -} - -//! Return the appropriate MPI datatype for a class -template -MPI_Datatype getMPItype(); - - -//! Template function to return the buffer size required to pack a class -template -size_t packsize( const TYPE& rhs ); - -//! Template function to pack a class to a buffer -template -void pack( const TYPE& rhs, char *buffer ); - -//! Template function to unpack a class from a buffer -template -void unpack( TYPE& data, const char *buffer ); - - -//! Template function to return the buffer size required to pack a std::vector -template -size_t packsize( const std::vector& rhs ); - -//! Template function to pack a class to a buffer -template -void pack( const std::vector& rhs, char *buffer ); - -//! Template function to pack a class to a buffer -template -void unpack( std::vector& data, const char *buffer ); - - -//! Template function to return the buffer size required to pack a std::pair -template -size_t packsize( const std::pair& rhs ); - -//! Template function to pack a class to a buffer -template -void pack( const std::pair& rhs, char *buffer ); - -//! Template function to pack a class to a buffer -template -void unpack( std::pair& data, const char *buffer ); - - -//! Template function to return the buffer size required to pack a std::map -template -size_t packsize( const std::map& rhs ); - -//! Template function to pack a class to a buffer -template -void pack( const std::map& rhs, char *buffer ); - -//! Template function to pack a class to a buffer -template -void unpack( std::map& data, const char *buffer ); - - -//! Template function to return the buffer size required to pack a std::set -template -size_t packsize( const std::set& rhs ); - -//! Template function to pack a class to a buffer -template -void pack( const std::set& rhs, char *buffer ); - -//! Template function to pack a class to a buffer -template -void unpack( std::set& data, const char *buffer ); - - - -// Helper functions -inline double sumReduce( MPI_Comm comm, double x ) -{ - double y = 0; - MPI_Allreduce(&x,&y,1,MPI_DOUBLE,MPI_SUM,comm); - return y; -} -inline float sumReduce( MPI_Comm comm, float x ) -{ - float y = 0; - MPI_Allreduce(&x,&y,1,MPI_FLOAT,MPI_SUM,comm); - return y; -} -inline int sumReduce( MPI_Comm comm, int x ) -{ - int y = 0; - MPI_Allreduce(&x,&y,1,MPI_INT,MPI_SUM,comm); - return y; -} -inline long long sumReduce( MPI_Comm comm, long long x ) -{ - long long y = 0; - MPI_Allreduce(&x,&y,1,MPI_LONG_LONG,MPI_SUM,comm); - return y; -} -inline bool sumReduce( MPI_Comm comm, bool x ) -{ - int y = sumReduce( comm, x?1:0 ); - return y>0; -} -inline std::vector sumReduce( MPI_Comm comm, const std::vector& x ) -{ - auto y = x; - MPI_Allreduce(x.data(),y.data(),x.size(),MPI_FLOAT,MPI_SUM,comm); - return y; -} -inline std::vector sumReduce( MPI_Comm comm, const std::vector& x ) -{ - auto y = x; - MPI_Allreduce(x.data(),y.data(),x.size(),MPI_INT,MPI_SUM,comm); - return y; -} -inline double maxReduce( MPI_Comm comm, double x ) -{ - double y = 0; - MPI_Allreduce(&x,&y,1,MPI_DOUBLE,MPI_MAX,comm); - return y; -} -inline float maxReduce( MPI_Comm comm, float x ) -{ - float y = 0; - MPI_Allreduce(&x,&y,1,MPI_FLOAT,MPI_MAX,comm); - return y; -} -inline int maxReduce( MPI_Comm comm, int x ) -{ - int y = 0; - MPI_Allreduce(&x,&y,1,MPI_INT,MPI_MAX,comm); - return y; -} - - -#endif - - -#include "common/MPI_Helpers.hpp" - - diff --git a/common/MPI_Helpers.hpp b/common/MPI_Helpers.hpp deleted file mode 100644 index 85261cf1..00000000 --- a/common/MPI_Helpers.hpp +++ /dev/null @@ -1,154 +0,0 @@ -// This file contains wrappers for MPI routines and functions to pack/unpack data structures -#ifndef MPI_WRAPPERS_HPP -#define MPI_WRAPPERS_HPP - -#include "common/MPI_Helpers.h" -#include -#include -#include -#include - - - -/******************************************************** -* Default instantiations for std::vector * -********************************************************/ -template -size_t packsize( const std::vector& rhs ) -{ - size_t bytes = sizeof(size_t); - for (size_t i=0; i -void pack( const std::vector& rhs, char *buffer ) -{ - size_t size = rhs.size(); - memcpy(buffer,&size,sizeof(size_t)); - size_t pos = sizeof(size_t); - for (size_t i=0; i -void unpack( std::vector& data, const char *buffer ) -{ - size_t size; - memcpy(&size,buffer,sizeof(size_t)); - data.clear(); - data.resize(size); - size_t pos = sizeof(size_t); - for (size_t i=0; i -size_t packsize( const std::pair& rhs ) -{ - return packsize(rhs.first)+packsize(rhs.second); -} -template -void pack( const std::pair& rhs, char *buffer ) -{ - pack(rhs.first,buffer); - pack(rhs.second,&buffer[packsize(rhs.first)]); -} -template -void unpack( std::pair& data, const char *buffer ) -{ - unpack(data.first,buffer); - unpack(data.second,&buffer[packsize(data.first)]); -} - - -/******************************************************** -* Default instantiations for std::map * -********************************************************/ -template -size_t packsize( const std::map& rhs ) -{ - size_t bytes = sizeof(size_t); - typename std::map::const_iterator it; - for (it=rhs.begin(); it!=rhs.end(); ++it) { - bytes += packsize(it->first); - bytes += packsize(it->second); - } - return bytes; -} -template -void pack( const std::map& rhs, char *buffer ) -{ - size_t N = rhs.size(); - pack(N,buffer); - size_t pos = sizeof(size_t); - typename std::map::const_iterator it; - for (it=rhs.begin(); it!=rhs.end(); ++it) { - pack(it->first,&buffer[pos]); pos+=packsize(it->first); - pack(it->second,&buffer[pos]); pos+=packsize(it->second); - } -} -template -void unpack( std::map& data, const char *buffer ) -{ - size_t N = 0; - unpack(N,buffer); - size_t pos = sizeof(size_t); - data.clear(); - for (size_t i=0; i tmp; - unpack(tmp.first,&buffer[pos]); pos+=packsize(tmp.first); - unpack(tmp.second,&buffer[pos]); pos+=packsize(tmp.second); - data.insert(tmp); - } -} - - -/******************************************************** -* Default instantiations for std::set * -********************************************************/ -template -size_t packsize( const std::set& rhs ) -{ - size_t bytes = sizeof(size_t); - typename std::set::const_iterator it; - for (it=rhs.begin(); it!=rhs.end(); ++it) { - bytes += packsize(*it); - } - return bytes; -} -template -void pack( const std::set& rhs, char *buffer ) -{ - size_t N = rhs.size(); - pack(N,buffer); - size_t pos = sizeof(size_t); - typename std::set::const_iterator it; - for (it=rhs.begin(); it!=rhs.end(); ++it) { - pack(*it); pos+=packsize(*it); - } -} -template -void unpack( std::set& data, const char *buffer ) -{ - size_t N = 0; - unpack(N,buffer); - size_t pos = sizeof(size_t); - data.clear(); - for (size_t i=0; i Date: Mon, 4 Jan 2021 22:30:05 -0500 Subject: [PATCH 112/205] use FOM morphology --- analysis/morphology.cpp | 53 ++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/analysis/morphology.cpp b/analysis/morphology.cpp index afb00b6f..0980a4f0 100644 --- a/analysis/morphology.cpp +++ b/analysis/morphology.cpp @@ -58,11 +58,11 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr } } } - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); // total Global is the number of nodes in the pore-space - MPI_Allreduce(&count,&totalGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&maxdist,&maxdistGlobal,1,MPI_DOUBLE,MPI_MAX,Dm->Comm); + totalGlobal = Dm->Comm.sumReduce( count ); + maxdistGlobal = Dm->Comm.sumReduce( maxdist ); double volume=double(nprocx*nprocy*nprocz)*double(nx-2)*double(ny-2)*double(nz-2); double volume_fraction=totalGlobal/volume; if (rank==0) printf("Volume fraction for morphological opening: %f \n",volume_fraction); @@ -131,9 +131,8 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr // Increase the critical radius until the target saturation is met double deltaR=0.05; // amount to change the radius in voxel units - double Rcrit_old=0.0; + double Rcrit_old; - double GlobalNumber = 1.f; int imin,jmin,kmin,imax,jmax,kmax; if (ErodeLabel == 1){ @@ -220,7 +219,7 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr Dm->Comm.sendrecv(sendID_YZ,Dm->sendCount("YZ"),Dm->rank_YZ(),sendtag,recvID_yz,Dm->recvCount("yz"),Dm->rank_yz(),recvtag); Dm->Comm.sendrecv(sendID_Yz,Dm->sendCount("Yz"),Dm->rank_Yz(),sendtag,recvID_yZ,Dm->recvCount("yZ"),Dm->rank_yZ(),recvtag); Dm->Comm.sendrecv(sendID_yZ,Dm->sendCount("yZ"),Dm->rank_yZ(),sendtag,recvID_Yz,Dm->recvCount("Yz"),Dm->rank_Yz(),recvtag); - //...................................................................................... + //...................................................................................... UnpackID(Dm->recvList("x"), Dm->recvCount("x") ,recvID_x, id); UnpackID(Dm->recvList("X"), Dm->recvCount("X") ,recvID_X, id); UnpackID(Dm->recvList("y"), Dm->recvCount("y") ,recvID_y, id); @@ -241,7 +240,7 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr UnpackID(Dm->recvList("YZ"), Dm->recvCount("YZ") ,recvID_YZ, id); //...................................................................................... - MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + //double GlobalNumber = Dm->Comm.sumReduce( LocalNumber ); count = 0.f; for (int k=1; k } } } - MPI_Allreduce(&count,&countGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + countGlobal = Dm->Comm.sumReduce( count ); void_fraction_new = countGlobal/totalGlobal; void_fraction_diff_new = abs(void_fraction_new-VoidFraction); /* if (rank==0){ @@ -286,7 +285,7 @@ double morph_open() fillHalo fillChar(Dm->Comm,Dm->rank_info,{Nx-2,Ny-2,Nz-2},{1,1,1},0,1); - MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + GlobalNumber = Dm->Comm.sumReduce( LocalNumber ); count = 0.f; for (int k=1; kComm); + countGlobal = Dm->Comm.sumReduce( count ); return countGlobal; } */ @@ -342,11 +341,11 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrComm); + Dm->Comm.barrier(); // total Global is the number of nodes in the pore-space - MPI_Allreduce(&count,&totalGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&maxdist,&maxdistGlobal,1,MPI_DOUBLE,MPI_MAX,Dm->Comm); + totalGlobal = Dm->Comm.sumReduce( count ); + maxdistGlobal = Dm->Comm.sumReduce( maxdist ); double volume=double(nprocx*nprocy*nprocz)*double(nx-2)*double(ny-2)*double(nz-2); double volume_fraction=totalGlobal/volume; if (rank==0) printf("Volume fraction for morphological opening: %f \n",volume_fraction); @@ -416,7 +415,6 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrComm); + Dm->Comm.barrier(); FILE *DRAIN = fopen("morphdrain.csv","w"); @@ -528,7 +526,7 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrrecvList("yZ"), Dm->recvCount("yZ") ,recvID_yZ, id); UnpackID(Dm->recvList("YZ"), Dm->recvCount("YZ") ,recvID_YZ, id); //...................................................................................... - MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + // double GlobalNumber = Dm->Comm.sumReduce( LocalNumber ); for (int k=0; krank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm); - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); for (int k=0; krank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm); - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); for (int k=1; kComm); + countGlobal = Dm->Comm.sumReduce( count ); void_fraction_new = countGlobal/totalGlobal; void_fraction_diff_new = abs(void_fraction_new-VoidFraction); if (rank==0){ @@ -649,13 +647,13 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr &id, std::shared_ptr Dm, double TargetGrowth, double WallFactor) +double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, std::shared_ptr Dm, double TargetGrowth) { int Nx = Dm->Nx; int Ny = Dm->Ny; int Nz = Dm->Nz; int rank = Dm->rank(); - + double count=0.0; for (int k=1; k &id, } } } - double count_original=sumReduce( Dm->Comm, count); + double count_original = Dm->Comm.sumReduce( count); // Estimate morph_delta double morph_delta = 0.0; @@ -686,7 +684,8 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, for (int j=1; j MAX_DISPLACEMENT) MAX_DISPLACEMENT= fabs(wallweight*morph_delta); if (Dist(i,j,k) - wallweight*morph_delta < 0.0){ @@ -695,8 +694,8 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, } } } - count=sumReduce( Dm->Comm, count); - MAX_DISPLACEMENT = maxReduce( Dm->Comm, MAX_DISPLACEMENT); + count = Dm->Comm.sumReduce( count ); + MAX_DISPLACEMENT = Dm->Comm.maxReduce( MAX_DISPLACEMENT ); GrowthEstimate = count - count_original; ERROR = fabs((GrowthEstimate-TargetGrowth) /TargetGrowth); @@ -732,14 +731,14 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, for (int j=1; jComm, count); + count = Dm->Comm.sumReduce( count ); return count; } From c998cc24b0cb4d7263d355217b9f6c17bc3fb708 Mon Sep 17 00:00:00 2001 From: James McClure Date: Mon, 4 Jan 2021 23:10:21 -0500 Subject: [PATCH 113/205] update to ScaLBL --- analysis/runAnalysis.cpp | 2 +- common/ScaLBL.cpp | 581 +++++++++++++++++++++++++++++++++++---- common/ScaLBL.h | 279 +++++++++++++++++-- 3 files changed, 775 insertions(+), 87 deletions(-) diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index 29cbe268..9cb85b6c 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -3,7 +3,7 @@ #include "analysis/analysis.h" #include "common/Array.h" #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/ScaLBL.h" #include "models/ColorModel.h" diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 51821176..8e953f97 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -78,43 +78,43 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ BoundaryCondition = Dm->BoundaryCondition; //...................................................................................... - ScaLBL_AllocateZeroCopy((void **) &sendbuf_x, 5*sendCount_x*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_X, 5*sendCount_X*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_y, 5*sendCount_y*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_Y, 5*sendCount_Y*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_z, 5*sendCount_z*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_Z, 5*sendCount_Z*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_xy, sendCount_xy*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_xY, sendCount_xY*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_Xy, sendCount_Xy*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_XY, sendCount_XY*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_xz, sendCount_xz*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_xZ, sendCount_xZ*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_Xz, sendCount_Xz*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_XZ, sendCount_XZ*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_yz, sendCount_yz*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_yZ, sendCount_yZ*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_Yz, sendCount_Yz*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &sendbuf_YZ, sendCount_YZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_x, 2*5*sendCount_x*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_X, 2*5*sendCount_X*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_y, 2*5*sendCount_y*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_Y, 2*5*sendCount_Y*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_z, 2*5*sendCount_z*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_Z, 2*5*sendCount_Z*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_xy, 2*sendCount_xy*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_xY, 2*sendCount_xY*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_Xy, 2*sendCount_Xy*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_XY, 2*sendCount_XY*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_xz, 2*sendCount_xz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_xZ, 2*sendCount_xZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_Xz, 2*sendCount_Xz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_XZ, 2*sendCount_XZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_yz, 2*sendCount_yz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_yZ, 2*sendCount_yZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_Yz, 2*sendCount_Yz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &sendbuf_YZ, 2*sendCount_YZ*sizeof(double)); // Allocate device memory //...................................................................................... - ScaLBL_AllocateZeroCopy((void **) &recvbuf_x, 5*recvCount_x*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_X, 5*recvCount_X*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_y, 5*recvCount_y*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_Y, 5*recvCount_Y*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_z, 5*recvCount_z*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_Z, 5*recvCount_Z*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_xy, recvCount_xy*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_xY, recvCount_xY*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_Xy, recvCount_Xy*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_XY, recvCount_XY*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_xz, recvCount_xz*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_xZ, recvCount_xZ*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_Xz, recvCount_Xz*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_XZ, recvCount_XZ*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_yz, recvCount_yz*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_yZ, recvCount_yZ*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_Yz, recvCount_Yz*sizeof(double)); // Allocate device memory - ScaLBL_AllocateZeroCopy((void **) &recvbuf_YZ, recvCount_YZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_x, 2*5*recvCount_x*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_X, 2*5*recvCount_X*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_y, 2*5*recvCount_y*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_Y, 2*5*recvCount_Y*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_z, 2*5*recvCount_z*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_Z, 2*5*recvCount_Z*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_xy, 2*recvCount_xy*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_xY, 2*recvCount_xY*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_Xy, 2*recvCount_Xy*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_XY, 2*recvCount_XY*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_xz, 2*recvCount_xz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_xZ, 2*recvCount_xZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_Xz, 2*recvCount_Xz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_XZ, 2*recvCount_XZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_yz, 2*recvCount_yz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_yZ, 2*recvCount_yZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_Yz, 2*recvCount_Yz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateZeroCopy((void **) &recvbuf_YZ, 2*recvCount_YZ*sizeof(double)); // Allocate device memory //...................................................................................... ScaLBL_AllocateZeroCopy((void **) &dvcSendList_x, sendCount_x*sizeof(int)); // Allocate device memory ScaLBL_AllocateZeroCopy((void **) &dvcSendList_X, sendCount_X*sizeof(int)); // Allocate device memory @@ -174,6 +174,44 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_YZ, recvCount_YZ*sizeof(int)); // Allocate device memory //...................................................................................... + ScaLBL_CopyToZeroCopy(dvcSendList_x,Dm->sendList_x,sendCount_x*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_X,Dm->sendList_X,sendCount_X*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_y,Dm->sendList_y,sendCount_y*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_Y,Dm->sendList_Y,sendCount_Y*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_z,Dm->sendList_z,sendCount_z*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_Z,Dm->sendList_Z,sendCount_Z*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_xy,Dm->sendList_xy,sendCount_xy*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_XY,Dm->sendList_XY,sendCount_XY*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_xY,Dm->sendList_xY,sendCount_xY*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_Xy,Dm->sendList_Xy,sendCount_Xy*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_xz,Dm->sendList_xz,sendCount_xz*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_XZ,Dm->sendList_XZ,sendCount_XZ*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_xZ,Dm->sendList_xZ,sendCount_xZ*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_Xz,Dm->sendList_Xz,sendCount_Xz*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_yz,Dm->sendList_yz,sendCount_yz*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_YZ,Dm->sendList_YZ,sendCount_YZ*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_yZ,Dm->sendList_yZ,sendCount_yZ*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcSendList_Yz,Dm->sendList_Yz,sendCount_Yz*sizeof(int)); + //...................................................................................... + ScaLBL_CopyToZeroCopy(dvcRecvList_x,Dm->recvList_x,recvCount_x*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_X,Dm->recvList_X,recvCount_X*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_y,Dm->recvList_y,recvCount_y*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_Y,Dm->recvList_Y,recvCount_Y*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_z,Dm->recvList_z,recvCount_z*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_Z,Dm->recvList_Z,recvCount_Z*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_xy,Dm->recvList_xy,recvCount_xy*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_XY,Dm->recvList_XY,recvCount_XY*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_xY,Dm->recvList_xY,recvCount_xY*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_Xy,Dm->recvList_Xy,recvCount_Xy*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_xz,Dm->recvList_xz,recvCount_xz*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_XZ,Dm->recvList_XZ,recvCount_XZ*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_xZ,Dm->recvList_xZ,recvCount_xZ*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_Xz,Dm->recvList_Xz,recvCount_Xz*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_yz,Dm->recvList_yz,recvCount_yz*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_YZ,Dm->recvList_YZ,recvCount_YZ*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_yZ,Dm->recvList_yZ,recvCount_yZ*sizeof(int)); + ScaLBL_CopyToZeroCopy(dvcRecvList_Yz,Dm->recvList_Yz,recvCount_Yz*sizeof(int)); + //...................................................................................... ScaLBL_CopyToZeroCopy(dvcSendList_x,Dm->sendList("x"),sendCount_x*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_X,Dm->sendList("X"),sendCount_X*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_y,Dm->sendList("y"),sendCount_y*sizeof(int)); @@ -319,7 +357,7 @@ int ScaLBL_Communicator::LastInterior(){ return last_interior; } -void ScaLBL_Communicator::D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, const int *list, int start, int count, +void ScaLBL_Communicator::D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, int *list, int start, int count, int *d3q19_recvlist){ int i,j,k,n,nn,idx; int * ReturnDist; @@ -363,14 +401,18 @@ int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLis int idx,i,j,k,n; // Check that Map has size matching sub-domain - if ( (int) Map.size(0) != Nx) + if (Map.size(0) != Nx) ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Map array dimensions do not match! \n"); // Initialize Map for (k=0;k 0) + Map(i,j,k) = -2; // this label is for parallel communication sites + else + Map(i,j,k) = -1; // this label is for solid bounce-back sites } } } @@ -520,7 +562,7 @@ int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLis } } } - + //for (idx=0; idx 0){ + if (kproc != 0){ + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,&Aq[Component*7*N],N); + } + if (kproc != nprocz-1){ + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,&Aq[Component*7*N],N); + } + } + else { + //...Packing for z face(6,12,13,16,17)................................ + ScaLBL_D3Q7_Unpack(6,dvcRecvDist_z,0,recvCount_z,recvbuf_z,&Aq[Component*7*N],N); + //...Packing for Z face(5,11,14,15,18)................................ + ScaLBL_D3Q7_Unpack(5,dvcRecvDist_Z,0,recvCount_Z,recvbuf_Z,&Aq[Component*7*N],N); + } + + //................................................................................... + Lock=false; // unlock the communicator after communications complete + //................................................................................... + +} + void ScaLBL_Communicator::TriSendD3Q7AA(double *Aq, double *Bq, double *Cq){ // NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2 @@ -1478,6 +1850,7 @@ void ScaLBL_Communicator::RecvHalo(double *data){ void ScaLBL_Communicator::RegularLayout(IntArray map, const double *data, DoubleArray ®data){ // Gets data from the device and stores in regular layout + int i,j,k,idx; int Nx = map.size(0); int Ny = map.size(1); int Nz = map.size(2); @@ -1489,10 +1862,11 @@ void ScaLBL_Communicator::RegularLayout(IntArray map, const double *data, Double double value; TmpDat = new double [N]; ScaLBL_CopyToHost(&TmpDat[0],&data[0], N*sizeof(double)); - for (int k=0; k Date: Mon, 4 Jan 2021 23:29:02 -0500 Subject: [PATCH 114/205] merging FOM with new functions --- common/ScaLBL.cpp | 40 ++-------------------------------------- common/ScaLBL.h | 4 +--- 2 files changed, 3 insertions(+), 41 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 8e953f97..74bc7b9f 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -174,43 +174,6 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_YZ, recvCount_YZ*sizeof(int)); // Allocate device memory //...................................................................................... - ScaLBL_CopyToZeroCopy(dvcSendList_x,Dm->sendList_x,sendCount_x*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_X,Dm->sendList_X,sendCount_X*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_y,Dm->sendList_y,sendCount_y*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_Y,Dm->sendList_Y,sendCount_Y*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_z,Dm->sendList_z,sendCount_z*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_Z,Dm->sendList_Z,sendCount_Z*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_xy,Dm->sendList_xy,sendCount_xy*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_XY,Dm->sendList_XY,sendCount_XY*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_xY,Dm->sendList_xY,sendCount_xY*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_Xy,Dm->sendList_Xy,sendCount_Xy*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_xz,Dm->sendList_xz,sendCount_xz*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_XZ,Dm->sendList_XZ,sendCount_XZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_xZ,Dm->sendList_xZ,sendCount_xZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_Xz,Dm->sendList_Xz,sendCount_Xz*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_yz,Dm->sendList_yz,sendCount_yz*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_YZ,Dm->sendList_YZ,sendCount_YZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_yZ,Dm->sendList_yZ,sendCount_yZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcSendList_Yz,Dm->sendList_Yz,sendCount_Yz*sizeof(int)); - //...................................................................................... - ScaLBL_CopyToZeroCopy(dvcRecvList_x,Dm->recvList_x,recvCount_x*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_X,Dm->recvList_X,recvCount_X*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_y,Dm->recvList_y,recvCount_y*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_Y,Dm->recvList_Y,recvCount_Y*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_z,Dm->recvList_z,recvCount_z*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_Z,Dm->recvList_Z,recvCount_Z*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_xy,Dm->recvList_xy,recvCount_xy*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_XY,Dm->recvList_XY,recvCount_XY*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_xY,Dm->recvList_xY,recvCount_xY*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_Xy,Dm->recvList_Xy,recvCount_Xy*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_xz,Dm->recvList_xz,recvCount_xz*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_XZ,Dm->recvList_XZ,recvCount_XZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_xZ,Dm->recvList_xZ,recvCount_xZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_Xz,Dm->recvList_Xz,recvCount_Xz*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_yz,Dm->recvList_yz,recvCount_yz*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_YZ,Dm->recvList_YZ,recvCount_YZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_yZ,Dm->recvList_yZ,recvCount_yZ*sizeof(int)); - ScaLBL_CopyToZeroCopy(dvcRecvList_Yz,Dm->recvList_Yz,recvCount_Yz*sizeof(int)); //...................................................................................... ScaLBL_CopyToZeroCopy(dvcSendList_x,Dm->sendList("x"),sendCount_x*sizeof(int)); ScaLBL_CopyToZeroCopy(dvcSendList_X,Dm->sendList("X"),sendCount_X*sizeof(int)); @@ -357,7 +320,7 @@ int ScaLBL_Communicator::LastInterior(){ return last_interior; } -void ScaLBL_Communicator::D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, int *list, int start, int count, +void ScaLBL_Communicator::D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, const int *list, int start, int count, int *d3q19_recvlist){ int i,j,k,n,nn,idx; int * ReturnDist; @@ -388,6 +351,7 @@ void ScaLBL_Communicator::D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, int *list, i delete [] ReturnDist; } + int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborList, signed char *id, int Np){ /* * Generate a memory optimized layout diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 59ab53b9..8cd42af4 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -345,7 +345,6 @@ public: //ScaLBL_Communicator(Domain &Dm, IntArray &Map); ~ScaLBL_Communicator(); //...................................................................................... - MPI_Comm MPI_COMM_SCALBL; // MPI Communicator unsigned long int CommunicationCount,SendCount,RecvCount; int Nx,Ny,Nz,N; int n_bb_d3q7, n_bb_d3q19; @@ -412,8 +411,7 @@ public: void PrintD3Q19(); private: - //void D3Q19_MapRecv_OLD(int q, int Cqx, int Cqy, int Cqz, int *list, int start, int count, int *d3q19_recvlist); - void D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, int *list, int start, int count, int *d3q19_recvlist); + void D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, const int *list, int start, int count, int *d3q19_recvlist); bool Lock; // use Lock to make sure only one call at a time to protect data in transit // only one set of Send requests can be active at any time (per instance) From cf52427539635f928def8fe7170a8f327d24ebeb Mon Sep 17 00:00:00 2001 From: James McClure Date: Mon, 4 Jan 2021 23:35:10 -0500 Subject: [PATCH 115/205] gradually putting it all back together --- IO/MeshDatabase.cpp | 145 +++++++++++++++++++-------------------- IO/MeshDatabase.h | 4 +- analysis/runAnalysis.cpp | 49 ++++++------- analysis/runAnalysis.h | 8 +-- analysis/uCT.cpp | 11 ++- 5 files changed, 101 insertions(+), 116 deletions(-) diff --git a/IO/MeshDatabase.cpp b/IO/MeshDatabase.cpp index 1fad9231..2c03ddde 100644 --- a/IO/MeshDatabase.cpp +++ b/IO/MeshDatabase.cpp @@ -1,7 +1,8 @@ #include "IO/MeshDatabase.h" #include "IO/Mesh.h" +#include "IO/PackData.h" #include "IO/IOHelpers.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Utilities.h" #include @@ -13,8 +14,6 @@ -/**************************************************** -****************************************************/ // MeshType template<> size_t packsize( const IO::MeshType& rhs ) @@ -247,80 +246,76 @@ void DatabaseEntry::read( const std::string& line ) // Gather the mesh databases from all processors inline int tod( int N ) { return (N+7)/sizeof(double); } -std::vector gatherAll( const std::vector& meshes, MPI_Comm comm ) +std::vector gatherAll( const std::vector& meshes, const Utilities::MPI& comm ) { - #ifdef USE_MPI - PROFILE_START("gatherAll"); - PROFILE_START("gatherAll-pack",2); - int size = MPI_WORLD_SIZE(); - // First pack the mesh data to local buffers - int localsize = 0; - for (size_t i=0; i data; - pos = 0; - while ( pos < globalsize ) { - MeshDatabase tmp; - unpack(tmp,(char*)&globalbuf[pos]); - pos += tod(packsize(tmp)); - std::map::iterator it = data.find(tmp.name); - if ( it==data.end() ) { - data[tmp.name] = tmp; - } else { - for (size_t i=0; isecond.domains.push_back(tmp.domains[i]); - for (size_t i=0; isecond.variables.push_back(tmp.variables[i]); - it->second.variable_data.insert(tmp.variable_data.begin(),tmp.variable_data.end()); - } - } - for (std::map::iterator it=data.begin(); it!=data.end(); ++it) { - // Get the unique variables - std::set data2(it->second.variables.begin(),it->second.variables.end()); - it->second.variables = std::vector(data2.begin(),data2.end()); - } - // Free temporary memory - delete [] localbuf; - delete [] recvsize; - delete [] disp; - delete [] globalbuf; - // Return the results - std::vector data2(data.size()); - size_t i=0; - for (std::map::iterator it=data.begin(); it!=data.end(); ++it, ++i) - data2[i] = it->second; - PROFILE_STOP("gatherAll-unpack",2); - PROFILE_STOP("gatherAll"); - return data2; - #else + if ( comm.getSize() == 1 ) return meshes; - #endif + PROFILE_START("gatherAll"); + PROFILE_START("gatherAll-pack",2); + int size = comm.getSize(); + // First pack the mesh data to local buffers + int localsize = 0; + for (size_t i=0; i data; + pos = 0; + while ( pos < globalsize ) { + MeshDatabase tmp; + unpack(tmp,(char*)&globalbuf[pos]); + pos += tod(packsize(tmp)); + std::map::iterator it = data.find(tmp.name); + if ( it==data.end() ) { + data[tmp.name] = tmp; + } else { + for (size_t i=0; isecond.domains.push_back(tmp.domains[i]); + for (size_t i=0; isecond.variables.push_back(tmp.variables[i]); + it->second.variable_data.insert(tmp.variable_data.begin(),tmp.variable_data.end()); + } + } + for (auto it=data.begin(); it!=data.end(); ++it) { + // Get the unique variables + std::set data2(it->second.variables.begin(),it->second.variables.end()); + it->second.variables = std::vector(data2.begin(),data2.end()); + } + // Free temporary memory + delete [] localbuf; + delete [] disp; + delete [] globalbuf; + // Return the results + std::vector data2(data.size()); + size_t i=0; + for (std::map::iterator it=data.begin(); it!=data.end(); ++it, ++i) + data2[i] = it->second; + PROFILE_STOP("gatherAll-unpack",2); + PROFILE_STOP("gatherAll"); + return data2; } diff --git a/IO/MeshDatabase.h b/IO/MeshDatabase.h index 9f544925..8e501624 100644 --- a/IO/MeshDatabase.h +++ b/IO/MeshDatabase.h @@ -2,7 +2,7 @@ #define MeshDatabase_INC #include "IO/Mesh.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include #include @@ -70,7 +70,7 @@ public: //! Gather the mesh databases from all processors -std::vector gatherAll( const std::vector& meshes, MPI_Comm comm ); +std::vector gatherAll( const std::vector& meshes, const Utilities::MPI& comm ); //! Write the mesh databases to a file diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index 9cb85b6c..c09b71c2 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -462,7 +462,7 @@ private: /****************************************************************** * MPI comm wrapper for use with analysis * ******************************************************************/ -runAnalysis::commWrapper::commWrapper( int tag_, MPI_Comm comm_, runAnalysis* analysis_ ): +runAnalysis::commWrapper::commWrapper( int tag_, const Utilities::MPI& comm_, runAnalysis* analysis_ ): comm(comm_), tag(tag_), analysis(analysis_) @@ -479,7 +479,7 @@ runAnalysis::commWrapper::~commWrapper() { if ( tag == -1 ) return; - MPI_Barrier( comm ); + comm.barrier(); analysis->d_comm_used[tag] = false; } runAnalysis::commWrapper runAnalysis::getComm( ) @@ -496,10 +496,10 @@ runAnalysis::commWrapper runAnalysis::getComm( ) if ( tag == -1 ) ERROR("Unable to get comm"); } - MPI_Bcast( &tag, 1, MPI_INT, 0, d_comm ); + tag = d_comm.bcast( tag, 0 ); d_comm_used[tag] = true; - if ( d_comms[tag] == MPI_COMM_NULL ) - MPI_Comm_dup( MPI_COMM_WORLD, &d_comms[tag] ); + if ( d_comms[tag].isNull() ) + d_comms[tag] = d_comm.dup(); return commWrapper(tag,d_comms[tag],this); } @@ -560,7 +560,7 @@ runAnalysis::runAnalysis( std::shared_ptr input_db, d_restartFile = restart_file + "." + rankString; - d_rank = MPI_WORLD_RANK(); + d_rank = d_comm.getRank(); writeIDMap(ID_map_struct(),0,id_map_filename); // Initialize IO for silo IO::initialize("","silo","false"); @@ -629,11 +629,8 @@ runAnalysis::runAnalysis( std::shared_ptr input_db, // Initialize the comms - MPI_Comm_dup(MPI_COMM_WORLD,&d_comm); - for (int i=0; i<1024; i++) { - d_comms[i] = MPI_COMM_NULL; + for (int i=0; i<1024; i++) d_comm_used[i] = false; - } // Initialize the threads int N_threads = db->getWithDefault( "N_threads", 4 ); auto method = db->getWithDefault( "load_balance", "default" ); @@ -643,12 +640,6 @@ runAnalysis::~runAnalysis( ) { // Finish processing analysis finish(); - // Clear internal data - MPI_Comm_free( &d_comm ); - for (int i=0; i<1024; i++) { - if ( d_comms[i] != MPI_COMM_NULL ) - MPI_Comm_free(&d_comms[i]); - } } void runAnalysis::finish( ) { @@ -662,7 +653,7 @@ void runAnalysis::finish( ) d_wait_subphase.reset(); d_wait_restart.reset(); // Syncronize - MPI_Barrier( d_comm ); + d_comm.barrier(); PROFILE_STOP("finish"); } @@ -915,12 +906,12 @@ void runAnalysis::run(int timestep, std::shared_ptr input_db, TwoPhase // Spawn a thread to write the restart file // if ( matches(type,AnalysisType::CreateRestart) ) { if (timestep%d_restart_interval==0){ - auto Restart_db = input_db->cloneDatabase(); - // Restart_db->putScalar( "Restart", true ); + if (d_rank==0) { - // std::ofstream OutStream("Restart.db"); - // Restart_db->print(OutStream, ""); - // OutStream.close(); + input_db->putScalar( "Restart", true ); + std::ofstream OutStream("Restart.db"); + input_db->print(OutStream, ""); + OutStream.close(); } // Write the restart file (using a seperate thread) auto work = new WriteRestartWorkItem(d_restartFile.c_str(),cDen,cfq,d_Np); @@ -1019,21 +1010,21 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha cfq = std::shared_ptr(new double[19*d_Np],DeleteArray); ScaLBL_CopyToHost(cfq.get(),fq,19*d_Np*sizeof(double)); ScaLBL_CopyToHost(cDen.get(),Den,2*d_Np*sizeof(double)); - // clone the input database to avoid modifying shared data - auto Restart_db = input_db->cloneDatabase(); - auto tmp_color_db = Restart_db->getDatabase( "Color" ); - tmp_color_db->putScalar("timestep",timestep); - tmp_color_db->putScalar( "Restart", true ); - Restart_db->putDatabase("Color", tmp_color_db); + if (d_rank==0) { + color_db->putScalar("timestep",timestep); + color_db->putScalar( "Restart", true ); + input_db->putDatabase("Color", color_db); std::ofstream OutStream("Restart.db"); - Restart_db->print(OutStream, ""); + input_db->print(OutStream, ""); OutStream.close(); + } // Write the restart file (using a seperate thread) auto work1 = new WriteRestartWorkItem(d_restartFile.c_str(),cDen,cfq,d_Np); work1->add_dependency(d_wait_restart); d_wait_restart = d_tpool.add_work(work1); + } if (timestep%d_visualization_interval==0){ diff --git a/analysis/runAnalysis.h b/analysis/runAnalysis.h index 55032d65..33adbcb0 100644 --- a/analysis/runAnalysis.h +++ b/analysis/runAnalysis.h @@ -68,10 +68,10 @@ public: class commWrapper { public: - MPI_Comm comm; + Utilities::MPI comm; int tag; runAnalysis *analysis; - commWrapper( int tag, MPI_Comm comm, runAnalysis *analysis ); + commWrapper( int tag, const Utilities::MPI& comm, runAnalysis *analysis ); commWrapper( ) = delete; commWrapper( const commWrapper &rhs ) = delete; commWrapper& operator=( const commWrapper &rhs ) = delete; @@ -100,8 +100,8 @@ private: BlobIDList d_last_id_map; std::vector d_meshData; std::string d_restartFile; - MPI_Comm d_comm; - MPI_Comm d_comms[1024]; + Utilities::MPI d_comm; + Utilities::MPI d_comms[1024]; volatile bool d_comm_used[1024]; std::shared_ptr d_ScaLBL_Comm; diff --git a/analysis/uCT.cpp b/analysis/uCT.cpp index 912f8e85..28d677c1 100644 --- a/analysis/uCT.cpp +++ b/analysis/uCT.cpp @@ -228,8 +228,7 @@ void filter_final( Array& ID, Array& Dist, Array& Mean, Array& Dist1, Array& Dist2 ) { PROFILE_SCOPED(timer,"filter_final"); - int rank; - MPI_Comm_rank(Dm.Comm,&rank); + int rank = Dm.Comm.getRank(); int Nx = Dm.Nx-2; int Ny = Dm.Ny-2; int Nz = Dm.Nz-2; @@ -242,7 +241,7 @@ void filter_final( Array& ID, Array& Dist, float tmp = 0; for (size_t i=0; i(Dist0.length()) ); const float dx1 = 0.3*tmp; const float dx2 = 1.05*dx1; if (rank==0) @@ -285,7 +284,7 @@ void filter_final( Array& ID, Array& Dist, Phase.fill(1); ComputeGlobalBlobIDs( Nx, Ny, Nz, Dm.rank_info, Phase, SignDist, 0, 0, GlobalBlobID, Dm.Comm ); fillInt.fill(GlobalBlobID); - int N_blobs = maxReduce(Dm.Comm,GlobalBlobID.max()+1); + int N_blobs = Dm.Comm.maxReduce(GlobalBlobID.max()+1); std::vector mean(N_blobs,0); std::vector count(N_blobs,0); for (int k=1; k<=Nz; k++) { @@ -321,8 +320,8 @@ void filter_final( Array& ID, Array& Dist, } } } - mean = sumReduce(Dm.Comm,mean); - count = sumReduce(Dm.Comm,count); + mean = Dm.Comm.sumReduce(mean); + count = Dm.Comm.sumReduce(count); for (size_t i=0; i Date: Mon, 4 Jan 2021 23:37:44 -0500 Subject: [PATCH 116/205] IO direct from FOM --- IO/PackData.cpp | 105 ++++++++++++++++++++++++++++++++ IO/PackData.h | 78 ++++++++++++++++++++++++ IO/PackData.hpp | 155 ++++++++++++++++++++++++++++++++++++++++++++++++ IO/netcdf.cpp | 14 ++--- IO/netcdf.h | 4 +- 5 files changed, 347 insertions(+), 9 deletions(-) create mode 100644 IO/PackData.cpp create mode 100644 IO/PackData.h create mode 100644 IO/PackData.hpp diff --git a/IO/PackData.cpp b/IO/PackData.cpp new file mode 100644 index 00000000..f10d9ca7 --- /dev/null +++ b/IO/PackData.cpp @@ -0,0 +1,105 @@ +#include "IO/PackData.h" + +#include + + +/******************************************************** +* Concrete implimentations for packing/unpacking * +********************************************************/ +// unsigned char +template<> +size_t packsize( const unsigned char& rhs ) +{ + return sizeof(unsigned char); +} +template<> +void pack( const unsigned char& rhs, char *buffer ) +{ + memcpy(buffer,&rhs,sizeof(unsigned char)); +} +template<> +void unpack( unsigned char& data, const char *buffer ) +{ + memcpy(&data,buffer,sizeof(unsigned char)); +} +// char +template<> +size_t packsize( const char& rhs ) +{ + return sizeof(char); +} +template<> +void pack( const char& rhs, char *buffer ) +{ + memcpy(buffer,&rhs,sizeof(char)); +} +template<> +void unpack( char& data, const char *buffer ) +{ + memcpy(&data,buffer,sizeof(char)); +} +// int +template<> +size_t packsize( const int& rhs ) +{ + return sizeof(int); +} +template<> +void pack( const int& rhs, char *buffer ) +{ + memcpy(buffer,&rhs,sizeof(int)); +} +template<> +void unpack( int& data, const char *buffer ) +{ + memcpy(&data,buffer,sizeof(int)); +} +// unsigned int +template<> +size_t packsize( const unsigned int& rhs ) +{ + return sizeof(unsigned int); +} +template<> +void pack( const unsigned int& rhs, char *buffer ) +{ + memcpy(buffer,&rhs,sizeof(int)); +} +template<> +void unpack( unsigned int& data, const char *buffer ) +{ + memcpy(&data,buffer,sizeof(int)); +} +// size_t +template<> +size_t packsize( const size_t& rhs ) +{ + return sizeof(size_t); +} +template<> +void pack( const size_t& rhs, char *buffer ) +{ + memcpy(buffer,&rhs,sizeof(size_t)); +} +template<> +void unpack( size_t& data, const char *buffer ) +{ + memcpy(&data,buffer,sizeof(size_t)); +} +// std::string +template<> +size_t packsize( const std::string& rhs ) +{ + return rhs.size()+1; +} +template<> +void pack( const std::string& rhs, char *buffer ) +{ + memcpy(buffer,rhs.c_str(),rhs.size()+1); +} +template<> +void unpack( std::string& data, const char *buffer ) +{ + data = std::string(buffer); +} + diff --git a/IO/PackData.h b/IO/PackData.h new file mode 100644 index 00000000..85326c0b --- /dev/null +++ b/IO/PackData.h @@ -0,0 +1,78 @@ +// This file contains unctions to pack/unpack data structures +#ifndef included_PackData +#define included_PackData + +#include +#include +#include + + +//! Template function to return the buffer size required to pack a class +template +size_t packsize( const TYPE& rhs ); + +//! Template function to pack a class to a buffer +template +void pack( const TYPE& rhs, char *buffer ); + +//! Template function to unpack a class from a buffer +template +void unpack( TYPE& data, const char *buffer ); + + +//! Template function to return the buffer size required to pack a std::vector +template +size_t packsize( const std::vector& rhs ); + +//! Template function to pack a class to a buffer +template +void pack( const std::vector& rhs, char *buffer ); + +//! Template function to pack a class to a buffer +template +void unpack( std::vector& data, const char *buffer ); + + +//! Template function to return the buffer size required to pack a std::pair +template +size_t packsize( const std::pair& rhs ); + +//! Template function to pack a class to a buffer +template +void pack( const std::pair& rhs, char *buffer ); + +//! Template function to pack a class to a buffer +template +void unpack( std::pair& data, const char *buffer ); + + +//! Template function to return the buffer size required to pack a std::map +template +size_t packsize( const std::map& rhs ); + +//! Template function to pack a class to a buffer +template +void pack( const std::map& rhs, char *buffer ); + +//! Template function to pack a class to a buffer +template +void unpack( std::map& data, const char *buffer ); + + +//! Template function to return the buffer size required to pack a std::set +template +size_t packsize( const std::set& rhs ); + +//! Template function to pack a class to a buffer +template +void pack( const std::set& rhs, char *buffer ); + +//! Template function to pack a class to a buffer +template +void unpack( std::set& data, const char *buffer ); + + +#include "IO/PackData.hpp" + +#endif + diff --git a/IO/PackData.hpp b/IO/PackData.hpp new file mode 100644 index 00000000..006cdf73 --- /dev/null +++ b/IO/PackData.hpp @@ -0,0 +1,155 @@ +// This file functions to pack/unpack data structures +#ifndef included_PackData_hpp +#define included_PackData_hpp + +#include "IO/PackData.h" + +#include +#include +#include +#include + + + +/******************************************************** +* Default instantiations for std::vector * +********************************************************/ +template +size_t packsize( const std::vector& rhs ) +{ + size_t bytes = sizeof(size_t); + for (size_t i=0; i +void pack( const std::vector& rhs, char *buffer ) +{ + size_t size = rhs.size(); + memcpy(buffer,&size,sizeof(size_t)); + size_t pos = sizeof(size_t); + for (size_t i=0; i +void unpack( std::vector& data, const char *buffer ) +{ + size_t size; + memcpy(&size,buffer,sizeof(size_t)); + data.clear(); + data.resize(size); + size_t pos = sizeof(size_t); + for (size_t i=0; i +size_t packsize( const std::pair& rhs ) +{ + return packsize(rhs.first)+packsize(rhs.second); +} +template +void pack( const std::pair& rhs, char *buffer ) +{ + pack(rhs.first,buffer); + pack(rhs.second,&buffer[packsize(rhs.first)]); +} +template +void unpack( std::pair& data, const char *buffer ) +{ + unpack(data.first,buffer); + unpack(data.second,&buffer[packsize(data.first)]); +} + + +/******************************************************** +* Default instantiations for std::map * +********************************************************/ +template +size_t packsize( const std::map& rhs ) +{ + size_t bytes = sizeof(size_t); + typename std::map::const_iterator it; + for (it=rhs.begin(); it!=rhs.end(); ++it) { + bytes += packsize(it->first); + bytes += packsize(it->second); + } + return bytes; +} +template +void pack( const std::map& rhs, char *buffer ) +{ + size_t N = rhs.size(); + pack(N,buffer); + size_t pos = sizeof(size_t); + typename std::map::const_iterator it; + for (it=rhs.begin(); it!=rhs.end(); ++it) { + pack(it->first,&buffer[pos]); pos+=packsize(it->first); + pack(it->second,&buffer[pos]); pos+=packsize(it->second); + } +} +template +void unpack( std::map& data, const char *buffer ) +{ + size_t N = 0; + unpack(N,buffer); + size_t pos = sizeof(size_t); + data.clear(); + for (size_t i=0; i tmp; + unpack(tmp.first,&buffer[pos]); pos+=packsize(tmp.first); + unpack(tmp.second,&buffer[pos]); pos+=packsize(tmp.second); + data.insert(tmp); + } +} + + +/******************************************************** +* Default instantiations for std::set * +********************************************************/ +template +size_t packsize( const std::set& rhs ) +{ + size_t bytes = sizeof(size_t); + typename std::set::const_iterator it; + for (it=rhs.begin(); it!=rhs.end(); ++it) { + bytes += packsize(*it); + } + return bytes; +} +template +void pack( const std::set& rhs, char *buffer ) +{ + size_t N = rhs.size(); + pack(N,buffer); + size_t pos = sizeof(size_t); + typename std::set::const_iterator it; + for (it=rhs.begin(); it!=rhs.end(); ++it) { + pack(*it); pos+=packsize(*it); + } +} +template +void unpack( std::set& data, const char *buffer ) +{ + size_t N = 0; + unpack(N,buffer); + size_t pos = sizeof(size_t); + data.clear(); + for (size_t i=0; i getVar( int fid, const std::string& var, const std::vector& sta std::vector var_size = getVarDim( fid, var ); for (int d=0; d<(int)var_size.size(); d++) { if ( start[d]<0 || start[d]+stride[d]*(count[d]-1)>(int)var_size[d] ) { - int rank = comm_rank(MPI_COMM_WORLD); + int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); char tmp[1000]; sprintf(tmp,"%i: Range exceeded array dimension:\n" " start[%i]=%i, count[%i]=%i, stride[%i]=%i, var_size[%i]=%i", diff --git a/IO/netcdf.h b/IO/netcdf.h index c71560a9..e1f65e61 100644 --- a/IO/netcdf.h +++ b/IO/netcdf.h @@ -5,7 +5,7 @@ #include #include "common/Array.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Communication.h" @@ -32,7 +32,7 @@ std::string VariableTypeName( VariableType type ); * @param mode Open the file for reading or writing * @param comm MPI communicator to use (MPI_COMM_WORLD: don't use parallel netcdf) */ -int open( const std::string& filename, FileMode mode, MPI_Comm comm=MPI_COMM_NULL ); +int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm=MPI_COMM_NULL ); /*! From 86de0442d1490b0233faea23a51f8df05d9ac791 Mon Sep 17 00:00:00 2001 From: James McClure Date: Mon, 4 Jan 2021 23:48:21 -0500 Subject: [PATCH 117/205] workin --- models/ColorModel.cpp | 26 +++++++++++++------------- models/ColorModel.h | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 49b5b313..482c78bf 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -293,7 +293,7 @@ void ScaLBL_ColorModel::AssignComponentLabels(double *phase) for (int i=0; iid[i] = Mask->id[i]; for (size_t idx=0; idxComm, label_count[idx]); + label_count_global[idx]=Dm->Comm.sumReduce( label_count[idx]); if (rank==0){ printf("Component labels: %lu \n",NLABELS); @@ -1044,8 +1044,8 @@ double ScaLBL_ColorModel::ImageInit(std::string Filename){ } } - Count=sumReduce( Dm->Comm, Count); - PoreCount=sumReduce( Dm->Comm, PoreCount); + Count=Dm->Comm.sumReduce( Count); + PoreCount=Dm->Comm.sumReduce( PoreCount); if (rank==0) printf(" new saturation: %f (%f / %f) \n", Count / PoreCount, Count, PoreCount); ScaLBL_CopyToDevice(Phi, PhaseLabel, Nx*Ny*Nz*sizeof(double)); @@ -1108,9 +1108,9 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){ } } } - count_connected=sumReduce( Dm->Comm, count_connected); - count_porespace=sumReduce( Dm->Comm, count_porespace); - count_water=sumReduce( Dm->Comm, count_water); + count_connected=Dm->Comm.sumReduce( count_connected); + count_porespace=Dm->Comm.sumReduce( count_porespace); + count_water=Dm->Comm.sumReduce( count_water); for (int k=0; kComm, count_morphopen); + count_morphopen=Dm->Comm.sumReduce( count_morphopen); volume_change = double(count_morphopen - count_connected); if (rank==0) printf(" opening of connected oil %f \n",volume_change/count_connected); @@ -1268,8 +1268,8 @@ double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){ mass_loss += random_value*seed_water_in_oil; } - count= sumReduce( Dm->Comm, count); - mass_loss= sumReduce( Dm->Comm, mass_loss); + count= Dm->Comm.sumReduce( count); + mass_loss= Dm->Comm.sumReduce( mass_loss); if (rank == 0) printf("Remove mass %f from %f voxels \n",mass_loss,count); // Need to initialize Aq, Bq, Den, Phi directly @@ -1308,7 +1308,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta } } } - double volume_initial = sumReduce( Dm->Comm, count); + double volume_initial = Dm->Comm.sumReduce( count); /* sprintf(LocalRankFilename,"phi_initial.%05i.raw",rank); FILE *INPUT = fopen(LocalRankFilename,"wb"); @@ -1343,8 +1343,8 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta } } } - volume_connected = sumReduce( Dm->Comm, count); - second_biggest = sumReduce( Dm->Comm, second_biggest); + volume_connected = Dm->Comm.sumReduce( count); + second_biggest = Dm->Comm.sumReduce( second_biggest); } else { // use the whole NWP @@ -1455,7 +1455,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta } } } - double volume_final= sumReduce( Dm->Comm, count); + double volume_final= Dm->Comm.sumReduce( count); delta_volume = (volume_final-volume_initial); if (rank == 0) printf("MorphInit: change fluid volume fraction by %f \n", delta_volume/volume_initial); diff --git a/models/ColorModel.h b/models/ColorModel.h index a3b3a124..85f35c06 100644 --- a/models/ColorModel.h +++ b/models/ColorModel.h @@ -12,7 +12,7 @@ Implementation of color lattice boltzmann model #include "common/Communication.h" #include "analysis/TwoPhase.h" #include "analysis/runAnalysis.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "ProfilerApp.h" #include "threadpool/thread_pool.h" From 74e8c5e75fc0b7b17be6aab7f5b13fe7242742f4 Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 5 Jan 2021 00:04:59 -0500 Subject: [PATCH 118/205] update barrier in ScaLBL / color model --- common/ScaLBL.h | 4 ++++ models/ColorModel.cpp | 41 +++++++++++++++++++---------------------- models/ColorModel.h | 4 ++-- 3 files changed, 25 insertions(+), 24 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 8cd42af4..0fb995f3 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -371,6 +371,10 @@ public: int LastInterior(); int MemoryOptimizedLayoutAA(IntArray &Map, int *neighborList, signed char *id, int Np); + void Barrier(){ + ScaLBL_DeviceBarrier(); + MPI_COMM_SCALBL.barrier(); + }; void SendD3Q19AA(double *dist); void RecvD3Q19AA(double *dist); void SendD3Q7AA(double *fq, int Component); diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 482c78bf..2c8c044d 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -168,9 +168,9 @@ void ScaLBL_ColorModel::SetDomain(){ for (int i=0; iid[i] = 1; // initialize this way //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object Averages = std::shared_ptr ( new SubPhase(Dm) ); // TwoPhase analysis object - MPI_Barrier(comm); + comm.barrier(); Dm->CommInit(); - MPI_Barrier(comm); + comm.barrier(); // Read domain parameters rank = Dm->rank(); nprocx = Dm->nprocx(); @@ -386,7 +386,7 @@ void ScaLBL_ColorModel::Create(){ } } ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np); - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); delete [] TmpMap; // copy the neighbor list @@ -464,9 +464,9 @@ void ScaLBL_ColorModel::Initialize(){ ScaLBL_CopyToDevice(Den,cDen,2*Np*sizeof(double)); ScaLBL_CopyToDevice(fq,cDist,19*Np*sizeof(double)); ScaLBL_CopyToDevice(Phi,cPhi,N*sizeof(double)); - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); - MPI_Barrier(comm); + comm.barrier(); } if (rank==0) printf ("Initializing phase field \n"); @@ -653,8 +653,8 @@ void ScaLBL_ColorModel::Run(){ //.......create and start timer............ double starttime,stoptime,cputime; - ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); + comm.barrier(); starttime = MPI_Wtime(); //......................................... @@ -675,7 +675,7 @@ void ScaLBL_ColorModel::Run(){ ScaLBL_Comm->BiSendD3Q7AA(Aq,Bq); //READ FROM NORMAL ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, Aq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->BiRecvD3Q7AA(Aq,Bq); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, Aq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); // Perform the collision operation @@ -691,7 +691,7 @@ void ScaLBL_ColorModel::Run(){ alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm_Regular->RecvHalo(Phi); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); // Set BCs if (BoundaryCondition == 3){ ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); @@ -707,8 +707,7 @@ void ScaLBL_ColorModel::Run(){ } ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + ScaLBL_Comm->Barrier(); // *************EVEN TIMESTEP************* timestep++; @@ -716,7 +715,7 @@ void ScaLBL_ColorModel::Run(){ ScaLBL_Comm->BiSendD3Q7AA(Aq,Bq); //READ FROM NORMAL ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, Aq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->BiRecvD3Q7AA(Aq,Bq); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, Aq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); // Perform the collision operation @@ -731,7 +730,7 @@ void ScaLBL_ColorModel::Run(){ alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm_Regular->RecvHalo(Phi); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); // Set boundary conditions if (BoundaryCondition == 3){ ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); @@ -747,8 +746,7 @@ void ScaLBL_ColorModel::Run(){ } ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + ScaLBL_Comm->Barrier(); //************************************************************************ PROFILE_STOP("Update"); @@ -992,14 +990,13 @@ void ScaLBL_ColorModel::Run(){ } morph_timesteps += analysis_interval; } - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + comm.barrier(); } analysis.finish(); PROFILE_STOP("Loop"); PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ - ScaLBL_DeviceBarrier(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + ScaLBL_Comm->Barrier(); stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep @@ -1049,12 +1046,12 @@ double ScaLBL_ColorModel::ImageInit(std::string Filename){ if (rank==0) printf(" new saturation: %f (%f / %f) \n", Count / PoreCount, Count, PoreCount); ScaLBL_CopyToDevice(Phi, PhaseLabel, Nx*Ny*Nz*sizeof(double)); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + comm.barrier(); ScaLBL_D3Q19_Init(fq, Np); ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + comm.barrier(); ScaLBL_CopyToHost(Averages->Phi.data(),Phi,Nx*Ny*Nz*sizeof(double)); @@ -1086,7 +1083,7 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){ BlobIDstruct new_index; double vF=0.0; double vS=0.0; ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,Averages->SDs,vF,vS,phase_label,Dm->Comm); - MPI_Barrier(Dm->Comm); + comm.barrier(); long long count_connected=0; long long count_porespace=0; @@ -1322,7 +1319,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta if (USE_CONNECTED_NWP){ BlobIDstruct new_index; ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,phase,Averages->SDs,vF,vS,phase_label,comm); - MPI_Barrier(Dm->Comm); + comm.barrier(); // only operate on component "0" count = 0.0; diff --git a/models/ColorModel.h b/models/ColorModel.h index 85f35c06..e1ba0355 100644 --- a/models/ColorModel.h +++ b/models/ColorModel.h @@ -68,8 +68,8 @@ public: double *Pressure; private: - MPI_Comm comm; - + Utilities::MPI comm; + int dist_mem_size; int neighborSize; // filenames From 7e51ba962b2fadd32bd1da004f4d33bd937df63a Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 5 Jan 2021 00:08:39 -0500 Subject: [PATCH 119/205] barrier in DFH --- models/DFHModel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/DFHModel.cpp b/models/DFHModel.cpp index 98fa6d32..26a57656 100644 --- a/models/DFHModel.cpp +++ b/models/DFHModel.cpp @@ -213,7 +213,7 @@ void ScaLBL_DFHModel::Create(){ Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np); - comm.barrier(); + ScaLBL_Comm->Barrier(); //........................................................................... // MAIN VARIABLES ALLOCATED HERE //........................................................................... From 77c2949b072cd5e6a22d379910f2d0c1ef206155 Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 5 Jan 2021 00:15:36 -0500 Subject: [PATCH 120/205] werkin --- models/GreyscaleColorModel.cpp | 58 ++++++++++++++++------------------ models/GreyscaleModel.cpp | 12 +++---- models/IonModel.cpp | 24 +++++++------- models/PoissonSolver.cpp | 38 +++++++++++----------- models/StokesModel.cpp | 34 ++++++++++---------- 5 files changed, 81 insertions(+), 85 deletions(-) diff --git a/models/GreyscaleColorModel.cpp b/models/GreyscaleColorModel.cpp index f1b37dad..009b02f5 100644 --- a/models/GreyscaleColorModel.cpp +++ b/models/GreyscaleColorModel.cpp @@ -273,7 +273,7 @@ void ScaLBL_GreyscaleColorModel::AssignComponentLabels() for (int i=0; iid[i] = Mask->id[i]; for (size_t idx=0; idxComm, label_count[idx]); + label_count_global[idx] = Dm->Comm.sumReduce( label_count[idx]); if (rank==0){ printf("Number of component labels: %lu \n",NLABELS); @@ -286,8 +286,7 @@ void ScaLBL_GreyscaleColorModel::AssignComponentLabels() } ScaLBL_CopyToDevice(Phi, phase, N*sizeof(double)); - ScaLBL_DeviceBarrier(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + ScaLBL_Comm->Barrier(); delete [] phase; } @@ -447,7 +446,7 @@ void ScaLBL_GreyscaleColorModel::AssignGreySolidLabels()//Model-4 ScaLBL_CopyToDevice(GreySolidGrad, GreySolidGrad_host, 3*Np*sizeof(double)); - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); delete [] SolidPotential_host; delete [] GreySolidGrad_host; delete [] Dst; @@ -545,7 +544,7 @@ void ScaLBL_GreyscaleColorModel::AssignGreyPoroPermLabels() // Set Dm to match Mask for (int i=0; iid[i] = Mask->id[i]; - for (int idx=0; idxComm, label_count[idx]); + for (int idx=0; idxComm.sumReduce( label_count[idx]); //Initialize a weighted porosity after considering grey voxels GreyPorosity=0.0; @@ -571,7 +570,7 @@ void ScaLBL_GreyscaleColorModel::AssignGreyPoroPermLabels() ScaLBL_CopyToDevice(Porosity_dvc, Porosity, Np*sizeof(double)); ScaLBL_CopyToDevice(Permeability_dvc, Permeability, Np*sizeof(double)); - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); delete [] Porosity; delete [] Permeability; } @@ -658,7 +657,7 @@ void ScaLBL_GreyscaleColorModel::Create(){ } } ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np); - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); delete [] TmpMap; // copy the neighbor list @@ -739,7 +738,7 @@ void ScaLBL_GreyscaleColorModel::Initialize(){ ScaLBL_CopyToDevice(Den,cDen,2*Np*sizeof(double)); ScaLBL_CopyToDevice(fq,cDist,19*Np*sizeof(double)); ScaLBL_CopyToDevice(Phi,cPhi,N*sizeof(double)); - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); @@ -912,7 +911,7 @@ void ScaLBL_GreyscaleColorModel::Run(){ //.......create and start timer............ double starttime,stoptime,cputime; - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); starttime = MPI_Wtime(); //......................................... @@ -934,7 +933,7 @@ void ScaLBL_GreyscaleColorModel::Run(){ ScaLBL_Comm->BiSendD3Q7AA(Aq,Bq); //READ FROM NORMAL ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, Aq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->BiRecvD3Q7AA(Aq,Bq); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, Aq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); // Perform the collision operation @@ -955,7 +954,7 @@ void ScaLBL_GreyscaleColorModel::Run(){ // alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm_Regular->RecvHalo(Phi); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); // Set BCs if (BoundaryCondition == 3){ ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); @@ -978,16 +977,15 @@ void ScaLBL_GreyscaleColorModel::Run(){ //ScaLBL_D3Q19_AAodd_GreyscaleColor(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi,GreySolidPhi,Porosity_dvc,Permeability_dvc,Velocity, // rhoA, rhoB, tauA, tauB,tauA_eff, tauB_eff, // alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); - + ScaLBL_Comm->Barrier(); + // *************EVEN TIMESTEP************* timestep++; // Compute the Phase indicator field ScaLBL_Comm->BiSendD3Q7AA(Aq,Bq); //READ FROM NORMAL ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, Aq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->BiRecvD3Q7AA(Aq,Bq); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, Aq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); // Perform the collision operation @@ -1008,7 +1006,7 @@ void ScaLBL_GreyscaleColorModel::Run(){ // alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm_Regular->RecvHalo(Phi); ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); // Set boundary conditions if (BoundaryCondition == 3){ ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); @@ -1031,9 +1029,8 @@ void ScaLBL_GreyscaleColorModel::Run(){ //ScaLBL_D3Q19_AAeven_GreyscaleColor(dvcMap, fq, Aq, Bq, Den, Phi,GreySolidPhi,Porosity_dvc,Permeability_dvc,Velocity, // rhoA, rhoB, tauA, tauB,tauA_eff, tauB_eff, // alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); - //************************************************************************ + ScaLBL_Comm->Barrier(); + //************************************************************************ PROFILE_STOP("Update"); //TODO For temporary use - writing Restart and Vis files should be included in the analysis framework in the future @@ -1315,15 +1312,14 @@ void ScaLBL_GreyscaleColorModel::Run(){ } morph_timesteps += analysis_interval; } - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + ScaLBL_Comm->Barrier(); } //analysis.finish(); PROFILE_STOP("Loop"); PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ - ScaLBL_DeviceBarrier(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); - stoptime = MPI_Wtime(); + ScaLBL_Comm->Barrier(); + stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; @@ -1369,14 +1365,14 @@ void ScaLBL_GreyscaleColorModel::ImageInit(std::string Filename){ // } // } // } -// Count=sumReduce( Dm->Comm, Count); -// PoreCount=sumReduce( Dm->Comm, PoreCount); +// Count=Dm->Comm.sumReduce( Count); +// PoreCount=Dm->Comm.sumReduce( PoreCount); // if (rank==0) printf(" new saturation: %f (%f / %f) \n", Count / PoreCount, Count, PoreCount); ScaLBL_D3Q19_Init(fq, Np); ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + ScaLBL_Comm->Barrier(); //ScaLBL_CopyToHost(Averages->Phi.data(),Phi,Nx*Ny*Nz*sizeof(double)); @@ -1447,8 +1443,8 @@ double ScaLBL_GreyscaleColorModel::SeedPhaseField(const double seed_water_in_oil mass_loss += random_value*seed_water_in_oil; } - count= sumReduce( Dm->Comm, count); - mass_loss= sumReduce( Dm->Comm, mass_loss); + count= Dm->Comm.sumReduce( count); + mass_loss= Dm->Comm.sumReduce( mass_loss); if (rank == 0) printf("Remove mass %.5g from %.5g voxels \n",mass_loss,count); // Need to initialize Aq, Bq, Den, Phi directly @@ -1828,7 +1824,7 @@ void ScaLBL_GreyscaleColorModel::WriteDebug(){ // // // ScaLBL_CopyToDevice(GreySolidGrad, GreySolidGrad_host, 3*Np*sizeof(double)); -// ScaLBL_DeviceBarrier(); +// ScaLBL_Comm->Barrier(); // delete [] SolidPotential_host; // delete [] GreySolidGrad_host; // delete [] Dst; @@ -1976,7 +1972,7 @@ void ScaLBL_GreyscaleColorModel::WriteDebug(){ // } // // ScaLBL_CopyToDevice(GreySolidPhi, GreySolidPhi_host, Nx*Ny*Nz*sizeof(double)); -// ScaLBL_DeviceBarrier(); +// ScaLBL_Comm->Barrier(); // // //debug // //FILE *OUTFILE; @@ -2047,7 +2043,7 @@ void ScaLBL_GreyscaleColorModel::WriteDebug(){ // } // // ScaLBL_CopyToDevice(GreySolidPhi, GreySolidPhi_host, Nx*Ny*Nz*sizeof(double)); -// ScaLBL_DeviceBarrier(); +// ScaLBL_Comm->Barrier(); // // //debug // FILE *OUTFILE; diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index b1bceeff..4e19ac44 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -267,7 +267,7 @@ void ScaLBL_GreyscaleModel::AssignComponentLabels(double *Porosity, double *Perm // Set Dm to match Mask for (int i=0; iid[i] = Mask->id[i]; - for (int idx=0; idxComm, label_count[idx]); + for (int idx=0; idxComm.sumReduce( label_count[idx]); //Initialize a weighted porosity after considering grey voxels GreyPorosity=0.0; for (unsigned int idx=0; idxComm, GreyPorosity_loc); + GreyPorosity = Dm->Comm.sumReduce( GreyPorosity_loc); GreyPorosity = GreyPorosity/double((Nx-2)*(Ny-2)*(Nz-2)*nprocs); if (rank==0){ @@ -683,10 +683,10 @@ void ScaLBL_GreyscaleModel::Run(){ double As = Morphology.A(); double Hs = Morphology.H(); double Xs = Morphology.X(); - Vs = sumReduce( Dm->Comm, Vs); - As = sumReduce( Dm->Comm, As); - Hs = sumReduce( Dm->Comm, Hs); - Xs = sumReduce( Dm->Comm, Xs); + Vs = Dm->Comm.sumReduce( Vs); + As = Dm->Comm.sumReduce( As); + Hs = Dm->Comm.sumReduce( Hs); + Xs = Dm->Comm.sumReduce( Xs); double h = Dm->voxel_length; //double absperm = h*h*mu*Mask->Porosity()*flow_rate / force_mag; diff --git a/models/IonModel.cpp b/models/IonModel.cpp index 6fbb627e..28137c3f 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -588,7 +588,7 @@ void ScaLBL_IonModel::AssignSolidBoundary(double *ion_solid) } for (size_t idx=0; idxComm, label_count[idx]); + label_count_global[idx]=Dm->Comm.sumReduce( label_count[idx]); if (rank==0){ printf("LB Ion Solver: number of ion solid labels: %lu \n",NLABELS); @@ -684,7 +684,7 @@ void ScaLBL_IonModel::Create(){ IonSolid_host = new double[Nx*Ny*Nz]; AssignSolidBoundary(IonSolid_host); ScaLBL_CopyToDevice(IonSolid, IonSolid_host, Nx*Ny*Nz*sizeof(double)); - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); delete [] IonSolid_host; } } @@ -782,7 +782,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ //.......create and start timer............ //double starttime,stoptime,cputime; - //ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + //ScaLBL_Comm->Barrier(); MPI_Barrier(comm); //starttime = MPI_Wtime(); for (int ic=0; icSendD3Q7AA(fq, ic); //READ FROM NORMAL ScaLBL_D3Q7_AAodd_IonConcentration(NeighborList, &fq[ic*Np*7],&Ci[ic*Np],ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); //--------------------------------------- Set boundary conditions -------------------------------------// if (BoundaryConditionInlet[ic]>0){ switch (BoundaryConditionInlet[ic]){ @@ -830,7 +830,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ if (BoundaryConditionSolid==1){ //TODO IonSolid may also be species-dependent ScaLBL_Comm->SolidDirichletD3Q7(&fq[ic*Np*7], IonSolid); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); } // *************EVEN TIMESTEP*************// @@ -839,7 +839,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ ScaLBL_Comm->SendD3Q7AA(fq, ic); //READ FORM NORMAL ScaLBL_D3Q7_AAeven_IonConcentration(&fq[ic*Np*7],&Ci[ic*Np],ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, ic); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); //--------------------------------------- Set boundary conditions -------------------------------------// if (BoundaryConditionInlet[ic]>0){ switch (BoundaryConditionInlet[ic]){ @@ -874,7 +874,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ if (BoundaryConditionSolid==1){ //TODO IonSolid may also be species-dependent ScaLBL_Comm->SolidDirichletD3Q7(&fq[ic*Np*7], IonSolid); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); } } } @@ -904,7 +904,7 @@ void ScaLBL_IonModel::getIonConcentration(DoubleArray &IonConcentration, const i //This function wirte out the data in a normal layout (by aggregating all decomposed domains) ScaLBL_Comm->RegularLayout(Map,&Ci[ic*Np],IonConcentration); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); IonConcentration_LB_to_Phys(IonConcentration); } @@ -914,7 +914,7 @@ void ScaLBL_IonModel::getIonConcentration_debug(int timestep){ DoubleArray PhaseField(Nx,Ny,Nz); for (int ic=0; icRegularLayout(Map,&Ci[ic*Np],PhaseField); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); IonConcentration_LB_to_Phys(PhaseField); FILE *OUTFILE; @@ -955,7 +955,7 @@ void ScaLBL_IonModel::DummyFluidVelocity(){ } ScaLBL_AllocateDeviceMemory((void **) &FluidVelocityDummy, sizeof(double)*3*Np); ScaLBL_CopyToDevice(FluidVelocityDummy, FluidVelocity_host, sizeof(double)*3*Np); - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); delete [] FluidVelocity_host; } @@ -976,7 +976,7 @@ void ScaLBL_IonModel::DummyElectricField(){ } ScaLBL_AllocateDeviceMemory((void **) &ElectricFieldDummy, sizeof(double)*3*Np); ScaLBL_CopyToDevice(ElectricFieldDummy, ElectricField_host, sizeof(double)*3*Np); - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); delete [] ElectricField_host; } @@ -1028,7 +1028,7 @@ double ScaLBL_IonModel::CalIonDenConvergence(vector &ci_avg_previous){ // DoubleArray PhaseField(Nx,Ny,Nz); // for (int ic=0; icRegularLayout(Map,&Ci[ic*Np],PhaseField); -// ScaLBL_DeviceBarrier(); MPI_Barrier(comm); +// ScaLBL_Comm->Barrier(); MPI_Barrier(comm); // // FILE *OUTFILE; // sprintf(LocalRankFilename,"Ion%02i.%05i.raw",ic+1,rank); diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index b0dde2c7..3d73f784 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -231,7 +231,7 @@ void ScaLBL_Poisson::AssignSolidBoundary(double *poisson_solid) } for (size_t idx=0; idxComm, label_count[idx]); + label_count_global[idx]=Dm->Comm.sumReduce( label_count[idx]); if (rank==0){ printf("LB-Poisson Solver: number of Poisson solid labels: %lu \n",NLABELS); @@ -324,16 +324,16 @@ void ScaLBL_Poisson::Create(){ } } ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np); - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); delete [] TmpMap; // copy the neighbor list ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); delete [] neighborList; // copy node ID //ScaLBL_CopyToDevice(dvcID, Mask->id, sizeof(signed char)*Nx*Ny*Nz); - //ScaLBL_DeviceBarrier(); + //ScaLBL_Comm->Barrier(); //Initialize solid boundary for electric potential ScaLBL_Comm->SetupBounceBackList(Map, Mask->id, Np); @@ -388,7 +388,7 @@ void ScaLBL_Poisson::Initialize(){ AssignSolidBoundary(psi_host);//step1 Potential_Init(psi_host);//step2 ScaLBL_CopyToDevice(Psi, psi_host, Nx*Ny*Nz*sizeof(double)); - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); ScaLBL_D3Q7_Poisson_Init(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_D3Q7_Poisson_Init(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); delete [] psi_host; @@ -408,7 +408,7 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ //.......create and start timer............ //double starttime,stoptime,cputime; - //ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + //ScaLBL_Comm->Barrier(); MPI_Barrier(comm); //starttime = MPI_Wtime(); timestep=0; @@ -421,13 +421,13 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ SolveElectricPotentialAAodd();//update electric potential SolvePoissonAAodd(ChargeDensity);//perform collision - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); // *************EVEN TIMESTEP*************// timestep++; SolveElectricPotentialAAeven();//update electric potential SolvePoissonAAeven(ChargeDensity);//perform collision - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); //************************************************************************/ // Check convergence of steady-state solution @@ -509,7 +509,7 @@ void ScaLBL_Poisson::SolveElectricPotentialAAodd(){ ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FROM NORMAL ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); // Set boundary conditions if (BoundaryCondition == 1){ ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); @@ -523,7 +523,7 @@ void ScaLBL_Poisson::SolveElectricPotentialAAeven(){ ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FORM NORMAL ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); // Set boundary conditions if (BoundaryCondition == 1){ ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); @@ -570,7 +570,7 @@ void ScaLBL_Poisson::DummyChargeDensity(){ } ScaLBL_AllocateDeviceMemory((void **) &ChargeDensityDummy, sizeof(double)*Np); ScaLBL_CopyToDevice(ChargeDensityDummy, ChargeDensity_host, sizeof(double)*Np); - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); delete [] ChargeDensity_host; } @@ -579,7 +579,7 @@ void ScaLBL_Poisson::getElectricPotential_debug(int timestep){ DoubleArray PhaseField(Nx,Ny,Nz); //ScaLBL_Comm->RegularLayout(Map,Psi,PhaseField); ScaLBL_CopyToHost(PhaseField.data(),Psi,sizeof(double)*Nx*Ny*Nz); - //ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + //ScaLBL_Comm->Barrier(); MPI_Barrier(comm); FILE *OUTFILE; sprintf(LocalRankFilename,"Electric_Potential_Time_%i.%05i.raw",timestep,rank); OUTFILE = fopen(LocalRankFilename,"wb"); @@ -597,22 +597,22 @@ void ScaLBL_Poisson::getElectricField(DoubleArray &Values_x, DoubleArray &Values ScaLBL_Comm->RegularLayout(Map,&ElectricField[0*Np],Values_x); ElectricField_LB_to_Phys(Values_x); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); ScaLBL_Comm->RegularLayout(Map,&ElectricField[1*Np],Values_y); ElectricField_LB_to_Phys(Values_y); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); ScaLBL_Comm->RegularLayout(Map,&ElectricField[2*Np],Values_z); ElectricField_LB_to_Phys(Values_z); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); } void ScaLBL_Poisson::getElectricField_debug(int timestep){ //ScaLBL_D3Q7_Poisson_getElectricField(fq,ElectricField,tau,Np); - //ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + //ScaLBL_Comm->Barrier(); MPI_Barrier(comm); DoubleArray PhaseField(Nx,Ny,Nz); ScaLBL_Comm->RegularLayout(Map,&ElectricField[0*Np],PhaseField); @@ -658,7 +658,7 @@ void ScaLBL_Poisson::ElectricField_LB_to_Phys(DoubleArray &Efield_reg){ // ScaLBL_D3Q7_Poisson_ElectricField(NeighborList, dvcMap, dvcID, Psi, ElectricField, BoundaryConditionSolid, // Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); // ScaLBL_Comm_Regular->RecvHalo(Psi); -// ScaLBL_DeviceBarrier(); +// ScaLBL_Comm->Barrier(); // if (BoundaryCondition == 1){ // ScaLBL_Comm->Poisson_D3Q7_BC_z(dvcMap,Psi,Vin); // ScaLBL_Comm->Poisson_D3Q7_BC_Z(dvcMap,Psi,Vout); @@ -671,7 +671,7 @@ void ScaLBL_Poisson::ElectricField_LB_to_Phys(DoubleArray &Efield_reg){ // // DoubleArray PhaseField(Nx,Ny,Nz); // ScaLBL_Comm->RegularLayout(Map,Psi,PhaseField); -// //ScaLBL_DeviceBarrier(); MPI_Barrier(comm); +// //ScaLBL_Comm->Barrier(); MPI_Barrier(comm); // FILE *OUTFILE; // sprintf(LocalRankFilename,"Electric_Potential.%05i.raw",rank); // OUTFILE = fopen(LocalRankFilename,"wb"); @@ -728,7 +728,7 @@ void ScaLBL_Poisson::ElectricField_LB_to_Phys(DoubleArray &Efield_reg){ // } // // for (size_t idx=0; idxComm, label_count[idx]); +// label_count_global[idx]=Dm->Comm.sumReduce( label_count[idx]); // // if (rank==0){ // printf("LB-Poisson Solver: number of Poisson solid labels: %lu \n",NLABELS); diff --git a/models/StokesModel.cpp b/models/StokesModel.cpp index 086e3633..4ad93950 100644 --- a/models/StokesModel.cpp +++ b/models/StokesModel.cpp @@ -343,7 +343,7 @@ void ScaLBL_StokesModel::Run_Lite(double *ChargeDensity, double *ElectricField){ } ScaLBL_D3Q19_AAodd_StokesMRT(NeighborList, fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz,rho0,den_scale,h,time_conv, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL @@ -365,7 +365,7 @@ void ScaLBL_StokesModel::Run_Lite(double *ChargeDensity, double *ElectricField){ } ScaLBL_D3Q19_AAeven_StokesMRT(fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz,rho0,den_scale,h,time_conv, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); //************************************************************************/ } } @@ -373,25 +373,25 @@ void ScaLBL_StokesModel::Run_Lite(double *ChargeDensity, double *ElectricField){ void ScaLBL_StokesModel::getVelocity(DoubleArray &Vel_x, DoubleArray &Vel_y, DoubleArray &Vel_z){ //get velocity in physical unit [m/sec] ScaLBL_D3Q19_Momentum(fq, Velocity, Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Vel_x); Velocity_LB_to_Phys(Vel_x); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Vel_y); Velocity_LB_to_Phys(Vel_y); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Vel_z); Velocity_LB_to_Phys(Vel_z); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); } void ScaLBL_StokesModel::getVelocity_debug(int timestep){ //get velocity in physical unit [m/sec] ScaLBL_D3Q19_Momentum(fq, Velocity, Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); DoubleArray PhaseField(Nx,Ny,Nz); ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField); @@ -492,7 +492,7 @@ double ScaLBL_StokesModel::CalVelocityConvergence(double& flow_rate_previous,dou //----------------------------------------------------- ScaLBL_D3Q19_Momentum(fq,Velocity, Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x); ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y); ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z); @@ -574,7 +574,7 @@ void ScaLBL_StokesModel::Run(){ //.......create and start timer............ double starttime,stoptime,cputime; - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); starttime = MPI_Wtime(); if (rank==0) printf("****************************************************************\n"); if (rank==0) printf("LB Single-Fluid Navier-Stokes Solver: timestepMax = %i\n", timestepMax); @@ -602,7 +602,7 @@ void ScaLBL_StokesModel::Run(){ ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); } ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL ScaLBL_D3Q19_AAeven_MRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); @@ -621,12 +621,12 @@ void ScaLBL_StokesModel::Run(){ ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); } ScaLBL_D3Q19_AAeven_MRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); //************************************************************************/ if (timestep%1000==0){ ScaLBL_D3Q19_Momentum(fq,Velocity, Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x); ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y); ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z); @@ -681,10 +681,10 @@ void ScaLBL_StokesModel::Run(){ double As = Morphology.A(); double Hs = Morphology.H(); double Xs = Morphology.X(); - Vs=sumReduce( Dm->Comm, Vs); - As=sumReduce( Dm->Comm, As); - Hs=sumReduce( Dm->Comm, Hs); - Xs=sumReduce( Dm->Comm, Xs); + Vs=Dm->Comm.sumReduce( Vs); + As=Dm->Comm.sumReduce( As); + Hs=Dm->Comm.sumReduce( Hs); + Xs=Dm->Comm.sumReduce( Xs); double h = Dm->voxel_length; double absperm = h*h*mu*Mask->Porosity()*flow_rate / force_mag; if (rank==0) { @@ -718,7 +718,7 @@ void ScaLBL_StokesModel::VelocityField(){ /* Minkowski Morphology(Mask); int SIZE=Np*sizeof(double); ScaLBL_D3Q19_Momentum(fq,Velocity, Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); MPI_Barrier(comm); ScaLBL_CopyToHost(&VELOCITY[0],&Velocity[0],3*SIZE); memcpy(Morphology.SDn.data(), Distance.data(), Nx*Ny*Nz*sizeof(double)); From 7644a8288e7142e38425d3645187ba296127e138 Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 5 Jan 2021 12:38:28 -0500 Subject: [PATCH 121/205] refactor Domain to include electorchem grey and FOM --- common/Domain.cpp | 519 +++++++++++++++++++++++++++++++++++----------- common/Domain.h | 3 + 2 files changed, 406 insertions(+), 116 deletions(-) diff --git a/common/Domain.cpp b/common/Domain.cpp index ab457f33..fbf9c324 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -263,6 +263,7 @@ void Domain::Decomp( const std::string& Filename ) int64_t i,j,k,n; int64_t xStart,yStart,zStart; int checkerSize; + bool USE_CHECKER = false; //int inlet_layers_x, inlet_layers_y, inlet_layers_z; //int outlet_layers_x, outlet_layers_y, outlet_layers_z; xStart=yStart=zStart=0; @@ -272,8 +273,8 @@ void Domain::Decomp( const std::string& Filename ) outlet_layers_x = 0; outlet_layers_y = 0; outlet_layers_z = 0; - inlet_layers_phase=1; - outlet_layers_phase=2; + inlet_layers_phase=1; + outlet_layers_phase=2; checkerSize = 32; // Read domain parameters @@ -302,6 +303,7 @@ void Domain::Decomp( const std::string& Filename ) } if (database->keyExists( "checkerSize" )){ checkerSize = database->getScalar( "checkerSize" ); + USE_CHECKER = true; } else { checkerSize = SIZE[0]; @@ -324,7 +326,6 @@ void Domain::Decomp( const std::string& Filename ) //printf("INPUT ERROR: Valid ReadType are 8bit, 16bit \n"); ReadType = "8bit"; } - nx = size[0]; ny = size[1]; nz = size[2]; @@ -335,7 +336,7 @@ void Domain::Decomp( const std::string& Filename ) global_Ny = SIZE[1]; global_Nz = SIZE[2]; nprocs=nprocx*nprocy*nprocz; - char *SegData = nullptr; + char *SegData = NULL; if (RANK==0){ printf("Input media: %s\n",Filename.c_str()); @@ -353,7 +354,7 @@ void Domain::Decomp( const std::string& Filename ) if (ReadType == "8bit"){ printf("Reading 8-bit input data \n"); FILE *SEGDAT = fopen(Filename.c_str(),"rb"); - if (!SEGDAT) ERROR("Domain.cpp: Error reading segmented data"); + if (SEGDAT==NULL) ERROR("Domain.cpp: Error reading segmented data"); size_t ReadSeg; ReadSeg=fread(SegData,1,SIZE,SEGDAT); if (ReadSeg != size_t(SIZE)) printf("Domain.cpp: Error reading segmented data \n"); @@ -364,7 +365,7 @@ void Domain::Decomp( const std::string& Filename ) short int *InputData; InputData = new short int[SIZE]; FILE *SEGDAT = fopen(Filename.c_str(),"rb"); - if (!SEGDAT) ERROR("Domain.cpp: Error reading segmented data"); + if (SEGDAT==NULL) ERROR("Domain.cpp: Error reading segmented data"); size_t ReadSeg; ReadSeg=fread(InputData,2,SIZE,SEGDAT); if (ReadSeg != size_t(SIZE)) printf("Domain.cpp: Error reading segmented data \n"); @@ -374,7 +375,7 @@ void Domain::Decomp( const std::string& Filename ) } } printf("Read segmented data from %s \n",Filename.c_str()); - + // relabel the data std::vector LabelCount(ReadValues.size(),0); for (int k = 0; k 0){ - // use checkerboard pattern - printf("Checkerboard pattern at x inlet for %i layers \n",inlet_layers_x); - for (int k = 0; k 0){ + // use checkerboard pattern + printf("Checkerboard pattern at x inlet for %i layers \n",inlet_layers_x); + for (int k = 0; k 0){ + printf("Checkerboard pattern at y inlet for %i layers \n",inlet_layers_y); + // use checkerboard pattern + for (int k = 0; k 0){ + printf("Checkerboard pattern at z inlet for %i layers, saturated with phase label=%i \n",inlet_layers_z,inlet_layers_phase); + // use checkerboard pattern + for (int k = zStart; k < zStart+inlet_layers_z; k++){ + for (int j = 0; j 0){ + // use checkerboard pattern + printf("Checkerboard pattern at x outlet for %i layers \n",outlet_layers_x); + for (int k = 0; k 0){ + printf("Checkerboard pattern at y outlet for %i layers \n",outlet_layers_y); + // use checkerboard pattern + for (int k = 0; k 0){ + printf("Checkerboard pattern at z outlet for %i layers, saturated with phase label=%i \n",outlet_layers_z,outlet_layers_phase); + // use checkerboard pattern + for (int k = zStart + nz*nprocz - outlet_layers_z; k < zStart + nz*nprocz; k++){ + for (int j = 0; j 0){ - printf("Checkerboard pattern at y inlet for %i layers \n",inlet_layers_y); - // use checkerboard pattern - for (int k = 0; k 0){ + printf("Mixed reflection pattern at z inlet for %i layers, saturated with phase label=%i \n",inlet_layers_z,inlet_layers_phase); + for (int k = zStart; k < zStart+inlet_layers_z; k++){ + for (int j = 0; j 0){ + SegData[k*global_Nx*global_Ny+j*global_Nx+i] = reflection_id; + } } } } } - } - - if (inlet_layers_z > 0){ - printf("Checkerboard pattern at z inlet for %i layers \n",inlet_layers_z); - // use checkerboard pattern - for (int k = zStart; k < zStart+inlet_layers_z; k++){ - for (int j = 0; j 0){ - // use checkerboard pattern - printf("Checkerboard pattern at x outlet for %i layers \n",outlet_layers_x); - for (int k = 0; k 0){ - printf("Checkerboard pattern at y outlet for %i layers \n",outlet_layers_y); - // use checkerboard pattern - for (int k = 0; k 0){ - printf("Checkerboard pattern at z outlet for %i layers \n",outlet_layers_z); - // use checkerboard pattern - for (int k = zStart + nz*nprocz - outlet_layers_z; k < zStart + nz*nprocz; k++){ - for (int j = 0; j 0){ + printf("Mixed reflection pattern at z outlet for %i layers, saturated with phase label=%i \n",outlet_layers_z,outlet_layers_phase); + for (int k = zStart + nz*nprocz - outlet_layers_z; k < zStart + nz*nprocz; k++){ + for (int j = 0; j 0){ + SegData[k*global_Nx*global_Ny+j*global_Nx+i] = reflection_id; + } } } } @@ -523,7 +553,7 @@ void Domain::Decomp( const std::string& Filename ) // Get the rank info int64_t N = (nx+2)*(ny+2)*(nz+2); - + // number of sites to use for periodic boundary condition transition zone int64_t z_transition_size = (nprocz*nz - (global_Nz - zStart))/2; if (z_transition_size < 0) z_transition_size=0; @@ -594,6 +624,27 @@ void Domain::Decomp( const std::string& Filename ) Comm.recv(id.data(),N,0,15); } Comm.barrier(); + + // Compute the porosity + double sum; + double sum_local=0.0; + double iVol_global = 1.0/(1.0*(Nx-2)*(Ny-2)*(Nz-2)*nprocs); + if (BoundaryCondition > 0 && BoundaryCondition !=5) iVol_global = 1.0/(1.0*(Nx-2)*nprocx*(Ny-2)*nprocy*((Nz-2)*nprocz-6)); + //......................................................... + for (int k=inlet_layers_z+1; k 0){ + sum_local+=1.0; + } + } + } + } + sum = Comm.sumReduce(sum_local); + porosity = sum*iVol_global; + if (rank()==0) printf("Media porosity = %f \n",porosity); + //......................................................... } void Domain::AggregateLabels( const std::string& filename ){ @@ -1170,3 +1221,239 @@ void ReadBinaryFile(char *FILENAME, double *Data, size_t N) File.close(); } +void Domain::ReadFromFile(const std::string& Filename,const std::string& Datatype, double *UserData) +{ + //........................................................................................ + // Reading the user-defined input file + // NOTE: so far it only supports BC=0 (periodic) and BC=5 (mixed reflection) + // because if checkerboard or inlet/outlet buffer layers are added, the + // value of the void space is undefined. + // NOTE: if BC=5 is used, where the inlet and outlet layers of the domain are modified, + // user needs to modify the input file accordingly before LBPM simulator read + // the input file. + //........................................................................................ + int rank_offset = 0; + int RANK = rank(); + int nprocs, nprocx, nprocy, nprocz, nx, ny, nz; + int64_t global_Nx,global_Ny,global_Nz; + int64_t i,j,k,n; + //TODO These offset we may still need them + int64_t xStart,yStart,zStart; + xStart=yStart=zStart=0; + + // Read domain parameters + // TODO currently the size of the data is still read from Domain{}; + // but user may have a user-specified size + auto size = database->getVector( "n" ); + auto SIZE = database->getVector( "N" ); + auto nproc = database->getVector( "nproc" ); + //TODO currently the funcationality "offset" is disabled as the user-defined input data may have a different size from that of the input domain + if (database->keyExists( "offset" )){ + auto offset = database->getVector( "offset" ); + xStart = offset[0]; + yStart = offset[1]; + zStart = offset[2]; + } + + nx = size[0]; + ny = size[1]; + nz = size[2]; + nprocx = nproc[0]; + nprocy = nproc[1]; + nprocz = nproc[2]; + global_Nx = SIZE[0]; + global_Ny = SIZE[1]; + global_Nz = SIZE[2]; + nprocs=nprocx*nprocy*nprocz; + + double *SegData = NULL; + if (RANK==0){ + printf("User-defined input file: %s (data type: %s)\n",Filename.c_str(),Datatype.c_str()); + printf("NOTE: currently only BC=0 or 5 supports user-defined input file!\n"); + // Rank=0 reads the entire segmented data and distributes to worker processes + printf("Dimensions of the user-defined input file: %ld x %ld x %ld \n",global_Nx,global_Ny,global_Nz); + int64_t SIZE = global_Nx*global_Ny*global_Nz; + + if (Datatype == "double"){ + printf("Reading input data as double precision floating number\n"); + SegData = new double[SIZE]; + FILE *SEGDAT = fopen(Filename.c_str(),"rb"); + if (SEGDAT==NULL) ERROR("Domain.cpp: Error reading user-defined file!\n"); + size_t ReadSeg; + ReadSeg=fread(SegData,8,SIZE,SEGDAT); + if (ReadSeg != size_t(SIZE)) printf("Domain.cpp: Error reading file: %s\n",Filename.c_str()); + fclose(SEGDAT); + } + else{ + ERROR("Error: User-defined input file only supports double-precision floating number!\n"); + } + printf("Read file successfully from %s \n",Filename.c_str()); + } + + // Get the rank info + int64_t N = (nx+2)*(ny+2)*(nz+2); + + // number of sites to use for periodic boundary condition transition zone + //int64_t z_transition_size = (nprocz*nz - (global_Nz - zStart))/2; + //if (z_transition_size < 0) z_transition_size=0; + int64_t z_transition_size = 0; + + //char LocalRankFilename[1000];//just for debug + double *loc_id; + loc_id = new double [(nx+2)*(ny+2)*(nz+2)]; + + // Set up the sub-domains + if (RANK==0){ + printf("Decomposing user-defined input file\n"); + printf("Distributing subdomains across %i processors \n",nprocs); + printf("Process grid: %i x %i x %i \n",nprocx,nprocy,nprocz); + printf("Subdomain size: %i x %i x %i \n",nx,ny,nz); + printf("Size of transition region: %ld \n", z_transition_size); + + for (int kp=0; kp Date: Tue, 5 Jan 2021 13:51:32 -0500 Subject: [PATCH 122/205] refactor new models to FOM --- models/GreyscaleColorModel.cpp | 4 +- models/GreyscaleModel.cpp | 10 +-- models/IonModel.cpp | 38 ++++++------ models/IonModel.h | 4 +- models/MRTModel.cpp | 37 +++++------ models/MRTModel.h | 4 +- models/PoissonSolver.cpp | 40 ++++++------ models/PoissonSolver.h | 4 +- models/StokesModel.cpp | 109 ++++++++++----------------------- models/StokesModel.h | 4 +- 10 files changed, 105 insertions(+), 149 deletions(-) diff --git a/models/GreyscaleColorModel.cpp b/models/GreyscaleColorModel.cpp index 009b02f5..0c349c26 100644 --- a/models/GreyscaleColorModel.cpp +++ b/models/GreyscaleColorModel.cpp @@ -167,7 +167,7 @@ void ScaLBL_GreyscaleColorModel::ReadInput(){ ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 ); fillHalo fill( MPI_COMM_WORLD, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); Array id_view; - id_view.viewRaw( size1, Mask->id ); + id_view.viewRaw( size1, Mask->id.data()); fill.copy( input_id, id_view ); fill.fill( id_view ); } @@ -600,7 +600,7 @@ void ScaLBL_GreyscaleColorModel::Create(){ if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np); MPI_Barrier(comm); //........................................................................... diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index 4e19ac44..b6f9fc62 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -366,7 +366,7 @@ void ScaLBL_GreyscaleModel::Create(){ if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np); MPI_Barrier(comm); //........................................................................... @@ -649,10 +649,10 @@ void ScaLBL_GreyscaleModel::Run(){ } } } - vax = sumReduce( Mask->Comm, vax_loc); - vay = sumReduce( Mask->Comm, vay_loc); - vaz = sumReduce( Mask->Comm, vaz_loc); - count = sumReduce( Mask->Comm, count_loc); + vax = Dm->Comm.sumReduce( vax_loc); + vay = Dm->Comm.sumReduce( vay_loc); + vaz = Dm->Comm.sumReduce( vaz_loc); + count = Dm->Comm.sumReduce( count_loc); vax /= count; vay /= count; diff --git a/models/IonModel.cpp b/models/IonModel.cpp index 28137c3f..3e4e2468 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -461,7 +461,7 @@ void ScaLBL_IonModel::SetDomain(){ for (int i=0; iid[i] = 1; // initialize this way //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object - MPI_Barrier(comm); + comm.barrier(); unsigned short int BC_inlet_min = *min_element(BoundaryConditionInlet.begin(),BoundaryConditionInlet.end()); unsigned short int BC_outlet_min = *min_element(BoundaryConditionOutlet.begin(),BoundaryConditionOutlet.end()); @@ -478,7 +478,7 @@ void ScaLBL_IonModel::SetDomain(){ } Dm->CommInit(); - MPI_Barrier(comm); + comm.barrier(); rank = Dm->rank(); nprocx = Dm->nprocx(); @@ -506,7 +506,7 @@ void ScaLBL_IonModel::ReadInput(){ ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 ); fillHalo fill( comm, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); Array id_view; - id_view.viewRaw( size1, Mask->id ); + id_view.viewRaw( size1, Mask->id.data() ); fill.copy( input_id, id_view ); fill.fill( id_view ); } @@ -650,8 +650,8 @@ void ScaLBL_IonModel::Create(){ if (rank==0) printf ("LB Ion Solver: Set up memory efficient layout \n"); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); - MPI_Barrier(comm); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np); + comm.barrier(); //........................................................................... // MAIN VARIABLES ALLOCATED HERE //........................................................................... @@ -670,15 +670,15 @@ void ScaLBL_IonModel::Create(){ if (rank==0) printf ("LB Ion Solver: Setting up device map and neighbor list \n"); // copy the neighbor list ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); - MPI_Barrier(comm); + comm.barrier(); //Initialize solid boundary for electrical potential //if ion concentration at solid surface is specified if (BoundaryConditionSolid==1){ - ScaLBL_AllocateDeviceMemory((void **) &IonSolid, sizeof(double)*Nx*Ny*Nz); - ScaLBL_Comm->SetupBounceBackList(Map, Mask->id, Np); - MPI_Barrier(comm); + ScaLBL_AllocateDeviceMemory((void **) &IonSolid, sizeof(double)*Nx*Ny*Nz); + ScaLBL_Comm->SetupBounceBackList(Map, Mask->id.data(), Np); + comm.barrier(); double *IonSolid_host; IonSolid_host = new double[Nx*Ny*Nz]; @@ -704,7 +704,7 @@ void ScaLBL_IonModel::Initialize(){ AssignIonConcentration_FromFile(&Ci_host[ic*Np],File_ion); } ScaLBL_CopyToDevice(Ci, Ci_host, number_ion_species*sizeof(double)*Np); - MPI_Barrier(comm); + comm.barrier(); for (int ic=0; icBarrier(); MPI_Barrier(comm); + //ScaLBL_Comm->Barrier(); comm.barrier(); //starttime = MPI_Wtime(); for (int ic=0; icSolidDirichletD3Q7(&fq[ic*Np*7], IonSolid); - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); } // *************EVEN TIMESTEP*************// @@ -874,7 +874,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ if (BoundaryConditionSolid==1){ //TODO IonSolid may also be species-dependent ScaLBL_Comm->SolidDirichletD3Q7(&fq[ic*Np*7], IonSolid); - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); } } } @@ -904,7 +904,7 @@ void ScaLBL_IonModel::getIonConcentration(DoubleArray &IonConcentration, const i //This function wirte out the data in a normal layout (by aggregating all decomposed domains) ScaLBL_Comm->RegularLayout(Map,&Ci[ic*Np],IonConcentration); - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); IonConcentration_LB_to_Phys(IonConcentration); } @@ -914,7 +914,7 @@ void ScaLBL_IonModel::getIonConcentration_debug(int timestep){ DoubleArray PhaseField(Nx,Ny,Nz); for (int ic=0; icRegularLayout(Map,&Ci[ic*Np],PhaseField); - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); IonConcentration_LB_to_Phys(PhaseField); FILE *OUTFILE; @@ -1001,10 +1001,8 @@ double ScaLBL_IonModel::CalIonDenConvergence(vector &ci_avg_previous){ ci_loc +=Ci_host[idx]; count_loc+=1.0; } - - MPI_Allreduce(&ci_loc,&ci_avg,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - + ci_avg = Mask->Comm.sumReduce( ci_loc); + count = Mask->Comm.sumReduce( count_loc); ci_avg /= count; double ci_avg_mag=ci_avg; if (ci_avg==0.0) ci_avg_mag=1.0; @@ -1028,7 +1026,7 @@ double ScaLBL_IonModel::CalIonDenConvergence(vector &ci_avg_previous){ // DoubleArray PhaseField(Nx,Ny,Nz); // for (int ic=0; icRegularLayout(Map,&Ci[ic*Np],PhaseField); -// ScaLBL_Comm->Barrier(); MPI_Barrier(comm); +// ScaLBL_Comm->Barrier(); comm.barrier(); // // FILE *OUTFILE; // sprintf(LocalRankFilename,"Ion%02i.%05i.raw",ic+1,rank); diff --git a/models/IonModel.h b/models/IonModel.h index 4b370978..6e8eab25 100644 --- a/models/IonModel.h +++ b/models/IonModel.h @@ -16,7 +16,7 @@ #include "common/ScaLBL.h" #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "analysis/Minkowski.h" #include "ProfilerApp.h" @@ -85,7 +85,7 @@ public: double *ElectricFieldDummy; private: - MPI_Comm comm; + Utilities::MPI comm; // filenames char LocalRankString[8]; diff --git a/models/MRTModel.cpp b/models/MRTModel.cpp index 4f55ad22..17f42345 100644 --- a/models/MRTModel.cpp +++ b/models/MRTModel.cpp @@ -86,9 +86,9 @@ void ScaLBL_MRTModel::SetDomain(){ for (int i=0; iid[i] = 1; // initialize this way //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object - MPI_Barrier(comm); + comm.barrier(); Dm->CommInit(); - MPI_Barrier(comm); + comm.barrier(); rank = Dm->rank(); nprocx = Dm->nprocx(); @@ -116,7 +116,7 @@ void ScaLBL_MRTModel::ReadInput(){ ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 ); fillHalo fill( comm, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); Array id_view; - id_view.viewRaw( size1, Mask->id ); + id_view.viewRaw( size1, Mask->id.data() ); fill.copy( input_id, id_view ); fill.fill( id_view ); } @@ -193,7 +193,7 @@ void ScaLBL_MRTModel::Create(){ if (rank==0) printf ("Setting up device map and neighbor list \n"); // copy the neighbor list ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); - MPI_Barrier(comm); + comm.barrier(); } @@ -228,7 +228,7 @@ void ScaLBL_MRTModel::Run(){ //.......create and start timer............ double starttime,stoptime,cputime; - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); starttime = MPI_Wtime(); if (rank==0) printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax); if (rank==0) printf("********************************************************\n"); @@ -255,7 +255,7 @@ void ScaLBL_MRTModel::Run(){ ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); } ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL ScaLBL_D3Q19_AAeven_MRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); @@ -274,12 +274,12 @@ void ScaLBL_MRTModel::Run(){ ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); } ScaLBL_D3Q19_AAeven_MRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); //************************************************************************/ if (timestep%1000==0){ ScaLBL_D3Q19_Momentum(fq,Velocity, Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x); ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y); ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z); @@ -300,11 +300,11 @@ void ScaLBL_MRTModel::Run(){ } } } - } - MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + } + vax=Dm->Comm.sumReduce( vax_loc); + vay=Dm->Comm.sumReduce( vay_loc); + vaz=Dm->Comm.sumReduce( vaz_loc); + count=Dm->Comm.sumReduce( count_loc); vax /= count; vay /= count; @@ -334,10 +334,11 @@ void ScaLBL_MRTModel::Run(){ double As = Morphology.A(); double Hs = Morphology.H(); double Xs = Morphology.X(); - Vs=sumReduce( Dm->Comm, Vs); - As=sumReduce( Dm->Comm, As); - Hs=sumReduce( Dm->Comm, Hs); - Xs=sumReduce( Dm->Comm, Xs); + Vs=Dm->Comm.sumReduce( Vs); + As=Dm->Comm.sumReduce( As); + Hs=Dm->Comm.sumReduce( Hs); + Xs=Dm->Comm.sumReduce( Xs); + double h = Dm->voxel_length; double absperm = h*h*mu*Mask->Porosity()*flow_rate / force_mag; if (rank==0) { @@ -371,7 +372,7 @@ void ScaLBL_MRTModel::VelocityField(){ /* Minkowski Morphology(Mask); int SIZE=Np*sizeof(double); ScaLBL_D3Q19_Momentum(fq,Velocity, Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); ScaLBL_CopyToHost(&VELOCITY[0],&Velocity[0],3*SIZE); memcpy(Morphology.SDn.data(), Distance.data(), Nx*Ny*Nz*sizeof(double)); diff --git a/models/MRTModel.h b/models/MRTModel.h index aa4ee1f0..40550e59 100644 --- a/models/MRTModel.h +++ b/models/MRTModel.h @@ -11,7 +11,7 @@ #include "common/ScaLBL.h" #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "analysis/Minkowski.h" #include "ProfilerApp.h" @@ -63,7 +63,7 @@ public: DoubleArray Velocity_y; DoubleArray Velocity_z; private: - MPI_Comm comm; + Utilities::MPI comm; // filenames char LocalRankString[8]; diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index 3d73f784..df1b2875 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -116,11 +116,11 @@ void ScaLBL_Poisson::SetDomain(){ for (int i=0; iid[i] = 1; // initialize this way //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object - MPI_Barrier(comm); + comm.barrier(); Dm->BoundaryCondition = BoundaryCondition; Mask->BoundaryCondition = BoundaryCondition; Dm->CommInit(); - MPI_Barrier(comm); + comm.barrier(); rank = Dm->rank(); nprocx = Dm->nprocx(); @@ -148,7 +148,7 @@ void ScaLBL_Poisson::ReadInput(){ ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 ); fillHalo fill( comm, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); Array id_view; - id_view.viewRaw( size1, Mask->id ); + id_view.viewRaw( size1, Mask->id.data() ); fill.copy( input_id, id_view ); fill.fill( id_view ); } @@ -275,8 +275,8 @@ void ScaLBL_Poisson::Create(){ if (rank==0) printf ("LB-Poisson Solver: Set up memory efficient layout \n"); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); - MPI_Barrier(comm); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np); + comm.barrier(); //........................................................................... // MAIN VARIABLES ALLOCATED HERE //........................................................................... @@ -329,15 +329,15 @@ void ScaLBL_Poisson::Create(){ // copy the neighbor list ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); ScaLBL_Comm->Barrier(); - MPI_Barrier(comm); + comm.barrier(); delete [] neighborList; // copy node ID //ScaLBL_CopyToDevice(dvcID, Mask->id, sizeof(signed char)*Nx*Ny*Nz); //ScaLBL_Comm->Barrier(); //Initialize solid boundary for electric potential - ScaLBL_Comm->SetupBounceBackList(Map, Mask->id, Np); - MPI_Barrier(comm); + ScaLBL_Comm->SetupBounceBackList(Map, Mask->id.data(), Np); + comm.barrier(); } void ScaLBL_Poisson::Potential_Init(double *psi_init){ @@ -408,7 +408,7 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ //.......create and start timer............ //double starttime,stoptime,cputime; - //ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + //ScaLBL_Comm->Barrier(); comm.barrier(); //starttime = MPI_Wtime(); timestep=0; @@ -421,13 +421,13 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ SolveElectricPotentialAAodd();//update electric potential SolvePoissonAAodd(ChargeDensity);//perform collision - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); // *************EVEN TIMESTEP*************// timestep++; SolveElectricPotentialAAeven();//update electric potential SolvePoissonAAeven(ChargeDensity);//perform collision - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); //************************************************************************/ // Check convergence of steady-state solution @@ -450,9 +450,9 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ } } } - MPI_Allreduce(&psi_loc,&psi_avg,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - + psi_avg=Dm->Comm.sumReduce( psi_loc); + count=Dm->Comm.sumReduce( count_loc); + psi_avg /= count; double psi_avg_mag=psi_avg; if (psi_avg==0.0) psi_avg_mag=1.0; @@ -579,7 +579,7 @@ void ScaLBL_Poisson::getElectricPotential_debug(int timestep){ DoubleArray PhaseField(Nx,Ny,Nz); //ScaLBL_Comm->RegularLayout(Map,Psi,PhaseField); ScaLBL_CopyToHost(PhaseField.data(),Psi,sizeof(double)*Nx*Ny*Nz); - //ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + //ScaLBL_Comm->Barrier(); comm.barrier(); FILE *OUTFILE; sprintf(LocalRankFilename,"Electric_Potential_Time_%i.%05i.raw",timestep,rank); OUTFILE = fopen(LocalRankFilename,"wb"); @@ -597,22 +597,22 @@ void ScaLBL_Poisson::getElectricField(DoubleArray &Values_x, DoubleArray &Values ScaLBL_Comm->RegularLayout(Map,&ElectricField[0*Np],Values_x); ElectricField_LB_to_Phys(Values_x); - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); ScaLBL_Comm->RegularLayout(Map,&ElectricField[1*Np],Values_y); ElectricField_LB_to_Phys(Values_y); - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); ScaLBL_Comm->RegularLayout(Map,&ElectricField[2*Np],Values_z); ElectricField_LB_to_Phys(Values_z); - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); } void ScaLBL_Poisson::getElectricField_debug(int timestep){ //ScaLBL_D3Q7_Poisson_getElectricField(fq,ElectricField,tau,Np); - //ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + //ScaLBL_Comm->Barrier(); comm.barrier(); DoubleArray PhaseField(Nx,Ny,Nz); ScaLBL_Comm->RegularLayout(Map,&ElectricField[0*Np],PhaseField); @@ -671,7 +671,7 @@ void ScaLBL_Poisson::ElectricField_LB_to_Phys(DoubleArray &Efield_reg){ // // DoubleArray PhaseField(Nx,Ny,Nz); // ScaLBL_Comm->RegularLayout(Map,Psi,PhaseField); -// //ScaLBL_Comm->Barrier(); MPI_Barrier(comm); +// //ScaLBL_Comm->Barrier(); comm.barrier(); // FILE *OUTFILE; // sprintf(LocalRankFilename,"Electric_Potential.%05i.raw",rank); // OUTFILE = fopen(LocalRankFilename,"wb"); diff --git a/models/PoissonSolver.h b/models/PoissonSolver.h index 74abd775..7599c8b3 100644 --- a/models/PoissonSolver.h +++ b/models/PoissonSolver.h @@ -12,7 +12,7 @@ #include "common/ScaLBL.h" #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "analysis/Minkowski.h" #include "ProfilerApp.h" @@ -77,7 +77,7 @@ public: double *ChargeDensityDummy;// for debugging private: - MPI_Comm comm; + Utilities::MPI comm; // filenames char LocalRankString[8]; diff --git a/models/StokesModel.cpp b/models/StokesModel.cpp index 4ad93950..0368b39b 100644 --- a/models/StokesModel.cpp +++ b/models/StokesModel.cpp @@ -189,11 +189,11 @@ void ScaLBL_StokesModel::SetDomain(){ for (int i=0; iid[i] = 1; // initialize this way //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object - MPI_Barrier(comm); + comm.barrier(); Dm->BoundaryCondition = BoundaryCondition; Mask->BoundaryCondition = BoundaryCondition; Dm->CommInit(); - MPI_Barrier(comm); + comm.barrier(); rank = Dm->rank(); nprocx = Dm->nprocx(); @@ -221,7 +221,7 @@ void ScaLBL_StokesModel::ReadInput(){ ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 ); fillHalo fill( comm, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); Array id_view; - id_view.viewRaw( size1, Mask->id ); + id_view.viewRaw( size1, Mask->id.data() ); fill.copy( input_id, id_view ); fill.fill( id_view ); } @@ -278,8 +278,8 @@ void ScaLBL_StokesModel::Create(){ if (rank==0) printf ("LB Single-Fluid Solver: Set up memory efficient layout \n"); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np); - MPI_Barrier(comm); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np); + comm.barrier(); //........................................................................... // MAIN VARIABLES ALLOCATED HERE //........................................................................... @@ -298,7 +298,7 @@ void ScaLBL_StokesModel::Create(){ if (rank==0) printf ("LB Single-Fluid Solver: Setting up device map and neighbor list \n"); // copy the neighbor list ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); - MPI_Barrier(comm); + comm.barrier(); } @@ -343,7 +343,7 @@ void ScaLBL_StokesModel::Run_Lite(double *ChargeDensity, double *ElectricField){ } ScaLBL_D3Q19_AAodd_StokesMRT(NeighborList, fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz,rho0,den_scale,h,time_conv, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL @@ -365,7 +365,7 @@ void ScaLBL_StokesModel::Run_Lite(double *ChargeDensity, double *ElectricField){ } ScaLBL_D3Q19_AAeven_StokesMRT(fq, Velocity, ChargeDensity, ElectricField, rlx_setA, rlx_setB, Fx, Fy, Fz,rho0,den_scale,h,time_conv, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); //************************************************************************/ } } @@ -373,25 +373,25 @@ void ScaLBL_StokesModel::Run_Lite(double *ChargeDensity, double *ElectricField){ void ScaLBL_StokesModel::getVelocity(DoubleArray &Vel_x, DoubleArray &Vel_y, DoubleArray &Vel_z){ //get velocity in physical unit [m/sec] ScaLBL_D3Q19_Momentum(fq, Velocity, Np); - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Vel_x); Velocity_LB_to_Phys(Vel_x); - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Vel_y); Velocity_LB_to_Phys(Vel_y); - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Vel_z); Velocity_LB_to_Phys(Vel_z); - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); } void ScaLBL_StokesModel::getVelocity_debug(int timestep){ //get velocity in physical unit [m/sec] ScaLBL_D3Q19_Momentum(fq, Velocity, Np); - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); DoubleArray PhaseField(Nx,Ny,Nz); ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField); @@ -469,10 +469,10 @@ vector ScaLBL_StokesModel::computeElectricForceAvg(double *ChargeDensity count_loc+=1.0; } - MPI_Allreduce(&Fx_loc,&Fx_avg,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&Fy_loc,&Fy_avg,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&Fz_loc,&Fz_avg,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + Fx_avg=Dm->Comm.sumReduce( Fx_loc); + Fy_avg=Dm->Comm.sumReduce( Fy_loc); + Fz_avg=Dm->Comm.sumReduce( Fz_loc); + count=Dm->Comm.sumReduce( count_loc); Fx_avg /= count; Fy_avg /= count; @@ -492,7 +492,7 @@ double ScaLBL_StokesModel::CalVelocityConvergence(double& flow_rate_previous,dou //----------------------------------------------------- ScaLBL_D3Q19_Momentum(fq,Velocity, Np); - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x); ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y); ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z); @@ -514,11 +514,11 @@ double ScaLBL_StokesModel::CalVelocityConvergence(double& flow_rate_previous,dou } } } - MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - + vax=Dm->Comm.sumReduce( vax_loc); + vay=Dm->Comm.sumReduce( vay_loc); + vaz=Dm->Comm.sumReduce( vaz_loc); + count=Dm->Comm.sumReduce( count_loc); + vax /= count; vay /= count; vaz /= count; @@ -574,7 +574,7 @@ void ScaLBL_StokesModel::Run(){ //.......create and start timer............ double starttime,stoptime,cputime; - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); starttime = MPI_Wtime(); if (rank==0) printf("****************************************************************\n"); if (rank==0) printf("LB Single-Fluid Navier-Stokes Solver: timestepMax = %i\n", timestepMax); @@ -602,7 +602,7 @@ void ScaLBL_StokesModel::Run(){ ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); } ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL ScaLBL_D3Q19_AAeven_MRT(fq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); @@ -621,12 +621,12 @@ void ScaLBL_StokesModel::Run(){ ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); } ScaLBL_D3Q19_AAeven_MRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); //************************************************************************/ if (timestep%1000==0){ ScaLBL_D3Q19_Momentum(fq,Velocity, Np); - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); comm.barrier(); ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Velocity_x); ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Velocity_y); ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Velocity_z); @@ -648,10 +648,12 @@ void ScaLBL_StokesModel::Run(){ } } } - MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); + + vax=Dm->Comm.sumReduce( vax_loc); + vay=Dm->Comm.sumReduce( vay_loc); + vaz=Dm->Comm.sumReduce( vaz_loc); + count=Dm->Comm.sumReduce( count_loc); + vax /= count; vay /= count; @@ -714,51 +716,6 @@ void ScaLBL_StokesModel::Run(){ } void ScaLBL_StokesModel::VelocityField(){ - -/* Minkowski Morphology(Mask); - int SIZE=Np*sizeof(double); - ScaLBL_D3Q19_Momentum(fq,Velocity, Np); - ScaLBL_Comm->Barrier(); MPI_Barrier(comm); - ScaLBL_CopyToHost(&VELOCITY[0],&Velocity[0],3*SIZE); - - memcpy(Morphology.SDn.data(), Distance.data(), Nx*Ny*Nz*sizeof(double)); - Morphology.Initialize(); - Morphology.UpdateMeshValues(); - Morphology.ComputeLocal(); - Morphology.Reduce(); - - double count_loc=0; - double count; - double vax,vay,vaz; - double vax_loc,vay_loc,vaz_loc; - vax_loc = vay_loc = vaz_loc = 0.f; - for (int n=0; nLastExterior(); n++){ - vax_loc += VELOCITY[n]; - vay_loc += VELOCITY[Np+n]; - vaz_loc += VELOCITY[2*Np+n]; - count_loc+=1.0; - } - - for (int n=ScaLBL_Comm->FirstInterior(); nLastInterior(); n++){ - vax_loc += VELOCITY[n]; - vay_loc += VELOCITY[Np+n]; - vaz_loc += VELOCITY[2*Np+n]; - count_loc+=1.0; - } - MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm); - - vax /= count; - vay /= count; - vaz /= count; - - double mu = (tau-0.5)/3.f; - if (rank==0) printf("Fx Fy Fz mu Vs As Js Xs vx vy vz\n"); - if (rank==0) printf("%.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",Fx, Fy, Fz, mu, - Morphology.V(),Morphology.A(),Morphology.J(),Morphology.X(),vax,vay,vaz); - */ std::vector visData; fillHalo fillData(Dm->Comm,Dm->rank_info,{Dm->Nx-2,Dm->Ny-2,Dm->Nz-2},{1,1,1},0,1); diff --git a/models/StokesModel.h b/models/StokesModel.h index 8da373bd..b6faa7d9 100644 --- a/models/StokesModel.h +++ b/models/StokesModel.h @@ -14,7 +14,7 @@ #include "common/ScaLBL.h" #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "analysis/Minkowski.h" #include "ProfilerApp.h" @@ -76,7 +76,7 @@ public: DoubleArray Velocity_y; DoubleArray Velocity_z; private: - MPI_Comm comm; + Utilities::MPI comm; // filenames char LocalRankString[8]; From 474c829e418b9cbac087969974a0f71fbe99fb10 Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 5 Jan 2021 15:50:07 -0500 Subject: [PATCH 123/205] refactor work --- analysis/morphology.cpp | 451 +++++++++++++++++--------------- models/ColorModel.cpp | 2 +- models/ColorModel.h | 2 +- sample_scripts/configure_ubuntu | 24 ++ 4 files changed, 270 insertions(+), 209 deletions(-) create mode 100755 sample_scripts/configure_ubuntu diff --git a/analysis/morphology.cpp b/analysis/morphology.cpp index 0980a4f0..f6bb3469 100644 --- a/analysis/morphology.cpp +++ b/analysis/morphology.cpp @@ -1,7 +1,7 @@ #include // Implementation of morphological opening routine -inline void PackID(const int *list, int count, signed char *sendbuf, signed char *ID){ +inline void PackID(int *list, int count, signed char *sendbuf, signed char *ID){ // Fill in the phase ID values from neighboring processors // This packs up the values that need to be sent from one processor to another int idx,n; @@ -13,7 +13,7 @@ inline void PackID(const int *list, int count, signed char *sendbuf, signed char } //*************************************************************************************** -inline void UnpackID(const int *list, int count, signed char *recvbuf, signed char *ID){ +inline void UnpackID(int *list, int count, signed char *recvbuf, signed char *ID){ // Fill in the phase ID values from neighboring processors // This unpacks the values once they have been recieved from neighbors int idx,n; @@ -58,11 +58,11 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr } } } - Dm->Comm.barrier(); + MPI_Barrier(Dm->Comm); // total Global is the number of nodes in the pore-space - totalGlobal = Dm->Comm.sumReduce( count ); - maxdistGlobal = Dm->Comm.sumReduce( maxdist ); + MPI_Allreduce(&count,&totalGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&maxdist,&maxdistGlobal,1,MPI_DOUBLE,MPI_MAX,Dm->Comm); double volume=double(nprocx*nprocy*nprocz)*double(nx-2)*double(ny-2)*double(nz-2); double volume_fraction=totalGlobal/volume; if (rank==0) printf("Volume fraction for morphological opening: %f \n",volume_fraction); @@ -77,44 +77,44 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr signed char *recvID_xy, *recvID_yz, *recvID_xz, *recvID_Xy, *recvID_Yz, *recvID_xZ; signed char *recvID_xY, *recvID_yZ, *recvID_Xz, *recvID_XY, *recvID_YZ, *recvID_XZ; // send buffers - sendID_x = new signed char [Dm->sendCount("x")]; - sendID_y = new signed char [Dm->sendCount("y")]; - sendID_z = new signed char [Dm->sendCount("z")]; - sendID_X = new signed char [Dm->sendCount("X")]; - sendID_Y = new signed char [Dm->sendCount("Y")]; - sendID_Z = new signed char [Dm->sendCount("Z")]; - sendID_xy = new signed char [Dm->sendCount("xy")]; - sendID_yz = new signed char [Dm->sendCount("yz")]; - sendID_xz = new signed char [Dm->sendCount("xz")]; - sendID_Xy = new signed char [Dm->sendCount("Xy")]; - sendID_Yz = new signed char [Dm->sendCount("Yz")]; - sendID_xZ = new signed char [Dm->sendCount("xZ")]; - sendID_xY = new signed char [Dm->sendCount("xY")]; - sendID_yZ = new signed char [Dm->sendCount("yZ")]; - sendID_Xz = new signed char [Dm->sendCount("Xz")]; - sendID_XY = new signed char [Dm->sendCount("XY")]; - sendID_YZ = new signed char [Dm->sendCount("YZ")]; - sendID_XZ = new signed char [Dm->sendCount("XZ")]; + sendID_x = new signed char [Dm->sendCount_x]; + sendID_y = new signed char [Dm->sendCount_y]; + sendID_z = new signed char [Dm->sendCount_z]; + sendID_X = new signed char [Dm->sendCount_X]; + sendID_Y = new signed char [Dm->sendCount_Y]; + sendID_Z = new signed char [Dm->sendCount_Z]; + sendID_xy = new signed char [Dm->sendCount_xy]; + sendID_yz = new signed char [Dm->sendCount_yz]; + sendID_xz = new signed char [Dm->sendCount_xz]; + sendID_Xy = new signed char [Dm->sendCount_Xy]; + sendID_Yz = new signed char [Dm->sendCount_Yz]; + sendID_xZ = new signed char [Dm->sendCount_xZ]; + sendID_xY = new signed char [Dm->sendCount_xY]; + sendID_yZ = new signed char [Dm->sendCount_yZ]; + sendID_Xz = new signed char [Dm->sendCount_Xz]; + sendID_XY = new signed char [Dm->sendCount_XY]; + sendID_YZ = new signed char [Dm->sendCount_YZ]; + sendID_XZ = new signed char [Dm->sendCount_XZ]; //...................................................................................... // recv buffers - recvID_x = new signed char [Dm->recvCount("x")]; - recvID_y = new signed char [Dm->recvCount("y")]; - recvID_z = new signed char [Dm->recvCount("z")]; - recvID_X = new signed char [Dm->recvCount("X")]; - recvID_Y = new signed char [Dm->recvCount("Y")]; - recvID_Z = new signed char [Dm->recvCount("Z")]; - recvID_xy = new signed char [Dm->recvCount("xy")]; - recvID_yz = new signed char [Dm->recvCount("yz")]; - recvID_xz = new signed char [Dm->recvCount("xz")]; - recvID_Xy = new signed char [Dm->recvCount("Xy")]; - recvID_xZ = new signed char [Dm->recvCount("xZ")]; - recvID_xY = new signed char [Dm->recvCount("xY")]; - recvID_yZ = new signed char [Dm->recvCount("yZ")]; - recvID_Yz = new signed char [Dm->recvCount("Yz")]; - recvID_Xz = new signed char [Dm->recvCount("Xz")]; - recvID_XY = new signed char [Dm->recvCount("XY")]; - recvID_YZ = new signed char [Dm->recvCount("YZ")]; - recvID_XZ = new signed char [Dm->recvCount("XZ")]; + recvID_x = new signed char [Dm->recvCount_x]; + recvID_y = new signed char [Dm->recvCount_y]; + recvID_z = new signed char [Dm->recvCount_z]; + recvID_X = new signed char [Dm->recvCount_X]; + recvID_Y = new signed char [Dm->recvCount_Y]; + recvID_Z = new signed char [Dm->recvCount_Z]; + recvID_xy = new signed char [Dm->recvCount_xy]; + recvID_yz = new signed char [Dm->recvCount_yz]; + recvID_xz = new signed char [Dm->recvCount_xz]; + recvID_Xy = new signed char [Dm->recvCount_Xy]; + recvID_xZ = new signed char [Dm->recvCount_xZ]; + recvID_xY = new signed char [Dm->recvCount_xY]; + recvID_yZ = new signed char [Dm->recvCount_yZ]; + recvID_Yz = new signed char [Dm->recvCount_Yz]; + recvID_Xz = new signed char [Dm->recvCount_Xz]; + recvID_XY = new signed char [Dm->recvCount_XY]; + recvID_YZ = new signed char [Dm->recvCount_YZ]; + recvID_XZ = new signed char [Dm->recvCount_XZ]; //...................................................................................... int sendtag,recvtag; sendtag = recvtag = 7; @@ -131,8 +131,9 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr // Increase the critical radius until the target saturation is met double deltaR=0.05; // amount to change the radius in voxel units - double Rcrit_old; + double Rcrit_old=0.0; + double GlobalNumber = 1.f; int imin,jmin,kmin,imax,jmax,kmax; if (ErodeLabel == 1){ @@ -182,65 +183,83 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr } } // Pack and send the updated ID values - PackID(Dm->sendList("x"), Dm->sendCount("x") ,sendID_x, id); - PackID(Dm->sendList("X"), Dm->sendCount("X") ,sendID_X, id); - PackID(Dm->sendList("y"), Dm->sendCount("y") ,sendID_y, id); - PackID(Dm->sendList("Y"), Dm->sendCount("Y") ,sendID_Y, id); - PackID(Dm->sendList("z"), Dm->sendCount("z") ,sendID_z, id); - PackID(Dm->sendList("Z"), Dm->sendCount("Z") ,sendID_Z, id); - PackID(Dm->sendList("xy"), Dm->sendCount("xy") ,sendID_xy, id); - PackID(Dm->sendList("Xy"), Dm->sendCount("Xy") ,sendID_Xy, id); - PackID(Dm->sendList("xY"), Dm->sendCount("xY") ,sendID_xY, id); - PackID(Dm->sendList("XY"), Dm->sendCount("XY") ,sendID_XY, id); - PackID(Dm->sendList("xz"), Dm->sendCount("xz") ,sendID_xz, id); - PackID(Dm->sendList("Xz"), Dm->sendCount("Xz") ,sendID_Xz, id); - PackID(Dm->sendList("xZ"), Dm->sendCount("xZ") ,sendID_xZ, id); - PackID(Dm->sendList("XZ"), Dm->sendCount("XZ") ,sendID_XZ, id); - PackID(Dm->sendList("yz"), Dm->sendCount("yz") ,sendID_yz, id); - PackID(Dm->sendList("Yz"), Dm->sendCount("Yz") ,sendID_Yz, id); - PackID(Dm->sendList("yZ"), Dm->sendCount("yZ") ,sendID_yZ, id); - PackID(Dm->sendList("YZ"), Dm->sendCount("YZ") ,sendID_YZ, id); + PackID(Dm->sendList_x, Dm->sendCount_x ,sendID_x, id); + PackID(Dm->sendList_X, Dm->sendCount_X ,sendID_X, id); + PackID(Dm->sendList_y, Dm->sendCount_y ,sendID_y, id); + PackID(Dm->sendList_Y, Dm->sendCount_Y ,sendID_Y, id); + PackID(Dm->sendList_z, Dm->sendCount_z ,sendID_z, id); + PackID(Dm->sendList_Z, Dm->sendCount_Z ,sendID_Z, id); + PackID(Dm->sendList_xy, Dm->sendCount_xy ,sendID_xy, id); + PackID(Dm->sendList_Xy, Dm->sendCount_Xy ,sendID_Xy, id); + PackID(Dm->sendList_xY, Dm->sendCount_xY ,sendID_xY, id); + PackID(Dm->sendList_XY, Dm->sendCount_XY ,sendID_XY, id); + PackID(Dm->sendList_xz, Dm->sendCount_xz ,sendID_xz, id); + PackID(Dm->sendList_Xz, Dm->sendCount_Xz ,sendID_Xz, id); + PackID(Dm->sendList_xZ, Dm->sendCount_xZ ,sendID_xZ, id); + PackID(Dm->sendList_XZ, Dm->sendCount_XZ ,sendID_XZ, id); + PackID(Dm->sendList_yz, Dm->sendCount_yz ,sendID_yz, id); + PackID(Dm->sendList_Yz, Dm->sendCount_Yz ,sendID_Yz, id); + PackID(Dm->sendList_yZ, Dm->sendCount_yZ ,sendID_yZ, id); + PackID(Dm->sendList_YZ, Dm->sendCount_YZ ,sendID_YZ, id); //...................................................................................... - Dm->Comm.sendrecv(sendID_x,Dm->sendCount("x"),Dm->rank_x(),sendtag,recvID_X,Dm->recvCount("X"),Dm->rank_X(),recvtag); - Dm->Comm.sendrecv(sendID_X,Dm->sendCount("X"),Dm->rank_X(),sendtag,recvID_x,Dm->recvCount("x"),Dm->rank_x(),recvtag); - Dm->Comm.sendrecv(sendID_y,Dm->sendCount("y"),Dm->rank_y(),sendtag,recvID_Y,Dm->recvCount("Y"),Dm->rank_Y(),recvtag); - Dm->Comm.sendrecv(sendID_Y,Dm->sendCount("Y"),Dm->rank_Y(),sendtag,recvID_y,Dm->recvCount("y"),Dm->rank_y(),recvtag); - Dm->Comm.sendrecv(sendID_z,Dm->sendCount("z"),Dm->rank_z(),sendtag,recvID_Z,Dm->recvCount("Z"),Dm->rank_Z(),recvtag); - Dm->Comm.sendrecv(sendID_Z,Dm->sendCount("Z"),Dm->rank_Z(),sendtag,recvID_z,Dm->recvCount("z"),Dm->rank_z(),recvtag); - Dm->Comm.sendrecv(sendID_xy,Dm->sendCount("xy"),Dm->rank_xy(),sendtag,recvID_XY,Dm->recvCount("XY"),Dm->rank_XY(),recvtag); - Dm->Comm.sendrecv(sendID_XY,Dm->sendCount("XY"),Dm->rank_XY(),sendtag,recvID_xy,Dm->recvCount("xy"),Dm->rank_xy(),recvtag); - Dm->Comm.sendrecv(sendID_Xy,Dm->sendCount("Xy"),Dm->rank_Xy(),sendtag,recvID_xY,Dm->recvCount("xY"),Dm->rank_xY(),recvtag); - Dm->Comm.sendrecv(sendID_xY,Dm->sendCount("xY"),Dm->rank_xY(),sendtag,recvID_Xy,Dm->recvCount("Xy"),Dm->rank_Xy(),recvtag); - Dm->Comm.sendrecv(sendID_xz,Dm->sendCount("xz"),Dm->rank_xz(),sendtag,recvID_XZ,Dm->recvCount("XZ"),Dm->rank_XZ(),recvtag); - Dm->Comm.sendrecv(sendID_XZ,Dm->sendCount("XZ"),Dm->rank_XZ(),sendtag,recvID_xz,Dm->recvCount("xz"),Dm->rank_xz(),recvtag); - Dm->Comm.sendrecv(sendID_Xz,Dm->sendCount("Xz"),Dm->rank_Xz(),sendtag,recvID_xZ,Dm->recvCount("xZ"),Dm->rank_xZ(),recvtag); - Dm->Comm.sendrecv(sendID_xZ,Dm->sendCount("xZ"),Dm->rank_xZ(),sendtag,recvID_Xz,Dm->recvCount("Xz"),Dm->rank_Xz(),recvtag); - Dm->Comm.sendrecv(sendID_yz,Dm->sendCount("yz"),Dm->rank_yz(),sendtag,recvID_YZ,Dm->recvCount("YZ"),Dm->rank_YZ(),recvtag); - Dm->Comm.sendrecv(sendID_YZ,Dm->sendCount("YZ"),Dm->rank_YZ(),sendtag,recvID_yz,Dm->recvCount("yz"),Dm->rank_yz(),recvtag); - Dm->Comm.sendrecv(sendID_Yz,Dm->sendCount("Yz"),Dm->rank_Yz(),sendtag,recvID_yZ,Dm->recvCount("yZ"),Dm->rank_yZ(),recvtag); - Dm->Comm.sendrecv(sendID_yZ,Dm->sendCount("yZ"),Dm->rank_yZ(),sendtag,recvID_Yz,Dm->recvCount("Yz"),Dm->rank_Yz(),recvtag); + MPI_Sendrecv(sendID_x,Dm->sendCount_x,MPI_CHAR,Dm->rank_x(),sendtag, + recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_X,Dm->sendCount_X,MPI_CHAR,Dm->rank_X(),sendtag, + recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_y,Dm->sendCount_y,MPI_CHAR,Dm->rank_y(),sendtag, + recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Y,Dm->sendCount_Y,MPI_CHAR,Dm->rank_Y(),sendtag, + recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_z,Dm->sendCount_z,MPI_CHAR,Dm->rank_z(),sendtag, + recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Z,Dm->sendCount_Z,MPI_CHAR,Dm->rank_Z(),sendtag, + recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xy,Dm->sendCount_xy,MPI_CHAR,Dm->rank_xy(),sendtag, + recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_XY,Dm->sendCount_XY,MPI_CHAR,Dm->rank_XY(),sendtag, + recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Xy,Dm->sendCount_Xy,MPI_CHAR,Dm->rank_Xy(),sendtag, + recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xY,Dm->sendCount_xY,MPI_CHAR,Dm->rank_xY(),sendtag, + recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xz,Dm->sendCount_xz,MPI_CHAR,Dm->rank_xz(),sendtag, + recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_XZ,Dm->sendCount_XZ,MPI_CHAR,Dm->rank_XZ(),sendtag, + recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Xz,Dm->sendCount_Xz,MPI_CHAR,Dm->rank_Xz(),sendtag, + recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xZ,Dm->sendCount_xZ,MPI_CHAR,Dm->rank_xZ(),sendtag, + recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_yz,Dm->sendCount_yz,MPI_CHAR,Dm->rank_yz(),sendtag, + recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_YZ,Dm->sendCount_YZ,MPI_CHAR,Dm->rank_YZ(),sendtag, + recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Yz,Dm->sendCount_Yz,MPI_CHAR,Dm->rank_Yz(),sendtag, + recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_yZ,Dm->sendCount_yZ,MPI_CHAR,Dm->rank_yZ(),sendtag, + recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); //...................................................................................... - UnpackID(Dm->recvList("x"), Dm->recvCount("x") ,recvID_x, id); - UnpackID(Dm->recvList("X"), Dm->recvCount("X") ,recvID_X, id); - UnpackID(Dm->recvList("y"), Dm->recvCount("y") ,recvID_y, id); - UnpackID(Dm->recvList("Y"), Dm->recvCount("Y") ,recvID_Y, id); - UnpackID(Dm->recvList("z"), Dm->recvCount("z") ,recvID_z, id); - UnpackID(Dm->recvList("Z"), Dm->recvCount("Z") ,recvID_Z, id); - UnpackID(Dm->recvList("xy"), Dm->recvCount("xy") ,recvID_xy, id); - UnpackID(Dm->recvList("Xy"), Dm->recvCount("Xy") ,recvID_Xy, id); - UnpackID(Dm->recvList("xY"), Dm->recvCount("xY") ,recvID_xY, id); - UnpackID(Dm->recvList("XY"), Dm->recvCount("XY") ,recvID_XY, id); - UnpackID(Dm->recvList("xz"), Dm->recvCount("xz") ,recvID_xz, id); - UnpackID(Dm->recvList("Xz"), Dm->recvCount("Xz") ,recvID_Xz, id); - UnpackID(Dm->recvList("xZ"), Dm->recvCount("xZ") ,recvID_xZ, id); - UnpackID(Dm->recvList("XZ"), Dm->recvCount("XZ") ,recvID_XZ, id); - UnpackID(Dm->recvList("yz"), Dm->recvCount("yz") ,recvID_yz, id); - UnpackID(Dm->recvList("Yz"), Dm->recvCount("Yz") ,recvID_Yz, id); - UnpackID(Dm->recvList("yZ"), Dm->recvCount("yZ") ,recvID_yZ, id); - UnpackID(Dm->recvList("YZ"), Dm->recvCount("YZ") ,recvID_YZ, id); + UnpackID(Dm->recvList_x, Dm->recvCount_x ,recvID_x, id); + UnpackID(Dm->recvList_X, Dm->recvCount_X ,recvID_X, id); + UnpackID(Dm->recvList_y, Dm->recvCount_y ,recvID_y, id); + UnpackID(Dm->recvList_Y, Dm->recvCount_Y ,recvID_Y, id); + UnpackID(Dm->recvList_z, Dm->recvCount_z ,recvID_z, id); + UnpackID(Dm->recvList_Z, Dm->recvCount_Z ,recvID_Z, id); + UnpackID(Dm->recvList_xy, Dm->recvCount_xy ,recvID_xy, id); + UnpackID(Dm->recvList_Xy, Dm->recvCount_Xy ,recvID_Xy, id); + UnpackID(Dm->recvList_xY, Dm->recvCount_xY ,recvID_xY, id); + UnpackID(Dm->recvList_XY, Dm->recvCount_XY ,recvID_XY, id); + UnpackID(Dm->recvList_xz, Dm->recvCount_xz ,recvID_xz, id); + UnpackID(Dm->recvList_Xz, Dm->recvCount_Xz ,recvID_Xz, id); + UnpackID(Dm->recvList_xZ, Dm->recvCount_xZ ,recvID_xZ, id); + UnpackID(Dm->recvList_XZ, Dm->recvCount_XZ ,recvID_XZ, id); + UnpackID(Dm->recvList_yz, Dm->recvCount_yz ,recvID_yz, id); + UnpackID(Dm->recvList_Yz, Dm->recvCount_Yz ,recvID_Yz, id); + UnpackID(Dm->recvList_yZ, Dm->recvCount_yZ ,recvID_yZ, id); + UnpackID(Dm->recvList_YZ, Dm->recvCount_YZ ,recvID_YZ, id); //...................................................................................... - //double GlobalNumber = Dm->Comm.sumReduce( LocalNumber ); + MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); count = 0.f; for (int k=1; k } } } - countGlobal = Dm->Comm.sumReduce( count ); + MPI_Allreduce(&count,&countGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); void_fraction_new = countGlobal/totalGlobal; void_fraction_diff_new = abs(void_fraction_new-VoidFraction); /* if (rank==0){ @@ -285,7 +304,7 @@ double morph_open() fillHalo fillChar(Dm->Comm,Dm->rank_info,{Nx-2,Ny-2,Nz-2},{1,1,1},0,1); - GlobalNumber = Dm->Comm.sumReduce( LocalNumber ); + MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); count = 0.f; for (int k=1; kComm.sumReduce( count ); + MPI_Allreduce(&count,&countGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); return countGlobal; } */ @@ -341,11 +360,11 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrComm.barrier(); + MPI_Barrier(Dm->Comm); // total Global is the number of nodes in the pore-space - totalGlobal = Dm->Comm.sumReduce( count ); - maxdistGlobal = Dm->Comm.sumReduce( maxdist ); + MPI_Allreduce(&count,&totalGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + MPI_Allreduce(&maxdist,&maxdistGlobal,1,MPI_DOUBLE,MPI_MAX,Dm->Comm); double volume=double(nprocx*nprocy*nprocz)*double(nx-2)*double(ny-2)*double(nz-2); double volume_fraction=totalGlobal/volume; if (rank==0) printf("Volume fraction for morphological opening: %f \n",volume_fraction); @@ -359,44 +378,44 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrsendCount("x")]; - sendID_y = new signed char [Dm->sendCount("y")]; - sendID_z = new signed char [Dm->sendCount("z")]; - sendID_X = new signed char [Dm->sendCount("X")]; - sendID_Y = new signed char [Dm->sendCount("Y")]; - sendID_Z = new signed char [Dm->sendCount("Z")]; - sendID_xy = new signed char [Dm->sendCount("xy")]; - sendID_yz = new signed char [Dm->sendCount("yz")]; - sendID_xz = new signed char [Dm->sendCount("xz")]; - sendID_Xy = new signed char [Dm->sendCount("Xy")]; - sendID_Yz = new signed char [Dm->sendCount("Yz")]; - sendID_xZ = new signed char [Dm->sendCount("xZ")]; - sendID_xY = new signed char [Dm->sendCount("xY")]; - sendID_yZ = new signed char [Dm->sendCount("yZ")]; - sendID_Xz = new signed char [Dm->sendCount("Xz")]; - sendID_XY = new signed char [Dm->sendCount("XY")]; - sendID_YZ = new signed char [Dm->sendCount("YZ")]; - sendID_XZ = new signed char [Dm->sendCount("XZ")]; + sendID_x = new signed char [Dm->sendCount_x]; + sendID_y = new signed char [Dm->sendCount_y]; + sendID_z = new signed char [Dm->sendCount_z]; + sendID_X = new signed char [Dm->sendCount_X]; + sendID_Y = new signed char [Dm->sendCount_Y]; + sendID_Z = new signed char [Dm->sendCount_Z]; + sendID_xy = new signed char [Dm->sendCount_xy]; + sendID_yz = new signed char [Dm->sendCount_yz]; + sendID_xz = new signed char [Dm->sendCount_xz]; + sendID_Xy = new signed char [Dm->sendCount_Xy]; + sendID_Yz = new signed char [Dm->sendCount_Yz]; + sendID_xZ = new signed char [Dm->sendCount_xZ]; + sendID_xY = new signed char [Dm->sendCount_xY]; + sendID_yZ = new signed char [Dm->sendCount_yZ]; + sendID_Xz = new signed char [Dm->sendCount_Xz]; + sendID_XY = new signed char [Dm->sendCount_XY]; + sendID_YZ = new signed char [Dm->sendCount_YZ]; + sendID_XZ = new signed char [Dm->sendCount_XZ]; //...................................................................................... // recv buffers - recvID_x = new signed char [Dm->recvCount("x")]; - recvID_y = new signed char [Dm->recvCount("y")]; - recvID_z = new signed char [Dm->recvCount("z")]; - recvID_X = new signed char [Dm->recvCount("X")]; - recvID_Y = new signed char [Dm->recvCount("Y")]; - recvID_Z = new signed char [Dm->recvCount("Z")]; - recvID_xy = new signed char [Dm->recvCount("xy")]; - recvID_yz = new signed char [Dm->recvCount("yz")]; - recvID_xz = new signed char [Dm->recvCount("xz")]; - recvID_Xy = new signed char [Dm->recvCount("Xy")]; - recvID_xZ = new signed char [Dm->recvCount("xZ")]; - recvID_xY = new signed char [Dm->recvCount("xY")]; - recvID_yZ = new signed char [Dm->recvCount("yZ")]; - recvID_Yz = new signed char [Dm->recvCount("Yz")]; - recvID_Xz = new signed char [Dm->recvCount("Xz")]; - recvID_XY = new signed char [Dm->recvCount("XY")]; - recvID_YZ = new signed char [Dm->recvCount("YZ")]; - recvID_XZ = new signed char [Dm->recvCount("XZ")]; + recvID_x = new signed char [Dm->recvCount_x]; + recvID_y = new signed char [Dm->recvCount_y]; + recvID_z = new signed char [Dm->recvCount_z]; + recvID_X = new signed char [Dm->recvCount_X]; + recvID_Y = new signed char [Dm->recvCount_Y]; + recvID_Z = new signed char [Dm->recvCount_Z]; + recvID_xy = new signed char [Dm->recvCount_xy]; + recvID_yz = new signed char [Dm->recvCount_yz]; + recvID_xz = new signed char [Dm->recvCount_xz]; + recvID_Xy = new signed char [Dm->recvCount_Xy]; + recvID_xZ = new signed char [Dm->recvCount_xZ]; + recvID_xY = new signed char [Dm->recvCount_xY]; + recvID_yZ = new signed char [Dm->recvCount_yZ]; + recvID_Yz = new signed char [Dm->recvCount_Yz]; + recvID_Xz = new signed char [Dm->recvCount_Xz]; + recvID_XY = new signed char [Dm->recvCount_XY]; + recvID_YZ = new signed char [Dm->recvCount_YZ]; + recvID_XZ = new signed char [Dm->recvCount_XZ]; //...................................................................................... int sendtag,recvtag; sendtag = recvtag = 7; @@ -415,6 +434,7 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrComm.barrier(); + MPI_Barrier(Dm->Comm); FILE *DRAIN = fopen("morphdrain.csv","w"); @@ -469,64 +489,82 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrsendList("x"), Dm->sendCount("x") ,sendID_x, id); - PackID(Dm->sendList("X"), Dm->sendCount("X") ,sendID_X, id); - PackID(Dm->sendList("y"), Dm->sendCount("y") ,sendID_y, id); - PackID(Dm->sendList("Y"), Dm->sendCount("Y") ,sendID_Y, id); - PackID(Dm->sendList("z"), Dm->sendCount("z") ,sendID_z, id); - PackID(Dm->sendList("Z"), Dm->sendCount("Z") ,sendID_Z, id); - PackID(Dm->sendList("xy"), Dm->sendCount("xy") ,sendID_xy, id); - PackID(Dm->sendList("Xy"), Dm->sendCount("Xy") ,sendID_Xy, id); - PackID(Dm->sendList("xY"), Dm->sendCount("xY") ,sendID_xY, id); - PackID(Dm->sendList("XY"), Dm->sendCount("XY") ,sendID_XY, id); - PackID(Dm->sendList("xz"), Dm->sendCount("xz") ,sendID_xz, id); - PackID(Dm->sendList("Xz"), Dm->sendCount("Xz") ,sendID_Xz, id); - PackID(Dm->sendList("xZ"), Dm->sendCount("xZ") ,sendID_xZ, id); - PackID(Dm->sendList("XZ"), Dm->sendCount("XZ") ,sendID_XZ, id); - PackID(Dm->sendList("yz"), Dm->sendCount("yz") ,sendID_yz, id); - PackID(Dm->sendList("Yz"), Dm->sendCount("Yz") ,sendID_Yz, id); - PackID(Dm->sendList("yZ"), Dm->sendCount("yZ") ,sendID_yZ, id); - PackID(Dm->sendList("YZ"), Dm->sendCount("YZ") ,sendID_YZ, id); + PackID(Dm->sendList_x, Dm->sendCount_x ,sendID_x, id); + PackID(Dm->sendList_X, Dm->sendCount_X ,sendID_X, id); + PackID(Dm->sendList_y, Dm->sendCount_y ,sendID_y, id); + PackID(Dm->sendList_Y, Dm->sendCount_Y ,sendID_Y, id); + PackID(Dm->sendList_z, Dm->sendCount_z ,sendID_z, id); + PackID(Dm->sendList_Z, Dm->sendCount_Z ,sendID_Z, id); + PackID(Dm->sendList_xy, Dm->sendCount_xy ,sendID_xy, id); + PackID(Dm->sendList_Xy, Dm->sendCount_Xy ,sendID_Xy, id); + PackID(Dm->sendList_xY, Dm->sendCount_xY ,sendID_xY, id); + PackID(Dm->sendList_XY, Dm->sendCount_XY ,sendID_XY, id); + PackID(Dm->sendList_xz, Dm->sendCount_xz ,sendID_xz, id); + PackID(Dm->sendList_Xz, Dm->sendCount_Xz ,sendID_Xz, id); + PackID(Dm->sendList_xZ, Dm->sendCount_xZ ,sendID_xZ, id); + PackID(Dm->sendList_XZ, Dm->sendCount_XZ ,sendID_XZ, id); + PackID(Dm->sendList_yz, Dm->sendCount_yz ,sendID_yz, id); + PackID(Dm->sendList_Yz, Dm->sendCount_Yz ,sendID_Yz, id); + PackID(Dm->sendList_yZ, Dm->sendCount_yZ ,sendID_yZ, id); + PackID(Dm->sendList_YZ, Dm->sendCount_YZ ,sendID_YZ, id); //...................................................................................... - Dm->Comm.sendrecv(sendID_x,Dm->sendCount("x"),Dm->rank_x(),sendtag,recvID_X,Dm->recvCount("X"),Dm->rank_X(),recvtag); - Dm->Comm.sendrecv(sendID_X,Dm->sendCount("X"),Dm->rank_X(),sendtag,recvID_x,Dm->recvCount("x"),Dm->rank_x(),recvtag); - Dm->Comm.sendrecv(sendID_y,Dm->sendCount("y"),Dm->rank_y(),sendtag,recvID_Y,Dm->recvCount("Y"),Dm->rank_Y(),recvtag); - Dm->Comm.sendrecv(sendID_Y,Dm->sendCount("Y"),Dm->rank_Y(),sendtag,recvID_y,Dm->recvCount("y"),Dm->rank_y(),recvtag); - Dm->Comm.sendrecv(sendID_z,Dm->sendCount("z"),Dm->rank_z(),sendtag,recvID_Z,Dm->recvCount("Z"),Dm->rank_Z(),recvtag); - Dm->Comm.sendrecv(sendID_Z,Dm->sendCount("Z"),Dm->rank_Z(),sendtag,recvID_z,Dm->recvCount("z"),Dm->rank_z(),recvtag); - Dm->Comm.sendrecv(sendID_xy,Dm->sendCount("xy"),Dm->rank_xy(),sendtag,recvID_XY,Dm->recvCount("XY"),Dm->rank_XY(),recvtag); - Dm->Comm.sendrecv(sendID_XY,Dm->sendCount("XY"),Dm->rank_XY(),sendtag,recvID_xy,Dm->recvCount("xy"),Dm->rank_xy(),recvtag); - Dm->Comm.sendrecv(sendID_Xy,Dm->sendCount("Xy"),Dm->rank_Xy(),sendtag,recvID_xY,Dm->recvCount("xY"),Dm->rank_xY(),recvtag); - Dm->Comm.sendrecv(sendID_xY,Dm->sendCount("xY"),Dm->rank_xY(),sendtag,recvID_Xy,Dm->recvCount("Xy"),Dm->rank_Xy(),recvtag); - Dm->Comm.sendrecv(sendID_xz,Dm->sendCount("xz"),Dm->rank_xz(),sendtag,recvID_XZ,Dm->recvCount("XZ"),Dm->rank_XZ(),recvtag); - Dm->Comm.sendrecv(sendID_XZ,Dm->sendCount("XZ"),Dm->rank_XZ(),sendtag,recvID_xz,Dm->recvCount("xz"),Dm->rank_xz(),recvtag); - Dm->Comm.sendrecv(sendID_Xz,Dm->sendCount("Xz"),Dm->rank_Xz(),sendtag,recvID_xZ,Dm->recvCount("xZ"),Dm->rank_xZ(),recvtag); - Dm->Comm.sendrecv(sendID_xZ,Dm->sendCount("xZ"),Dm->rank_xZ(),sendtag,recvID_Xz,Dm->recvCount("Xz"),Dm->rank_Xz(),recvtag); - Dm->Comm.sendrecv(sendID_yz,Dm->sendCount("yz"),Dm->rank_yz(),sendtag,recvID_YZ,Dm->recvCount("YZ"),Dm->rank_YZ(),recvtag); - Dm->Comm.sendrecv(sendID_YZ,Dm->sendCount("YZ"),Dm->rank_YZ(),sendtag,recvID_yz,Dm->recvCount("yz"),Dm->rank_yz(),recvtag); - Dm->Comm.sendrecv(sendID_Yz,Dm->sendCount("Yz"),Dm->rank_Yz(),sendtag,recvID_yZ,Dm->recvCount("yZ"),Dm->rank_yZ(),recvtag); - Dm->Comm.sendrecv(sendID_yZ,Dm->sendCount("yZ"),Dm->rank_yZ(),sendtag,recvID_Yz,Dm->recvCount("Yz"),Dm->rank_Yz(),recvtag); + MPI_Sendrecv(sendID_x,Dm->sendCount_x,MPI_CHAR,Dm->rank_x(),sendtag, + recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_X,Dm->sendCount_X,MPI_CHAR,Dm->rank_X(),sendtag, + recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_y,Dm->sendCount_y,MPI_CHAR,Dm->rank_y(),sendtag, + recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Y,Dm->sendCount_Y,MPI_CHAR,Dm->rank_Y(),sendtag, + recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_z,Dm->sendCount_z,MPI_CHAR,Dm->rank_z(),sendtag, + recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Z,Dm->sendCount_Z,MPI_CHAR,Dm->rank_Z(),sendtag, + recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xy,Dm->sendCount_xy,MPI_CHAR,Dm->rank_xy(),sendtag, + recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_XY,Dm->sendCount_XY,MPI_CHAR,Dm->rank_XY(),sendtag, + recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Xy,Dm->sendCount_Xy,MPI_CHAR,Dm->rank_Xy(),sendtag, + recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xY,Dm->sendCount_xY,MPI_CHAR,Dm->rank_xY(),sendtag, + recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xz,Dm->sendCount_xz,MPI_CHAR,Dm->rank_xz(),sendtag, + recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_XZ,Dm->sendCount_XZ,MPI_CHAR,Dm->rank_XZ(),sendtag, + recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Xz,Dm->sendCount_Xz,MPI_CHAR,Dm->rank_Xz(),sendtag, + recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_xZ,Dm->sendCount_xZ,MPI_CHAR,Dm->rank_xZ(),sendtag, + recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_yz,Dm->sendCount_yz,MPI_CHAR,Dm->rank_yz(),sendtag, + recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_YZ,Dm->sendCount_YZ,MPI_CHAR,Dm->rank_YZ(),sendtag, + recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_Yz,Dm->sendCount_Yz,MPI_CHAR,Dm->rank_Yz(),sendtag, + recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + MPI_Sendrecv(sendID_yZ,Dm->sendCount_yZ,MPI_CHAR,Dm->rank_yZ(),sendtag, + recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); //...................................................................................... - UnpackID(Dm->recvList("x"), Dm->recvCount("x") ,recvID_x, id); - UnpackID(Dm->recvList("X"), Dm->recvCount("X") ,recvID_X, id); - UnpackID(Dm->recvList("y"), Dm->recvCount("y") ,recvID_y, id); - UnpackID(Dm->recvList("Y"), Dm->recvCount("Y") ,recvID_Y, id); - UnpackID(Dm->recvList("z"), Dm->recvCount("z") ,recvID_z, id); - UnpackID(Dm->recvList("Z"), Dm->recvCount("Z") ,recvID_Z, id); - UnpackID(Dm->recvList("xy"), Dm->recvCount("xy") ,recvID_xy, id); - UnpackID(Dm->recvList("Xy"), Dm->recvCount("Xy") ,recvID_Xy, id); - UnpackID(Dm->recvList("xY"), Dm->recvCount("xY") ,recvID_xY, id); - UnpackID(Dm->recvList("XY"), Dm->recvCount("XY") ,recvID_XY, id); - UnpackID(Dm->recvList("xz"), Dm->recvCount("xz") ,recvID_xz, id); - UnpackID(Dm->recvList("Xz"), Dm->recvCount("Xz") ,recvID_Xz, id); - UnpackID(Dm->recvList("xZ"), Dm->recvCount("xZ") ,recvID_xZ, id); - UnpackID(Dm->recvList("XZ"), Dm->recvCount("XZ") ,recvID_XZ, id); - UnpackID(Dm->recvList("yz"), Dm->recvCount("yz") ,recvID_yz, id); - UnpackID(Dm->recvList("Yz"), Dm->recvCount("Yz") ,recvID_Yz, id); - UnpackID(Dm->recvList("yZ"), Dm->recvCount("yZ") ,recvID_yZ, id); - UnpackID(Dm->recvList("YZ"), Dm->recvCount("YZ") ,recvID_YZ, id); + UnpackID(Dm->recvList_x, Dm->recvCount_x ,recvID_x, id); + UnpackID(Dm->recvList_X, Dm->recvCount_X ,recvID_X, id); + UnpackID(Dm->recvList_y, Dm->recvCount_y ,recvID_y, id); + UnpackID(Dm->recvList_Y, Dm->recvCount_Y ,recvID_Y, id); + UnpackID(Dm->recvList_z, Dm->recvCount_z ,recvID_z, id); + UnpackID(Dm->recvList_Z, Dm->recvCount_Z ,recvID_Z, id); + UnpackID(Dm->recvList_xy, Dm->recvCount_xy ,recvID_xy, id); + UnpackID(Dm->recvList_Xy, Dm->recvCount_Xy ,recvID_Xy, id); + UnpackID(Dm->recvList_xY, Dm->recvCount_xY ,recvID_xY, id); + UnpackID(Dm->recvList_XY, Dm->recvCount_XY ,recvID_XY, id); + UnpackID(Dm->recvList_xz, Dm->recvCount_xz ,recvID_xz, id); + UnpackID(Dm->recvList_Xz, Dm->recvCount_Xz ,recvID_Xz, id); + UnpackID(Dm->recvList_xZ, Dm->recvCount_xZ ,recvID_xZ, id); + UnpackID(Dm->recvList_XZ, Dm->recvCount_XZ ,recvID_XZ, id); + UnpackID(Dm->recvList_yz, Dm->recvCount_yz ,recvID_yz, id); + UnpackID(Dm->recvList_Yz, Dm->recvCount_Yz ,recvID_Yz, id); + UnpackID(Dm->recvList_yZ, Dm->recvCount_yZ ,recvID_yZ, id); + UnpackID(Dm->recvList_YZ, Dm->recvCount_YZ ,recvID_YZ, id); //...................................................................................... - // double GlobalNumber = Dm->Comm.sumReduce( LocalNumber ); + MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); for (int k=0; krank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm); - Dm->Comm.barrier(); + MPI_Barrier(Dm->Comm); for (int k=0; krank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm); - Dm->Comm.barrier(); + MPI_Barrier(Dm->Comm); for (int k=1; kComm.sumReduce( count ); + MPI_Allreduce(&count,&countGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); void_fraction_new = countGlobal/totalGlobal; void_fraction_diff_new = abs(void_fraction_new-VoidFraction); if (rank==0){ @@ -647,13 +685,13 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr &id, std::shared_ptr Dm, double TargetGrowth) +double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, std::shared_ptr Dm, double TargetGrowth, double WallFactor) { int Nx = Dm->Nx; int Ny = Dm->Ny; int Nz = Dm->Nz; int rank = Dm->rank(); - + double count=0.0; for (int k=1; k &id, } } } - double count_original = Dm->Comm.sumReduce( count); + double count_original=sumReduce( Dm->Comm, count); // Estimate morph_delta double morph_delta = 0.0; @@ -684,8 +722,7 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, for (int j=1; j MAX_DISPLACEMENT) MAX_DISPLACEMENT= fabs(wallweight*morph_delta); if (Dist(i,j,k) - wallweight*morph_delta < 0.0){ @@ -694,8 +731,8 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, } } } - count = Dm->Comm.sumReduce( count ); - MAX_DISPLACEMENT = Dm->Comm.maxReduce( MAX_DISPLACEMENT ); + count=sumReduce( Dm->Comm, count); + MAX_DISPLACEMENT = maxReduce( Dm->Comm, MAX_DISPLACEMENT); GrowthEstimate = count - count_original; ERROR = fabs((GrowthEstimate-TargetGrowth) /TargetGrowth); @@ -731,14 +768,14 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, for (int j=1; jComm.sumReduce( count ); + count=sumReduce( Dm->Comm, count); return count; } diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 2c8c044d..87dcc560 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -9,7 +9,7 @@ color lattice boltzmann model #include #include -ScaLBL_ColorModel::ScaLBL_ColorModel(int RANK, int NP, MPI_Comm COMM): +ScaLBL_ColorModel::ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),alpha(0),beta(0), Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) diff --git a/models/ColorModel.h b/models/ColorModel.h index e1ba0355..f5667765 100644 --- a/models/ColorModel.h +++ b/models/ColorModel.h @@ -18,7 +18,7 @@ Implementation of color lattice boltzmann model class ScaLBL_ColorModel{ public: - ScaLBL_ColorModel(int RANK, int NP, MPI_Comm COMM); + ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM); ~ScaLBL_ColorModel(); // functions in they should be run diff --git a/sample_scripts/configure_ubuntu b/sample_scripts/configure_ubuntu new file mode 100755 index 00000000..516925d0 --- /dev/null +++ b/sample_scripts/configure_ubuntu @@ -0,0 +1,24 @@ +#!/bin/bash + +cmake -D CMAKE_C_COMPILER:PATH=/opt/arden/openmpi/3.1.2/bin/mpicc \ + -D CMAKE_CXX_COMPILER:PATH=/opt/arden/openmpi/3.1.2/bin/mpicxx \ + -D CMAKE_C_FLAGS="-O3 -fPIC" \ + -D CMAKE_CXX_FLAGS="-O3 -fPIC " \ + -D CMAKE_CXX_STANDARD=14 \ + -D MPI_COMPILER:BOOL=TRUE \ + -D MPIEXEC=mpirun \ + -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ + -D CMAKE_BUILD_TYPE:STRING=Release \ + -D CUDA_FLAGS="-arch sm_35" \ + -D CUDA_HOST_COMPILER="/usr/bin/gcc" \ + -D HDF5_DIRECTORY="/opt/arden/hdf5/1.8.12" \ + -D HDF5_LIB="/opt/arden/hdf5/1.8.12/lib/libhdf5.a"\ + -D USE_SILO=1 \ + -D SILO_LIB="/opt/arden/silo/4.10.2/lib/libsiloh5.a" \ + -D SILO_DIRECTORY="/opt/arden/silo/4.10.2" \ + -D USE_NETCDF=1 \ + -D NETCDF_DIRECTORY="/opt/arden/netcdf/4.6.1" \ + -D USE_CUDA=0 \ + -D USE_TIMER=0 \ + ~/Programs/LBPM-WIA + From d7fc6774359d51c4d4393227cf0e96888f5cbb5b Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 5 Jan 2021 16:03:15 -0500 Subject: [PATCH 124/205] refactor morphology --- analysis/morphology.cpp | 451 +++++++++++++++++++--------------------- 1 file changed, 209 insertions(+), 242 deletions(-) diff --git a/analysis/morphology.cpp b/analysis/morphology.cpp index f6bb3469..ad231f3f 100644 --- a/analysis/morphology.cpp +++ b/analysis/morphology.cpp @@ -1,7 +1,7 @@ #include // Implementation of morphological opening routine -inline void PackID(int *list, int count, signed char *sendbuf, signed char *ID){ +inline void PackID(const int *list, int count, signed char *sendbuf, signed char *ID){ // Fill in the phase ID values from neighboring processors // This packs up the values that need to be sent from one processor to another int idx,n; @@ -13,7 +13,7 @@ inline void PackID(int *list, int count, signed char *sendbuf, signed char *ID){ } //*************************************************************************************** -inline void UnpackID(int *list, int count, signed char *recvbuf, signed char *ID){ +inline void UnpackID(const int *list, int count, signed char *recvbuf, signed char *ID){ // Fill in the phase ID values from neighboring processors // This unpacks the values once they have been recieved from neighbors int idx,n; @@ -58,11 +58,11 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr } } } - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); // total Global is the number of nodes in the pore-space - MPI_Allreduce(&count,&totalGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&maxdist,&maxdistGlobal,1,MPI_DOUBLE,MPI_MAX,Dm->Comm); + totalGlobal = Dm->Comm.sumReduce( count ); + maxdistGlobal = Dm->Comm.sumReduce( maxdist ); double volume=double(nprocx*nprocy*nprocz)*double(nx-2)*double(ny-2)*double(nz-2); double volume_fraction=totalGlobal/volume; if (rank==0) printf("Volume fraction for morphological opening: %f \n",volume_fraction); @@ -77,44 +77,44 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr signed char *recvID_xy, *recvID_yz, *recvID_xz, *recvID_Xy, *recvID_Yz, *recvID_xZ; signed char *recvID_xY, *recvID_yZ, *recvID_Xz, *recvID_XY, *recvID_YZ, *recvID_XZ; // send buffers - sendID_x = new signed char [Dm->sendCount_x]; - sendID_y = new signed char [Dm->sendCount_y]; - sendID_z = new signed char [Dm->sendCount_z]; - sendID_X = new signed char [Dm->sendCount_X]; - sendID_Y = new signed char [Dm->sendCount_Y]; - sendID_Z = new signed char [Dm->sendCount_Z]; - sendID_xy = new signed char [Dm->sendCount_xy]; - sendID_yz = new signed char [Dm->sendCount_yz]; - sendID_xz = new signed char [Dm->sendCount_xz]; - sendID_Xy = new signed char [Dm->sendCount_Xy]; - sendID_Yz = new signed char [Dm->sendCount_Yz]; - sendID_xZ = new signed char [Dm->sendCount_xZ]; - sendID_xY = new signed char [Dm->sendCount_xY]; - sendID_yZ = new signed char [Dm->sendCount_yZ]; - sendID_Xz = new signed char [Dm->sendCount_Xz]; - sendID_XY = new signed char [Dm->sendCount_XY]; - sendID_YZ = new signed char [Dm->sendCount_YZ]; - sendID_XZ = new signed char [Dm->sendCount_XZ]; + sendID_x = new signed char [Dm->sendCount("x")]; + sendID_y = new signed char [Dm->sendCount("y")]; + sendID_z = new signed char [Dm->sendCount("z")]; + sendID_X = new signed char [Dm->sendCount("X")]; + sendID_Y = new signed char [Dm->sendCount("Y")]; + sendID_Z = new signed char [Dm->sendCount("Z")]; + sendID_xy = new signed char [Dm->sendCount("xy")]; + sendID_yz = new signed char [Dm->sendCount("yz")]; + sendID_xz = new signed char [Dm->sendCount("xz")]; + sendID_Xy = new signed char [Dm->sendCount("Xy")]; + sendID_Yz = new signed char [Dm->sendCount("Yz")]; + sendID_xZ = new signed char [Dm->sendCount("xZ")]; + sendID_xY = new signed char [Dm->sendCount("xY")]; + sendID_yZ = new signed char [Dm->sendCount("yZ")]; + sendID_Xz = new signed char [Dm->sendCount("Xz")]; + sendID_XY = new signed char [Dm->sendCount("XY")]; + sendID_YZ = new signed char [Dm->sendCount("YZ")]; + sendID_XZ = new signed char [Dm->sendCount("XZ")]; //...................................................................................... // recv buffers - recvID_x = new signed char [Dm->recvCount_x]; - recvID_y = new signed char [Dm->recvCount_y]; - recvID_z = new signed char [Dm->recvCount_z]; - recvID_X = new signed char [Dm->recvCount_X]; - recvID_Y = new signed char [Dm->recvCount_Y]; - recvID_Z = new signed char [Dm->recvCount_Z]; - recvID_xy = new signed char [Dm->recvCount_xy]; - recvID_yz = new signed char [Dm->recvCount_yz]; - recvID_xz = new signed char [Dm->recvCount_xz]; - recvID_Xy = new signed char [Dm->recvCount_Xy]; - recvID_xZ = new signed char [Dm->recvCount_xZ]; - recvID_xY = new signed char [Dm->recvCount_xY]; - recvID_yZ = new signed char [Dm->recvCount_yZ]; - recvID_Yz = new signed char [Dm->recvCount_Yz]; - recvID_Xz = new signed char [Dm->recvCount_Xz]; - recvID_XY = new signed char [Dm->recvCount_XY]; - recvID_YZ = new signed char [Dm->recvCount_YZ]; - recvID_XZ = new signed char [Dm->recvCount_XZ]; + recvID_x = new signed char [Dm->recvCount("x")]; + recvID_y = new signed char [Dm->recvCount("y")]; + recvID_z = new signed char [Dm->recvCount("z")]; + recvID_X = new signed char [Dm->recvCount("X")]; + recvID_Y = new signed char [Dm->recvCount("Y")]; + recvID_Z = new signed char [Dm->recvCount("Z")]; + recvID_xy = new signed char [Dm->recvCount("xy")]; + recvID_yz = new signed char [Dm->recvCount("yz")]; + recvID_xz = new signed char [Dm->recvCount("xz")]; + recvID_Xy = new signed char [Dm->recvCount("Xy")]; + recvID_xZ = new signed char [Dm->recvCount("xZ")]; + recvID_xY = new signed char [Dm->recvCount("xY")]; + recvID_yZ = new signed char [Dm->recvCount("yZ")]; + recvID_Yz = new signed char [Dm->recvCount("Yz")]; + recvID_Xz = new signed char [Dm->recvCount("Xz")]; + recvID_XY = new signed char [Dm->recvCount("XY")]; + recvID_YZ = new signed char [Dm->recvCount("YZ")]; + recvID_XZ = new signed char [Dm->recvCount("XZ")]; //...................................................................................... int sendtag,recvtag; sendtag = recvtag = 7; @@ -131,9 +131,8 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr // Increase the critical radius until the target saturation is met double deltaR=0.05; // amount to change the radius in voxel units - double Rcrit_old=0.0; + double Rcrit_old; - double GlobalNumber = 1.f; int imin,jmin,kmin,imax,jmax,kmax; if (ErodeLabel == 1){ @@ -183,83 +182,65 @@ double MorphOpen(DoubleArray &SignDist, signed char *id, std::shared_ptr } } // Pack and send the updated ID values - PackID(Dm->sendList_x, Dm->sendCount_x ,sendID_x, id); - PackID(Dm->sendList_X, Dm->sendCount_X ,sendID_X, id); - PackID(Dm->sendList_y, Dm->sendCount_y ,sendID_y, id); - PackID(Dm->sendList_Y, Dm->sendCount_Y ,sendID_Y, id); - PackID(Dm->sendList_z, Dm->sendCount_z ,sendID_z, id); - PackID(Dm->sendList_Z, Dm->sendCount_Z ,sendID_Z, id); - PackID(Dm->sendList_xy, Dm->sendCount_xy ,sendID_xy, id); - PackID(Dm->sendList_Xy, Dm->sendCount_Xy ,sendID_Xy, id); - PackID(Dm->sendList_xY, Dm->sendCount_xY ,sendID_xY, id); - PackID(Dm->sendList_XY, Dm->sendCount_XY ,sendID_XY, id); - PackID(Dm->sendList_xz, Dm->sendCount_xz ,sendID_xz, id); - PackID(Dm->sendList_Xz, Dm->sendCount_Xz ,sendID_Xz, id); - PackID(Dm->sendList_xZ, Dm->sendCount_xZ ,sendID_xZ, id); - PackID(Dm->sendList_XZ, Dm->sendCount_XZ ,sendID_XZ, id); - PackID(Dm->sendList_yz, Dm->sendCount_yz ,sendID_yz, id); - PackID(Dm->sendList_Yz, Dm->sendCount_Yz ,sendID_Yz, id); - PackID(Dm->sendList_yZ, Dm->sendCount_yZ ,sendID_yZ, id); - PackID(Dm->sendList_YZ, Dm->sendCount_YZ ,sendID_YZ, id); + PackID(Dm->sendList("x"), Dm->sendCount("x") ,sendID_x, id); + PackID(Dm->sendList("X"), Dm->sendCount("X") ,sendID_X, id); + PackID(Dm->sendList("y"), Dm->sendCount("y") ,sendID_y, id); + PackID(Dm->sendList("Y"), Dm->sendCount("Y") ,sendID_Y, id); + PackID(Dm->sendList("z"), Dm->sendCount("z") ,sendID_z, id); + PackID(Dm->sendList("Z"), Dm->sendCount("Z") ,sendID_Z, id); + PackID(Dm->sendList("xy"), Dm->sendCount("xy") ,sendID_xy, id); + PackID(Dm->sendList("Xy"), Dm->sendCount("Xy") ,sendID_Xy, id); + PackID(Dm->sendList("xY"), Dm->sendCount("xY") ,sendID_xY, id); + PackID(Dm->sendList("XY"), Dm->sendCount("XY") ,sendID_XY, id); + PackID(Dm->sendList("xz"), Dm->sendCount("xz") ,sendID_xz, id); + PackID(Dm->sendList("Xz"), Dm->sendCount("Xz") ,sendID_Xz, id); + PackID(Dm->sendList("xZ"), Dm->sendCount("xZ") ,sendID_xZ, id); + PackID(Dm->sendList("XZ"), Dm->sendCount("XZ") ,sendID_XZ, id); + PackID(Dm->sendList("yz"), Dm->sendCount("yz") ,sendID_yz, id); + PackID(Dm->sendList("Yz"), Dm->sendCount("Yz") ,sendID_Yz, id); + PackID(Dm->sendList("yZ"), Dm->sendCount("yZ") ,sendID_yZ, id); + PackID(Dm->sendList("YZ"), Dm->sendCount("YZ") ,sendID_YZ, id); //...................................................................................... - MPI_Sendrecv(sendID_x,Dm->sendCount_x,MPI_CHAR,Dm->rank_x(),sendtag, - recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_X,Dm->sendCount_X,MPI_CHAR,Dm->rank_X(),sendtag, - recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_y,Dm->sendCount_y,MPI_CHAR,Dm->rank_y(),sendtag, - recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Y,Dm->sendCount_Y,MPI_CHAR,Dm->rank_Y(),sendtag, - recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_z,Dm->sendCount_z,MPI_CHAR,Dm->rank_z(),sendtag, - recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Z,Dm->sendCount_Z,MPI_CHAR,Dm->rank_Z(),sendtag, - recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xy,Dm->sendCount_xy,MPI_CHAR,Dm->rank_xy(),sendtag, - recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_XY,Dm->sendCount_XY,MPI_CHAR,Dm->rank_XY(),sendtag, - recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Xy,Dm->sendCount_Xy,MPI_CHAR,Dm->rank_Xy(),sendtag, - recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xY,Dm->sendCount_xY,MPI_CHAR,Dm->rank_xY(),sendtag, - recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xz,Dm->sendCount_xz,MPI_CHAR,Dm->rank_xz(),sendtag, - recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_XZ,Dm->sendCount_XZ,MPI_CHAR,Dm->rank_XZ(),sendtag, - recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Xz,Dm->sendCount_Xz,MPI_CHAR,Dm->rank_Xz(),sendtag, - recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xZ,Dm->sendCount_xZ,MPI_CHAR,Dm->rank_xZ(),sendtag, - recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_yz,Dm->sendCount_yz,MPI_CHAR,Dm->rank_yz(),sendtag, - recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_YZ,Dm->sendCount_YZ,MPI_CHAR,Dm->rank_YZ(),sendtag, - recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Yz,Dm->sendCount_Yz,MPI_CHAR,Dm->rank_Yz(),sendtag, - recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_yZ,Dm->sendCount_yZ,MPI_CHAR,Dm->rank_yZ(),sendtag, - recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + Dm->Comm.sendrecv(sendID_x,Dm->sendCount("x"),Dm->rank_x(),sendtag,recvID_X,Dm->recvCount("X"),Dm->rank_X(),recvtag); + Dm->Comm.sendrecv(sendID_X,Dm->sendCount("X"),Dm->rank_X(),sendtag,recvID_x,Dm->recvCount("x"),Dm->rank_x(),recvtag); + Dm->Comm.sendrecv(sendID_y,Dm->sendCount("y"),Dm->rank_y(),sendtag,recvID_Y,Dm->recvCount("Y"),Dm->rank_Y(),recvtag); + Dm->Comm.sendrecv(sendID_Y,Dm->sendCount("Y"),Dm->rank_Y(),sendtag,recvID_y,Dm->recvCount("y"),Dm->rank_y(),recvtag); + Dm->Comm.sendrecv(sendID_z,Dm->sendCount("z"),Dm->rank_z(),sendtag,recvID_Z,Dm->recvCount("Z"),Dm->rank_Z(),recvtag); + Dm->Comm.sendrecv(sendID_Z,Dm->sendCount("Z"),Dm->rank_Z(),sendtag,recvID_z,Dm->recvCount("z"),Dm->rank_z(),recvtag); + Dm->Comm.sendrecv(sendID_xy,Dm->sendCount("xy"),Dm->rank_xy(),sendtag,recvID_XY,Dm->recvCount("XY"),Dm->rank_XY(),recvtag); + Dm->Comm.sendrecv(sendID_XY,Dm->sendCount("XY"),Dm->rank_XY(),sendtag,recvID_xy,Dm->recvCount("xy"),Dm->rank_xy(),recvtag); + Dm->Comm.sendrecv(sendID_Xy,Dm->sendCount("Xy"),Dm->rank_Xy(),sendtag,recvID_xY,Dm->recvCount("xY"),Dm->rank_xY(),recvtag); + Dm->Comm.sendrecv(sendID_xY,Dm->sendCount("xY"),Dm->rank_xY(),sendtag,recvID_Xy,Dm->recvCount("Xy"),Dm->rank_Xy(),recvtag); + Dm->Comm.sendrecv(sendID_xz,Dm->sendCount("xz"),Dm->rank_xz(),sendtag,recvID_XZ,Dm->recvCount("XZ"),Dm->rank_XZ(),recvtag); + Dm->Comm.sendrecv(sendID_XZ,Dm->sendCount("XZ"),Dm->rank_XZ(),sendtag,recvID_xz,Dm->recvCount("xz"),Dm->rank_xz(),recvtag); + Dm->Comm.sendrecv(sendID_Xz,Dm->sendCount("Xz"),Dm->rank_Xz(),sendtag,recvID_xZ,Dm->recvCount("xZ"),Dm->rank_xZ(),recvtag); + Dm->Comm.sendrecv(sendID_xZ,Dm->sendCount("xZ"),Dm->rank_xZ(),sendtag,recvID_Xz,Dm->recvCount("Xz"),Dm->rank_Xz(),recvtag); + Dm->Comm.sendrecv(sendID_yz,Dm->sendCount("yz"),Dm->rank_yz(),sendtag,recvID_YZ,Dm->recvCount("YZ"),Dm->rank_YZ(),recvtag); + Dm->Comm.sendrecv(sendID_YZ,Dm->sendCount("YZ"),Dm->rank_YZ(),sendtag,recvID_yz,Dm->recvCount("yz"),Dm->rank_yz(),recvtag); + Dm->Comm.sendrecv(sendID_Yz,Dm->sendCount("Yz"),Dm->rank_Yz(),sendtag,recvID_yZ,Dm->recvCount("yZ"),Dm->rank_yZ(),recvtag); + Dm->Comm.sendrecv(sendID_yZ,Dm->sendCount("yZ"),Dm->rank_yZ(),sendtag,recvID_Yz,Dm->recvCount("Yz"),Dm->rank_Yz(),recvtag); //...................................................................................... - UnpackID(Dm->recvList_x, Dm->recvCount_x ,recvID_x, id); - UnpackID(Dm->recvList_X, Dm->recvCount_X ,recvID_X, id); - UnpackID(Dm->recvList_y, Dm->recvCount_y ,recvID_y, id); - UnpackID(Dm->recvList_Y, Dm->recvCount_Y ,recvID_Y, id); - UnpackID(Dm->recvList_z, Dm->recvCount_z ,recvID_z, id); - UnpackID(Dm->recvList_Z, Dm->recvCount_Z ,recvID_Z, id); - UnpackID(Dm->recvList_xy, Dm->recvCount_xy ,recvID_xy, id); - UnpackID(Dm->recvList_Xy, Dm->recvCount_Xy ,recvID_Xy, id); - UnpackID(Dm->recvList_xY, Dm->recvCount_xY ,recvID_xY, id); - UnpackID(Dm->recvList_XY, Dm->recvCount_XY ,recvID_XY, id); - UnpackID(Dm->recvList_xz, Dm->recvCount_xz ,recvID_xz, id); - UnpackID(Dm->recvList_Xz, Dm->recvCount_Xz ,recvID_Xz, id); - UnpackID(Dm->recvList_xZ, Dm->recvCount_xZ ,recvID_xZ, id); - UnpackID(Dm->recvList_XZ, Dm->recvCount_XZ ,recvID_XZ, id); - UnpackID(Dm->recvList_yz, Dm->recvCount_yz ,recvID_yz, id); - UnpackID(Dm->recvList_Yz, Dm->recvCount_Yz ,recvID_Yz, id); - UnpackID(Dm->recvList_yZ, Dm->recvCount_yZ ,recvID_yZ, id); - UnpackID(Dm->recvList_YZ, Dm->recvCount_YZ ,recvID_YZ, id); + UnpackID(Dm->recvList("x"), Dm->recvCount("x") ,recvID_x, id); + UnpackID(Dm->recvList("X"), Dm->recvCount("X") ,recvID_X, id); + UnpackID(Dm->recvList("y"), Dm->recvCount("y") ,recvID_y, id); + UnpackID(Dm->recvList("Y"), Dm->recvCount("Y") ,recvID_Y, id); + UnpackID(Dm->recvList("z"), Dm->recvCount("z") ,recvID_z, id); + UnpackID(Dm->recvList("Z"), Dm->recvCount("Z") ,recvID_Z, id); + UnpackID(Dm->recvList("xy"), Dm->recvCount("xy") ,recvID_xy, id); + UnpackID(Dm->recvList("Xy"), Dm->recvCount("Xy") ,recvID_Xy, id); + UnpackID(Dm->recvList("xY"), Dm->recvCount("xY") ,recvID_xY, id); + UnpackID(Dm->recvList("XY"), Dm->recvCount("XY") ,recvID_XY, id); + UnpackID(Dm->recvList("xz"), Dm->recvCount("xz") ,recvID_xz, id); + UnpackID(Dm->recvList("Xz"), Dm->recvCount("Xz") ,recvID_Xz, id); + UnpackID(Dm->recvList("xZ"), Dm->recvCount("xZ") ,recvID_xZ, id); + UnpackID(Dm->recvList("XZ"), Dm->recvCount("XZ") ,recvID_XZ, id); + UnpackID(Dm->recvList("yz"), Dm->recvCount("yz") ,recvID_yz, id); + UnpackID(Dm->recvList("Yz"), Dm->recvCount("Yz") ,recvID_Yz, id); + UnpackID(Dm->recvList("yZ"), Dm->recvCount("yZ") ,recvID_yZ, id); + UnpackID(Dm->recvList("YZ"), Dm->recvCount("YZ") ,recvID_YZ, id); //...................................................................................... - MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + //double GlobalNumber = Dm->Comm.sumReduce( LocalNumber ); count = 0.f; for (int k=1; k } } } - MPI_Allreduce(&count,&countGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + countGlobal = Dm->Comm.sumReduce( count ); void_fraction_new = countGlobal/totalGlobal; void_fraction_diff_new = abs(void_fraction_new-VoidFraction); /* if (rank==0){ @@ -304,7 +285,7 @@ double morph_open() fillHalo fillChar(Dm->Comm,Dm->rank_info,{Nx-2,Ny-2,Nz-2},{1,1,1},0,1); - MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + GlobalNumber = Dm->Comm.sumReduce( LocalNumber ); count = 0.f; for (int k=1; kComm); + countGlobal = Dm->Comm.sumReduce( count ); return countGlobal; } */ @@ -360,11 +341,11 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrComm); + Dm->Comm.barrier(); // total Global is the number of nodes in the pore-space - MPI_Allreduce(&count,&totalGlobal,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); - MPI_Allreduce(&maxdist,&maxdistGlobal,1,MPI_DOUBLE,MPI_MAX,Dm->Comm); + totalGlobal = Dm->Comm.sumReduce( count ); + maxdistGlobal = Dm->Comm.sumReduce( maxdist ); double volume=double(nprocx*nprocy*nprocz)*double(nx-2)*double(ny-2)*double(nz-2); double volume_fraction=totalGlobal/volume; if (rank==0) printf("Volume fraction for morphological opening: %f \n",volume_fraction); @@ -378,44 +359,44 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrsendCount_x]; - sendID_y = new signed char [Dm->sendCount_y]; - sendID_z = new signed char [Dm->sendCount_z]; - sendID_X = new signed char [Dm->sendCount_X]; - sendID_Y = new signed char [Dm->sendCount_Y]; - sendID_Z = new signed char [Dm->sendCount_Z]; - sendID_xy = new signed char [Dm->sendCount_xy]; - sendID_yz = new signed char [Dm->sendCount_yz]; - sendID_xz = new signed char [Dm->sendCount_xz]; - sendID_Xy = new signed char [Dm->sendCount_Xy]; - sendID_Yz = new signed char [Dm->sendCount_Yz]; - sendID_xZ = new signed char [Dm->sendCount_xZ]; - sendID_xY = new signed char [Dm->sendCount_xY]; - sendID_yZ = new signed char [Dm->sendCount_yZ]; - sendID_Xz = new signed char [Dm->sendCount_Xz]; - sendID_XY = new signed char [Dm->sendCount_XY]; - sendID_YZ = new signed char [Dm->sendCount_YZ]; - sendID_XZ = new signed char [Dm->sendCount_XZ]; + sendID_x = new signed char [Dm->sendCount("x")]; + sendID_y = new signed char [Dm->sendCount("y")]; + sendID_z = new signed char [Dm->sendCount("z")]; + sendID_X = new signed char [Dm->sendCount("X")]; + sendID_Y = new signed char [Dm->sendCount("Y")]; + sendID_Z = new signed char [Dm->sendCount("Z")]; + sendID_xy = new signed char [Dm->sendCount("xy")]; + sendID_yz = new signed char [Dm->sendCount("yz")]; + sendID_xz = new signed char [Dm->sendCount("xz")]; + sendID_Xy = new signed char [Dm->sendCount("Xy")]; + sendID_Yz = new signed char [Dm->sendCount("Yz")]; + sendID_xZ = new signed char [Dm->sendCount("xZ")]; + sendID_xY = new signed char [Dm->sendCount("xY")]; + sendID_yZ = new signed char [Dm->sendCount("yZ")]; + sendID_Xz = new signed char [Dm->sendCount("Xz")]; + sendID_XY = new signed char [Dm->sendCount("XY")]; + sendID_YZ = new signed char [Dm->sendCount("YZ")]; + sendID_XZ = new signed char [Dm->sendCount("XZ")]; //...................................................................................... // recv buffers - recvID_x = new signed char [Dm->recvCount_x]; - recvID_y = new signed char [Dm->recvCount_y]; - recvID_z = new signed char [Dm->recvCount_z]; - recvID_X = new signed char [Dm->recvCount_X]; - recvID_Y = new signed char [Dm->recvCount_Y]; - recvID_Z = new signed char [Dm->recvCount_Z]; - recvID_xy = new signed char [Dm->recvCount_xy]; - recvID_yz = new signed char [Dm->recvCount_yz]; - recvID_xz = new signed char [Dm->recvCount_xz]; - recvID_Xy = new signed char [Dm->recvCount_Xy]; - recvID_xZ = new signed char [Dm->recvCount_xZ]; - recvID_xY = new signed char [Dm->recvCount_xY]; - recvID_yZ = new signed char [Dm->recvCount_yZ]; - recvID_Yz = new signed char [Dm->recvCount_Yz]; - recvID_Xz = new signed char [Dm->recvCount_Xz]; - recvID_XY = new signed char [Dm->recvCount_XY]; - recvID_YZ = new signed char [Dm->recvCount_YZ]; - recvID_XZ = new signed char [Dm->recvCount_XZ]; + recvID_x = new signed char [Dm->recvCount("x")]; + recvID_y = new signed char [Dm->recvCount("y")]; + recvID_z = new signed char [Dm->recvCount("z")]; + recvID_X = new signed char [Dm->recvCount("X")]; + recvID_Y = new signed char [Dm->recvCount("Y")]; + recvID_Z = new signed char [Dm->recvCount("Z")]; + recvID_xy = new signed char [Dm->recvCount("xy")]; + recvID_yz = new signed char [Dm->recvCount("yz")]; + recvID_xz = new signed char [Dm->recvCount("xz")]; + recvID_Xy = new signed char [Dm->recvCount("Xy")]; + recvID_xZ = new signed char [Dm->recvCount("xZ")]; + recvID_xY = new signed char [Dm->recvCount("xY")]; + recvID_yZ = new signed char [Dm->recvCount("yZ")]; + recvID_Yz = new signed char [Dm->recvCount("Yz")]; + recvID_Xz = new signed char [Dm->recvCount("Xz")]; + recvID_XY = new signed char [Dm->recvCount("XY")]; + recvID_YZ = new signed char [Dm->recvCount("YZ")]; + recvID_XZ = new signed char [Dm->recvCount("XZ")]; //...................................................................................... int sendtag,recvtag; sendtag = recvtag = 7; @@ -434,7 +415,6 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrComm); + Dm->Comm.barrier(); FILE *DRAIN = fopen("morphdrain.csv","w"); @@ -489,82 +469,64 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrsendList_x, Dm->sendCount_x ,sendID_x, id); - PackID(Dm->sendList_X, Dm->sendCount_X ,sendID_X, id); - PackID(Dm->sendList_y, Dm->sendCount_y ,sendID_y, id); - PackID(Dm->sendList_Y, Dm->sendCount_Y ,sendID_Y, id); - PackID(Dm->sendList_z, Dm->sendCount_z ,sendID_z, id); - PackID(Dm->sendList_Z, Dm->sendCount_Z ,sendID_Z, id); - PackID(Dm->sendList_xy, Dm->sendCount_xy ,sendID_xy, id); - PackID(Dm->sendList_Xy, Dm->sendCount_Xy ,sendID_Xy, id); - PackID(Dm->sendList_xY, Dm->sendCount_xY ,sendID_xY, id); - PackID(Dm->sendList_XY, Dm->sendCount_XY ,sendID_XY, id); - PackID(Dm->sendList_xz, Dm->sendCount_xz ,sendID_xz, id); - PackID(Dm->sendList_Xz, Dm->sendCount_Xz ,sendID_Xz, id); - PackID(Dm->sendList_xZ, Dm->sendCount_xZ ,sendID_xZ, id); - PackID(Dm->sendList_XZ, Dm->sendCount_XZ ,sendID_XZ, id); - PackID(Dm->sendList_yz, Dm->sendCount_yz ,sendID_yz, id); - PackID(Dm->sendList_Yz, Dm->sendCount_Yz ,sendID_Yz, id); - PackID(Dm->sendList_yZ, Dm->sendCount_yZ ,sendID_yZ, id); - PackID(Dm->sendList_YZ, Dm->sendCount_YZ ,sendID_YZ, id); + PackID(Dm->sendList("x"), Dm->sendCount("x") ,sendID_x, id); + PackID(Dm->sendList("X"), Dm->sendCount("X") ,sendID_X, id); + PackID(Dm->sendList("y"), Dm->sendCount("y") ,sendID_y, id); + PackID(Dm->sendList("Y"), Dm->sendCount("Y") ,sendID_Y, id); + PackID(Dm->sendList("z"), Dm->sendCount("z") ,sendID_z, id); + PackID(Dm->sendList("Z"), Dm->sendCount("Z") ,sendID_Z, id); + PackID(Dm->sendList("xy"), Dm->sendCount("xy") ,sendID_xy, id); + PackID(Dm->sendList("Xy"), Dm->sendCount("Xy") ,sendID_Xy, id); + PackID(Dm->sendList("xY"), Dm->sendCount("xY") ,sendID_xY, id); + PackID(Dm->sendList("XY"), Dm->sendCount("XY") ,sendID_XY, id); + PackID(Dm->sendList("xz"), Dm->sendCount("xz") ,sendID_xz, id); + PackID(Dm->sendList("Xz"), Dm->sendCount("Xz") ,sendID_Xz, id); + PackID(Dm->sendList("xZ"), Dm->sendCount("xZ") ,sendID_xZ, id); + PackID(Dm->sendList("XZ"), Dm->sendCount("XZ") ,sendID_XZ, id); + PackID(Dm->sendList("yz"), Dm->sendCount("yz") ,sendID_yz, id); + PackID(Dm->sendList("Yz"), Dm->sendCount("Yz") ,sendID_Yz, id); + PackID(Dm->sendList("yZ"), Dm->sendCount("yZ") ,sendID_yZ, id); + PackID(Dm->sendList("YZ"), Dm->sendCount("YZ") ,sendID_YZ, id); //...................................................................................... - MPI_Sendrecv(sendID_x,Dm->sendCount_x,MPI_CHAR,Dm->rank_x(),sendtag, - recvID_X,Dm->recvCount_X,MPI_CHAR,Dm->rank_X(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_X,Dm->sendCount_X,MPI_CHAR,Dm->rank_X(),sendtag, - recvID_x,Dm->recvCount_x,MPI_CHAR,Dm->rank_x(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_y,Dm->sendCount_y,MPI_CHAR,Dm->rank_y(),sendtag, - recvID_Y,Dm->recvCount_Y,MPI_CHAR,Dm->rank_Y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Y,Dm->sendCount_Y,MPI_CHAR,Dm->rank_Y(),sendtag, - recvID_y,Dm->recvCount_y,MPI_CHAR,Dm->rank_y(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_z,Dm->sendCount_z,MPI_CHAR,Dm->rank_z(),sendtag, - recvID_Z,Dm->recvCount_Z,MPI_CHAR,Dm->rank_Z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Z,Dm->sendCount_Z,MPI_CHAR,Dm->rank_Z(),sendtag, - recvID_z,Dm->recvCount_z,MPI_CHAR,Dm->rank_z(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xy,Dm->sendCount_xy,MPI_CHAR,Dm->rank_xy(),sendtag, - recvID_XY,Dm->recvCount_XY,MPI_CHAR,Dm->rank_XY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_XY,Dm->sendCount_XY,MPI_CHAR,Dm->rank_XY(),sendtag, - recvID_xy,Dm->recvCount_xy,MPI_CHAR,Dm->rank_xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Xy,Dm->sendCount_Xy,MPI_CHAR,Dm->rank_Xy(),sendtag, - recvID_xY,Dm->recvCount_xY,MPI_CHAR,Dm->rank_xY(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xY,Dm->sendCount_xY,MPI_CHAR,Dm->rank_xY(),sendtag, - recvID_Xy,Dm->recvCount_Xy,MPI_CHAR,Dm->rank_Xy(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xz,Dm->sendCount_xz,MPI_CHAR,Dm->rank_xz(),sendtag, - recvID_XZ,Dm->recvCount_XZ,MPI_CHAR,Dm->rank_XZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_XZ,Dm->sendCount_XZ,MPI_CHAR,Dm->rank_XZ(),sendtag, - recvID_xz,Dm->recvCount_xz,MPI_CHAR,Dm->rank_xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Xz,Dm->sendCount_Xz,MPI_CHAR,Dm->rank_Xz(),sendtag, - recvID_xZ,Dm->recvCount_xZ,MPI_CHAR,Dm->rank_xZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_xZ,Dm->sendCount_xZ,MPI_CHAR,Dm->rank_xZ(),sendtag, - recvID_Xz,Dm->recvCount_Xz,MPI_CHAR,Dm->rank_Xz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_yz,Dm->sendCount_yz,MPI_CHAR,Dm->rank_yz(),sendtag, - recvID_YZ,Dm->recvCount_YZ,MPI_CHAR,Dm->rank_YZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_YZ,Dm->sendCount_YZ,MPI_CHAR,Dm->rank_YZ(),sendtag, - recvID_yz,Dm->recvCount_yz,MPI_CHAR,Dm->rank_yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_Yz,Dm->sendCount_Yz,MPI_CHAR,Dm->rank_Yz(),sendtag, - recvID_yZ,Dm->recvCount_yZ,MPI_CHAR,Dm->rank_yZ(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); - MPI_Sendrecv(sendID_yZ,Dm->sendCount_yZ,MPI_CHAR,Dm->rank_yZ(),sendtag, - recvID_Yz,Dm->recvCount_Yz,MPI_CHAR,Dm->rank_Yz(),recvtag,Dm->Comm,MPI_STATUS_IGNORE); + Dm->Comm.sendrecv(sendID_x,Dm->sendCount("x"),Dm->rank_x(),sendtag,recvID_X,Dm->recvCount("X"),Dm->rank_X(),recvtag); + Dm->Comm.sendrecv(sendID_X,Dm->sendCount("X"),Dm->rank_X(),sendtag,recvID_x,Dm->recvCount("x"),Dm->rank_x(),recvtag); + Dm->Comm.sendrecv(sendID_y,Dm->sendCount("y"),Dm->rank_y(),sendtag,recvID_Y,Dm->recvCount("Y"),Dm->rank_Y(),recvtag); + Dm->Comm.sendrecv(sendID_Y,Dm->sendCount("Y"),Dm->rank_Y(),sendtag,recvID_y,Dm->recvCount("y"),Dm->rank_y(),recvtag); + Dm->Comm.sendrecv(sendID_z,Dm->sendCount("z"),Dm->rank_z(),sendtag,recvID_Z,Dm->recvCount("Z"),Dm->rank_Z(),recvtag); + Dm->Comm.sendrecv(sendID_Z,Dm->sendCount("Z"),Dm->rank_Z(),sendtag,recvID_z,Dm->recvCount("z"),Dm->rank_z(),recvtag); + Dm->Comm.sendrecv(sendID_xy,Dm->sendCount("xy"),Dm->rank_xy(),sendtag,recvID_XY,Dm->recvCount("XY"),Dm->rank_XY(),recvtag); + Dm->Comm.sendrecv(sendID_XY,Dm->sendCount("XY"),Dm->rank_XY(),sendtag,recvID_xy,Dm->recvCount("xy"),Dm->rank_xy(),recvtag); + Dm->Comm.sendrecv(sendID_Xy,Dm->sendCount("Xy"),Dm->rank_Xy(),sendtag,recvID_xY,Dm->recvCount("xY"),Dm->rank_xY(),recvtag); + Dm->Comm.sendrecv(sendID_xY,Dm->sendCount("xY"),Dm->rank_xY(),sendtag,recvID_Xy,Dm->recvCount("Xy"),Dm->rank_Xy(),recvtag); + Dm->Comm.sendrecv(sendID_xz,Dm->sendCount("xz"),Dm->rank_xz(),sendtag,recvID_XZ,Dm->recvCount("XZ"),Dm->rank_XZ(),recvtag); + Dm->Comm.sendrecv(sendID_XZ,Dm->sendCount("XZ"),Dm->rank_XZ(),sendtag,recvID_xz,Dm->recvCount("xz"),Dm->rank_xz(),recvtag); + Dm->Comm.sendrecv(sendID_Xz,Dm->sendCount("Xz"),Dm->rank_Xz(),sendtag,recvID_xZ,Dm->recvCount("xZ"),Dm->rank_xZ(),recvtag); + Dm->Comm.sendrecv(sendID_xZ,Dm->sendCount("xZ"),Dm->rank_xZ(),sendtag,recvID_Xz,Dm->recvCount("Xz"),Dm->rank_Xz(),recvtag); + Dm->Comm.sendrecv(sendID_yz,Dm->sendCount("yz"),Dm->rank_yz(),sendtag,recvID_YZ,Dm->recvCount("YZ"),Dm->rank_YZ(),recvtag); + Dm->Comm.sendrecv(sendID_YZ,Dm->sendCount("YZ"),Dm->rank_YZ(),sendtag,recvID_yz,Dm->recvCount("yz"),Dm->rank_yz(),recvtag); + Dm->Comm.sendrecv(sendID_Yz,Dm->sendCount("Yz"),Dm->rank_Yz(),sendtag,recvID_yZ,Dm->recvCount("yZ"),Dm->rank_yZ(),recvtag); + Dm->Comm.sendrecv(sendID_yZ,Dm->sendCount("yZ"),Dm->rank_yZ(),sendtag,recvID_Yz,Dm->recvCount("Yz"),Dm->rank_Yz(),recvtag); //...................................................................................... - UnpackID(Dm->recvList_x, Dm->recvCount_x ,recvID_x, id); - UnpackID(Dm->recvList_X, Dm->recvCount_X ,recvID_X, id); - UnpackID(Dm->recvList_y, Dm->recvCount_y ,recvID_y, id); - UnpackID(Dm->recvList_Y, Dm->recvCount_Y ,recvID_Y, id); - UnpackID(Dm->recvList_z, Dm->recvCount_z ,recvID_z, id); - UnpackID(Dm->recvList_Z, Dm->recvCount_Z ,recvID_Z, id); - UnpackID(Dm->recvList_xy, Dm->recvCount_xy ,recvID_xy, id); - UnpackID(Dm->recvList_Xy, Dm->recvCount_Xy ,recvID_Xy, id); - UnpackID(Dm->recvList_xY, Dm->recvCount_xY ,recvID_xY, id); - UnpackID(Dm->recvList_XY, Dm->recvCount_XY ,recvID_XY, id); - UnpackID(Dm->recvList_xz, Dm->recvCount_xz ,recvID_xz, id); - UnpackID(Dm->recvList_Xz, Dm->recvCount_Xz ,recvID_Xz, id); - UnpackID(Dm->recvList_xZ, Dm->recvCount_xZ ,recvID_xZ, id); - UnpackID(Dm->recvList_XZ, Dm->recvCount_XZ ,recvID_XZ, id); - UnpackID(Dm->recvList_yz, Dm->recvCount_yz ,recvID_yz, id); - UnpackID(Dm->recvList_Yz, Dm->recvCount_Yz ,recvID_Yz, id); - UnpackID(Dm->recvList_yZ, Dm->recvCount_yZ ,recvID_yZ, id); - UnpackID(Dm->recvList_YZ, Dm->recvCount_YZ ,recvID_YZ, id); + UnpackID(Dm->recvList("x"), Dm->recvCount("x") ,recvID_x, id); + UnpackID(Dm->recvList("X"), Dm->recvCount("X") ,recvID_X, id); + UnpackID(Dm->recvList("y"), Dm->recvCount("y") ,recvID_y, id); + UnpackID(Dm->recvList("Y"), Dm->recvCount("Y") ,recvID_Y, id); + UnpackID(Dm->recvList("z"), Dm->recvCount("z") ,recvID_z, id); + UnpackID(Dm->recvList("Z"), Dm->recvCount("Z") ,recvID_Z, id); + UnpackID(Dm->recvList("xy"), Dm->recvCount("xy") ,recvID_xy, id); + UnpackID(Dm->recvList("Xy"), Dm->recvCount("Xy") ,recvID_Xy, id); + UnpackID(Dm->recvList("xY"), Dm->recvCount("xY") ,recvID_xY, id); + UnpackID(Dm->recvList("XY"), Dm->recvCount("XY") ,recvID_XY, id); + UnpackID(Dm->recvList("xz"), Dm->recvCount("xz") ,recvID_xz, id); + UnpackID(Dm->recvList("Xz"), Dm->recvCount("Xz") ,recvID_Xz, id); + UnpackID(Dm->recvList("xZ"), Dm->recvCount("xZ") ,recvID_xZ, id); + UnpackID(Dm->recvList("XZ"), Dm->recvCount("XZ") ,recvID_XZ, id); + UnpackID(Dm->recvList("yz"), Dm->recvCount("yz") ,recvID_yz, id); + UnpackID(Dm->recvList("Yz"), Dm->recvCount("Yz") ,recvID_Yz, id); + UnpackID(Dm->recvList("yZ"), Dm->recvCount("yZ") ,recvID_yZ, id); + UnpackID(Dm->recvList("YZ"), Dm->recvCount("YZ") ,recvID_YZ, id); //...................................................................................... - MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm->Comm); + // double GlobalNumber = Dm->Comm.sumReduce( LocalNumber ); for (int k=0; krank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm); - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); for (int k=0; krank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm); - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); for (int k=1; kComm); + countGlobal = Dm->Comm.sumReduce( count ); void_fraction_new = countGlobal/totalGlobal; void_fraction_diff_new = abs(void_fraction_new-VoidFraction); if (rank==0){ @@ -685,13 +647,14 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptr &id, std::shared_ptr Dm, double TargetGrowth) double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, std::shared_ptr Dm, double TargetGrowth, double WallFactor) { int Nx = Dm->Nx; int Ny = Dm->Ny; int Nz = Dm->Nz; int rank = Dm->rank(); - + double count=0.0; for (int k=1; k &id, } } } - double count_original=sumReduce( Dm->Comm, count); + double count_original = Dm->Comm.sumReduce( count); // Estimate morph_delta double morph_delta = 0.0; @@ -722,7 +685,9 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, for (int j=1; j MAX_DISPLACEMENT) MAX_DISPLACEMENT= fabs(wallweight*morph_delta); if (Dist(i,j,k) - wallweight*morph_delta < 0.0){ @@ -731,8 +696,8 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, } } } - count=sumReduce( Dm->Comm, count); - MAX_DISPLACEMENT = maxReduce( Dm->Comm, MAX_DISPLACEMENT); + count = Dm->Comm.sumReduce( count ); + MAX_DISPLACEMENT = Dm->Comm.maxReduce( MAX_DISPLACEMENT ); GrowthEstimate = count - count_original; ERROR = fabs((GrowthEstimate-TargetGrowth) /TargetGrowth); @@ -768,14 +733,16 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, for (int j=1; jComm, count); + count = Dm->Comm.sumReduce( count ); return count; } From 345b43d9f819ad51b66db5f45d4f200694d552cc Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 5 Jan 2021 16:08:03 -0500 Subject: [PATCH 125/205] FOM for tests --- tests/TestCommD3Q19.cpp | 21 +++++++++++---------- tests/TestDatabase.cpp | 2 +- tests/TestSegDist.cpp | 8 ++++---- tests/TestTorus.cpp | 8 ++++---- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/tests/TestCommD3Q19.cpp b/tests/TestCommD3Q19.cpp index 73b14ec9..12adbb73 100644 --- a/tests/TestCommD3Q19.cpp +++ b/tests/TestCommD3Q19.cpp @@ -6,7 +6,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" using namespace std; @@ -166,8 +166,8 @@ int main(int argc, char **argv) // Initialize MPI Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + int rank = comm.getRank(); + int nprocs = comm.getSize(); int check; { @@ -262,14 +262,14 @@ int main(int argc, char **argv) } } } - MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm); + sum = comm.sumReduce( sum_local ); double iVol_global=1.f/double((Nx-2)*(Ny-2)*(Nz-2)*nprocx*nprocy*nprocz); porosity = 1.0-sum*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); //....................................................................... //........................................................................... - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; //........................................................................... @@ -354,7 +354,7 @@ int main(int argc, char **argv) GlobalFlipScaLBL_D3Q19_Init(fq_host, Map, Np, Nx-2, Ny-2, Nz-2, iproc,jproc,kproc,nprocx,nprocy,nprocz); ScaLBL_CopyToDevice(fq, fq_host, 19*dist_mem_size); ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); //************************************************************************* // First timestep ScaLBL_Comm.SendD3Q19AA(fq); //READ FROM NORMAL @@ -377,8 +377,8 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; - MPI_Barrier(comm); - starttime = MPI_Wtime(); + comm.barrier(); + starttime = Utilities::MPI::time(); //......................................... @@ -397,13 +397,13 @@ int main(int argc, char **argv) //********************************************* ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); // Iteration completed! timestep++; //................................................................... } //************************************************************************/ - stoptime = MPI_Wtime(); + stoptime = Utilities::MPI::time(); // cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl; cputime = stoptime - starttime; // cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl; @@ -429,5 +429,6 @@ int main(int argc, char **argv) comm.barrier(); Utilities::shutdown(); // **************************************************** + return check; } diff --git a/tests/TestDatabase.cpp b/tests/TestDatabase.cpp index d32c5bff..c3341aab 100644 --- a/tests/TestDatabase.cpp +++ b/tests/TestDatabase.cpp @@ -9,7 +9,7 @@ #include "common/UnitTest.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Database.h" #include "ProfilerApp.h" diff --git a/tests/TestSegDist.cpp b/tests/TestSegDist.cpp index 97f1b8de..58f7a4a3 100644 --- a/tests/TestSegDist.cpp +++ b/tests/TestSegDist.cpp @@ -97,13 +97,13 @@ int main(int argc, char **argv) } } - MPI_Barrier(comm); + comm.barrier(); if (rank==0) printf("Initialized! Converting to Signed Distance function \n"); - double t1 = MPI_Wtime(); + double t1 = Utilities::MPI::time(); DoubleArray Distance(nx,ny,nz); CalcDist(Distance,id,Dm,{false,false,false}); - double t2 = MPI_Wtime(); + double t2 = Utilities::MPI::time(); if (rank==0) printf("Total time: %f seconds \n",t2-t1); @@ -115,7 +115,7 @@ int main(int argc, char **argv) } } } - err = sumReduce( Dm.Comm, err ); + err = Dm.Comm.sumReduce( err ); err = sqrt( err / (nx*ny*nz*nprocs) ); if (rank==0) printf("Mean error %0.4f \n", err); diff --git a/tests/TestTorus.cpp b/tests/TestTorus.cpp index ee9789e1..ff412c39 100644 --- a/tests/TestTorus.cpp +++ b/tests/TestTorus.cpp @@ -172,10 +172,10 @@ int main(int argc, char **argv) double Ai = Object->A(); double Hi = Object->H(); double Xi = Object->X(); - Vi=sumReduce( Dm->Comm, Vi); - Ai=sumReduce( Dm->Comm, Ai); - Hi=sumReduce( Dm->Comm, Hi); - Xi=sumReduce( Dm->Comm, Xi); + Vi=Dm->Comm.sumReduce( Vi); + Ai=Dm->Comm.sumReduce( Ai); + Hi=Dm->Comm.sumReduce( Hi); + Xi=Dm->Comm.sumReduce( Xi); printf("Vi=%f, Ai=%f, Hi=%f, Xi=%f \n", Vi,Ai,Hi,Xi); } // Limit scope so variables that contain communicators will free before MPI_Finialize From 7f4f74779c34a99fc78eaaaba474ff6ee880df1a Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 5 Jan 2021 18:43:44 -0500 Subject: [PATCH 126/205] fix the build --- models/DFHModel.cpp | 25 +- models/DFHModel.h | 4 +- models/GreyscaleColorModel.cpp | 14 +- models/GreyscaleColorModel.h | 4 +- models/GreyscaleModel.cpp | 22 +- models/GreyscaleModel.h | 4 +- models/IonModel.cpp | 2 +- models/IonModel.h | 2 +- models/MRTModel.cpp | 3 +- models/MRTModel.h | 4 +- models/MultiPhysController.cpp | 2 +- models/MultiPhysController.h | 4 +- models/PoissonSolver.cpp | 2 +- models/PoissonSolver.h | 2 +- models/StokesModel.cpp | 2 +- models/StokesModel.h | 4 +- tests/GenerateSphereTest.cpp | 390 ++------------------------ tests/TestBlobIdentify.cpp | 28 +- tests/TestBubbleDFH.cpp | 29 +- tests/TestColorBubble.cpp | 14 +- tests/TestColorGradDFH.cpp | 19 +- tests/TestFluxBC.cpp | 9 +- tests/TestForceD3Q19.cpp | 6 +- tests/TestForceMoments.cpp | 34 ++- tests/TestInterfaceSpeed.cpp | 26 +- tests/TestMap.cpp | 14 +- tests/TestMomentsD3Q19.cpp | 4 +- tests/TestNernstPlanck.cpp | 24 +- tests/TestPNP_Stokes.cpp | 22 +- tests/TestPoiseuille.cpp | 18 +- tests/TestPoissonSolver.cpp | 21 +- tests/TestPressVel.cpp | 20 +- tests/hello_world.cpp | 4 +- tests/lbpm_dfh_simulator.cpp | 2 +- tests/lbpm_minkowski_scalar.cpp | 99 +------ tests/lbpm_morph_pp.cpp | 20 +- tests/lbpm_morphdrain_pp.cpp | 8 +- tests/lbpm_morphopen_pp.cpp | 8 +- tests/lbpm_permeability_simulator.cpp | 12 +- tests/testCommunication.cpp | 21 +- 40 files changed, 242 insertions(+), 710 deletions(-) diff --git a/models/DFHModel.cpp b/models/DFHModel.cpp index 26a57656..dfff19c5 100644 --- a/models/DFHModel.cpp +++ b/models/DFHModel.cpp @@ -3,7 +3,7 @@ color lattice boltzmann model */ #include "models/DFHModel.h" -ScaLBL_DFHModel::ScaLBL_DFHModel(int RANK, int NP, MPI_Comm COMM): +ScaLBL_DFHModel::ScaLBL_DFHModel(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),alpha(0),beta(0), Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) @@ -114,9 +114,9 @@ void ScaLBL_DFHModel::SetDomain(){ id = new char [N]; for (int i=0; iid[i] = 1; // initialize this way Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object - MPI_Barrier(comm); + comm.barrier(); Dm->CommInit(); - MPI_Barrier(comm); + comm.barrier(); rank = Dm->rank(); } @@ -138,7 +138,7 @@ void ScaLBL_DFHModel::ReadInput(){ sprintf(LocalRankString,"%05d",rank); sprintf(LocalRankFilename,"%s%s","SignDist.",LocalRankString); ReadBinaryFile(LocalRankFilename, Averages->SDs.data(), N); - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; } @@ -430,7 +430,8 @@ void ScaLBL_DFHModel::Initialize(){ } } } - MPI_Allreduce(&count_wet,&count_wet_global,1,MPI_DOUBLE,MPI_SUM,comm); + count_wet_global=Dm->Comm.sumReduce( count_wet); + if (rank==0) printf("Wetting phase volume fraction =%f \n",count_wet_global/double(Nx*Ny*Nz*nprocs)); // initialize phi based on PhaseLabel (include solid component labels) ScaLBL_CopyToDevice(Phi, PhaseLabel, Np*sizeof(double)); @@ -452,7 +453,7 @@ void ScaLBL_DFHModel::Initialize(){ timestep=0; } } - MPI_Bcast(×tep,1,MPI_INT,0,comm); + //MPI_Bcast(×tep,1,MPI_INT,0,comm); // Read in the restart file to CPU buffers double *cPhi = new double[Np]; double *cDist = new double[19*Np]; @@ -474,7 +475,7 @@ void ScaLBL_DFHModel::Initialize(){ ScaLBL_DeviceBarrier(); delete [] cPhi; delete [] cDist; - MPI_Barrier(comm); + comm.barrier(); } if (rank==0) printf ("Initializing phase field \n"); @@ -492,7 +493,7 @@ void ScaLBL_DFHModel::Run(){ //.......create and start timer............ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); starttime = MPI_Wtime(); //......................................... //************ MAIN ITERATION LOOP ***************************************/ @@ -538,7 +539,7 @@ void ScaLBL_DFHModel::Run(){ } ScaLBL_D3Q19_AAodd_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, SolidPotential, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); // *************EVEN TIMESTEP************* timestep++; @@ -574,9 +575,9 @@ void ScaLBL_DFHModel::Run(){ } ScaLBL_D3Q19_AAeven_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, SolidPotential, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); //************************************************************************ - MPI_Barrier(comm); + comm.barrier(); PROFILE_STOP("Update"); // Run the analysis @@ -587,7 +588,7 @@ void ScaLBL_DFHModel::Run(){ PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep diff --git a/models/DFHModel.h b/models/DFHModel.h index 883ec6f8..b50f62a2 100644 --- a/models/DFHModel.h +++ b/models/DFHModel.h @@ -18,7 +18,7 @@ Implementation of color lattice boltzmann model class ScaLBL_DFHModel{ public: - ScaLBL_DFHModel(int RANK, int NP, MPI_Comm COMM); + ScaLBL_DFHModel(int RANK, int NP, const Utilities::MPI& COMM); ~ScaLBL_DFHModel(); // functions in they should be run @@ -66,7 +66,7 @@ public: double *Pressure; private: - MPI_Comm comm; + Utilities::MPI comm; int dist_mem_size; int neighborSize; diff --git a/models/GreyscaleColorModel.cpp b/models/GreyscaleColorModel.cpp index 0c349c26..3a922e79 100644 --- a/models/GreyscaleColorModel.cpp +++ b/models/GreyscaleColorModel.cpp @@ -15,7 +15,7 @@ void DeleteArray( const TYPE *p ) delete [] p; } -ScaLBL_GreyscaleColorModel::ScaLBL_GreyscaleColorModel(int RANK, int NP, MPI_Comm COMM): +ScaLBL_GreyscaleColorModel::ScaLBL_GreyscaleColorModel(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),tauA_eff(0),tauB_eff(0),rhoA(0),rhoB(0),alpha(0),beta(0), Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0),GreyPorosity(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) @@ -135,9 +135,9 @@ void ScaLBL_GreyscaleColorModel::SetDomain(){ id = new signed char [N]; for (int i=0; iid[i] = 1; // initialize this way Averages = std::shared_ptr ( new GreyPhaseAnalysis(Dm) ); // TwoPhase analysis object - MPI_Barrier(comm); + comm.barrier(); Dm->CommInit(); - MPI_Barrier(comm); + comm.barrier(); // Read domain parameters rank = Dm->rank(); nprocx = Dm->nprocx(); @@ -601,7 +601,7 @@ void ScaLBL_GreyscaleColorModel::Create(){ Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np); - MPI_Barrier(comm); + comm.barrier(); //........................................................................... // MAIN VARIABLES ALLOCATED HERE @@ -740,7 +740,7 @@ void ScaLBL_GreyscaleColorModel::Initialize(){ ScaLBL_CopyToDevice(Phi,cPhi,N*sizeof(double)); ScaLBL_Comm->Barrier(); - MPI_Barrier(comm); + comm.barrier(); if (rank==0) printf ("Initializing phase field from Restart\n"); ScaLBL_PhaseField_InitFromRestart(Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); @@ -912,7 +912,7 @@ void ScaLBL_GreyscaleColorModel::Run(){ //.......create and start timer............ double starttime,stoptime,cputime; ScaLBL_Comm->Barrier(); - MPI_Barrier(comm); + comm.barrier(); starttime = MPI_Wtime(); //......................................... @@ -1071,7 +1071,7 @@ void ScaLBL_GreyscaleColorModel::Run(){ } } RESTARTFILE.close(); - MPI_Barrier(comm); + comm.barrier(); } if (timestep%visualization_interval==0){ WriteVisFiles(); diff --git a/models/GreyscaleColorModel.h b/models/GreyscaleColorModel.h index 8c8b4eee..1ae4ab73 100644 --- a/models/GreyscaleColorModel.h +++ b/models/GreyscaleColorModel.h @@ -17,7 +17,7 @@ Implementation of two-fluid greyscale color lattice boltzmann model class ScaLBL_GreyscaleColorModel{ public: - ScaLBL_GreyscaleColorModel(int RANK, int NP, MPI_Comm COMM); + ScaLBL_GreyscaleColorModel(int RANK, int NP, const Utilities::MPI& COMM); ~ScaLBL_GreyscaleColorModel(); // functions in they should be run @@ -72,7 +72,7 @@ public: double *Permeability_dvc; private: - MPI_Comm comm; + Utilities::MPI comm; int dist_mem_size; int neighborSize; diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index b6f9fc62..d43b615b 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -13,7 +13,7 @@ void DeleteArray( const TYPE *p ) delete [] p; } -ScaLBL_GreyscaleModel::ScaLBL_GreyscaleModel(int RANK, int NP, MPI_Comm COMM): +ScaLBL_GreyscaleModel::ScaLBL_GreyscaleModel(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0),tau_eff(0),Den(0),Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),GreyPorosity(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) { @@ -121,9 +121,9 @@ void ScaLBL_GreyscaleModel::SetDomain(){ id = new signed char [N]; for (int i=0; iid[i] = 1; // initialize this way - MPI_Barrier(comm); + comm.barrier(); Dm->CommInit(); - MPI_Barrier(comm); + comm.barrier(); // Read domain parameters rank = Dm->rank(); nprocx = Dm->nprocx(); @@ -367,7 +367,7 @@ void ScaLBL_GreyscaleModel::Create(){ Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np); - MPI_Barrier(comm); + comm.barrier(); //........................................................................... // MAIN VARIABLES ALLOCATED HERE @@ -454,7 +454,7 @@ void ScaLBL_GreyscaleModel::Initialize(){ ScaLBL_CopyToDevice(fq,cfq.get(),19*Np*sizeof(double)); ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); } } @@ -487,7 +487,7 @@ void ScaLBL_GreyscaleModel::Run(){ //.......create and start timer............ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); starttime = MPI_Wtime(); //......................................... @@ -540,7 +540,7 @@ void ScaLBL_GreyscaleModel::Run(){ ScaLBL_D3Q19_AAodd_Greyscale_IMRT(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); break; } - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); // *************EVEN TIMESTEP*************// timestep++; @@ -580,7 +580,7 @@ void ScaLBL_GreyscaleModel::Run(){ ScaLBL_D3Q19_AAeven_Greyscale_IMRT(fq, 0, ScaLBL_Comm->LastExterior(), Np, rlx, rlx_eff, Fx, Fy, Fz,Porosity,Permeability,Velocity,Den,Pressure_dvc); break; } - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); //************************************************************************/ if (timestep%analysis_interval==0){ @@ -735,7 +735,7 @@ void ScaLBL_GreyscaleModel::Run(){ RESTARTFILE=fopen(LocalRestartFile,"wb"); fwrite(cfq.get(),sizeof(double),19*Np,RESTARTFILE); fclose(RESTARTFILE); - MPI_Barrier(comm); + comm.barrier(); } } @@ -743,7 +743,7 @@ void ScaLBL_GreyscaleModel::Run(){ PROFILE_SAVE("lbpm_greyscale_simulator",1); //************************************************************************ ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep @@ -766,7 +766,7 @@ void ScaLBL_GreyscaleModel::VelocityField(){ /* Minkowski Morphology(Mask); int SIZE=Np*sizeof(double); ScaLBL_D3Q19_Momentum(fq,Velocity, Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); ScaLBL_CopyToHost(&VELOCITY[0],&Velocity[0],3*SIZE); memcpy(Morphology.SDn.data(), Distance.data(), Nx*Ny*Nz*sizeof(double)); diff --git a/models/GreyscaleModel.h b/models/GreyscaleModel.h index 6fe1a108..46cfb014 100644 --- a/models/GreyscaleModel.h +++ b/models/GreyscaleModel.h @@ -18,7 +18,7 @@ Implementation of color lattice boltzmann model class ScaLBL_GreyscaleModel{ public: - ScaLBL_GreyscaleModel(int RANK, int NP, MPI_Comm COMM); + ScaLBL_GreyscaleModel(int RANK, int NP, const Utilities::MPI& COMM); ~ScaLBL_GreyscaleModel(); // functions in they should be run @@ -76,7 +76,7 @@ public: DoubleArray Pressure; private: - MPI_Comm comm; + Utilities::MPI comm; int dist_mem_size; int neighborSize; diff --git a/models/IonModel.cpp b/models/IonModel.cpp index 3e4e2468..aec7ad79 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -6,7 +6,7 @@ #include "analysis/distance.h" #include "common/ReadMicroCT.h" -ScaLBL_IonModel::ScaLBL_IonModel(int RANK, int NP, MPI_Comm COMM): +ScaLBL_IonModel::ScaLBL_IonModel(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK),nprocs(NP),timestep(0),timestepMax(0),time_conv(0),kb(0),electron_charge(0),T(0),Vt(0),k2_inv(0),h(0), tolerance(0),number_ion_species(0),Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0), fluidVelx_dummy(0),fluidVely_dummy(0),fluidVelz_dummy(0), diff --git a/models/IonModel.h b/models/IonModel.h index 6e8eab25..4b91ac8f 100644 --- a/models/IonModel.h +++ b/models/IonModel.h @@ -22,7 +22,7 @@ class ScaLBL_IonModel{ public: - ScaLBL_IonModel(int RANK, int NP, MPI_Comm COMM); + ScaLBL_IonModel(int RANK, int NP, const Utilities::MPI& COMM); ~ScaLBL_IonModel(); // functions in they should be run diff --git a/models/MRTModel.cpp b/models/MRTModel.cpp index 17f42345..62c7ee1c 100644 --- a/models/MRTModel.cpp +++ b/models/MRTModel.cpp @@ -4,8 +4,7 @@ #include "models/MRTModel.h" #include "analysis/distance.h" #include "common/ReadMicroCT.h" - -ScaLBL_MRTModel::ScaLBL_MRTModel(int RANK, int NP, MPI_Comm COMM): +ScaLBL_MRTModel::ScaLBL_MRTModel(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0), Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),mu(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) diff --git a/models/MRTModel.h b/models/MRTModel.h index 40550e59..4c41b746 100644 --- a/models/MRTModel.h +++ b/models/MRTModel.h @@ -17,7 +17,7 @@ class ScaLBL_MRTModel{ public: - ScaLBL_MRTModel(int RANK, int NP, MPI_Comm COMM); + ScaLBL_MRTModel(int RANK, int NP, const Utilities::MPI& COMM); ~ScaLBL_MRTModel(); // functions in they should be run @@ -63,7 +63,7 @@ public: DoubleArray Velocity_y; DoubleArray Velocity_z; private: - Utilities::MPI comm; + Utilities::MPI comm; // filenames char LocalRankString[8]; diff --git a/models/MultiPhysController.cpp b/models/MultiPhysController.cpp index fcfb5403..bbc77923 100644 --- a/models/MultiPhysController.cpp +++ b/models/MultiPhysController.cpp @@ -1,6 +1,6 @@ #include "models/MultiPhysController.h" -ScaLBL_Multiphys_Controller::ScaLBL_Multiphys_Controller(int RANK, int NP, MPI_Comm COMM): +ScaLBL_Multiphys_Controller::ScaLBL_Multiphys_Controller(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK),nprocs(NP),Restart(0),timestepMax(0),num_iter_Stokes(0),num_iter_Ion(0), analysis_interval(0),visualization_interval(0),tolerance(0),comm(COMM) { diff --git a/models/MultiPhysController.h b/models/MultiPhysController.h index f217248f..dfc5bcee 100644 --- a/models/MultiPhysController.h +++ b/models/MultiPhysController.h @@ -19,7 +19,7 @@ class ScaLBL_Multiphys_Controller{ public: - ScaLBL_Multiphys_Controller(int RANK, int NP, MPI_Comm COMM); + ScaLBL_Multiphys_Controller(int RANK, int NP, const Utilities::MPI& COMM); ~ScaLBL_Multiphys_Controller(); void ReadParams(string filename); @@ -44,7 +44,7 @@ public: std::shared_ptr study_db; private: - MPI_Comm comm; + Utilities::MPI comm; // filenames char LocalRankString[8]; diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index df1b2875..8831d705 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -5,7 +5,7 @@ #include "analysis/distance.h" #include "common/ReadMicroCT.h" -ScaLBL_Poisson::ScaLBL_Poisson(int RANK, int NP, MPI_Comm COMM): +ScaLBL_Poisson::ScaLBL_Poisson(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK), nprocs(NP),timestep(0),timestepMax(0),tau(0),k2_inv(0),tolerance(0),h(0), epsilon0(0),epsilon0_LB(0),epsilonR(0),epsilon_LB(0),Vin(0),Vout(0),Nx(0),Ny(0),Nz(0),N(0),Np(0),analysis_interval(0), chargeDen_dummy(0),WriteLog(0), diff --git a/models/PoissonSolver.h b/models/PoissonSolver.h index 7599c8b3..72b43d28 100644 --- a/models/PoissonSolver.h +++ b/models/PoissonSolver.h @@ -21,7 +21,7 @@ class ScaLBL_Poisson{ public: - ScaLBL_Poisson(int RANK, int NP, MPI_Comm COMM); + ScaLBL_Poisson(int RANK, int NP, const Utilities::MPI& COMM); ~ScaLBL_Poisson(); // functions in they should be run diff --git a/models/StokesModel.cpp b/models/StokesModel.cpp index 0368b39b..53f3e14c 100644 --- a/models/StokesModel.cpp +++ b/models/StokesModel.cpp @@ -5,7 +5,7 @@ #include "analysis/distance.h" #include "common/ReadMicroCT.h" -ScaLBL_StokesModel::ScaLBL_StokesModel(int RANK, int NP, MPI_Comm COMM): +ScaLBL_StokesModel::ScaLBL_StokesModel(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tau(0), Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),mu(0),h(0),nu_phys(0),rho_phys(0),rho0(0),den_scale(0),time_conv(0),tolerance(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) diff --git a/models/StokesModel.h b/models/StokesModel.h index b6faa7d9..31784a4f 100644 --- a/models/StokesModel.h +++ b/models/StokesModel.h @@ -20,7 +20,7 @@ class ScaLBL_StokesModel{ public: - ScaLBL_StokesModel(int RANK, int NP, MPI_Comm COMM); + ScaLBL_StokesModel(int RANK, int NP, const Utilities::MPI& COMM); ~ScaLBL_StokesModel(); // functions in they should be run @@ -89,4 +89,4 @@ private: void Velocity_LB_to_Phys(DoubleArray &Vel_reg); vector computeElectricForceAvg(double *ChargeDensity, double *ElectricField); }; -#endif \ No newline at end of file +#endif diff --git a/tests/GenerateSphereTest.cpp b/tests/GenerateSphereTest.cpp index 6b9241bc..e28042d7 100644 --- a/tests/GenerateSphereTest.cpp +++ b/tests/GenerateSphereTest.cpp @@ -9,7 +9,7 @@ //#include "common/pmmc.h" #include "common/Domain.h" #include "common/SpherePack.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Communication.h" /* @@ -70,8 +70,8 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny } } // total Global is the number of nodes in the pore-space - MPI_Allreduce(&count,&totalGlobal,1,MPI_DOUBLE,MPI_SUM,Dm.Comm); - MPI_Allreduce(&maxdist,&maxdistGlobal,1,MPI_DOUBLE,MPI_MAX,Dm.Comm); + totalGlobal = Dm.Comm.sumReduce( count ); + maxdistGlobal = Dm.Comm.sumReduce( maxdist ); double volume=double(nprocx*nprocy*nprocz)*double(nx-2)*double(ny-2)*double(nz-2); double porosity=totalGlobal/volume; if (rank==0) printf("Media Porosity: %f \n",porosity); @@ -145,10 +145,9 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny // Increase the critical radius until the target saturation is met double deltaR=0.05; // amount to change the radius in voxel units - double Rcrit_old=0.0; - double Rcrit_new=0.0; + double Rcrit_old; + double Rcrit_new; - double GlobalNumber = 1.f; int imin,jmin,kmin,imax,jmax,kmax; Rcrit_new = maxdistGlobal; @@ -253,7 +252,7 @@ inline void MorphOpen(DoubleArray SignDist, char *id, Domain &Dm, int nx, int ny UnpackID(Dm.recvList("YZ"), Dm.recvCount("YZ") ,recvID_YZ, id); //...................................................................................... - MPI_Allreduce(&LocalNumber,&GlobalNumber,1,MPI_DOUBLE,MPI_SUM,Dm.Comm); + //double GlobalNumber = Dm.Comm.sumReduce( LocalNumber ); count = 0.f; for (int k=1; k -bool allAgree( const std::vector& x, MPI_Comm comm ) { +bool allAgree( const std::vector& x, const Utilities::MPI& comm ) { std::vector x2 = x; - MPI_Bcast(&x2[0],x.size()*sizeof(T)/sizeof(int),MPI_INT,0,comm); + comm.bcast(&x2[0],x.size()*sizeof(T)/sizeof(int),0); int diff = x==x2 ? 0:1; - int diff2 = 0; - MPI_Allreduce(&diff,&diff2,1,MPI_INT,MPI_SUM,comm); + int diff2 = comm.sumReduce( diff ); return diff2==0; } @@ -74,9 +72,9 @@ struct bubble_struct { // Create a random set of bubles -std::vector create_bubbles( int N_bubbles, double Lx, double Ly, double Lz, MPI_Comm comm ) +std::vector create_bubbles( int N_bubbles, double Lx, double Ly, double Lz, const Utilities::MPI& comm ) { - int rank = comm_rank(comm); + int rank = comm.getRank(); std::vector bubbles(N_bubbles); if ( rank == 0 ) { double R0 = 0.2*Lx*Ly*Lz/pow((double)N_bubbles,0.333); @@ -91,7 +89,7 @@ std::vector create_bubbles( int N_bubbles, double Lx, double Ly, } } size_t N_bytes = N_bubbles*sizeof(bubble_struct); - MPI_Bcast((char*)&bubbles[0],N_bytes,MPI_CHAR,0,comm); + comm.bcast((char*)&bubbles[0],N_bytes,0); return bubbles; } @@ -124,7 +122,7 @@ void fillBubbleData( const std::vector& bubbles, DoubleArray& Pha // Shift all of the data by the given number of cells -void shift_data( DoubleArray& data, int sx, int sy, int sz, const RankInfoStruct& rank_info, MPI_Comm comm ) +void shift_data( DoubleArray& data, int sx, int sy, int sz, const RankInfoStruct& rank_info, const Utilities::MPI& comm ) { int nx = data.size(0)-2; int ny = data.size(1)-2; @@ -296,7 +294,7 @@ int main(int argc, char **argv) velocity[i].z = bubbles[i].radius*(2*rand2()-1); } } - MPI_Bcast((char*)&velocity[0],bubbles.size()*sizeof(Point),MPI_CHAR,0,comm); + comm.bcast((char*)&velocity[0],bubbles.size()*sizeof(Point),0); fillBubbleData( bubbles, Phase, SignDist, Lx, Ly, Lz, rank_info ); fillData.fill(Phase); fillData.fill(SignDist); @@ -390,8 +388,8 @@ int main(int argc, char **argv) printf("\n"); } } - MPI_Bcast(&N1,1,MPI_INT,0,comm); - MPI_Bcast(&N2,1,MPI_INT,0,comm); + comm.bcast(&N1,1,0); + comm.bcast(&N2,1,0); if ( N1!=nblobs || N2!=nblobs2 ) { if ( rank==0 ) printf("Error, blob ids do not map in moving bubble test (%i,%i,%i,%i)\n", diff --git a/tests/TestBubbleDFH.cpp b/tests/TestBubbleDFH.cpp index 872a8909..cd4f487b 100644 --- a/tests/TestBubbleDFH.cpp +++ b/tests/TestBubbleDFH.cpp @@ -9,7 +9,7 @@ #include "common/Communication.h" #include "analysis/TwoPhase.h" #include "analysis/runAnalysis.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "ProfilerApp.h" #include "threadpool/thread_pool.h" @@ -43,7 +43,7 @@ int main(int argc, char **argv) int device=ScaLBL_SetDevice(rank); printf("Using GPU ID %i for rank %i \n",device,rank); ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); PROFILE_ENABLE(1); //PROFILE_ENABLE_TRACE(); @@ -70,7 +70,7 @@ int main(int argc, char **argv) // Initialize compute device // int device=ScaLBL_SetDevice(rank); ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); Utilities::setErrorHandlers(); @@ -116,7 +116,7 @@ int main(int argc, char **argv) // Get the rank info const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); - MPI_Barrier(comm); + comm.barrier(); if (nprocs != nprocx*nprocy*nprocz){ printf("nprocx = %i \n",nprocx); @@ -165,7 +165,7 @@ int main(int argc, char **argv) // Mask that excludes the solid phase auto Mask = std::make_shared(domain_db,comm); - MPI_Barrier(comm); + comm.barrier(); Nx+=2; Ny+=2; Nz += 2; int N = Nx*Ny*Nz; @@ -249,6 +249,7 @@ int main(int argc, char **argv) auto neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np); comm.barrier(); + //........................................................................... // MAIN VARIABLES ALLOCATED HERE //........................................................................... @@ -384,8 +385,8 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); - starttime = MPI_Wtime(); + comm.barrier(); + starttime = Utilities::MPI::time(); //......................................... err = 1.0; @@ -434,7 +435,7 @@ int main(int argc, char **argv) } ScaLBL_D3Q19_AAodd_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, SolidPotential, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->next, Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); // *************EVEN TIMESTEP************* timestep++; @@ -470,9 +471,9 @@ int main(int argc, char **argv) } ScaLBL_D3Q19_AAeven_DFH(NeighborList, fq, Aq, Bq, Den, Phi, Gradient, SolidPotential, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, 0, ScaLBL_Comm->next, Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); //************************************************************************ - MPI_Barrier(comm); + comm.barrier(); PROFILE_STOP("Update"); // Run the analysis @@ -484,8 +485,8 @@ int main(int argc, char **argv) PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); - stoptime = MPI_Wtime(); + comm.barrier(); + stoptime = Utilities::MPI::time(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep cputime = (stoptime - starttime)/timestep; @@ -544,9 +545,9 @@ int main(int argc, char **argv) PROFILE_STOP("Main"); PROFILE_SAVE("lbpm_color_simulator",1); // **************************************************** - MPI_Barrier(comm); + comm.barrier(); } // Limit scope so variables that contain communicators will free before MPI_Finialize - Utilities::shutdown(); + Utilities::shutdown(); return check; } diff --git a/tests/TestColorBubble.cpp b/tests/TestColorBubble.cpp index b093ad47..46f7f195 100644 --- a/tests/TestColorBubble.cpp +++ b/tests/TestColorBubble.cpp @@ -7,7 +7,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "models/ColorModel.h" using namespace std; @@ -64,14 +64,11 @@ inline void InitializeBubble(ScaLBL_ColorModel &ColorModel, double BubbleRadius) //*************************************************************************************** int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - Utilities::startup( argc, argv ); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + int rank = comm.getRank(); + int nprocs = comm.getSize(); int check=0; { if (rank == 0){ @@ -95,7 +92,8 @@ int main(int argc, char **argv) ColorModel.Run(); ColorModel.WriteDebug(); } - Utilities::shutdown(); + Utilities::shutdown(); + return check; } diff --git a/tests/TestColorGradDFH.cpp b/tests/TestColorGradDFH.cpp index e2616f2c..2fd65e7d 100644 --- a/tests/TestColorGradDFH.cpp +++ b/tests/TestColorGradDFH.cpp @@ -7,7 +7,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" using namespace std; @@ -25,14 +25,11 @@ std::shared_ptr loadInputs( int nprocs ) //*************************************************************************************** int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - Utilities::startup( argc, argv ); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + int rank = comm.getRank(); + int nprocs = comm.getSize(); int check=0; { // parallel domain size (# of sub-domains) @@ -81,7 +78,7 @@ int main(int argc, char **argv) } } Dm->CommInit(); - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; if (rank==0) printf ("Create ScaLBL_Communicator \n"); @@ -208,7 +205,9 @@ int main(int argc, char **argv) } } - Utilities::shutdown(); - return check; + } + Utilities::shutdown(); + + return check; } diff --git a/tests/TestFluxBC.cpp b/tests/TestFluxBC.cpp index 95fd575f..3762aee6 100644 --- a/tests/TestFluxBC.cpp +++ b/tests/TestFluxBC.cpp @@ -90,7 +90,7 @@ int main (int argc, char **argv) Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id.data(),Np); comm.barrier(); - //......................device distributions................................. + //......................device distributions................................. int dist_mem_size = Np*sizeof(double); if (rank==0) printf ("Allocating distributions \n"); @@ -148,7 +148,7 @@ int main (int argc, char **argv) double *VEL; VEL= new double [3*Np]; int SIZE=3*Np*sizeof(double); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); ScaLBL_CopyToHost(&VEL[0],&dvc_vel[0],SIZE); double Q = 0.f; @@ -191,7 +191,8 @@ int main (int argc, char **argv) din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); ScaLBL_D3Q19_AAodd_MRT(NeighborList, fq, 0, ScaLBL_Comm->next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); + timestep++; ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL @@ -200,7 +201,7 @@ int main (int argc, char **argv) din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); ScaLBL_D3Q19_AAeven_MRT(fq, 0, ScaLBL_Comm->next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_Comm->Barrier(); timestep++; //************************************************************************/ diff --git a/tests/TestForceD3Q19.cpp b/tests/TestForceD3Q19.cpp index b53c47f2..7650c6f0 100644 --- a/tests/TestForceD3Q19.cpp +++ b/tests/TestForceD3Q19.cpp @@ -1,5 +1,5 @@ #include -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Utilities.h" #include @@ -443,14 +443,14 @@ inline void MRT_Transform(double *dist, int Np, double Fx, double Fy, double Fz) int main (int argc, char **argv) { Utilities::startup( argc, argv ); - Utilities::MPI comm( MPI_COMM_WORLD ); + Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); for (int i=0; i #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" using namespace std; @@ -46,14 +46,11 @@ std::shared_ptr loadInputs( int nprocs ) //*************************************************************************************** int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - Utilities::startup( argc, argv ); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + int rank = comm.getRank(); + int nprocs = comm.getSize(); int check=0; { // parallel domain size (# of sub-domains) @@ -97,7 +94,7 @@ int main(int argc, char **argv) printf("********************************************************\n"); } - MPI_Barrier(comm); + comm.barrier(); kproc = rank/(nprocx*nprocy); jproc = (rank-nprocx*nprocy*kproc)/nprocx; iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; @@ -105,7 +102,7 @@ int main(int argc, char **argv) if (rank == 0) { printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc); } - MPI_Barrier(comm); + comm.barrier(); if (rank == 1){ printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc); printf("\n\n"); @@ -142,7 +139,7 @@ int main(int argc, char **argv) } } Dm->CommInit(); - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; int Np=0; // number of local pore nodes @@ -187,7 +184,7 @@ int main(int argc, char **argv) if (rank == 0) PrintNeighborList(neighborList,Np, rank); - MPI_Barrier(comm); + comm.barrier(); //......................device distributions................................. int dist_mem_size = Np*sizeof(double); @@ -212,13 +209,13 @@ int main(int argc, char **argv) //.......create and start timer............ double starttime,stoptime,cputime; - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); - starttime = MPI_Wtime(); + ScaLBL_DeviceBarrier(); comm.barrier(); + starttime = Utilities::MPI::time(); /************ MAIN ITERATION LOOP (timing communications)***************************************/ //ScaLBL_Comm->SendD3Q19(dist, &dist[10*Np]); //ScaLBL_Comm->RecvD3Q19(dist, &dist[10*Np]); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); if (rank==0) printf("Beginning AA timesteps...\n"); if (rank==0) printf("********************************************************\n"); @@ -230,14 +227,14 @@ int main(int argc, char **argv) ScaLBL_D3Q19_AAodd_MRT(NeighborList, dist, ScaLBL_Comm->first_interior, ScaLBL_Comm->last_interior, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); ScaLBL_Comm->RecvD3Q19AA(dist); //WRITE INTO OPPOSITE ScaLBL_D3Q19_AAodd_MRT(NeighborList, dist, 0, ScaLBL_Comm->next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); timestep++; ScaLBL_Comm->SendD3Q19AA(dist); //READ FORM NORMAL ScaLBL_D3Q19_AAeven_MRT(dist, ScaLBL_Comm->first_interior, ScaLBL_Comm->last_interior, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); ScaLBL_Comm->RecvD3Q19AA(dist); //WRITE INTO OPPOSITE ScaLBL_D3Q19_AAeven_MRT(dist, 0, ScaLBL_Comm->next, Np, rlx_setA, rlx_setB, Fx, Fy, Fz); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); timestep++; //************************************************************************/ @@ -247,7 +244,7 @@ int main(int argc, char **argv) //************************************************************************/ - stoptime = MPI_Wtime(); + stoptime = Utilities::MPI::time(); // cout << "CPU time: " << (stoptime - starttime) << " seconds" << endl; cputime = stoptime - starttime; // cout << "Lattice update rate: "<< double(Nx*Ny*Nz*timestep)/cputime/1000000 << " MLUPS" << endl; @@ -329,6 +326,7 @@ int main(int argc, char **argv) } - Utilities::shutdown(); + Utilities::shutdown(); + return check; } diff --git a/tests/TestInterfaceSpeed.cpp b/tests/TestInterfaceSpeed.cpp index 085f1624..67bf8f95 100644 --- a/tests/TestInterfaceSpeed.cpp +++ b/tests/TestInterfaceSpeed.cpp @@ -2,7 +2,7 @@ #include #include "analysis/TwoPhase.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Communication.h" #include "IO/Mesh.h" #include "IO/Writer.h" @@ -21,6 +21,7 @@ int main (int argc, char *argv[]) Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); + // Load inputs string FILENAME = argv[1]; // Load inputs @@ -35,7 +36,7 @@ int main (int argc, char *argv[]) Nx+=2; Ny+=2; Nz+=2; - for (i=0; iid[i] = 1; + for (int i=0; iid[i] = 1; Dm->CommInit(); @@ -46,9 +47,9 @@ int main (int argc, char *argv[]) double dist1,dist2; Cx = Cy = Cz = N*0.5; - for (k=0; k #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" using namespace std; @@ -26,11 +26,8 @@ std::shared_ptr loadInputs( int nprocs ) //*************************************************************************************** int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - Utilities::startup( argc, argv ); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int check=0; { @@ -42,6 +39,7 @@ int main(int argc, char **argv) {1,0,1},{-1,0,-1},{1,0,-1},{-1,0,1}, {0,1,1},{0,-1,-1},{0,1,-1},{0,-1,1}}; + int rank = comm.getRank(); if (rank == 0){ printf("********************************************************\n"); printf("Running unit test: TestMap \n"); @@ -49,7 +47,7 @@ int main(int argc, char **argv) } // Load inputs - auto db = loadInputs( nprocs ); + auto db = loadInputs( comm.getSize() ); int Nx = db->getVector( "n" )[0]; int Ny = db->getVector( "n" )[1]; int Nz = db->getVector( "n" )[2]; @@ -92,7 +90,7 @@ int main(int argc, char **argv) Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id.data(),Np); comm.barrier(); - + // Check the neighborlist printf("Check neighborlist: exterior %i, first interior %i last interior %i \n",ScaLBL_Comm->LastExterior(),ScaLBL_Comm->FirstInterior(),ScaLBL_Comm->LastInterior()); for (int idx=0; idxLastExterior(); idx++){ @@ -193,7 +191,7 @@ int main(int argc, char **argv) delete [] TmpMap; } - Utilities::shutdown(); + Utilities::shutdown(); return check; } diff --git a/tests/TestMomentsD3Q19.cpp b/tests/TestMomentsD3Q19.cpp index 07c6769b..10413743 100644 --- a/tests/TestMomentsD3Q19.cpp +++ b/tests/TestMomentsD3Q19.cpp @@ -1,5 +1,5 @@ #include -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Utilities.h" #include @@ -470,7 +470,7 @@ int main (int argc, char **argv) for (int i=0; i #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "models/MRTModel.h" void ParallelPlates(ScaLBL_MRTModel &MRT){ @@ -47,14 +47,11 @@ void ParallelPlates(ScaLBL_MRTModel &MRT){ //*************************************************************************************** int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - Utilities::startup( argc, argv ); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + int rank = comm.getRank(); + int nprocs = comm.getSize(); int check=0; { if (rank == 0){ @@ -76,7 +73,7 @@ int main(int argc, char **argv) int SIZE=MRT.Np*sizeof(double); ScaLBL_D3Q19_Momentum(MRT.fq,MRT.Velocity, MRT.Np); - ScaLBL_DeviceBarrier(); MPI_Barrier(comm); + ScaLBL_DeviceBarrier(); comm.barrier(); ScaLBL_CopyToHost(&Vz[0],&MRT.Velocity[0],3*SIZE); if (rank == 0) printf("Force: %f,%f,%f \n",MRT.Fx,MRT.Fy,MRT.Fz); @@ -90,7 +87,7 @@ int main(int argc, char **argv) j=Ny/2; k=Nz/2; if (rank == 0) printf("Channel width=%f \n",W); if (rank == 0) printf("ID flag vz analytical\n"); - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) { for (i=0;i #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" //*************************************************************************************** int main(int argc, char **argv) { - //***************************************** - // ***** MPI STUFF **************** - //***************************************** // Initialize MPI - Utilities::startup( argc, argv ); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); + int rank = comm.getRank(); int check=0; { if (rank == 0){ @@ -48,7 +45,7 @@ int main(int argc, char **argv) printf("********************************************************\n"); } - MPI_Barrier(comm); + comm.barrier(); int kproc = rank/(nprocx*nprocy); int jproc = (rank-nprocx*nprocy*kproc)/nprocx; int iproc = rank-nprocx*nprocy*kproc-nprocz*jproc; @@ -56,7 +53,7 @@ int main(int argc, char **argv) if (rank == 0) { printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc); } - MPI_Barrier(comm); + comm.barrier(); if (rank == 1){ printf("i,j,k proc=%d %d %d \n",iproc,jproc,kproc); printf("\n\n"); @@ -100,11 +97,11 @@ int main(int argc, char **argv) } } } - MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm); + sum = comm.sumReduce( sum_local ); porosity = sum*iVol_global; if (rank==0) printf("Media porosity = %f \n",porosity); - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; if (rank==0) printf ("Create ScaLBL_Communicator \n"); @@ -191,6 +188,7 @@ int main(int argc, char **argv) } } } - Utilities::shutdown(); + Utilities::shutdown(); return check; + } diff --git a/tests/hello_world.cpp b/tests/hello_world.cpp index 0a01a645..3de56719 100644 --- a/tests/hello_world.cpp +++ b/tests/hello_world.cpp @@ -1,5 +1,5 @@ #include -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Utilities.h" @@ -13,7 +13,7 @@ int main (int argc, char **argv) for (int i=0; i Dm (new Domain(domain_db,comm)); - for (k=0;kid[n] = 1; - } - } - } + std::shared_ptr Dm = std::shared_ptr(new Domain(domain_db,comm)); // full domain for analysis + comm.barrier(); Dm->CommInit(); - - int z_transition_size = 0; - int xStart = 0; - int yStart = 0; - int zStart = 0; - // Set up the sub-domains - if (rank==0){ - printf("Distributing subdomain across %i processors \n",nprocs); - printf("Process grid: %i x %i x %i \n",Dm->nprocx(),Dm->nprocy(),Dm->nprocz()); - printf("Subdomain size: %i \n",N); - //printf("Size of transition region: %i \n", z_transition_size); - char *tmp; - tmp = new char[N]; - for (int kp=0; kpnprocx()*Dm->nprocy() + jp*Dm->nprocx() + ip; - // Pack and send the subdomain for rnk - for (k=0;kid[nlocal] = tmp[nlocal]; - } - } - } - } - else{ - printf("Sending data to process %i \n", rnk); - MPI_Send(tmp,N,MPI_CHAR,rnk,15,comm); - } - } - } - } - } - else{ - // Recieve the subdomain from rank = 0 - printf("Ready to recieve data %i at process %i \n", N,rank); - comm.recv(Dm->id.data(),N,0,15); - } - MPI_Barrier(comm); // Compute the Minkowski functionals - MPI_Barrier(comm); + comm.barrier(); std::shared_ptr Averages(new Minkowski(Dm)); // Calculate the distance diff --git a/tests/lbpm_morph_pp.cpp b/tests/lbpm_morph_pp.cpp index e681f650..12f6f319 100644 --- a/tests/lbpm_morph_pp.cpp +++ b/tests/lbpm_morph_pp.cpp @@ -23,9 +23,9 @@ int main(int argc, char **argv) { // Initialize MPI - Utilities::startup( argc, argv ); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); + int rank = comm.getRank(); { //....................................................................... // Reading the domain information file @@ -125,13 +125,13 @@ int main(int argc, char **argv) if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); CalcDist(SignDist,id_solid,*Dm); - MPI_Barrier(comm); + comm.barrier(); // Extract only the connected part of NWP BlobIDstruct new_index; double vF=0.0; double vS=0.0; ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm); - MPI_Barrier(Dm->Comm); + Dm->Comm.barrier(); int count_connected=0; int count_porespace=0; @@ -153,9 +153,9 @@ int main(int argc, char **argv) } } } - count_connected=sumReduce( Dm->Comm, count_connected); - count_porespace=sumReduce( Dm->Comm, count_porespace); - count_water=sumReduce( Dm->Comm, count_water); + count_connected = Dm->Comm.sumReduce( count_connected ); + count_porespace = Dm->Comm.sumReduce( count_porespace ); + count_water = Dm->Comm.sumReduce( count_water ); for (int k=0; kComm, count_water); + count_water = Dm->Comm.sumReduce( count_water ); SW = double(count_water) / count_porespace; if(rank==0) printf("Final saturation: %f \n", SW); @@ -234,12 +234,12 @@ int main(int argc, char **argv) } } } - MPI_Barrier(comm); + comm.barrier(); auto filename2 = READFILE + ".morph.raw"; if (rank==0) printf("Writing file to: %s \n", filename2.c_str()); Mask->AggregateLabels(filename2); } - Utilities::shutdown(); + Utilities::shutdown(); } diff --git a/tests/lbpm_morphdrain_pp.cpp b/tests/lbpm_morphdrain_pp.cpp index d8a63d6a..a8e24273 100644 --- a/tests/lbpm_morphdrain_pp.cpp +++ b/tests/lbpm_morphdrain_pp.cpp @@ -23,9 +23,9 @@ int main(int argc, char **argv) { // Initialize MPI - Utilities::startup( argc, argv ); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); + int rank = comm.getRank(); { //....................................................................... // Reading the domain information file @@ -119,7 +119,7 @@ int main(int argc, char **argv) if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); CalcDist(SignDist,id_solid,*Dm); - MPI_Barrier(comm); + comm.barrier(); // Run the morphological opening MorphDrain(SignDist, id, Dm, SW); @@ -194,7 +194,7 @@ int main(int argc, char **argv) } } } - MPI_Barrier(comm); + comm.barrier(); auto filename2 = READFILE + ".morphdrain.raw"; if (rank==0) printf("Writing file to: %s \n", filename2.data() ); diff --git a/tests/lbpm_morphopen_pp.cpp b/tests/lbpm_morphopen_pp.cpp index 7db17d19..6afb8722 100644 --- a/tests/lbpm_morphopen_pp.cpp +++ b/tests/lbpm_morphopen_pp.cpp @@ -23,9 +23,9 @@ int main(int argc, char **argv) { // Initialize MPI - Utilities::startup( argc, argv ); + Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); + int rank = comm.getRank(); { //....................................................................... // Reading the domain information file @@ -121,7 +121,7 @@ int main(int argc, char **argv) if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); CalcDist(SignDist,id_solid,*Dm); - MPI_Barrier(comm); + comm.barrier(); // Run the morphological opening MorphOpen(SignDist, id, Dm, SW, ErodeLabel, OpenLabel); @@ -196,7 +196,7 @@ int main(int argc, char **argv) } } } - MPI_Barrier(comm); + comm.barrier(); auto filename2 = READFILE + ".morphopen.raw"; if (rank==0) printf("Writing file to: %s \n", filename2.data()); diff --git a/tests/lbpm_permeability_simulator.cpp b/tests/lbpm_permeability_simulator.cpp index 941254b0..05caa256 100644 --- a/tests/lbpm_permeability_simulator.cpp +++ b/tests/lbpm_permeability_simulator.cpp @@ -9,7 +9,7 @@ #include "common/ScaLBL.h" #include "common/Communication.h" #include "analysis/TwoPhase.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "models/MRTModel.h" //#define WRITE_SURFACES @@ -24,10 +24,10 @@ using namespace std; int main(int argc, char **argv) { // Initialize MPI - Utilities::startup( argc, argv ); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + Utilities::startup( argc, argv ); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { if (rank == 0){ printf("********************************************************\n"); @@ -38,7 +38,7 @@ int main(int argc, char **argv) int device=ScaLBL_SetDevice(rank); NULL_USE( device ); ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier(); ScaLBL_MRTModel MRT(rank,nprocs,comm); auto filename = argv[1]; diff --git a/tests/testCommunication.cpp b/tests/testCommunication.cpp index 7065cd27..e666a882 100644 --- a/tests/testCommunication.cpp +++ b/tests/testCommunication.cpp @@ -6,7 +6,7 @@ #include #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Array.h" using namespace std; @@ -15,11 +15,9 @@ using namespace std; //*************************************************************************************** -int test_communication( MPI_Comm comm, int nprocx, int nprocy, int nprocz ) +int test_communication( const Utilities::MPI& comm, int nprocx, int nprocy, int nprocz ) { - int rank,nprocs; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + int rank = comm.getRank(); int iproc,jproc,kproc; int sendtag,recvtag; if (rank==0) printf("\nRunning test %i %i %i\n",nprocx,nprocy,nprocz); @@ -38,7 +36,7 @@ int test_communication( MPI_Comm comm, int nprocx, int nprocy, int nprocz ) rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz, rank_yz, rank_YZ, rank_yZ, rank_Yz ); - MPI_Barrier(comm); + comm.barrier(); //********************************** @@ -85,7 +83,7 @@ int test_communication( MPI_Comm comm, int nprocx, int nprocy, int nprocz ) sendCount_xy = sendCount_yz = sendCount_xz = sendCount_Xy = sendCount_Yz = sendCount_xZ = 0; sendCount_xY = sendCount_yZ = sendCount_Xz = sendCount_XY = sendCount_YZ = sendCount_XZ = 0; - MPI_Barrier(comm); + comm.barrier(); if (rank==0) printf ("SendLists are ready on host\n"); //...................................................................................... // Use MPI to fill in the recvCounts form the associated processes @@ -158,7 +156,7 @@ int test_communication( MPI_Comm comm, int nprocx, int nprocy, int nprocz ) recvCount_yz, recvCount_YZ, recvCount_yZ, recvCount_Yz, rank_x, rank_y, rank_z, rank_X, rank_Y, rank_Z, rank_xy, rank_XY, rank_xY, rank_Xy, rank_xz, rank_XZ, rank_xZ, rank_Xz, rank_yz, rank_YZ, rank_yZ, rank_Yz ); - MPI_Barrier(comm); + comm.barrier(); if (rank==0) printf ("RecvLists finished\n"); // Free memory @@ -181,11 +179,9 @@ int test_communication( MPI_Comm comm, int nprocx, int nprocy, int nprocz ) template -int testHalo( MPI_Comm comm, int nprocx, int nprocy, int nprocz, int depth ) +int testHalo( const Utilities::MPI& comm, int nprocx, int nprocy, int nprocz, int depth ) { - int rank,nprocs; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); + int rank = comm.getRank(); if ( rank==0 ) printf("\nRunning Halo test %i %i %i %i\n",nprocx,nprocy,nprocz,depth); @@ -292,7 +288,6 @@ int main(int argc, char **argv) int N_errors_global = comm.sumReduce( N_errors ); comm.barrier(); Utilities::shutdown(); - if ( rank==0 ) { if ( N_errors_global==0 ) std::cout << "All tests passed\n"; From 432fab95b3b00c9a922d9b7a6e73746bf341603e Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Wed, 6 Jan 2021 01:03:18 -0500 Subject: [PATCH 127/205] test done;add sine and cosine voltage input for Poisson solver --- models/PoissonSolver.cpp | 35 ++++++++++++++++++++++------------- models/PoissonSolver.h | 8 ++++---- tests/TestNernstPlanck.cpp | 4 ++-- tests/TestPNP_Stokes.cpp | 4 ++-- tests/TestPoissonSolver.cpp | 13 +++++++------ 5 files changed, 37 insertions(+), 27 deletions(-) diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index 96d737bb..1af8ad65 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -139,8 +139,17 @@ void ScaLBL_Poisson::SetDomain(){ for (int i=0; iid[i] = 1; // initialize this way //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object MPI_Barrier(comm); - Dm->BoundaryCondition = BoundaryCondition; - Mask->BoundaryCondition = BoundaryCondition; + if (BoundaryConditionInlet==0 && BoundaryConditionOutlet==0){ + Dm->BoundaryCondition = 0; + Mask->BoundaryCondition = 0; + } + else if (BoundaryConditionInlet>0 && BoundaryConditionOutlet>0){ + Dm->BoundaryCondition = 1; + Mask->BoundaryCondition = 1; + } + else {//i.e. non-periodic and periodic BCs are mixed + ERROR("Error: check the type of inlet and outlet boundary condition! Mixed periodic and non-periodic BCs are found!\n"); + } Dm->CommInit(); MPI_Barrier(comm); @@ -378,7 +387,7 @@ void ScaLBL_Poisson::Potential_Init(double *psi_init){ if (electric_db->keyExists( "Vin" )){ Vin = electric_db->getScalar( "Vin" ); } - if (rank==0) printf("LB-Poisson Solver: inlet boundary; fixed electric potential Vin = %.3g \n",Vin); + if (rank==0) printf("LB-Poisson Solver: inlet boundary; fixed electric potential Vin = %.3g [V]\n",Vin); break; case 2: if (electric_db->keyExists( "Vin0" )){//voltage amplitude; unit: Volt @@ -398,12 +407,12 @@ void ScaLBL_Poisson::Potential_Init(double *psi_init){ } if (rank==0){ if (Vin_Type==1){ - printf("LB-Poisson Solver: inlet boundary; periodic electric potential Vin = %.3g*Sin[2*pi*%.3g*(t+%.3g)] \n",Vin,freqIn,t0_In); - printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vin,freqIn,t0_In); + printf("LB-Poisson Solver: inlet boundary; periodic electric potential Vin = %.3g*Sin[2*pi*%.3g*(t+%.3g)] [V]\n",Vin0,freqIn,t0_In); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vin0,freqIn,t0_In); } else if (Vin_Type==2){ - printf("LB-Poisson Solver: inlet boundary; periodic electric potential Vin = %.3g*Cos[2*pi*%.3g*(t+%.3g)] \n",Vin,freqIn,t0_In); - printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vin,freqIn,t0_In); + printf("LB-Poisson Solver: inlet boundary; periodic electric potential Vin = %.3g*Cos[2*pi*%.3g*(t+%.3g)] [V] \n",Vin0,freqIn,t0_In); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vin0,freqIn,t0_In); } } break; @@ -415,7 +424,7 @@ void ScaLBL_Poisson::Potential_Init(double *psi_init){ if (electric_db->keyExists( "Vout" )){ Vout = electric_db->getScalar( "Vout" ); } - if (rank==0) printf("LB-Poisson Solver: outlet boundary; fixed electric potential Vin = %.3g \n",Vout); + if (rank==0) printf("LB-Poisson Solver: outlet boundary; fixed electric potential Vout = %.3g [V] \n",Vout); break; case 2: if (electric_db->keyExists( "Vout0" )){//voltage amplitude; unit: Volt @@ -435,12 +444,12 @@ void ScaLBL_Poisson::Potential_Init(double *psi_init){ } if (rank==0){ if (Vout_Type==1){ - printf("LB-Poisson Solver: outlet boundary; periodic electric potential Vout = %.3g*Sin[2*pi*%.3g*(t+%.3g)] \n",Vout,freqOut,t0_Out); - printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vout,freqOut,t0_Out); + printf("LB-Poisson Solver: outlet boundary; periodic electric potential Vout = %.3g*Sin[2*pi*%.3g*(t+%.3g)] [V]\n",Vout0,freqOut,t0_Out); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vout0,freqOut,t0_Out); } else if (Vout_Type==2){ - printf("LB-Poisson Solver: outlet boundary; periodic electric potential Vout = %.3g*Cos[2*pi*%.3g*(t+%.3g)] \n",Vout,freqOut,t0_Out); - printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vout,freqOut,t0_Out); + printf("LB-Poisson Solver: outlet boundary; periodic electric potential Vout = %.3g*Cos[2*pi*%.3g*(t+%.3g)] [V]\n",Vout0,freqOut,t0_Out); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vout0,freqOut,t0_Out); } } break; @@ -489,7 +498,7 @@ void ScaLBL_Poisson::Initialize(double time_conv_from_Study){ double *psi_host; psi_host = new double [Nx*Ny*Nz]; time_conv = time_conv_from_Study; - AssignSolidBoundary(psi_host,time_conv);//step1 + AssignSolidBoundary(psi_host);//step1 Potential_Init(psi_host);//step2 ScaLBL_CopyToDevice(Psi, psi_host, Nx*Ny*Nz*sizeof(double)); ScaLBL_DeviceBarrier(); diff --git a/models/PoissonSolver.h b/models/PoissonSolver.h index ebcac179..f2d18327 100644 --- a/models/PoissonSolver.h +++ b/models/PoissonSolver.h @@ -32,8 +32,8 @@ public: void SetDomain(); void ReadInput(); void Create(); - void Initialize(); - void Run(double *ChargeDensity); + void Initialize(double time_conv_from_Study); + void Run(double *ChargeDensity,int timestep_from_Study); void getElectricPotential(DoubleArray &ReturnValues); void getElectricPotential_debug(int timestep); void getElectricField(DoubleArray &Values_x, DoubleArray &Values_y, DoubleArray &Values_z); @@ -101,8 +101,8 @@ private: void AssignSolidBoundary(double *poisson_solid); void Potential_Init(double *psi_init); void ElectricField_LB_to_Phys(DoubleArray &Efield_reg); - void SolveElectricPotentialAAodd(); - void SolveElectricPotentialAAeven(); + void SolveElectricPotentialAAodd(int timestep_from_Study); + void SolveElectricPotentialAAeven(int timestep_from_Study); //void SolveElectricField(); void SolvePoissonAAodd(double *ChargeDensity); void SolvePoissonAAeven(double *ChargeDensity); diff --git a/tests/TestNernstPlanck.cpp b/tests/TestNernstPlanck.cpp index def67d5b..ecb3a6d0 100644 --- a/tests/TestNernstPlanck.cpp +++ b/tests/TestNernstPlanck.cpp @@ -69,7 +69,7 @@ int main(int argc, char **argv) PoissonSolver.SetDomain(); PoissonSolver.ReadInput(); PoissonSolver.Create(); - PoissonSolver.Initialize(); + PoissonSolver.Initialize(0); int timestep=0; double error = 1.0; @@ -77,7 +77,7 @@ int main(int argc, char **argv) while (timestep < Study.timestepMax && error > Study.tolerance){ timestep++; - PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental + PoissonSolver.Run(IonModel.ChargeDensity,0);//solve Poisson equtaion to get steady-state electrical potental IonModel.Run(IonModel.FluidVelocityDummy,PoissonSolver.ElectricField); //solve for ion transport and electric potential timestep++;//AA operations diff --git a/tests/TestPNP_Stokes.cpp b/tests/TestPNP_Stokes.cpp index bf05f73c..16abcee0 100644 --- a/tests/TestPNP_Stokes.cpp +++ b/tests/TestPNP_Stokes.cpp @@ -82,7 +82,7 @@ int main(int argc, char **argv) PoissonSolver.SetDomain(); PoissonSolver.ReadInput(); PoissonSolver.Create(); - PoissonSolver.Initialize(); + PoissonSolver.Initialize(0); int timestep=0; @@ -94,7 +94,7 @@ int main(int argc, char **argv) while (timestep < Study.timestepMax && error > Study.tolerance){ timestep++; - PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental + PoissonSolver.Run(IonModel.ChargeDensity,0);//solve Poisson equtaion to get steady-state electrical potental StokesModel.Run_Lite(IonModel.ChargeDensity, PoissonSolver.ElectricField);// Solve the N-S equations to get velocity IonModel.Run(StokesModel.Velocity,PoissonSolver.ElectricField); //solve for ion transport and electric potential diff --git a/tests/TestPoissonSolver.cpp b/tests/TestPoissonSolver.cpp index 5683ace1..c81e503e 100644 --- a/tests/TestPoissonSolver.cpp +++ b/tests/TestPoissonSolver.cpp @@ -64,17 +64,18 @@ int main(int argc, char **argv) PoissonSolver.DummyChargeDensity(); if (PoissonSolver.TestPeriodic==true){ - if (rank==0) printf("Testing periodic voltage input is enabled. Total test time is %.3g[s], saving data every %.3g[s]; - user-specified time resolution is %.3g[s/lt]\n", + if (rank==0) printf("Testing periodic voltage input is enabled. Total test time is %.3g[s], saving data every %.3g[s]; user-specified time resolution is %.3g[s/lt]\n", PoissonSolver.TestPeriodicTime,PoissonSolver.TestPeriodicSaveInterval,PoissonSolver.TestPeriodicTimeConv); int timestep = 0; - while (timestep<(PoissonSolver.TestPeriodicTime/PoissonSolver.TestPeriodicTimeConv)){ + int timeMax = int(PoissonSolver.TestPeriodicTime/PoissonSolver.TestPeriodicTimeConv); + int timeSave = int(PoissonSolver.TestPeriodicSaveInterval/PoissonSolver.TestPeriodicTimeConv); + while (timestep Date: Wed, 6 Jan 2021 11:58:43 -0500 Subject: [PATCH 128/205] merge complete / cpu tests --- IO/PIO.cpp | 2 +- IO/silo.cpp | 2 +- IO/silo.h | 2 +- IO/silo.hpp | 2 +- analysis/ElectroChemistry.h | 2 +- analysis/GreyPhase.h | 2 +- analysis/SubPhase.h | 2 +- common/SpherePack.cpp | 2 +- common/SpherePack.h | 2 +- models/DFHModel.h | 2 +- models/GreyscaleColorModel.h | 2 +- models/GreyscaleModel.h | 2 +- models/MultiPhysController.h | 2 +- tests/TestColorSquareTube.cpp | 2 +- tests/TestFluxBC.cpp | 2 +- tests/TestInterfaceSpeed.cpp | 11 ++- tests/TestIonModel.cpp | 23 +++-- tests/TestMassConservationD3Q7.cpp | 2 +- tests/TestMicroCTReader.cpp | 10 ++- tests/TestSetDevice.cpp | 20 ++--- tests/TestWriter.cpp | 12 ++- tests/lbpm_color_simulator.cpp | 23 ++--- ...m_electrokinetic_SingleFluid_simulator.cpp | 22 +++-- tests/lbpm_greyscaleColor_simulator.cpp | 29 +++---- tests/lbpm_greyscale_simulator.cpp | 87 +++++++++---------- tests/lbpm_minkowski_scalar.cpp | 2 +- 26 files changed, 128 insertions(+), 143 deletions(-) diff --git a/IO/PIO.cpp b/IO/PIO.cpp index 6c6ece2d..fe0f7db4 100644 --- a/IO/PIO.cpp +++ b/IO/PIO.cpp @@ -1,6 +1,6 @@ #include "IO/PIO.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include #include diff --git a/IO/silo.cpp b/IO/silo.cpp index eece8583..ddf3646a 100644 --- a/IO/silo.cpp +++ b/IO/silo.cpp @@ -1,6 +1,6 @@ #include "IO/silo.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "ProfilerApp.h" diff --git a/IO/silo.h b/IO/silo.h index e200bb05..40a023d7 100644 --- a/IO/silo.h +++ b/IO/silo.h @@ -6,7 +6,7 @@ #include #include "common/Array.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Communication.h" diff --git a/IO/silo.hpp b/IO/silo.hpp index 312f32d8..35852004 100644 --- a/IO/silo.hpp +++ b/IO/silo.hpp @@ -3,7 +3,7 @@ #include "IO/silo.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "ProfilerApp.h" diff --git a/analysis/ElectroChemistry.h b/analysis/ElectroChemistry.h index beaff833..90874ca0 100644 --- a/analysis/ElectroChemistry.h +++ b/analysis/ElectroChemistry.h @@ -8,7 +8,7 @@ #include #include "common/Domain.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Communication.h" #include "analysis/analysis.h" #include "analysis/distance.h" diff --git a/analysis/GreyPhase.h b/analysis/GreyPhase.h index 4aca756d..3ab46752 100644 --- a/analysis/GreyPhase.h +++ b/analysis/GreyPhase.h @@ -10,7 +10,7 @@ #include "common/Communication.h" #include "analysis/analysis.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" diff --git a/analysis/SubPhase.h b/analysis/SubPhase.h index 71b87ef0..691c654f 100644 --- a/analysis/SubPhase.h +++ b/analysis/SubPhase.h @@ -12,7 +12,7 @@ #include "analysis/distance.h" #include "analysis/Minkowski.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" diff --git a/common/SpherePack.cpp b/common/SpherePack.cpp index a7246b72..3f77cefd 100644 --- a/common/SpherePack.cpp +++ b/common/SpherePack.cpp @@ -9,7 +9,7 @@ #include "common/Array.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Communication.h" #include "common/Database.h" #include "common/SpherePack.h" diff --git a/common/SpherePack.h b/common/SpherePack.h index 5075b289..5f68dd7d 100644 --- a/common/SpherePack.h +++ b/common/SpherePack.h @@ -12,7 +12,7 @@ #include "common/Array.h" #include "common/Utilities.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Communication.h" #include "common/Database.h" diff --git a/models/DFHModel.h b/models/DFHModel.h index b50f62a2..00e6e6b3 100644 --- a/models/DFHModel.h +++ b/models/DFHModel.h @@ -12,7 +12,7 @@ Implementation of color lattice boltzmann model #include "common/Communication.h" #include "analysis/TwoPhase.h" #include "analysis/runAnalysis.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "ProfilerApp.h" #include "threadpool/thread_pool.h" diff --git a/models/GreyscaleColorModel.h b/models/GreyscaleColorModel.h index 1ae4ab73..667099e9 100644 --- a/models/GreyscaleColorModel.h +++ b/models/GreyscaleColorModel.h @@ -11,7 +11,7 @@ Implementation of two-fluid greyscale color lattice boltzmann model #include "common/Communication.h" #include "analysis/GreyPhase.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "ProfilerApp.h" #include "threadpool/thread_pool.h" diff --git a/models/GreyscaleModel.h b/models/GreyscaleModel.h index 46cfb014..aa68c180 100644 --- a/models/GreyscaleModel.h +++ b/models/GreyscaleModel.h @@ -10,7 +10,7 @@ Implementation of color lattice boltzmann model #include #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Database.h" #include "common/ScaLBL.h" #include "ProfilerApp.h" diff --git a/models/MultiPhysController.h b/models/MultiPhysController.h index dfc5bcee..4388d6b9 100644 --- a/models/MultiPhysController.h +++ b/models/MultiPhysController.h @@ -13,7 +13,7 @@ #include "common/ScaLBL.h" #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "analysis/Minkowski.h" #include "ProfilerApp.h" diff --git a/tests/TestColorSquareTube.cpp b/tests/TestColorSquareTube.cpp index e21aa286..1434c327 100644 --- a/tests/TestColorSquareTube.cpp +++ b/tests/TestColorSquareTube.cpp @@ -7,7 +7,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "models/ColorModel.h" std::shared_ptr loadInputs( int nprocs ) diff --git a/tests/TestFluxBC.cpp b/tests/TestFluxBC.cpp index 3762aee6..3028d8ee 100644 --- a/tests/TestFluxBC.cpp +++ b/tests/TestFluxBC.cpp @@ -1,5 +1,5 @@ #include -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Utilities.h" #include "common/ScaLBL.h" diff --git a/tests/TestInterfaceSpeed.cpp b/tests/TestInterfaceSpeed.cpp index 67bf8f95..4036a205 100644 --- a/tests/TestInterfaceSpeed.cpp +++ b/tests/TestInterfaceSpeed.cpp @@ -21,7 +21,8 @@ int main (int argc, char *argv[]) Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); - + int toReturn = 0; + { // Load inputs string FILENAME = argv[1]; // Load inputs @@ -114,7 +115,6 @@ int main (int argc, char *argv[]) printf("-------------------------------- \n"); //......................................................................... - int toReturn = 0; if (fabs(Averages->awn - 2*PI*RADIUS*RADIUS)/(2*PI*RADIUS*RADIUS) > 0.02){ toReturn = 1; printf("TestCylinderArea.cpp: error tolerance exceeded for wn area \n"); @@ -144,9 +144,8 @@ int main (int argc, char *argv[]) toReturn = 7; } - return toReturn; - comm.barrier(); - return 0; - Utilities::shutdown(); + } + Utilities::shutdown(); + return toReturn; } diff --git a/tests/TestIonModel.cpp b/tests/TestIonModel.cpp index 2a0a02a9..58c051dc 100644 --- a/tests/TestIonModel.cpp +++ b/tests/TestIonModel.cpp @@ -23,21 +23,22 @@ int main(int argc, char **argv) Utilities::startup( argc, argv ); { // Limit scope so variables that contain communicators will free before MPI_Finialize - - MPI_Comm comm; - MPI_Comm_dup(MPI_COMM_WORLD,&comm); - int rank = comm_rank(comm); - int nprocs = comm_size(comm); + // Initialize MPI + Utilities::startup( argc, argv ); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); if (rank == 0){ printf("**************************************\n"); printf("Running Test for Ion Transport \n"); printf("**************************************\n"); } - // Initialize compute device - ScaLBL_SetDevice(rank); - ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + // Initialize compute device + int device=ScaLBL_SetDevice(rank); + NULL_USE( device ); + ScaLBL_DeviceBarrier(); + comm.barrier(); PROFILE_ENABLE(1); //PROFILE_ENABLE_TRACE(); @@ -62,7 +63,6 @@ int main(int argc, char **argv) IonModel.DummyFluidVelocity(); IonModel.DummyElectricField(); - int timestep=0; double error = 1.0; vectorci_avg_previous{0.0,0.0};//assuming 1:1 solution @@ -85,8 +85,7 @@ int main(int argc, char **argv) PROFILE_SAVE("TestIonModel",1); // **************************************************** - MPI_Barrier(comm); - MPI_Comm_free(&comm); + comm.barrier(); } // Limit scope so variables that contain communicators will free before MPI_Finialize diff --git a/tests/TestMassConservationD3Q7.cpp b/tests/TestMassConservationD3Q7.cpp index 3893781b..03b6d01a 100644 --- a/tests/TestMassConservationD3Q7.cpp +++ b/tests/TestMassConservationD3Q7.cpp @@ -8,7 +8,7 @@ #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "models/ColorModel.h" inline void InitializeBubble(ScaLBL_ColorModel &ColorModel, double BubbleRadius){ diff --git a/tests/TestMicroCTReader.cpp b/tests/TestMicroCTReader.cpp index 27230183..d8609356 100644 --- a/tests/TestMicroCTReader.cpp +++ b/tests/TestMicroCTReader.cpp @@ -1,6 +1,6 @@ // Test reading high-resolution files from the microct database -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/UnitTest.h" #include "common/Database.h" #include "common/Domain.h" @@ -13,12 +13,14 @@ void testReadMicroCT( const std::string& filename, UnitTest& ut ) { + Utilities::MPI comm( MPI_COMM_WORLD ); + // Get the domain info auto db = std::make_shared( filename ); auto domain_db = db->getDatabase( "Domain" ); // Test reading microCT files - auto data = readMicroCT( *domain_db, MPI_COMM_WORLD ); + auto data = readMicroCT( *domain_db, comm ); // Check if we loaded the data correctly if ( data.size() == domain_db->getVector( "n" ) ) @@ -30,7 +32,7 @@ void testReadMicroCT( const std::string& filename, UnitTest& ut ) auto n = domain_db->getVector( "n" ); auto nproc = domain_db->getVector( "nproc" ); int N[3] = { n[0]*nproc[0], n[1]*nproc[1], n[2]*nproc[2] }; - int rank = comm_rank(MPI_COMM_WORLD); + int rank = comm.getRank(); RankInfoStruct rankInfo( rank, nproc[0], nproc[1], nproc[2] ); std::vector meshData( 1 ); auto Var = std::make_shared(); @@ -41,7 +43,7 @@ void testReadMicroCT( const std::string& filename, UnitTest& ut ) meshData[0].meshName = "grid"; meshData[0].mesh = std::make_shared(rankInfo,n[0],n[1],n[2],N[0],N[1],N[2]); meshData[0].vars.push_back(Var); - IO::writeData( 0, meshData, MPI_COMM_WORLD ); + IO::writeData( 0, meshData, comm ); } diff --git a/tests/TestSetDevice.cpp b/tests/TestSetDevice.cpp index 51e71682..553f297d 100644 --- a/tests/TestSetDevice.cpp +++ b/tests/TestSetDevice.cpp @@ -1,24 +1,25 @@ #include -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Utilities.h" #include "common/ScaLBL.h" int main (int argc, char **argv) -{ - MPI_Init(&argc,&argv); - int rank = MPI_WORLD_RANK(); - int nprocs = MPI_WORLD_SIZE(); +{ + Utilities::startup( argc, argv ); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); for (int i=0; i& meshData, UnitTest& ut ) { - int rank, nprocs; - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); - MPI_Barrier(comm); + Utilities::MPI comm( MPI_COMM_WORLD ); + int nprocs = comm.getSize(); + comm.barrier(); // Get the format std::string format2 = format; @@ -63,7 +61,7 @@ void testWriter( const std::string& format, std::vector& mes IO::initialize( "test_"+format, format2, false ); IO::writeData( 0, meshData, comm ); IO::writeData( 3, meshData, comm ); - MPI_Barrier(comm); + comm.barrier(); PROFILE_STOP(format+"-write"); // Get the summary name for reading diff --git a/tests/lbpm_color_simulator.cpp b/tests/lbpm_color_simulator.cpp index 996b7879..1d579486 100644 --- a/tests/lbpm_color_simulator.cpp +++ b/tests/lbpm_color_simulator.cpp @@ -23,6 +23,13 @@ int main(int argc, char **argv) { + + // Initialize MPI + Utilities::startup( argc, argv ); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); + // Load the input database auto db = std::make_shared( argv[1] ); @@ -33,20 +40,16 @@ int main(int argc, char **argv) { // Limit scope so variables that contain communicators will free before MPI_Finialize - MPI_Comm comm; - MPI_Comm_dup(MPI_COMM_WORLD,&comm); - int rank = comm_rank(comm); - int nprocs = comm_size(comm); - if (rank == 0){ printf("********************************************************\n"); printf("Running Color LBM \n"); printf("********************************************************\n"); } - // Initialize compute device - ScaLBL_SetDevice(rank); - ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + // Initialize compute device + int device=ScaLBL_SetDevice(rank); + NULL_USE( device ); + ScaLBL_DeviceBarrier(); + comm.barrier(); PROFILE_ENABLE(1); //PROFILE_ENABLE_TRACE(); @@ -71,8 +74,6 @@ int main(int argc, char **argv) PROFILE_SAVE(file,level); // **************************************************** - MPI_Barrier(comm); - MPI_Comm_free(&comm); } // Limit scope so variables that contain communicators will free before MPI_Finialize diff --git a/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp b/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp index 689745b9..b9f215e7 100644 --- a/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp +++ b/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp @@ -26,21 +26,22 @@ int main(int argc, char **argv) Utilities::startup( argc, argv ); { // Limit scope so variables that contain communicators will free before MPI_Finialize - - MPI_Comm comm; - MPI_Comm_dup(MPI_COMM_WORLD,&comm); - int rank = comm_rank(comm); - int nprocs = comm_size(comm); + // Initialize MPI + Utilities::startup( argc, argv ); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); if (rank == 0){ printf("********************************************************\n"); printf("Running LBPM electrokinetic single-fluid solver \n"); printf("********************************************************\n"); } - // Initialize compute device - ScaLBL_SetDevice(rank); - ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + // Initialize compute device + int device=ScaLBL_SetDevice(rank); + NULL_USE( device ); + ScaLBL_DeviceBarrier(); + comm.barrier(); PROFILE_ENABLE(1); //PROFILE_ENABLE_TRACE(); @@ -121,9 +122,6 @@ int main(int argc, char **argv) PROFILE_STOP("Main"); PROFILE_SAVE("lbpm_electrokinetic_SingleFluid_simulator",1); // **************************************************** - - MPI_Barrier(comm); - MPI_Comm_free(&comm); } // Limit scope so variables that contain communicators will free before MPI_Finialize diff --git a/tests/lbpm_greyscaleColor_simulator.cpp b/tests/lbpm_greyscaleColor_simulator.cpp index fec85c0e..2efe8c7d 100644 --- a/tests/lbpm_greyscaleColor_simulator.cpp +++ b/tests/lbpm_greyscaleColor_simulator.cpp @@ -19,27 +19,25 @@ using namespace std; int main(int argc, char **argv) { - - // Initialize MPI and error handlers + // Initialize MPI Utilities::startup( argc, argv ); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { // Limit scope so variables that contain communicators will free before MPI_Finialize - - MPI_Comm comm; - MPI_Comm_dup(MPI_COMM_WORLD,&comm); - int rank = comm_rank(comm); - int nprocs = comm_size(comm); - + if (rank == 0){ - printf("****************************************\n"); - printf("Running Greyscale Two-Phase Calculation \n"); - printf("****************************************\n"); + printf("****************************************\n"); + printf("Running Greyscale Two-Phase Calculation \n"); + printf("****************************************\n"); } // Initialize compute device - ScaLBL_SetDevice(rank); + int device=ScaLBL_SetDevice(rank); + NULL_USE( device ); ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); - + comm.barrier(); + PROFILE_ENABLE(1); //PROFILE_ENABLE_TRACE(); //PROFILE_ENABLE_MEMORY(); @@ -61,9 +59,6 @@ int main(int argc, char **argv) PROFILE_SAVE("lbpm_greyscaleColor_simulator",1); // **************************************************** - MPI_Barrier(comm); - MPI_Comm_free(&comm); - } // Limit scope so variables that contain communicators will free before MPI_Finialize Utilities::shutdown(); diff --git a/tests/lbpm_greyscale_simulator.cpp b/tests/lbpm_greyscale_simulator.cpp index df8cb3cb..e6166116 100644 --- a/tests/lbpm_greyscale_simulator.cpp +++ b/tests/lbpm_greyscale_simulator.cpp @@ -19,54 +19,49 @@ using namespace std; int main(int argc, char **argv) { + // Initialize MPI + Utilities::startup( argc, argv ); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); - // Initialize MPI and error handlers - Utilities::startup( argc, argv ); + { // Limit scope so variables that contain communicators will free before MPI_Finialize - { // Limit scope so variables that contain communicators will free before MPI_Finialize - - MPI_Comm comm; - MPI_Comm_dup(MPI_COMM_WORLD,&comm); - int rank = comm_rank(comm); - int nprocs = comm_size(comm); - - if (rank == 0){ - printf("********************************************************\n"); - printf("Running Greyscale Single Phase Permeability Calculation \n"); - printf("********************************************************\n"); - } - // Initialize compute device - ScaLBL_SetDevice(rank); - ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); - - PROFILE_ENABLE(1); - //PROFILE_ENABLE_TRACE(); - //PROFILE_ENABLE_MEMORY(); - PROFILE_SYNCHRONIZE(); - PROFILE_START("Main"); - Utilities::setErrorHandlers(); - - auto filename = argv[1]; - ScaLBL_GreyscaleModel Greyscale(rank,nprocs,comm); - Greyscale.ReadParams(filename); - Greyscale.SetDomain(); - Greyscale.ReadInput(); - Greyscale.Create(); // creating the model will create data structure to match the pore structure and allocate variables - Greyscale.Initialize(); // initializing the model will set initial conditions for variables - Greyscale.Run(); - Greyscale.VelocityField(); - //Greyscale.WriteDebug(); - - PROFILE_STOP("Main"); - PROFILE_SAVE("lbpm_greyscale_simulator",1); - // **************************************************** - - MPI_Barrier(comm); - MPI_Comm_free(&comm); - - } // Limit scope so variables that contain communicators will free before MPI_Finialize + if (rank == 0){ + printf("********************************************************\n"); + printf("Running Greyscale Single Phase Permeability Calculation \n"); + printf("********************************************************\n"); + } + // Initialize compute device + int device=ScaLBL_SetDevice(rank); + NULL_USE( device ); + ScaLBL_DeviceBarrier(); + comm.barrier(); + + PROFILE_ENABLE(1); + //PROFILE_ENABLE_TRACE(); + //PROFILE_ENABLE_MEMORY(); + PROFILE_SYNCHRONIZE(); + PROFILE_START("Main"); + Utilities::setErrorHandlers(); - Utilities::shutdown(); + auto filename = argv[1]; + ScaLBL_GreyscaleModel Greyscale(rank,nprocs,comm); + Greyscale.ReadParams(filename); + Greyscale.SetDomain(); + Greyscale.ReadInput(); + Greyscale.Create(); // creating the model will create data structure to match the pore structure and allocate variables + Greyscale.Initialize(); // initializing the model will set initial conditions for variables + Greyscale.Run(); + Greyscale.VelocityField(); + //Greyscale.WriteDebug(); + + PROFILE_STOP("Main"); + PROFILE_SAVE("lbpm_greyscale_simulator",1); + // **************************************************** + + } // Limit scope so variables that contain communicators will free before MPI_Finialize + + Utilities::shutdown(); } diff --git a/tests/lbpm_minkowski_scalar.cpp b/tests/lbpm_minkowski_scalar.cpp index 22893e38..5ee1a91b 100644 --- a/tests/lbpm_minkowski_scalar.cpp +++ b/tests/lbpm_minkowski_scalar.cpp @@ -14,7 +14,7 @@ #include "common/Array.h" #include "common/Domain.h" #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "IO/MeshDatabase.h" #include "IO/Mesh.h" #include "IO/Writer.h" From a066fa66062a30d2c119b76a3ac0b2a960d1adb5 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Wed, 13 Jan 2021 22:06:08 -0500 Subject: [PATCH 129/205] merge Lee model --- models/FreeLeeModel.cpp | 14 +++++++------- models/FreeLeeModel.h | 4 ++-- tests/CMakeLists.txt | 5 ----- tests/TestMRT.cpp | 5 ----- 4 files changed, 9 insertions(+), 19 deletions(-) diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index 547885b8..5d0f64f6 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -9,7 +9,7 @@ color lattice boltzmann model #include #include -ScaLBL_FreeLeeModel::ScaLBL_FreeLeeModel(int RANK, int NP, MPI_Comm COMM): +ScaLBL_FreeLeeModel::ScaLBL_FreeLeeModel(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),W(0),gamma(0), Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) @@ -107,9 +107,9 @@ void ScaLBL_FreeLeeModel::SetDomain(){ id = new signed char [N]; for (int i=0; iid[i] = 1; // initialize this way - MPI_Barrier(comm); + comm.barrier() Dm->CommInit(); - MPI_Barrier(comm); + comm.barrier() // Read domain parameters rank = Dm->rank(); nprocx = Dm->nprocx(); @@ -206,8 +206,8 @@ void ScaLBL_FreeLeeModel::Create(){ if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np,2); - MPI_Barrier(comm); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np,2); + comm.barrier() //........................................................................... // MAIN VARIABLES ALLOCATED HERE @@ -335,7 +335,7 @@ void ScaLBL_FreeLeeModel::Initialize(){ ScaLBL_CopyToDevice(Phi,cPhi,N*sizeof(double)); ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier() } if (rank==0) printf ("Initializing phase field \n"); @@ -371,7 +371,7 @@ void ScaLBL_FreeLeeModel::Run(){ //.......create and start timer............ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); - MPI_Barrier(comm); + comm.barrier() starttime = MPI_Wtime(); //......................................... diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h index 01fb54c3..437f48b3 100644 --- a/models/FreeLeeModel.h +++ b/models/FreeLeeModel.h @@ -18,7 +18,7 @@ Implementation of Lee et al JCP 2016 lattice boltzmann model class ScaLBL_FreeLeeModel{ public: - ScaLBL_FreeLeeModel(int RANK, int NP, MPI_Comm COMM); + ScaLBL_FreeLeeModel(int RANK, int NP, const Utilities::MPI& COMM); ~ScaLBL_FreeLeeModel(); // functions in they should be run @@ -70,7 +70,7 @@ public: DoubleArray SignDist; private: - MPI_Comm comm; + Utilities::MPI comm; int dist_mem_size; int neighborSize; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 2bb810bd..414a13cf 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -4,13 +4,8 @@ ADD_LBPM_EXECUTABLE( lbpm_color_simulator ) ADD_LBPM_EXECUTABLE( lbpm_permeability_simulator ) ADD_LBPM_EXECUTABLE( lbpm_greyscale_simulator ) -<<<<<<< HEAD -ADD_LBPM_EXECUTABLE( lbpm_greyscaleColor_simulator ) -ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_SingleFluid_simulator ) -======= ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_SingleFluid_simulator ) ADD_LBPM_EXECUTABLE( lbpm_greyscaleColor_simulator ) ->>>>>>> FOM #ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator ) #ADD_LBPM_EXECUTABLE( lbpm_color_macro_simulator ) ADD_LBPM_EXECUTABLE( lbpm_dfh_simulator ) diff --git a/tests/TestMRT.cpp b/tests/TestMRT.cpp index 5b58130e..172f6c55 100644 --- a/tests/TestMRT.cpp +++ b/tests/TestMRT.cpp @@ -804,13 +804,8 @@ int main(int argc, char **argv) } // **************************************************** -<<<<<<< HEAD comm.barrier(); Utilities::shutdown(); -======= - MPI_Barrier(comm); - MPI_Finalize(); ->>>>>>> electrokinetic // **************************************************** return check; From f8881e35ed8eef93772e99a2d22ebd49e5e0d3f1 Mon Sep 17 00:00:00 2001 From: James McClure Date: Thu, 14 Jan 2021 14:54:21 -0500 Subject: [PATCH 130/205] fixing some bugs in analysis --- analysis/ElectroChemistry.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/analysis/ElectroChemistry.cpp b/analysis/ElectroChemistry.cpp index f9e25c35..2a0b7169 100644 --- a/analysis/ElectroChemistry.cpp +++ b/analysis/ElectroChemistry.cpp @@ -48,6 +48,8 @@ void ElectroChemistryAnalyzer::SetParams(){ void ElectroChemistryAnalyzer::Basic(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes, int timestep){ int i,j,k; + double Vin=0.0; + double Vout=0.0; Poisson.getElectricPotential(ElectricalPotential); /* local sub-domain averages */ @@ -78,12 +80,14 @@ void ElectroChemistryAnalyzer::Basic(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poiss } } } - rho_avg_global[ion]=Dm->Comm.sumReduce( rho_avg_local[ion]); - rho_mu_avg_global[ion]=Dm->Comm.sumReduce( rho_mu_avg_local[ion]); - rho_psi_avg_global[ion]=Dm->Comm.sumReduce( rho_psi_avg_local[ion]); - - rho_mu_avg_global[ion] /= rho_avg_global[ion]; - rho_psi_avg_global[ion] /= rho_avg_global[ion]; + rho_avg_global[ion]=Dm->Comm.sumReduce( rho_avg_local[ion]) / Volume; + rho_mu_avg_global[ion]=Dm->Comm.sumReduce( rho_mu_avg_local[ion]) / Volume; + rho_psi_avg_global[ion]=Dm->Comm.sumReduce( rho_psi_avg_local[ion]) / Volume; + + if (rho_avg_global[ion] > 0.0){ + rho_mu_avg_global[ion] /= rho_avg_global[ion]; + rho_psi_avg_global[ion] /= rho_avg_global[ion]; + } } for (int ion=0; ion Date: Thu, 14 Jan 2021 14:55:51 -0500 Subject: [PATCH 131/205] move to cuda directory --- {gpu => cuda}/D3Q7BC.cu | 0 {gpu => cuda}/Greyscale.cu | 0 {gpu => cuda}/GreyscaleColor.cu | 0 {gpu => cuda}/Ion.cu | 0 {gpu => cuda}/Poisson.cu | 0 {gpu => cuda}/Stokes.cu | 0 6 files changed, 0 insertions(+), 0 deletions(-) rename {gpu => cuda}/D3Q7BC.cu (100%) rename {gpu => cuda}/Greyscale.cu (100%) rename {gpu => cuda}/GreyscaleColor.cu (100%) rename {gpu => cuda}/Ion.cu (100%) rename {gpu => cuda}/Poisson.cu (100%) rename {gpu => cuda}/Stokes.cu (100%) diff --git a/gpu/D3Q7BC.cu b/cuda/D3Q7BC.cu similarity index 100% rename from gpu/D3Q7BC.cu rename to cuda/D3Q7BC.cu diff --git a/gpu/Greyscale.cu b/cuda/Greyscale.cu similarity index 100% rename from gpu/Greyscale.cu rename to cuda/Greyscale.cu diff --git a/gpu/GreyscaleColor.cu b/cuda/GreyscaleColor.cu similarity index 100% rename from gpu/GreyscaleColor.cu rename to cuda/GreyscaleColor.cu diff --git a/gpu/Ion.cu b/cuda/Ion.cu similarity index 100% rename from gpu/Ion.cu rename to cuda/Ion.cu diff --git a/gpu/Poisson.cu b/cuda/Poisson.cu similarity index 100% rename from gpu/Poisson.cu rename to cuda/Poisson.cu diff --git a/gpu/Stokes.cu b/cuda/Stokes.cu similarity index 100% rename from gpu/Stokes.cu rename to cuda/Stokes.cu From 31234ed0032da2b7275be4e564c4ba83dd66e44a Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 15 Jan 2021 14:41:10 -0500 Subject: [PATCH 132/205] fix multiple init --- tests/lbpm_electrokinetic_SingleFluid_simulator.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp b/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp index b9f215e7..600d9f2f 100644 --- a/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp +++ b/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp @@ -23,15 +23,13 @@ using namespace std; int main(int argc, char **argv) { // Initialize MPI and error handlers - Utilities::startup( argc, argv ); - - { // Limit scope so variables that contain communicators will free before MPI_Finialize - // Initialize MPI Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); + { // Limit scope so variables that contain communicators will free before MPI_Finialize + if (rank == 0){ printf("********************************************************\n"); printf("Running LBPM electrokinetic single-fluid solver \n"); From 1d85db88ed2c1c764924f3d6f69996e825d215ea Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Fri, 15 Jan 2021 15:38:15 -0500 Subject: [PATCH 133/205] adding Lee model to FOM --- common/WideHalo.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/WideHalo.h b/common/WideHalo.h index 601eda13..d5c97c2f 100644 --- a/common/WideHalo.h +++ b/common/WideHalo.h @@ -11,7 +11,8 @@ public: ScaLBLWideHalo_Communicator(std::shared_ptr Dm, int width); ~ScaLBLWideHalo_Communicator(); //...................................................................................... - MPI_Comm MPI_COMM_SCALBL; // MPI Communicator + //MPI_Comm MPI_COMM_SCALBL; // MPI Communicator + Utilities::MPI MPI_COMM_SCALBL; unsigned long int CommunicationCount,SendCount,RecvCount; int Nx,Ny,Nz,N; // original domain structure int Nxh,Nyh,Nzh,Nh; // with wide halo @@ -32,7 +33,6 @@ public: double *recvbuf_xyz, *recvbuf_Xyz, *recvbuf_xYz, *recvbuf_XYz; double *recvbuf_xyZ, *recvbuf_XyZ, *recvbuf_xYZ, *recvbuf_XYZ; //...................................................................................... - int LastExterior(); int FirstInterior(); int LastInterior(); From 9bc8100a1da945e43bee14bb961670df898f3a61 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 15 Jan 2021 16:33:57 -0500 Subject: [PATCH 134/205] debugging wide halo --- common/WideHalo.cpp | 109 ++++++++++++++++++++-------------------- models/FreeLeeModel.cpp | 15 +++--- models/FreeLeeModel.h | 2 +- tests/TestBubbleDFH.cpp | 7 +-- tests/TestColorGrad.cpp | 29 ++++++++--- 5 files changed, 85 insertions(+), 77 deletions(-) diff --git a/common/WideHalo.cpp b/common/WideHalo.cpp index ecc563f3..ed4a451d 100644 --- a/common/WideHalo.cpp +++ b/common/WideHalo.cpp @@ -9,7 +9,7 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr Comm,&MPI_COMM_SCALBL); + MPI_COMM_SCALBL = Dm->Comm.dup(); //...................................................................................... // Copy the domain size and communication information directly from Dm Nx = Dm->Nx; @@ -59,7 +59,7 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr id[i] = 1; // initialize this way - comm.barrier() + comm.barrier(); Dm->CommInit(); - comm.barrier() + comm.barrier(); // Read domain parameters rank = Dm->rank(); nprocx = Dm->nprocx(); @@ -139,7 +139,7 @@ void ScaLBL_FreeLeeModel::ReadInput(){ ASSERT( (int) size1[0] == size0[0]+2 && (int) size1[1] == size0[1]+2 && (int) size1[2] == size0[2]+2 ); fillHalo fill( MPI_COMM_WORLD, Mask->rank_info, size0, { 1, 1, 1 }, 0, 1 ); Array id_view; - id_view.viewRaw( size1, Mask->id ); + id_view.viewRaw( size1, Mask->id.data() ); fill.copy( input_id, id_view ); fill.fill( id_view ); } @@ -207,7 +207,7 @@ void ScaLBL_FreeLeeModel::Create(){ Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np,2); - comm.barrier() + comm.barrier(); //........................................................................... // MAIN VARIABLES ALLOCATED HERE @@ -335,7 +335,7 @@ void ScaLBL_FreeLeeModel::Initialize(){ ScaLBL_CopyToDevice(Phi,cPhi,N*sizeof(double)); ScaLBL_DeviceBarrier(); - comm.barrier() + comm.barrier(); } if (rank==0) printf ("Initializing phase field \n"); @@ -371,7 +371,7 @@ void ScaLBL_FreeLeeModel::Run(){ //.......create and start timer............ double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); - comm.barrier() + comm.barrier(); starttime = MPI_Wtime(); //......................................... @@ -460,8 +460,7 @@ void ScaLBL_FreeLeeModel::Run(){ ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); */ - ScaLBL_DeviceBarrier(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + ScaLBL_Comm->Barrier(); //************************************************************************ PROFILE_STOP("Update"); } diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h index 437f48b3..5aa2d30a 100644 --- a/models/FreeLeeModel.h +++ b/models/FreeLeeModel.h @@ -10,7 +10,7 @@ Implementation of Lee et al JCP 2016 lattice boltzmann model #include #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "ProfilerApp.h" #include "threadpool/thread_pool.h" #include "common/ScaLBL.h" diff --git a/tests/TestBubbleDFH.cpp b/tests/TestBubbleDFH.cpp index 77a8aadb..fe5c5a3d 100644 --- a/tests/TestBubbleDFH.cpp +++ b/tests/TestBubbleDFH.cpp @@ -247,13 +247,8 @@ int main(int argc, char **argv) if (rank==0) printf ("Set up memory efficient layout Npad=%i \n",Npad); IntArray Map(Nx,Ny,Nz); auto neighborList= new int[18*Npad]; -<<<<<<< HEAD - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id,Np,1); - MPI_Barrier(comm); -======= - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np,1); comm.barrier(); ->>>>>>> FOM //........................................................................... // MAIN VARIABLES ALLOCATED HERE diff --git a/tests/TestColorGrad.cpp b/tests/TestColorGrad.cpp index 4fdedb7b..ac59fc38 100644 --- a/tests/TestColorGrad.cpp +++ b/tests/TestColorGrad.cpp @@ -1,3 +1,4 @@ + //************************************************************************* // Lattice Boltzmann Simulator for Single Phase Flow in Porous Media // James E. McCLure @@ -6,7 +7,7 @@ #include #include #include "common/ScaLBL.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" using namespace std; @@ -70,7 +71,20 @@ int main(int argc, char **argv) //....................................................................... // Reading the domain information file //....................................................................... - if (nprocs==1){ + ifstream domain("Domain.in"); + if (domain.good()){ + domain >> nprocx; + domain >> nprocy; + domain >> nprocz; + domain >> Nx; + domain >> Ny; + domain >> Nz; + domain >> nspheres; + domain >> Lx; + domain >> Ly; + domain >> Lz; + } + else if (nprocs==1){ nprocx=nprocy=nprocz=1; Nx=Ny=Nz=3; nspheres=0; @@ -136,7 +150,8 @@ int main(int argc, char **argv) double iVol_global = 1.0/Nx/Ny/Nz/nprocx/nprocy/nprocz; int BoundaryCondition=0; - std::shared_ptr Dm = std::shared_ptr(new Domain(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition)); + Domain Dm(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition); + Nx += 2; Ny += 2; Nz += 2; @@ -150,7 +165,7 @@ int main(int argc, char **argv) for (j=0;jid[n]=1; + Dm.id[n]=1; Np++; // Initialize gradient ColorGrad = (1,2,3) double value=double(3*k+2*j+i); @@ -158,7 +173,7 @@ int main(int argc, char **argv) } } } - Dm->CommInit(); + Dm.CommInit(); MPI_Barrier(comm); if (rank == 0) cout << "Domain set." << endl; if (rank==0) printf ("Create ScaLBL_Communicator \n"); @@ -175,7 +190,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Np]; - ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np,1); + ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm.id,Np); MPI_Barrier(comm); //......................device distributions................................. @@ -229,7 +244,7 @@ int main(int argc, char **argv) for (j=1;jid[n] > 0){ + if (Dm.id[n] > 0){ int idx = Map(i,j,k); CX=COLORGRAD[idx]; CY=COLORGRAD[Np+idx]; From c911a4f3d2240f18e34aa680195fa57bc82a7c43 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 15 Jan 2021 17:12:38 -0500 Subject: [PATCH 135/205] add wide halo to fom --- tests/CMakeLists.txt | 3 +- tests/TestColorGrad.cpp | 109 +++-------------------------- tests/TestWideHalo.cpp | 147 +++++++++++++--------------------------- 3 files changed, 58 insertions(+), 201 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 414a13cf..0a8074a3 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -55,9 +55,8 @@ ADD_LBPM_TEST( TestTopo3D ) ADD_LBPM_TEST( TestFluxBC ) ADD_LBPM_TEST( TestMap ) #ADD_LBPM_TEST( TestMRT ) -ADD_LBPM_TEST( TestColorGrad ) +#ADD_LBPM_TEST( TestColorGrad ) ADD_LBPM_TEST( TestWideHalo ) -#ADD_LBPM_TEST( TestColorGradDFH ) ADD_LBPM_TEST( TestColorGradDFH ) ADD_LBPM_TEST( TestBubbleDFH ../example/Bubble/input.db) #ADD_LBPM_TEST( TestColorMassBounceback ../example/Bubble/input.db) diff --git a/tests/TestColorGrad.cpp b/tests/TestColorGrad.cpp index ac59fc38..9ea243ea 100644 --- a/tests/TestColorGrad.cpp +++ b/tests/TestColorGrad.cpp @@ -3,7 +3,12 @@ // Lattice Boltzmann Simulator for Single Phase Flow in Porous Media // James E. McCLure //************************************************************************* -#include +#include // Initialize MPI + Utilities::startup( argc, argv ); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); + int check; #include #include #include "common/ScaLBL.h" @@ -21,8 +26,8 @@ int main(int argc, char **argv) // Initialize MPI Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + int rank = comm.getRank(); + int nprocs = comm.getSize(); int check; { // parallel domain size (# of sub-domains) @@ -47,105 +52,13 @@ int main(int argc, char **argv) int Nx,Ny,Nz; int i,j,k,n; int dim = 3; + Nx = Ny = Nz = 32; + Lx = Ly = Lz = 1.0; //if (rank == 0) printf("dim=%d\n",dim); int timestep = 0; int timesteps = 100; int centralNode = 2; - double tauA = 1.0; - double tauB = 1.0; - double rhoA = 1.0; - double rhoB = 1.0; - double alpha = 0.005; - double beta = 0.95; - - double tau = 1.0; - double mu=(tau-0.5)/3.0; - double rlx_setA=1.0/tau; - double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA); - - Fx = Fy = 0.f; - Fz = 0.f; - - if (rank==0){ - //....................................................................... - // Reading the domain information file - //....................................................................... - ifstream domain("Domain.in"); - if (domain.good()){ - domain >> nprocx; - domain >> nprocy; - domain >> nprocz; - domain >> Nx; - domain >> Ny; - domain >> Nz; - domain >> nspheres; - domain >> Lx; - domain >> Ly; - domain >> Lz; - } - else if (nprocs==1){ - nprocx=nprocy=nprocz=1; - Nx=Ny=Nz=3; - nspheres=0; - Lx=Ly=Lz=1; - } - else if (nprocs==2){ - nprocx=2; nprocy=1; - nprocz=1; - Nx=Ny=Nz=dim; - Nx = dim; Ny = dim; Nz = dim; - nspheres=0; - Lx=Ly=Lz=1; - } - else if (nprocs==4){ - nprocx=nprocy=2; - nprocz=1; - Nx=Ny=Nz=dim; - nspheres=0; - Lx=Ly=Lz=1; - } - else if (nprocs==8){ - nprocx=nprocy=nprocz=2; - Nx=Ny=Nz=dim; - nspheres=0; - Lx=Ly=Lz=1; - } - //....................................................................... - } - // ************************************************************** - // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); - //................................................. - MPI_Bcast(&Nx,1,MPI_INT,0,comm); - MPI_Bcast(&Ny,1,MPI_INT,0,comm); - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - MPI_Bcast(&nspheres,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); - //................................................. - MPI_Barrier(comm); - // ************************************************************** - // ************************************************************** - - if (nprocs != nprocx*nprocy*nprocz){ - printf("nprocx = %i \n",nprocx); - printf("nprocy = %i \n",nprocy); - printf("nprocz = %i \n",nprocz); - INSIST(nprocs == nprocx*nprocy*nprocz,"Fatal error in processor count!"); - } - - if (rank==0){ - printf("********************************************************\n"); - printf("Sub-domain size = %i x %i x %i\n",Nx,Ny,Nz); - printf("********************************************************\n"); - } - - MPI_Barrier(comm); double iVol_global = 1.0/Nx/Ny/Nz/nprocx/nprocy/nprocz; int BoundaryCondition=0; @@ -190,7 +103,7 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Np]; - ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm.id,Np); + ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm.id,Np,1); MPI_Barrier(comm); //......................device distributions................................. diff --git a/tests/TestWideHalo.cpp b/tests/TestWideHalo.cpp index cc29a15d..767aeaeb 100644 --- a/tests/TestWideHalo.cpp +++ b/tests/TestWideHalo.cpp @@ -8,7 +8,7 @@ #include #include "common/ScaLBL.h" #include "common/WideHalo.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" using namespace std; @@ -20,105 +20,55 @@ int main(int argc, char **argv) // ***** MPI STUFF **************** //***************************************** // Initialize MPI - int rank,nprocs; - MPI_Init(&argc,&argv); - MPI_Comm comm = MPI_COMM_WORLD; - MPI_Comm_rank(comm,&rank); - MPI_Comm_size(comm,&nprocs); - int check; + Utilities::startup( argc, argv ); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); + int check=0; { - // parallel domain size (# of sub-domains) - int nprocx,nprocy,nprocz; - int iproc,jproc,kproc; - if (rank == 0){ printf("********************************************************\n"); printf("Running Color Model: TestColor \n"); printf("********************************************************\n"); } - - // BGK Model parameters - string FILENAME; - unsigned int nBlocks, nthreads; - int timestepMax, interval; - double Fx,Fy,Fz,tol; // Domain variables + int nprocx, nprocy, nprocz; double Lx,Ly,Lz; - int nspheres; int Nx,Ny,Nz; int i,j,k,n; - int dim = 3; - //if (rank == 0) printf("dim=%d\n",dim); - int timestep = 0; - int timesteps = 100; - int centralNode = 2; + int dim = 16; + Lx = Ly = Lz = 1.0; + int BoundaryCondition=0; - double tauA = 1.0; - double tauB = 1.0; - double rhoA = 1.0; - double rhoB = 1.0; - double alpha = 0.005; - double beta = 0.95; - - double tau = 1.0; - double mu=(tau-0.5)/3.0; - double rlx_setA=1.0/tau; - double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA); - - Fx = Fy = 0.f; - Fz = 0.f; - - if (rank==0){ - //....................................................................... - // Reading the domain information file - //....................................................................... - if (nprocs==1){ - nprocx=nprocy=nprocz=1; - Nx=Ny=Nz=3; - nspheres=0; - Lx=Ly=Lz=1; - } - else if (nprocs==2){ - nprocx=2; nprocy=1; - nprocz=1; - Nx=Ny=Nz=dim; - Nx = dim; Ny = dim; Nz = dim; - nspheres=0; - Lx=Ly=Lz=1; - } - else if (nprocs==4){ - nprocx=nprocy=2; - nprocz=1; - Nx=Ny=Nz=dim; - nspheres=0; - Lx=Ly=Lz=1; - } - else if (nprocs==8){ - nprocx=nprocy=nprocz=2; - Nx=Ny=Nz=dim; - nspheres=0; - Lx=Ly=Lz=1; - } - //....................................................................... + //....................................................................... + // Reading the domain information file + //....................................................................... + nprocx=nprocy=nprocz=1; + if (nprocs==1){ + nprocx=nprocy=nprocz=1; + Nx=Ny=Nz=dim; + Lx=Ly=Lz=1; } - // ************************************************************** - // Broadcast simulation parameters from rank 0 to all other procs - MPI_Barrier(comm); - //................................................. - MPI_Bcast(&Nx,1,MPI_INT,0,comm); - MPI_Bcast(&Ny,1,MPI_INT,0,comm); - MPI_Bcast(&Nz,1,MPI_INT,0,comm); - MPI_Bcast(&nprocx,1,MPI_INT,0,comm); - MPI_Bcast(&nprocy,1,MPI_INT,0,comm); - MPI_Bcast(&nprocz,1,MPI_INT,0,comm); - MPI_Bcast(&nspheres,1,MPI_INT,0,comm); - MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm); - MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm); - //................................................. - MPI_Barrier(comm); - // ************************************************************** + else if (nprocs==2){ + nprocx=2; nprocy=1; + nprocz=1; + Nx=Ny=Nz=dim; + Nx = dim; Ny = dim; Nz = dim; + Lx=Ly=Lz=1; + } + else if (nprocs==4){ + nprocx=nprocy=2; + nprocz=1; + Nx=Ny=Nz=dim; + Lx=Ly=Lz=1; + } + else if (nprocs==8){ + nprocx=nprocy=nprocz=2; + Nx=Ny=Nz=dim; + Lx=Ly=Lz=1; + } + //....................................................................... // ************************************************************** if (nprocs != nprocx*nprocy*nprocz){ @@ -134,10 +84,7 @@ int main(int argc, char **argv) printf("********************************************************\n"); } - MPI_Barrier(comm); - - double iVol_global = 1.0/Nx/Ny/Nz/nprocx/nprocy/nprocz; - int BoundaryCondition=0; + comm.barrier(); std::shared_ptr Dm = std::shared_ptr(new Domain(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BoundaryCondition)); Nx += 2; @@ -162,7 +109,7 @@ int main(int argc, char **argv) } } Dm->CommInit(); - MPI_Barrier(comm); + comm.barrier(); if (rank == 0) cout << "Domain set." << endl; if (rank==0) printf ("Create ScaLBL_Communicator \n"); @@ -179,12 +126,8 @@ int main(int argc, char **argv) IntArray Map(Nx,Ny,Nz); neighborList= new int[18*Np]; - ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm->id,Np,2); - MPI_Barrier(comm); - - //......................device distributions................................. - int dist_mem_size = Np*sizeof(double); - if (rank==0) printf ("Allocating distributions \n"); + ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Dm->id.data(),Np,2); + comm.barrier(); int *NeighborList; int *dvcMap; @@ -241,8 +184,10 @@ int main(int argc, char **argv) CY=COLORGRAD[Np+idx]; CZ=COLORGRAD[2*Np+idx]; double error=sqrt((CX-1.0)*(CX-1.0)+(CY-2.0)*(CY-2.0)+ (CZ-3.0)*(CZ-3.0)); - if (error > 1e-8) + if (error > 1e-8){ + check++; printf("i,j,k=%i,%i,%i: Color gradient=%f,%f,%f \n",i,j,k,CX,CY,CZ); + } } } } @@ -250,8 +195,8 @@ int main(int argc, char **argv) } // **************************************************** - MPI_Barrier(comm); - MPI_Finalize(); + comm.barrier(); + Utilities::shutdown(); // **************************************************** return check; From b85d808c0a98cacf7efe762cc78910a2ecd9bf06 Mon Sep 17 00:00:00 2001 From: James McClure Date: Sun, 17 Jan 2021 12:36:53 -0500 Subject: [PATCH 136/205] add wetting indices to mutiphase --- analysis/SubPhase.cpp | 126 ++++++++++++++++++++++++++++++++++++++++-- analysis/SubPhase.h | 4 ++ 2 files changed, 124 insertions(+), 6 deletions(-) diff --git a/analysis/SubPhase.cpp b/analysis/SubPhase.cpp index 59778177..fcac6573 100644 --- a/analysis/SubPhase.cpp +++ b/analysis/SubPhase.cpp @@ -40,7 +40,7 @@ SubPhase::SubPhase(std::shared_ptr dm): { // If timelog is empty, write a short header to list the averages //fprintf(SUBPHASE,"--------------------------------------------------------------------------------------\n"); - fprintf(SUBPHASE,"time rn rw nun nuw Fx Fy Fz iftwn "); + fprintf(SUBPHASE,"time rn rw nun nuw Fx Fy Fz iftwn wet "); fprintf(SUBPHASE,"pwc pwd pnc pnd "); // pressures fprintf(SUBPHASE,"Mwc Mwd Mwi Mnc Mnd Mni "); // mass fprintf(SUBPHASE,"Pwc_x Pwd_x Pwi_x Pnc_x Pnd_x Pni_x "); // momentum @@ -65,7 +65,7 @@ SubPhase::SubPhase(std::shared_ptr dm): sprintf(LocalRankFilename,"%s%s","subphase.csv.",LocalRankString); SUBPHASE = fopen(LocalRankFilename,"a+"); //fprintf(SUBPHASE,"--------------------------------------------------------------------------------------\n"); - fprintf(SUBPHASE,"time rn rw nun nuw Fx Fy Fz iftwn "); + fprintf(SUBPHASE,"time rn rw nun nuw Fx Fy Fz iftwn wet "); fprintf(SUBPHASE,"pwc pwd pnc pnd "); // pressures fprintf(SUBPHASE,"Mwc Mwd Mwi Mnc Mnd Mni "); // mass fprintf(SUBPHASE,"Pwc_x Pwd_x Pwi_x Pnc_x Pnd_x Pni_x "); // momentum @@ -93,7 +93,7 @@ SubPhase::SubPhase(std::shared_ptr dm): { // If timelog is empty, write a short header to list the averages //fprintf(TIMELOG,"--------------------------------------------------------------------------------------\n"); - fprintf(TIMELOG,"sw krw krn vw vn pw pn\n"); + fprintf(TIMELOG,"sw krw krn vw vn pw pn wet\n"); } } } @@ -109,7 +109,7 @@ SubPhase::~SubPhase() void SubPhase::Write(int timestep) { if (Dm->rank()==0){ - fprintf(SUBPHASE,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g ",timestep,rho_n,rho_w,nu_n,nu_w,Fx,Fy,Fz,gamma_wn); + fprintf(SUBPHASE,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g ",timestep,rho_n,rho_w,nu_n,nu_w,Fx,Fy,Fz,gamma_wn,total_wetting_value_global); fprintf(SUBPHASE,"%.8g %.8g %.8g %.8g ",gwc.p, gwd.p, gnc.p, gnd.p); fprintf(SUBPHASE,"%.8g %.8g %.8g %.8g %.8g %.8g ",gwc.M, gwd.M, giwn.Mw, gnc.M, gnd.M, giwn.Mn); fprintf(SUBPHASE,"%.8g %.8g %.8g %.8g %.8g %.8g ",gwc.Px, gwd.Px, giwn.Pwx, gnc.Px, gnd.Px, giwn.Pnx); @@ -125,7 +125,7 @@ void SubPhase::Write(int timestep) fflush(SUBPHASE); } else{ - fprintf(SUBPHASE,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g ",timestep,rho_n,rho_w,nu_n,nu_w,Fx,Fy,Fz,gamma_wn); + fprintf(SUBPHASE,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g ",timestep,rho_n,rho_w,nu_n,nu_w,Fx,Fy,Fz,gamma_wn,total_wetting_value); fprintf(SUBPHASE,"%.8g %.8g %.8g %.8g ",wc.p, wd.p, nc.p, nd.p); fprintf(SUBPHASE,"%.8g %.8g %.8g %.8g %.8g %.8g ",wc.M, wd.M, iwn.Mw, nc.M, nd.M, iwn.Mn); fprintf(SUBPHASE,"%.8g %.8g %.8g %.8g %.8g %.8g ",wc.Px, wd.Px, iwn.Pwx, nc.Px, nd.Px, iwn.Pnx); @@ -172,6 +172,9 @@ void SubPhase::Basic(){ double count_w = 0.0; double count_n = 0.0; + total_wetting_interaction = count_wetting_interaction = 0.0; + total_wetting_interaction_global = count_wetting_interaction_global=0.0; + for (k=0; kid[nq] > 0 ) { + local_wetting_interaction += (Phi(nq)-wetval); + local_wetting_weight += 1.0; + } + int nq = (k)*Nx*Ny+(j)*Nx+(i-1); + if ( Dm->id[nq] > 0 ) { + local_wetting_interaction += (Phi(nq)-wetval); + local_wetting_weight += 1.0; + } + int nq = (k)*Nx*Ny+(j+1)*Nx+(i); + if ( Dm->id[nq] > 0 ) { + local_wetting_interaction += (Phi(nq)-wetval); + local_wetting_weight += 1.0; + } + int nq = (k)*Nx*Ny+(j-1)*Nx+(i); + if ( Dm->id[nq] > 0 ) { + local_wetting_interaction += (Phi(nq)-wetval); + local_wetting_weight += 1.0; + } + int nq = (k+1)*Nx*Ny+(j)*Nx+(i); + if ( Dm->id[nq] > 0 ) { + local_wetting_interaction += (Phi(nq)-wetval); + local_wetting_weight += 1.0; + } + int nq = (k-1)*Nx*Ny+(j)*Nx+(i); + if ( Dm->id[nq] > 0 ) { + local_wetting_interaction += (Phi(nq)-wetval); + local_wetting_weight += 1.0; + } + // x, y interactions + int nq = (k)*Nx*Ny+(j+1)*Nx+(i+1); + if ( Dm->id[nq] > 0 ) { + local_wetting_interaction += 0.5*(Phi(nq)-wetval); + local_wetting_weight += 0.5; + } + int nq = (k)*Nx*Ny+(j-1)*Nx+(i-1); + if ( Dm->id[nq] > 0 ) { + local_wetting_interaction += 0.5*(Phi(nq)-wetval); + local_wetting_weight += 0.5; + } + int nq = (k)*Nx*Ny+(j-1)*Nx+(i+1); + if ( Dm->id[nq] > 0 ) { + local_wetting_interaction += 0.5*(Phi(nq)-wetval); + local_wetting_weight += 0.5; + } + int nq = (k)*Nx*Ny+(j+1)*Nx+(i-1); + if ( Dm->id[nq] > 0 ) { + local_wetting_interaction += 0.5*(Phi(nq)-wetval); + local_wetting_weight += 0.5; + } + // xz interactions + int nq = (k+1)*Nx*Ny+(j)*Nx+(i+1); + if ( Dm->id[nq] > 0 ) { + local_wetting_interaction += 0.5*(Phi(nq)-wetval); + local_wetting_weight += 0.5; + } + int nq = (k-1)*Nx*Ny+(j)*Nx+(i-1); + if ( Dm->id[nq] > 0 ) { + local_wetting_interaction += 0.5*(Phi(nq)-wetval); + local_wetting_weight += 0.5; + } + int nq = (k+1)*Nx*Ny+(j)*Nx+(i-1); + if ( Dm->id[nq] > 0 ) { + local_wetting_interaction += 0.5*(Phi(nq)-wetval); + local_wetting_weight += 0.5; + } + int nq = (k-1)*Nx*Ny+(j)*Nx+(i+1); + if ( Dm->id[nq] > 0 ) { + local_wetting_interaction += 0.5*(Phi(nq)-wetval); + local_wetting_weight += 0.5; + } + // yz interactions + int nq = (k+1)*Nx*Ny+(j+1)*Nx+(i); + if ( Dm->id[nq] > 0 ) { + local_wetting_interaction += 0.5*(Phi(nq)-wetval); + local_wetting_weight += 0.5; + } + int nq = (k-1)*Nx*Ny+(j-1)*Nx+(i); + if ( Dm->id[nq] > 0 ) { + local_wetting_interaction += 0.5*(Phi(nq)-wetval); + local_wetting_weight += 0.5; + } + int nq = (k+1)*Nx*Ny+(j-1)*Nx+(i); + if ( Dm->id[nq] > 0 ) { + local_wetting_interaction += 0.5*(Phi(nq)-wetval); + local_wetting_weight += 0.5; + } + int nq = (k-1)*Nx*Ny+(j+1)*Nx+(i); + if ( Dm->id[nq] > 0 ) { + local_wetting_interaction += 0.5*(Phi(nq)-wetval); + local_wetting_weight += 0.5; + } + /* interaction due to this solid site*/ + total_wetting_interaction += 0.5*local_wetting_interaction; + if (local_wetting_weight > 0.0) + count_wetting_interaction += local_wetting_weight; + } } } } + total_wetting_interaction_global=Dm->Comm.sumReduce( total_wetting_interaction); + count_wetting_interaction_global=Dm->Comm.sumReduce( count_wetting_interaction); + /* normalize wetting interactions */ + if (count_wetting_interaction > 0.0) + total_wetting_interaction /= count_wetting_interaction; + if (count_wetting_interaction_global > 0.0) + total_wetting_interaction_global /= count_wetting_interaction_global; + gwb.V=Dm->Comm.sumReduce( wb.V); gnb.V=Dm->Comm.sumReduce( nb.V); gwb.M=Dm->Comm.sumReduce( wb.M); @@ -303,7 +417,7 @@ void SubPhase::Basic(){ double krn = h*h*nu_n*not_water_flow_rate / force_mag ; double krw = h*h*nu_w*water_flow_rate / force_mag; //printf(" water saturation = %f, fractional flow =%f \n",saturation,fractional_flow); - fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %.5g %.5g %.5g\n",saturation,krw,krn,h*water_flow_rate,h*not_water_flow_rate, gwb.p, gnb.p); + fprintf(TIMELOG,"%.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",saturation,krw,krn,h*water_flow_rate,h*not_water_flow_rate, gwb.p, gnb.p, total_wetting_interaction_global); fflush(TIMELOG); } if (err==true){ diff --git a/analysis/SubPhase.h b/analysis/SubPhase.h index 691c654f..d2ce6b44 100644 --- a/analysis/SubPhase.h +++ b/analysis/SubPhase.h @@ -74,6 +74,10 @@ public: // global entities phase gwc,gwd,gwb,gnc,gnd,gnb; interface giwn,giwnc; + /* fluid-solid wetting interaction */ + double total_wetting_interaction, count_wetting_interaction; + double total_wetting_interaction_global, count_wetting_interaction_global; + //........................................................................... int Nx,Ny,Nz; IntArray PhaseID; // Phase ID array (solid=0, non-wetting=1, wetting=2) From 9051753f4c69728f7ae3cce976df3160d52762eb Mon Sep 17 00:00:00 2001 From: James McClure Date: Sun, 17 Jan 2021 12:37:13 -0500 Subject: [PATCH 137/205] wide halo updates --- common/WideHalo.cpp | 3 +-- tests/TestWideHalo.cpp | 8 +++++++- tests/lbpm_BGK_simulator.cpp | 4 ++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/common/WideHalo.cpp b/common/WideHalo.cpp index ed4a451d..b56e8b96 100644 --- a/common/WideHalo.cpp +++ b/common/WideHalo.cpp @@ -58,12 +58,11 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr Date: Sun, 17 Jan 2021 12:41:01 -0500 Subject: [PATCH 138/205] add wetting indices to subphase analysis --- analysis/SubPhase.cpp | 44 +++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/analysis/SubPhase.cpp b/analysis/SubPhase.cpp index fcac6573..4d04d546 100644 --- a/analysis/SubPhase.cpp +++ b/analysis/SubPhase.cpp @@ -50,7 +50,7 @@ SubPhase::SubPhase(std::shared_ptr dm): fprintf(SUBPHASE,"Vwc Awc Hwc Xwc "); // wc region fprintf(SUBPHASE,"Vwd Awd Hwd Xwd Nwd "); // wd region fprintf(SUBPHASE,"Vnc Anc Hnc Xnc "); // nc region - fprintf(SUBPHASE,"Vnd And Hnd Xnd Nnd "); // nd region + fprintf(SUBPHASE,"Vnd And Hnd Xnd Nnd "); // nd regionin fprintf(SUBPHASE,"Vi Ai Hi Xi "); // interface region fprintf(SUBPHASE,"Vic Aic Hic Xic Nic\n"); // interface region @@ -109,7 +109,7 @@ SubPhase::~SubPhase() void SubPhase::Write(int timestep) { if (Dm->rank()==0){ - fprintf(SUBPHASE,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g ",timestep,rho_n,rho_w,nu_n,nu_w,Fx,Fy,Fz,gamma_wn,total_wetting_value_global); + fprintf(SUBPHASE,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g ",timestep,rho_n,rho_w,nu_n,nu_w,Fx,Fy,Fz,gamma_wn,total_wetting_interaction_global); fprintf(SUBPHASE,"%.8g %.8g %.8g %.8g ",gwc.p, gwd.p, gnc.p, gnd.p); fprintf(SUBPHASE,"%.8g %.8g %.8g %.8g %.8g %.8g ",gwc.M, gwd.M, giwn.Mw, gnc.M, gnd.M, giwn.Mn); fprintf(SUBPHASE,"%.8g %.8g %.8g %.8g %.8g %.8g ",gwc.Px, gwd.Px, giwn.Pwx, gnc.Px, gnd.Px, giwn.Pnx); @@ -125,7 +125,7 @@ void SubPhase::Write(int timestep) fflush(SUBPHASE); } else{ - fprintf(SUBPHASE,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g ",timestep,rho_n,rho_w,nu_n,nu_w,Fx,Fy,Fz,gamma_wn,total_wetting_value); + fprintf(SUBPHASE,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g ",timestep,rho_n,rho_w,nu_n,nu_w,Fx,Fy,Fz,gamma_wn,total_wetting_interaction); fprintf(SUBPHASE,"%.8g %.8g %.8g %.8g ",wc.p, wd.p, nc.p, nd.p); fprintf(SUBPHASE,"%.8g %.8g %.8g %.8g %.8g %.8g ",wc.M, wd.M, iwn.Mw, nc.M, nd.M, iwn.Mn); fprintf(SUBPHASE,"%.8g %.8g %.8g %.8g %.8g %.8g ",wc.Px, wd.Px, iwn.Pwx, nc.Px, nd.Px, iwn.Pnx); @@ -156,7 +156,7 @@ void SubPhase::SetParams(double rhoA, double rhoB, double tauA, double tauB, dou } void SubPhase::Basic(){ - int i,j,k,n,imin,jmin,kmin,kmax; + int i,j,k,n,imin,jmin,kmin,kmax, nq; // If external boundary conditions are set, do not average over the inlet kmin=1; kmax=Nz-1; @@ -234,95 +234,95 @@ void SubPhase::Basic(){ double wetval = Phi(i,j,k); double local_wetting_interaction = 0.0; double local_wetting_weight=0.0; - int nq = (k)*Nx*Ny+(j)*Nx+(i+1); + nq = (k)*Nx*Ny+(j)*Nx+(i+1); if ( Dm->id[nq] > 0 ) { local_wetting_interaction += (Phi(nq)-wetval); local_wetting_weight += 1.0; } - int nq = (k)*Nx*Ny+(j)*Nx+(i-1); + nq = (k)*Nx*Ny+(j)*Nx+(i-1); if ( Dm->id[nq] > 0 ) { local_wetting_interaction += (Phi(nq)-wetval); local_wetting_weight += 1.0; } - int nq = (k)*Nx*Ny+(j+1)*Nx+(i); + nq = (k)*Nx*Ny+(j+1)*Nx+(i); if ( Dm->id[nq] > 0 ) { local_wetting_interaction += (Phi(nq)-wetval); local_wetting_weight += 1.0; } - int nq = (k)*Nx*Ny+(j-1)*Nx+(i); + nq = (k)*Nx*Ny+(j-1)*Nx+(i); if ( Dm->id[nq] > 0 ) { local_wetting_interaction += (Phi(nq)-wetval); local_wetting_weight += 1.0; } - int nq = (k+1)*Nx*Ny+(j)*Nx+(i); + nq = (k+1)*Nx*Ny+(j)*Nx+(i); if ( Dm->id[nq] > 0 ) { local_wetting_interaction += (Phi(nq)-wetval); local_wetting_weight += 1.0; } - int nq = (k-1)*Nx*Ny+(j)*Nx+(i); + nq = (k-1)*Nx*Ny+(j)*Nx+(i); if ( Dm->id[nq] > 0 ) { local_wetting_interaction += (Phi(nq)-wetval); local_wetting_weight += 1.0; } // x, y interactions - int nq = (k)*Nx*Ny+(j+1)*Nx+(i+1); + nq = (k)*Nx*Ny+(j+1)*Nx+(i+1); if ( Dm->id[nq] > 0 ) { local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } - int nq = (k)*Nx*Ny+(j-1)*Nx+(i-1); + nq = (k)*Nx*Ny+(j-1)*Nx+(i-1); if ( Dm->id[nq] > 0 ) { local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } - int nq = (k)*Nx*Ny+(j-1)*Nx+(i+1); + nq = (k)*Nx*Ny+(j-1)*Nx+(i+1); if ( Dm->id[nq] > 0 ) { local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } - int nq = (k)*Nx*Ny+(j+1)*Nx+(i-1); + nq = (k)*Nx*Ny+(j+1)*Nx+(i-1); if ( Dm->id[nq] > 0 ) { local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } // xz interactions - int nq = (k+1)*Nx*Ny+(j)*Nx+(i+1); + nq = (k+1)*Nx*Ny+(j)*Nx+(i+1); if ( Dm->id[nq] > 0 ) { local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } - int nq = (k-1)*Nx*Ny+(j)*Nx+(i-1); + nq = (k-1)*Nx*Ny+(j)*Nx+(i-1); if ( Dm->id[nq] > 0 ) { local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } - int nq = (k+1)*Nx*Ny+(j)*Nx+(i-1); + nq = (k+1)*Nx*Ny+(j)*Nx+(i-1); if ( Dm->id[nq] > 0 ) { local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } - int nq = (k-1)*Nx*Ny+(j)*Nx+(i+1); + nq = (k-1)*Nx*Ny+(j)*Nx+(i+1); if ( Dm->id[nq] > 0 ) { local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } // yz interactions - int nq = (k+1)*Nx*Ny+(j+1)*Nx+(i); + nq = (k+1)*Nx*Ny+(j+1)*Nx+(i); if ( Dm->id[nq] > 0 ) { local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } - int nq = (k-1)*Nx*Ny+(j-1)*Nx+(i); + nq = (k-1)*Nx*Ny+(j-1)*Nx+(i); if ( Dm->id[nq] > 0 ) { local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } - int nq = (k+1)*Nx*Ny+(j-1)*Nx+(i); + nq = (k+1)*Nx*Ny+(j-1)*Nx+(i); if ( Dm->id[nq] > 0 ) { local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } - int nq = (k-1)*Nx*Ny+(j+1)*Nx+(i); + nq = (k-1)*Nx*Ny+(j+1)*Nx+(i); if ( Dm->id[nq] > 0 ) { local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; From 88cf09e3262c502186bf474de71e58f1443ef716 Mon Sep 17 00:00:00 2001 From: James McClure Date: Sun, 17 Jan 2021 12:53:09 -0500 Subject: [PATCH 139/205] copy phi in subphase --- analysis/runAnalysis.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index c09b71c2..8bbd5c97 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -940,6 +940,8 @@ void runAnalysis::run(int timestep, std::shared_ptr input_db, TwoPhase ******************************************************************/ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den) { + int N = d_N[0]*d_N[1]*d_N[2]; + NULL_USE( N ); // Check which analysis steps we need to perform auto color_db = input_db->getDatabase( "Color" ); auto vis_db = input_db->getDatabase( "Visualization" ); @@ -973,7 +975,11 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha PROFILE_START("Copy-Wait",1); PROFILE_STOP("Copy-Wait",1); PROFILE_START("Copy-State",1); - // copy other variables + if (d_regular) + d_ScaLBL_Comm->RegularLayout(d_Map,Phi,Averages.Phi); + else + ScaLBL_CopyToHost(Averages.Phi.data(),Phi,N*sizeof(double)); + // copy other variables d_ScaLBL_Comm->RegularLayout(d_Map,Pressure,Averages.Pressure); d_ScaLBL_Comm->RegularLayout(d_Map,&Den[0],Averages.Rho_n); d_ScaLBL_Comm->RegularLayout(d_Map,&Den[d_Np],Averages.Rho_w); From 9ee14bce017289817ef09812be6a364ee4d6c28b Mon Sep 17 00:00:00 2001 From: James McClure Date: Sun, 17 Jan 2021 13:58:51 -0500 Subject: [PATCH 140/205] wetting energy in analysis --- analysis/SubPhase.cpp | 16 +++++++++++++--- analysis/runAnalysis.cpp | 7 ++++--- tests/lbpm_color_simulator.cpp | 4 ++-- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/analysis/SubPhase.cpp b/analysis/SubPhase.cpp index 4d04d546..d6933140 100644 --- a/analysis/SubPhase.cpp +++ b/analysis/SubPhase.cpp @@ -187,6 +187,10 @@ void SubPhase::Basic(){ double phi = (nA-nB)/(nA+nB); Phi(n) = phi; } + if (Phi(n) != Phi(n)){ + // check for NaN + Phi(n) = 0.0; + } } } } @@ -328,13 +332,19 @@ void SubPhase::Basic(){ local_wetting_weight += 0.5; } /* interaction due to this solid site*/ - total_wetting_interaction += 0.5*local_wetting_interaction; - if (local_wetting_weight > 0.0) - count_wetting_interaction += local_wetting_weight; + if (local_wetting_interaction == local_wetting_interaction){ + total_wetting_interaction += 0.5*local_wetting_interaction; + if (local_wetting_weight > 0.0) + count_wetting_interaction += local_wetting_weight; + } + else{ + //printf("Check interaction at %i %i %i \n",i,j,k); + } } } } } + printf("wetting interaction = %f, count = %f\n",total_wetting_interaction,count_wetting_interaction); total_wetting_interaction_global=Dm->Comm.sumReduce( total_wetting_interaction); count_wetting_interaction_global=Dm->Comm.sumReduce( count_wetting_interaction); /* normalize wetting interactions */ diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index 8bbd5c97..f7447188 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -975,10 +975,11 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha PROFILE_START("Copy-Wait",1); PROFILE_STOP("Copy-Wait",1); PROFILE_START("Copy-State",1); - if (d_regular) + /*if (d_regular) d_ScaLBL_Comm->RegularLayout(d_Map,Phi,Averages.Phi); - else - ScaLBL_CopyToHost(Averages.Phi.data(),Phi,N*sizeof(double)); + else */ + ScaLBL_CopyToHost(Averages.Phi.data(),Phi,N*sizeof(double)); + // copy other variables d_ScaLBL_Comm->RegularLayout(d_Map,Pressure,Averages.Pressure); d_ScaLBL_Comm->RegularLayout(d_Map,&Den[0],Averages.Rho_n); diff --git a/tests/lbpm_color_simulator.cpp b/tests/lbpm_color_simulator.cpp index 1d579486..4b9478be 100644 --- a/tests/lbpm_color_simulator.cpp +++ b/tests/lbpm_color_simulator.cpp @@ -35,8 +35,8 @@ int main(int argc, char **argv) // Initialize MPI and error handlers auto multiple = db->getWithDefault( "MPI_THREAD_MULTIPLE", true ); - Utilities::startup( argc, argv, multiple ); - Utilities::MPI::changeProfileLevel( 1 ); + //Utilities::startup( argc, argv, multiple ); + //Utilities::MPI::changeProfileLevel( 1 ); { // Limit scope so variables that contain communicators will free before MPI_Finialize From dc886091a1a96c5f71efc93e2b4426efaea6c287 Mon Sep 17 00:00:00 2001 From: James McClure Date: Sun, 17 Jan 2021 15:01:56 -0500 Subject: [PATCH 141/205] looking at bug in writer --- analysis/SubPhase.cpp | 3 ++- analysis/runAnalysis.cpp | 3 +-- example/Plates/input.db | 15 ++++++++------- tests/lbpm_color_simulator.cpp | 2 -- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/analysis/SubPhase.cpp b/analysis/SubPhase.cpp index d6933140..aa3a7744 100644 --- a/analysis/SubPhase.cpp +++ b/analysis/SubPhase.cpp @@ -190,6 +190,7 @@ void SubPhase::Basic(){ if (Phi(n) != Phi(n)){ // check for NaN Phi(n) = 0.0; + //printf("Nan at %i %i %i \n",i,j,k); } } } @@ -344,7 +345,7 @@ void SubPhase::Basic(){ } } } - printf("wetting interaction = %f, count = %f\n",total_wetting_interaction,count_wetting_interaction); + //printf("wetting interaction = %f, count = %f\n",total_wetting_interaction,count_wetting_interaction); total_wetting_interaction_global=Dm->Comm.sumReduce( total_wetting_interaction); count_wetting_interaction_global=Dm->Comm.sumReduce( count_wetting_interaction); /* normalize wetting interactions */ diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index f7447188..e2fca48d 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -978,8 +978,7 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha /*if (d_regular) d_ScaLBL_Comm->RegularLayout(d_Map,Phi,Averages.Phi); else */ - ScaLBL_CopyToHost(Averages.Phi.data(),Phi,N*sizeof(double)); - + ScaLBL_CopyToHost(Averages.Phi.data(),Phi,N*sizeof(double)); // copy other variables d_ScaLBL_Comm->RegularLayout(d_Map,Pressure,Averages.Pressure); d_ScaLBL_Comm->RegularLayout(d_Map,&Den[0],Averages.Rho_n); diff --git a/example/Plates/input.db b/example/Plates/input.db index 2e74a43f..2da2ea3e 100644 --- a/example/Plates/input.db +++ b/example/Plates/input.db @@ -7,10 +7,10 @@ Color { beta = 0.95; F = 0, 0, 0 Restart = false - timestepMax = 3000 + timestepMax = 500 flux = 0.0 ComponentLabels = -2, -1 - ComponentAffinity = -1.0, -0.5; + ComponentAffinity = 1.0, 1.0; } Domain { @@ -26,13 +26,14 @@ Domain { } Analysis { - blobid_interval = 1000 // Frequency to perform blob identification - analysis_interval = 1000 // Frequency to perform analysis - restart_interval = 1000 // Frequency to write restart data - visualization_interval = 1000 // Frequency to write visualization data + analysis_interval = 100 // Frequency to perform analysis + visualization_interval = 500 + subphase_analysis_interval = 100 + restart_interval = 100000 restart_file = "Restart" // Filename to use for restart file (will append rank) N_threads = 4 // Number of threads to use load_balance = "independent" // Load balance method to use: "none", "default", "independent" } - +Visualization { +} \ No newline at end of file diff --git a/tests/lbpm_color_simulator.cpp b/tests/lbpm_color_simulator.cpp index 4b9478be..6451d38a 100644 --- a/tests/lbpm_color_simulator.cpp +++ b/tests/lbpm_color_simulator.cpp @@ -79,5 +79,3 @@ int main(int argc, char **argv) Utilities::shutdown(); } - - From 0916cb2d3a03a959d5dafee33738309b86e28366 Mon Sep 17 00:00:00 2001 From: James McClure Date: Sun, 17 Jan 2021 20:14:39 -0500 Subject: [PATCH 142/205] turn off prints --- analysis/SubPhase.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/analysis/SubPhase.cpp b/analysis/SubPhase.cpp index aa3a7744..79ef5c93 100644 --- a/analysis/SubPhase.cpp +++ b/analysis/SubPhase.cpp @@ -237,98 +237,117 @@ void SubPhase::Basic(){ else { // solid wetting assessment double wetval = Phi(i,j,k); + //if (wetval != wetval) printf("%f at %i %i %i \n",wetval,i,j,k); double local_wetting_interaction = 0.0; double local_wetting_weight=0.0; nq = (k)*Nx*Ny+(j)*Nx+(i+1); if ( Dm->id[nq] > 0 ) { + //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 1\n",i,j,k); local_wetting_interaction += (Phi(nq)-wetval); local_wetting_weight += 1.0; } nq = (k)*Nx*Ny+(j)*Nx+(i-1); if ( Dm->id[nq] > 0 ) { + //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 2\n",i,j,k); local_wetting_interaction += (Phi(nq)-wetval); local_wetting_weight += 1.0; } nq = (k)*Nx*Ny+(j+1)*Nx+(i); if ( Dm->id[nq] > 0 ) { + //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 3\n",i,j,k); local_wetting_interaction += (Phi(nq)-wetval); local_wetting_weight += 1.0; } nq = (k)*Nx*Ny+(j-1)*Nx+(i); if ( Dm->id[nq] > 0 ) { + //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 4\n",i,j,k); local_wetting_interaction += (Phi(nq)-wetval); local_wetting_weight += 1.0; } nq = (k+1)*Nx*Ny+(j)*Nx+(i); if ( Dm->id[nq] > 0 ) { + //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 5\n",i,j,k); local_wetting_interaction += (Phi(nq)-wetval); local_wetting_weight += 1.0; } nq = (k-1)*Nx*Ny+(j)*Nx+(i); if ( Dm->id[nq] > 0 ) { + //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 6\n",i,j,k); local_wetting_interaction += (Phi(nq)-wetval); local_wetting_weight += 1.0; } // x, y interactions nq = (k)*Nx*Ny+(j+1)*Nx+(i+1); if ( Dm->id[nq] > 0 ) { + //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 7\n",i,j,k); local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } nq = (k)*Nx*Ny+(j-1)*Nx+(i-1); if ( Dm->id[nq] > 0 ) { + //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 8\n",i,j,k); local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } nq = (k)*Nx*Ny+(j-1)*Nx+(i+1); if ( Dm->id[nq] > 0 ) { + //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 9\n",i,j,k); local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } nq = (k)*Nx*Ny+(j+1)*Nx+(i-1); if ( Dm->id[nq] > 0 ) { + //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 10\n",i,j,k); local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } // xz interactions nq = (k+1)*Nx*Ny+(j)*Nx+(i+1); if ( Dm->id[nq] > 0 ) { + //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 11\n",i,j,k); local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } nq = (k-1)*Nx*Ny+(j)*Nx+(i-1); if ( Dm->id[nq] > 0 ) { + //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 12\n",i,j,k); local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } nq = (k+1)*Nx*Ny+(j)*Nx+(i-1); if ( Dm->id[nq] > 0 ) { + //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 13\n",i,j,k); local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } nq = (k-1)*Nx*Ny+(j)*Nx+(i+1); if ( Dm->id[nq] > 0 ) { + //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 14\n",i,j,k); local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } // yz interactions nq = (k+1)*Nx*Ny+(j+1)*Nx+(i); if ( Dm->id[nq] > 0 ) { + //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 15\n",i,j,k); local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } nq = (k-1)*Nx*Ny+(j-1)*Nx+(i); if ( Dm->id[nq] > 0 ) { + //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 16\n",i,j,k); local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } nq = (k+1)*Nx*Ny+(j-1)*Nx+(i); if ( Dm->id[nq] > 0 ) { + //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 17\n",i,j,k); local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } nq = (k-1)*Nx*Ny+(j+1)*Nx+(i); if ( Dm->id[nq] > 0 ) { + //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 18\n",i,j,k); local_wetting_interaction += 0.5*(Phi(nq)-wetval); local_wetting_weight += 0.5; } From 90c4122cc65f3985d63d0c998d1319d310463b8f Mon Sep 17 00:00:00 2001 From: James McClure Date: Sun, 17 Jan 2021 20:28:00 -0500 Subject: [PATCH 143/205] fix piston db --- example/Piston/input.db | 2 ++ 1 file changed, 2 insertions(+) diff --git a/example/Piston/input.db b/example/Piston/input.db index fab67cc5..5a9ba030 100644 --- a/example/Piston/input.db +++ b/example/Piston/input.db @@ -35,4 +35,6 @@ Analysis { load_balance = "independent" // Load balance method to use: "none", "default", "independent" } +Visualization { +} \ No newline at end of file From 4690adb104e6b0adbf3683f46686abf9da70d98e Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 18 Jan 2021 21:30:27 -0500 Subject: [PATCH 144/205] save the work --- models/FreeLeeModel.cpp | 312 +++++++++++++++++++++++++++++++++++----- 1 file changed, 276 insertions(+), 36 deletions(-) diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index 547885b8..755347f3 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -32,8 +32,8 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){ tauA = tauB = 1.0; rhoA = rhoB = 1.0; Fx = Fy = Fz = 0.0; - gamma=1e-3; - W=5; + gamma=1e-3;//surface tension + W=5.0;//interfacial thickness Restart=false; din=dout=1.0; flux=0.0; @@ -220,7 +220,7 @@ void ScaLBL_FreeLeeModel::Create(){ //........................................................................... ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize); ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Np); - ScaLBL_AllocateDeviceMemory((void **) &fq, 19*dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &gqbar, 19*dist_mem_size); ScaLBL_AllocateDeviceMemory((void **) &hq, 7*dist_mem_size); ScaLBL_AllocateDeviceMemory((void **) &mu_phi, dist_mem_size); ScaLBL_AllocateDeviceMemory((void **) &Den, dist_mem_size); @@ -239,10 +239,11 @@ void ScaLBL_FreeLeeModel::Create(){ for (int i=1; iMap(i,j,k); } } } + //TODO The following check needs update! // check that TmpMap is valid for (int idx=0; idxLastExterior(); idx++){ auto n = TmpMap[idx]; @@ -264,21 +265,255 @@ void ScaLBL_FreeLeeModel::Create(){ // copy the neighbor list ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); - // initialize phi based on PhaseLabel (include solid component labels) } -/******************************************************** - * AssignComponentLabels * - ********************************************************/ +void ScaLBL_FreeLeeModel::AssignComponentLabels() +{ + double *phase; + phase = new double[Nh]; + + size_t NLABELS=0; + signed char VALUE=0; + double AFFINITY=0.f; + + auto LabelList = greyscaleColor_db->getVector( "ComponentLabels" ); + auto AffinityList = greyscaleColor_db->getVector( "ComponentAffinity" ); + + NLABELS=LabelList.size(); + if (NLABELS != AffinityList.size()){ + ERROR("Error: ComponentLabels and ComponentAffinity must be the same length! \n"); + } + + double label_count[NLABELS]; + double label_count_global[NLABELS]; + + // Assign the labels + for (size_t idx=0; idxid[n] = 0; // set mask to zero since this is an immobile component + } + } + // fluid labels are reserved + if (VALUE == 1) AFFINITY=1.0; + else if (VALUE == 2) AFFINITY=-1.0; + phase[n] = AFFINITY; + } + } + } + + // Set Dm to match Mask + for (int i=0; iid[i] = Mask->id[i]; + + for (size_t idx=0; idxComm, label_count[idx]); + + if (rank==0){ + printf("Number of component labels: %lu \n",NLABELS); + for (unsigned int idx=0; idxMPI_COMM_SCALBL); + delete [] phase; +} + +void ScaLBL_FreeLeeModel::AssignChemPotential_ColorGrad() +{ + double *SolidPotential_host = new double [Nx*Ny*Nz]; + double *GreySolidGrad_host = new double [3*Np]; + + size_t NLABELS=0; + signed char VALUE=0; + double AFFINITY=0.f; + + auto LabelList = greyscaleColor_db->getVector( "GreySolidLabels" ); + auto AffinityList = greyscaleColor_db->getVector( "GreySolidAffinity" ); + + NLABELS=LabelList.size(); + if (NLABELS != AffinityList.size()){ + ERROR("Error: GreySolidLabels and GreySolidAffinity must be the same length! \n"); + } + + for (int k=0;kid[n] = 0; // set mask to zero since this is an immobile component + } + } + SolidPotential_host[n] = AFFINITY; + } + } + } + + // Calculate grey-solid color-gradient + double *Dst; + Dst = new double [3*3*3]; + for (int kk=0; kk<3; kk++){ + for (int jj=0; jj<3; jj++){ + for (int ii=0; ii<3; ii++){ + int index = kk*9+jj*3+ii; + Dst[index] = sqrt(double(ii-1)*double(ii-1) + double(jj-1)*double(jj-1)+ double(kk-1)*double(kk-1)); + } + } + } + double w_face = 1.f; + double w_edge = 0.5; + double w_corner = 0.f; + //local + Dst[13] = 0.f; + //faces + Dst[4] = w_face; + Dst[10] = w_face; + Dst[12] = w_face; + Dst[14] = w_face; + Dst[16] = w_face; + Dst[22] = w_face; + // corners + Dst[0] = w_corner; + Dst[2] = w_corner; + Dst[6] = w_corner; + Dst[8] = w_corner; + Dst[18] = w_corner; + Dst[20] = w_corner; + Dst[24] = w_corner; + Dst[26] = w_corner; + // edges + Dst[1] = w_edge; + Dst[3] = w_edge; + Dst[5] = w_edge; + Dst[7] = w_edge; + Dst[9] = w_edge; + Dst[11] = w_edge; + Dst[15] = w_edge; + Dst[17] = w_edge; + Dst[19] = w_edge; + Dst[21] = w_edge; + Dst[23] = w_edge; + Dst[25] = w_edge; + + for (int k=1; kSDs(i,j,k)<2.0){ + GreySolidGrad_host[idx+0*Np] = phi_x; + GreySolidGrad_host[idx+1*Np] = phi_y; + GreySolidGrad_host[idx+2*Np] = phi_z; + } + else{ + GreySolidGrad_host[idx+0*Np] = 0.0; + GreySolidGrad_host[idx+1*Np] = 0.0; + GreySolidGrad_host[idx+2*Np] = 0.0; + } + } + } + } + } + + + if (rank==0){ + printf("Number of Grey-solid labels: %lu \n",NLABELS); + for (unsigned int idx=0; idxLastExterior(), Np); + ScaLBL_FreeLeeModel_PhaseField_Init(dvcMap, Phi, Den, hq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + if (Restart == true){ + //TODO need to revise this function if (rank==0){ printf("Reading restart file! \n"); } @@ -292,7 +527,7 @@ void ScaLBL_FreeLeeModel::Initialize(){ cDen = new double[2*Np]; cDist = new double[19*Np]; ScaLBL_CopyToHost(TmpMap, dvcMap, Np*sizeof(int)); - ScaLBL_CopyToHost(cPhi, Phi, N*sizeof(double)); + //ScaLBL_CopyToHost(cPhi, Phi, N*sizeof(double)); ifstream File(LocalRestartFile,ios::binary); int idx; @@ -336,11 +571,11 @@ void ScaLBL_FreeLeeModel::Initialize(){ ScaLBL_DeviceBarrier(); MPI_Barrier(comm); - } - if (rank==0) printf ("Initializing phase field \n"); - //ScaLBL_PhaseField_Init(dvcMap, Phi, Den, hq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); - //ScaLBL_PhaseField_Init(dvcMap, Phi, Den, hq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + if (rank==0) printf ("Initializing phase and density fields on device from Restart\n"); + ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + } // establish reservoirs for external bC if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4 ){ @@ -382,27 +617,30 @@ void ScaLBL_FreeLeeModel::Run(){ PROFILE_START("Update"); // *************ODD TIMESTEP************* timestep++; - /* // Compute the Phase indicator field + //------------------------------------------------------------------------------------------------------------------- + // Compute the Phase indicator field // Read for hq, Bq happens in this routine (requires communication) - ScaLBL_Comm->BiSendD3Q7AA(hq,Bq); //READ FROM NORMAL - ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm->BiRecvD3Q7AA(hq,Bq); //WRITE INTO OPPOSITE + //ScaLBL_Comm->SendD3Q7AA(hq); //READ FROM NORMAL + ScaLBL_Comm->SendD3Q7AA(hq); //READ FROM NORMAL + ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q7AA(hq); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); - ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); // Perform the collision operation - ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL + ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FROM NORMAL if (BoundaryCondition > 0 && BoundaryCondition < 5){ + //TODO to be revised ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); } // Halo exchange for phase field - ScaLBL_Comm_Regular->SendHalo(Phi); + ScaLBL_Comm_WideHalo->Send(Phi); - ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm_Regular->RecvHalo(Phi); - ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + ScaLBL_Comm_WideHalo->Recv(Phi); + ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); // Set BCs if (BoundaryCondition == 3){ @@ -417,7 +655,7 @@ void ScaLBL_FreeLeeModel::Run(){ ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); } - ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_DeviceBarrier(); MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); @@ -425,24 +663,24 @@ void ScaLBL_FreeLeeModel::Run(){ // *************EVEN TIMESTEP************* timestep++; // Compute the Phase indicator field - ScaLBL_Comm->BiSendD3Q7AA(hq,Bq); //READ FROM NORMAL - ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm->BiRecvD3Q7AA(hq,Bq); //WRITE INTO OPPOSITE + ScaLBL_Comm->SendD3Q7AA(hq); //READ FROM NORMAL + ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q7AA(hq); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); - ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); // Perform the collision operation - ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL + ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FORM NORMAL // Halo exchange for phase field if (BoundaryCondition > 0 && BoundaryCondition < 5){ ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); } - ScaLBL_Comm_Regular->SendHalo(Phi); + ScaLBL_Comm_WideHalo->Send(Phi); ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm_Regular->RecvHalo(Phi); - ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + ScaLBL_Comm_WideHalo->Recv(Phi); + ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); // Set boundary conditions if (BoundaryCondition == 3){ @@ -459,7 +697,9 @@ void ScaLBL_FreeLeeModel::Run(){ } ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - */ + + + //---------------------------------------------------------------------------------------------- ScaLBL_DeviceBarrier(); MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); //************************************************************************ From 4085deb5e35e354ad756fb7b63dacac1ac91b0ec Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 18 Jan 2021 23:37:08 -0500 Subject: [PATCH 145/205] save the work --- models/FreeLeeModel.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index e9f1f5b5..4bdff0d0 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -243,20 +243,19 @@ void ScaLBL_FreeLeeModel::Create(){ } } } - //TODO The following check needs update! // check that TmpMap is valid for (int idx=0; idxLastExterior(); idx++){ auto n = TmpMap[idx]; - if (n > Nx*Ny*Nz){ + if (n > Nxh*Nyh*Nzh){ printf("Bad value! idx=%i \n", n); - TmpMap[idx] = Nx*Ny*Nz-1; + TmpMap[idx] = Nxh*Nyh*Nzh-1; } } for (int idx=ScaLBL_Comm->FirstInterior(); idxLastInterior(); idx++){ auto n = TmpMap[idx]; - if ( n > Nx*Ny*Nz ){ + if ( n > Nxh*Nyh*Nzh ){ printf("Bad value! idx=%i \n",n); - TmpMap[idx] = Nx*Ny*Nz-1; + TmpMap[idx] = Nxh*Nyh*Nzh-1; } } ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np); From ff946a91d8778d1d9254113479c3204b82a85204 Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 19 Jan 2021 10:30:39 -0500 Subject: [PATCH 146/205] wetting fluctuation from laplacian --- analysis/SubPhase.cpp | 161 ++++++++------------------------------- analysis/SubPhase.h | 5 +- analysis/runAnalysis.cpp | 7 +- models/ColorModel.cpp | 7 +- 4 files changed, 46 insertions(+), 134 deletions(-) diff --git a/analysis/SubPhase.cpp b/analysis/SubPhase.cpp index 79ef5c93..f17c7b83 100644 --- a/analysis/SubPhase.cpp +++ b/analysis/SubPhase.cpp @@ -20,6 +20,7 @@ SubPhase::SubPhase(std::shared_ptr dm): Pressure.resize(Nx,Ny,Nz); Pressure.fill(0); Phi.resize(Nx,Ny,Nz); Phi.fill(0); DelPhi.resize(Nx,Ny,Nz); DelPhi.fill(0); + Laplacian.resize(Nx,Ny,Nz); Laplacian.fill(0); Vel_x.resize(Nx,Ny,Nz); Vel_x.fill(0); // Gradient of the phase indicator field Vel_y.resize(Nx,Ny,Nz); Vel_y.fill(0); Vel_z.resize(Nx,Ny,Nz); Vel_z.fill(0); @@ -172,8 +173,21 @@ void SubPhase::Basic(){ double count_w = 0.0; double count_n = 0.0; - total_wetting_interaction = count_wetting_interaction = 0.0; - total_wetting_interaction_global = count_wetting_interaction_global=0.0; + /* compute the laplacian */ + Dm->CommunicateMeshHalo(Phi); + for (int k=1; kComm.barrier(); + Dm->CommunicateMeshHalo(Laplacian); for (k=0; kid[nq] > 0 ) { - //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 1\n",i,j,k); - local_wetting_interaction += (Phi(nq)-wetval); - local_wetting_weight += 1.0; - } - nq = (k)*Nx*Ny+(j)*Nx+(i-1); - if ( Dm->id[nq] > 0 ) { - //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 2\n",i,j,k); - local_wetting_interaction += (Phi(nq)-wetval); - local_wetting_weight += 1.0; - } - nq = (k)*Nx*Ny+(j+1)*Nx+(i); - if ( Dm->id[nq] > 0 ) { - //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 3\n",i,j,k); - local_wetting_interaction += (Phi(nq)-wetval); - local_wetting_weight += 1.0; - } - nq = (k)*Nx*Ny+(j-1)*Nx+(i); - if ( Dm->id[nq] > 0 ) { - //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 4\n",i,j,k); - local_wetting_interaction += (Phi(nq)-wetval); - local_wetting_weight += 1.0; - } - nq = (k+1)*Nx*Ny+(j)*Nx+(i); - if ( Dm->id[nq] > 0 ) { - //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 5\n",i,j,k); - local_wetting_interaction += (Phi(nq)-wetval); - local_wetting_weight += 1.0; - } - nq = (k-1)*Nx*Ny+(j)*Nx+(i); - if ( Dm->id[nq] > 0 ) { - //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 6\n",i,j,k); - local_wetting_interaction += (Phi(nq)-wetval); - local_wetting_weight += 1.0; - } - // x, y interactions - nq = (k)*Nx*Ny+(j+1)*Nx+(i+1); - if ( Dm->id[nq] > 0 ) { - //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 7\n",i,j,k); - local_wetting_interaction += 0.5*(Phi(nq)-wetval); - local_wetting_weight += 0.5; - } - nq = (k)*Nx*Ny+(j-1)*Nx+(i-1); - if ( Dm->id[nq] > 0 ) { - //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 8\n",i,j,k); - local_wetting_interaction += 0.5*(Phi(nq)-wetval); - local_wetting_weight += 0.5; - } - nq = (k)*Nx*Ny+(j-1)*Nx+(i+1); - if ( Dm->id[nq] > 0 ) { - //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 9\n",i,j,k); - local_wetting_interaction += 0.5*(Phi(nq)-wetval); - local_wetting_weight += 0.5; - } - nq = (k)*Nx*Ny+(j+1)*Nx+(i-1); - if ( Dm->id[nq] > 0 ) { - //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 10\n",i,j,k); - local_wetting_interaction += 0.5*(Phi(nq)-wetval); - local_wetting_weight += 0.5; - } - // xz interactions - nq = (k+1)*Nx*Ny+(j)*Nx+(i+1); - if ( Dm->id[nq] > 0 ) { - //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 11\n",i,j,k); - local_wetting_interaction += 0.5*(Phi(nq)-wetval); - local_wetting_weight += 0.5; - } - nq = (k-1)*Nx*Ny+(j)*Nx+(i-1); - if ( Dm->id[nq] > 0 ) { - //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 12\n",i,j,k); - local_wetting_interaction += 0.5*(Phi(nq)-wetval); - local_wetting_weight += 0.5; - } - nq = (k+1)*Nx*Ny+(j)*Nx+(i-1); - if ( Dm->id[nq] > 0 ) { - //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 13\n",i,j,k); - local_wetting_interaction += 0.5*(Phi(nq)-wetval); - local_wetting_weight += 0.5; - } - nq = (k-1)*Nx*Ny+(j)*Nx+(i+1); - if ( Dm->id[nq] > 0 ) { - //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 14\n",i,j,k); - local_wetting_interaction += 0.5*(Phi(nq)-wetval); - local_wetting_weight += 0.5; - } - // yz interactions - nq = (k+1)*Nx*Ny+(j+1)*Nx+(i); - if ( Dm->id[nq] > 0 ) { - //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 15\n",i,j,k); - local_wetting_interaction += 0.5*(Phi(nq)-wetval); - local_wetting_weight += 0.5; - } - nq = (k-1)*Nx*Ny+(j-1)*Nx+(i); - if ( Dm->id[nq] > 0 ) { - //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 16\n",i,j,k); - local_wetting_interaction += 0.5*(Phi(nq)-wetval); - local_wetting_weight += 0.5; - } - nq = (k+1)*Nx*Ny+(j-1)*Nx+(i); - if ( Dm->id[nq] > 0 ) { - //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 17\n",i,j,k); - local_wetting_interaction += 0.5*(Phi(nq)-wetval); - local_wetting_weight += 0.5; - } - nq = (k-1)*Nx*Ny+(j+1)*Nx+(i); - if ( Dm->id[nq] > 0 ) { - //if (Phi(nq)!=Phi(nq)) printf("%i %i %i : 18\n",i,j,k); - local_wetting_interaction += 0.5*(Phi(nq)-wetval); - local_wetting_weight += 0.5; - } - /* interaction due to this solid site*/ - if (local_wetting_interaction == local_wetting_interaction){ - total_wetting_interaction += 0.5*local_wetting_interaction; - if (local_wetting_weight > 0.0) - count_wetting_interaction += local_wetting_weight; - } - else{ - //printf("Check interaction at %i %i %i \n",i,j,k); - } + } + } + } + + total_wetting_interaction = count_wetting_interaction = 0.0; + total_wetting_interaction_global = count_wetting_interaction_global=0.0; + for (k=kmin; kid[n] > 0 && SDs(i,j,k) < 2.0 ){ + count_wetting_interaction += 1.0; + total_wetting_interaction += Laplacian(i,j,k); } } } @@ -367,11 +269,12 @@ void SubPhase::Basic(){ //printf("wetting interaction = %f, count = %f\n",total_wetting_interaction,count_wetting_interaction); total_wetting_interaction_global=Dm->Comm.sumReduce( total_wetting_interaction); count_wetting_interaction_global=Dm->Comm.sumReduce( count_wetting_interaction); - /* normalize wetting interactions */ + /* normalize wetting interactions <-- Don't do this if normalizing laplacian (use solid surface area) if (count_wetting_interaction > 0.0) total_wetting_interaction /= count_wetting_interaction; if (count_wetting_interaction_global > 0.0) total_wetting_interaction_global /= count_wetting_interaction_global; + */ gwb.V=Dm->Comm.sumReduce( wb.V); gnb.V=Dm->Comm.sumReduce( nb.V); diff --git a/analysis/SubPhase.h b/analysis/SubPhase.h index d2ce6b44..f3462133 100644 --- a/analysis/SubPhase.h +++ b/analysis/SubPhase.h @@ -68,11 +68,11 @@ public: * b - bulk (total) */ // local entities - phase wc,wd,wb,nc,nd,nb; + phase wc,wd,wb,nc,nd,nb,solid; interface iwn,iwnc; // global entities - phase gwc,gwd,gwb,gnc,gnd,gnb; + phase gwc,gwd,gwb,gnc,gnd,gnb,gsolid; interface giwn,giwnc; /* fluid-solid wetting interaction */ double total_wetting_interaction, count_wetting_interaction; @@ -88,6 +88,7 @@ public: DoubleArray Rho_w; // density field DoubleArray Phi; // phase indicator field DoubleArray DelPhi; // Magnitude of Gradient of the phase indicator field + DoubleArray Laplacian; // laplacian of phase indicator field DoubleArray Pressure; // pressure field DoubleArray Vel_x; // velocity field DoubleArray Vel_y; diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index e2fca48d..0f9f1c74 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -940,7 +940,10 @@ void runAnalysis::run(int timestep, std::shared_ptr input_db, TwoPhase ******************************************************************/ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den) { - int N = d_N[0]*d_N[1]*d_N[2]; + int Nx = d_N[0]; + int Ny = d_N[1]; + int Nz = d_N[2]; + int N = Nx*Ny*Nz; NULL_USE( N ); // Check which analysis steps we need to perform auto color_db = input_db->getDatabase( "Color" ); @@ -1036,7 +1039,7 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha if (timestep%d_visualization_interval==0){ // Write the vis files commWrapper comm = getComm(); - fillHalo fillData( comm.comm, d_rank_info, d_n, {1,1,1}, 0, 1 ); + fillHalo fillData( comm.comm, d_rank_info, {Nx-2,Ny-2,Nz-2}, {1,1,1}, 0, 1 ); auto work = new IOWorkItem( timestep, input_db, d_meshData, Averages, fillData, std::move( comm ) ); work->add_dependency(d_wait_analysis); work->add_dependency(d_wait_subphase); diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 5f0e6f0e..ea1ec97f 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -238,9 +238,14 @@ void ScaLBL_ColorModel::ReadInput(){ } } // MeanFilter(Averages->SDs); + Minkowski Solid(Dm); if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); CalcDist(Averages->SDs,id_solid,*Mask); - + Solid.ComputeScalar(Averages->SDs,0.0); + if (rank == 0) { + printf("Vs As Hs Xs\n"); + printf("%.8g %.8g %.8g %.8g\n",Solid.Vi_global,Solid.Ai_global,Solid.Ji_global,Solid.Xi_global); + } if (rank == 0) cout << "Domain set." << endl; Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta); From d442f890f66a46077c3b7059ef73255393d901a9 Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 19 Jan 2021 10:50:12 -0500 Subject: [PATCH 147/205] write solid info to file in color --- models/ColorModel.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index ea1ec97f..0547814b 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -242,9 +242,21 @@ void ScaLBL_ColorModel::ReadInput(){ if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); CalcDist(Averages->SDs,id_solid,*Mask); Solid.ComputeScalar(Averages->SDs,0.0); + /* save averages */ + Averages->solid.V = Solid.Vi; + Averages->solid.A = Solid.Ai; + Averages->solid.H = Solid.Ji; + Averages->solid.X = Solid.Xi; + Averages->gsolid.V = Solid.Vi_global; + Averages->gsolid.A = Solid.Ai_global; + Averages->gsolid.H = Solid.Ji_global; + Averages->gsolid.X = Solid.Xi_global; + /* write to file */ if (rank == 0) { - printf("Vs As Hs Xs\n"); - printf("%.8g %.8g %.8g %.8g\n",Solid.Vi_global,Solid.Ai_global,Solid.Ji_global,Solid.Xi_global); + FILE *SOLID = fopen("solid.csv","w"); + fprintf(SOLID,"Vs As Hs Xs\n"); + fprintf(SOLID,"%.8g %.8g %.8g %.8g\n",Solid.Vi_global,Solid.Ai_global,Solid.Ji_global,Solid.Xi_global); + fclose(SOLID); } if (rank == 0) cout << "Domain set." << endl; From e21774d021d17210d906b05d36a9780a83e96780 Mon Sep 17 00:00:00 2001 From: James McClure Date: Tue, 19 Jan 2021 11:38:23 -0500 Subject: [PATCH 148/205] fix laplacian --- analysis/SubPhase.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/analysis/SubPhase.cpp b/analysis/SubPhase.cpp index f17c7b83..794baeea 100644 --- a/analysis/SubPhase.cpp +++ b/analysis/SubPhase.cpp @@ -157,7 +157,7 @@ void SubPhase::SetParams(double rhoA, double rhoB, double tauA, double tauB, dou } void SubPhase::Basic(){ - int i,j,k,n,imin,jmin,kmin,kmax, nq; + int i,j,k,n,imin,jmin,kmin,kmax; // If external boundary conditions are set, do not average over the inlet kmin=1; kmax=Nz-1; @@ -182,7 +182,7 @@ void SubPhase::Basic(){ double fx = (Phi(i+1,j,k) - 2.0*Phi(i,j,k) + Phi(i-1,j,k)); double fy = (Phi(i,j+1,k) - 2.0*Phi(i,j,k) + Phi(i,j-1,k)); double fz = (Phi(i,j,k+1) - 2.0*Phi(i,j,k) + Phi(i,j,k-1)); - Laplacian(i,j,k) = sqrt(fx*fx+fy*fy+fz*fz); + Laplacian(i,j,k) = 0.5*(fx + fy + fz); } } } From 791526bf035dee3c2e3fc5e22fcb7bc163337a23 Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 20 Jan 2021 16:50:07 -0500 Subject: [PATCH 149/205] surface wetting energy in subphase --- analysis/SubPhase.cpp | 14 ++++++-------- analysis/SubPhase.h | 1 - 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/analysis/SubPhase.cpp b/analysis/SubPhase.cpp index 794baeea..d0657391 100644 --- a/analysis/SubPhase.cpp +++ b/analysis/SubPhase.cpp @@ -20,7 +20,6 @@ SubPhase::SubPhase(std::shared_ptr dm): Pressure.resize(Nx,Ny,Nz); Pressure.fill(0); Phi.resize(Nx,Ny,Nz); Phi.fill(0); DelPhi.resize(Nx,Ny,Nz); DelPhi.fill(0); - Laplacian.resize(Nx,Ny,Nz); Laplacian.fill(0); Vel_x.resize(Nx,Ny,Nz); Vel_x.fill(0); // Gradient of the phase indicator field Vel_y.resize(Nx,Ny,Nz); Vel_y.fill(0); Vel_z.resize(Nx,Ny,Nz); Vel_z.fill(0); @@ -179,15 +178,14 @@ void SubPhase::Basic(){ for (int j=1; jComm.barrier(); - Dm->CommunicateMeshHalo(Laplacian); + Dm->CommunicateMeshHalo(DelPhi); for (k=0; kid[n] > 0 && SDs(i,j,k) < 2.0 ){ count_wetting_interaction += 1.0; - total_wetting_interaction += Laplacian(i,j,k); + total_wetting_interaction += DelPhi(i,j,k); } } } diff --git a/analysis/SubPhase.h b/analysis/SubPhase.h index f3462133..a6d35edd 100644 --- a/analysis/SubPhase.h +++ b/analysis/SubPhase.h @@ -88,7 +88,6 @@ public: DoubleArray Rho_w; // density field DoubleArray Phi; // phase indicator field DoubleArray DelPhi; // Magnitude of Gradient of the phase indicator field - DoubleArray Laplacian; // laplacian of phase indicator field DoubleArray Pressure; // pressure field DoubleArray Vel_x; // velocity field DoubleArray Vel_y; From e63b471260ea4c5ea8b74be25d5b629fc5c5e0ef Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 20 Jan 2021 16:50:25 -0500 Subject: [PATCH 150/205] added performance estimator to scalbl --- common/ScaLBL.cpp | 29 +++++++++++++++++++++++++++++ common/ScaLBL.h | 1 + 2 files changed, 30 insertions(+) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index c3edc44f..67cc1f73 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -310,6 +310,35 @@ ScaLBL_Communicator::~ScaLBL_Communicator(){ // destrutor does nothing (bad idea) // -- note that there needs to be a way to free memory allocated on the device!!! } +double ScaLBL_Communicator::GetPerformance(int *NeighborList, double *fq, int Np){ + /* EACH MPI PROCESS GETS ITS OWN MEASUREMENT*/ + /* use MRT kernels to check performance without communication / synchronization */ + int TIMESTEPS=500; + double RLX_SETA=1.0; + double RLX_SETB = 8.f*(2.f-RLX_SETA)/(8.f-RLX_SETA); + double FX = 0.0; + double FY = 0.0; + double FZ = 0.0; + //.......create and start timer............ + double starttime,stoptime,cputime; + Barrier(); + starttime = MPI_Wtime(); + //......................................... + for (int t=0; t Date: Wed, 20 Jan 2021 18:25:29 -0500 Subject: [PATCH 151/205] demo for performance in MRT model --- common/ScaLBL.cpp | 3 ++- models/MRTModel.cpp | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 67cc1f73..76a60fd3 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -319,6 +319,7 @@ double ScaLBL_Communicator::GetPerformance(int *NeighborList, double *fq, int Np double FX = 0.0; double FY = 0.0; double FZ = 0.0; + ScaLBL_D3Q19_Init(fq, Np); //.......create and start timer............ double starttime,stoptime,cputime; Barrier(); @@ -333,7 +334,7 @@ double ScaLBL_Communicator::GetPerformance(int *NeighborList, double *fq, int Np stoptime = MPI_Wtime(); Barrier(); // Compute the walltime per timestep - cputime = (stoptime - starttime)/TIMESTEPS; + cputime = 0.5*(stoptime - starttime)/TIMESTEPS; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; return MLUPS; diff --git a/models/MRTModel.cpp b/models/MRTModel.cpp index 3e19b717..01d13762 100644 --- a/models/MRTModel.cpp +++ b/models/MRTModel.cpp @@ -26,6 +26,8 @@ void ScaLBL_MRTModel::ReadParams(string filename){ tolerance = 1.0e-8; Fx = Fy = 0.0; Fz = 1.0e-5; + dout = 1.0; + din = 1.0; // Color Model parameters if (mrt_db->keyExists( "timestepMax" )){ @@ -194,7 +196,8 @@ void ScaLBL_MRTModel::Create(){ // copy the neighbor list ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); comm.barrier(); - + double MLUPS = ScaLBL_Comm->GetPerformance(NeighborList,fq,Np); + printf(" MLPUS=%f from rank %i\n",MLUPS,rank); } void ScaLBL_MRTModel::Initialize(){ From 90ba7ed65af31e87cba7458c233c48d3269b6227 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Fri, 22 Jan 2021 10:05:16 -0500 Subject: [PATCH 152/205] Fixing bug with analysis --- .clang-format | 108 +++ IO/silo.hpp | 375 +++++----- analysis/morphology.cpp | 1 - analysis/runAnalysis.cpp | 1175 +++++++++++++++++--------------- analysis/runAnalysis.h | 65 +- common/Communication.h | 7 +- common/Domain.cpp | 26 +- common/MPI.cpp | 109 ++- common/MPI.h | 20 +- common/ScaLBL.cpp | 98 ++- models/ColorModel.cpp | 31 +- tests/TestNetcdf.cpp | 9 +- tests/lbpm_color_simulator.cpp | 102 ++- tests/lbpm_morph_pp.cpp | 1 - tests/lbpm_uCT_pp.cpp | 28 +- 15 files changed, 1229 insertions(+), 926 deletions(-) create mode 100644 .clang-format diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..b930c24c --- /dev/null +++ b/.clang-format @@ -0,0 +1,108 @@ +# To run clang tools: +# cd to root directory +# To update format only: +# find . -name "*.cpp" -or -name "*.cc" -or -name "*.h" -or -name "*.hpp" -or -name "*.I" | xargs -I{} clang-format -i {} +# git status -s . | sed s/^...// | grep -E "(\.cpp|\.h|\.cc|\.hpp|\.I)" | xargs -I{} clang-format -i {} + +# To run modernize +# export CLANG_PATH=/packages/llvm/build/llvm-60 +# export PATH=${CLANG_PATH}/bin:${CLANG_PATH}/share/clang:$PATH +# find src -name "*.cpp" -or -name "*.cc" | xargs -I{} clang-tidy -checks=modernize* -p=/projects/AtomicModel/build/debug -fix {} +# find src -name "*.cpp" -or -name "*.cc" -or -name "*.h" -or -name "*.hpp" -or -name "*.I" | xargs -I{} clang-format -i {} + + + + + +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -4 +AlignAfterOpenBracket: DontAlign +AlignConsecutiveAssignments: true +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: true +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: true +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: true +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: true + AfterControlStatement: false + AfterEnum: false + AfterFunction: true + AfterNamespace: false + AfterObjCDeclaration: true + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +#BreakBeforeBraces: Stroustrup +BreakBeforeBraces: Custom +BreakBeforeTernaryOperators: false +BreakConstructorInitializersBeforeComma: false +ColumnLimit: 100 +CommentPragmas: '^ IWYU pragma:' +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: false +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + - Regex: '^(<|"(gtest|isl|json)/)' + Priority: 3 + - Regex: '.*' + Priority: 1 +IndentCaseLabels: false +IndentWidth: 4 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 2 +NamespaceIndentation: None +ObjCBlockIndentWidth: 4 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: true +SpaceAfterTemplateKeyword: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: true +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 4 +UseTab: Never +... + diff --git a/IO/silo.hpp b/IO/silo.hpp index 35852004..1e17aa5c 100644 --- a/IO/silo.hpp +++ b/IO/silo.hpp @@ -2,8 +2,8 @@ #define SILO_INTERFACE_HPP #include "IO/silo.h" -#include "common/Utilities.h" #include "common/MPI.h" +#include "common/Utilities.h" #include "ProfilerApp.h" @@ -13,52 +13,77 @@ #include - namespace silo { /**************************************************** -* Helper functions * -****************************************************/ -template static constexpr int getType(); -template<> constexpr int getType() { return DB_DOUBLE; } -template<> constexpr int getType() { return DB_FLOAT; } -template<> constexpr int getType() { return DB_INT; } + * Helper functions * + ****************************************************/ template -inline void copyData( Array& data, int type, const void *src ) +static constexpr int getType(); +template<> +constexpr int getType() +{ + return DB_DOUBLE; +} +template<> +constexpr int getType() +{ + return DB_FLOAT; +} +template<> +constexpr int getType() +{ + return DB_INT; +} +template +inline void copyData( Array &data, int type, const void *src ) { if ( type == getType() ) - memcpy( data.data(), src, data.length()*sizeof(TYPE) ); + memcpy( data.data(), src, data.length() * sizeof( TYPE ) ); else if ( type == DB_DOUBLE ) - data.copy( static_cast(src) ); + data.copy( static_cast( src ) ); else if ( type == DB_FLOAT ) - data.copy( static_cast(src) ); + data.copy( static_cast( src ) ); else if ( type == DB_INT ) - data.copy( static_cast(src) ); + data.copy( static_cast( src ) ); else - ERROR("Unknown type"); + ERROR( "Unknown type" ); } /**************************************************** -* Write/read an arbitrary vector * -****************************************************/ -template constexpr int getSiloType(); -template<> constexpr int getSiloType() { return DB_INT; } -template<> constexpr int getSiloType() { return DB_FLOAT; } -template<> constexpr int getSiloType() { return DB_DOUBLE; } + * Write/read an arbitrary vector * + ****************************************************/ template -void write( DBfile* fid, const std::string& varname, const std::vector& data ) +constexpr int getSiloType(); +template<> +constexpr int getSiloType() +{ + return DB_INT; +} +template<> +constexpr int getSiloType() +{ + return DB_FLOAT; +} +template<> +constexpr int getSiloType() +{ + return DB_DOUBLE; +} +template +void write( DBfile *fid, const std::string &varname, const std::vector &data ) { int dims = data.size(); - int err = DBWrite( fid, varname.c_str(), (void*) data.data(), &dims, 1, getSiloType() ); + int err = DBWrite( fid, varname.c_str(), (void *) data.data(), &dims, 1, getSiloType() ); ASSERT( err == 0 ); } template -std::vector read( DBfile* fid, const std::string& varname ) +std::vector read( DBfile *fid, const std::string &varname ) { int N = DBGetVarLength( fid, varname.c_str() ); - std::vector data(N); + std::vector data( N ); int err = DBReadVar( fid, varname.c_str(), data.data() ); ASSERT( err == 0 ); return data; @@ -66,31 +91,31 @@ std::vector read( DBfile* fid, const std::string& varname ) /**************************************************** -* Helper function to get variable suffixes * -****************************************************/ + * Helper function to get variable suffixes * + ****************************************************/ inline std::vector getVarSuffix( int ndim, int nvars ) { - std::vector suffix(nvars); + std::vector suffix( nvars ); if ( nvars == 1 ) { suffix[0] = ""; } else if ( nvars == ndim ) { - if ( ndim==2 ) { + if ( ndim == 2 ) { suffix[0] = "_x"; suffix[1] = "_y"; - } else if ( ndim==3 ) { + } else if ( ndim == 3 ) { suffix[0] = "_x"; suffix[1] = "_y"; suffix[2] = "_z"; } else { - ERROR("Not finished"); + ERROR( "Not finished" ); } - } else if ( nvars == ndim*ndim ) { - if ( ndim==2 ) { + } else if ( nvars == ndim * ndim ) { + if ( ndim == 2 ) { suffix[0] = "_xx"; suffix[1] = "_xy"; suffix[2] = "_yx"; suffix[3] = "_yy"; - } else if ( ndim==3 ) { + } else if ( ndim == 3 ) { suffix[0] = "_xx"; suffix[1] = "_xy"; suffix[2] = "_xz"; @@ -101,122 +126,127 @@ inline std::vector getVarSuffix( int ndim, int nvars ) suffix[7] = "_zy"; suffix[8] = "_zz"; } else { - ERROR("Not finished"); + ERROR( "Not finished" ); } } else { - for (int i=0; i -void writeUniformMesh( DBfile* fid, const std::string& meshname, - const std::array& range, const std::array& N ) +void writeUniformMesh( DBfile *fid, const std::string &meshname, + const std::array &range, const std::array &N ) { - PROFILE_START("writeUniformMesh",2); + PROFILE_START( "writeUniformMesh", 2 ); int dims[NDIM]; - for (size_t d=0; d= 1 ) { x = new float[dims[0]]; - for (int i=0; i= 2 ) { y = new float[dims[1]]; - for (int i=0; i= 3 ) { z = new float[dims[2]]; - for (int i=0; i -void writeUniformMeshVariable( DBfile* fid, const std::string& meshname, const std::array& N, - const std::string& varname, const Array& data, VariableType type ) + * Write a vector/tensor quad variable * + ****************************************************/ +template +void writeUniformMeshVariable( DBfile *fid, const std::string &meshname, + const std::array &N, const std::string &varname, const Array &data, + VariableType type ) { - PROFILE_START("writeUniformMeshVariable",2); - int nvars=1, dims[NDIM]={1}; + PROFILE_START( "writeUniformMeshVariable", 2 ); + int nvars = 1, dims[NDIM] = { 1 }; const TYPE *vars[NDIM] = { nullptr }; - int vartype = 0; + int vartype = 0; if ( type == VariableType::NodeVariable ) { - ASSERT( data.ndim()==NDIM || data.ndim()==NDIM+1 ); - for (int d=0; d var_names(nvars); - for (int i=0; i var_names( nvars ); + for ( int i = 0; i < nvars; i++ ) var_names[i] = varname + suffix[i]; - std::vector varnames(nvars,nullptr); - for (int i=0; i(var_names[i].c_str()); - int err = DBPutQuadvar( fid, varname.c_str(), meshname.c_str(), nvars, - varnames.data(), vars, dims, NDIM, nullptr, 0, getType(), vartype, nullptr ); + std::vector varnames( nvars, nullptr ); + for ( int i = 0; i < nvars; i++ ) + varnames[i] = const_cast( var_names[i].c_str() ); + int err = DBPutQuadvar( fid, varname.c_str(), meshname.c_str(), nvars, varnames.data(), vars, + dims, NDIM, nullptr, 0, getType(), vartype, nullptr ); ASSERT( err == 0 ); - PROFILE_STOP("writeUniformMeshVariable",2); + PROFILE_STOP( "writeUniformMeshVariable", 2 ); } -template -Array readUniformMeshVariable( DBfile* fid, const std::string& varname ) +template +Array readUniformMeshVariable( DBfile *fid, const std::string &varname ) { auto var = DBGetQuadvar( fid, varname.c_str() ); ASSERT( var != nullptr ); Array data( var->nels, var->nvals ); int type = var->datatype; - for (int i=0; invals; i++) { + for ( int i = 0; i < var->nvals; i++ ) { Array data2( var->nels ); copyData( data2, type, var->vals[i] ); - memcpy( &data(0,i), data2.data(), var->nels*sizeof(TYPE) ); + memcpy( &data( 0, i ), data2.data(), var->nels * sizeof( TYPE ) ); } DBFreeQuadvar( var ); - std::vector dims( var->ndims+1, var->nvals ); - for (int d=0; dndims; d++) + std::vector dims( var->ndims + 1, var->nvals ); + for ( int d = 0; d < var->ndims; d++ ) dims[d] = var->dims[d]; data.reshape( dims ); return data; @@ -224,54 +254,55 @@ Array readUniformMeshVariable( DBfile* fid, const std::string& varname ) /**************************************************** -* Read/write a point mesh/variable to silo * -****************************************************/ + * Read/write a point mesh/variable to silo * + ****************************************************/ template -void writePointMesh( DBfile* fid, const std::string& meshname, - int ndim, int N, const TYPE *coords[] ) +void writePointMesh( + DBfile *fid, const std::string &meshname, int ndim, int N, const TYPE *coords[] ) { int err = DBPutPointmesh( fid, meshname.c_str(), ndim, coords, N, getType(), nullptr ); ASSERT( err == 0 ); } -template -Array readPointMesh( DBfile* fid, const std::string& meshname ) +template +Array readPointMesh( DBfile *fid, const std::string &meshname ) { auto mesh = DBGetPointmesh( fid, meshname.c_str() ); - int N = mesh->nels; - int ndim = mesh->ndims; - Array coords(N,ndim); + int N = mesh->nels; + int ndim = mesh->ndims; + Array coords( N, ndim ); int type = mesh->datatype; - for (int d=0; d data2( N ); copyData( data2, type, mesh->coords[d] ); - memcpy( &coords(0,d), data2.data(), N*sizeof(TYPE) ); + memcpy( &coords( 0, d ), data2.data(), N * sizeof( TYPE ) ); } DBFreePointmesh( mesh ); return coords; } template -void writePointMeshVariable( DBfile* fid, const std::string& meshname, - const std::string& varname, const Array& data ) +void writePointMeshVariable( + DBfile *fid, const std::string &meshname, const std::string &varname, const Array &data ) { - int N = data.size(0); - int nvars = data.size(1); - std::vector vars(nvars); - for (int i=0; i(), nullptr ); + int N = data.size( 0 ); + int nvars = data.size( 1 ); + std::vector vars( nvars ); + for ( int i = 0; i < nvars; i++ ) + vars[i] = &data( 0, i ); + int err = DBPutPointvar( + fid, varname.c_str(), meshname.c_str(), nvars, vars.data(), N, getType(), nullptr ); ASSERT( err == 0 ); } -template -Array readPointMeshVariable( DBfile* fid, const std::string& varname ) +template +Array readPointMeshVariable( DBfile *fid, const std::string &varname ) { auto var = DBGetPointvar( fid, varname.c_str() ); ASSERT( var != nullptr ); Array data( var->nels, var->nvals ); int type = var->datatype; - for (int i=0; invals; i++) { + for ( int i = 0; i < var->nvals; i++ ) { Array data2( var->nels ); copyData( data2, type, var->vals[i] ); - memcpy( &data(0,i), data2.data(), var->nels*sizeof(TYPE) ); + memcpy( &data( 0, i ), data2.data(), var->nels * sizeof( TYPE ) ); } DBFreeMeshvar( var ); return data; @@ -279,110 +310,110 @@ Array readPointMeshVariable( DBfile* fid, const std::string& varname ) /**************************************************** -* Read/write a triangle mesh * -****************************************************/ + * Read/write a triangle mesh * + ****************************************************/ template -void writeTriMesh( DBfile* fid, const std::string& meshName, - int ndim, int ndim_tri, int N, const TYPE *coords[], int N_tri, const int *tri[] ) +void writeTriMesh( DBfile *fid, const std::string &meshName, int ndim, int ndim_tri, int N, + const TYPE *coords[], int N_tri, const int *tri[] ) { auto zoneName = meshName + "_zones"; - std::vector nodelist( (ndim_tri+1)*N_tri ); - for (int i=0, j=0; i nodelist( ( ndim_tri + 1 ) * N_tri ); + for ( int i = 0, j = 0; i < N_tri; i++ ) { + for ( int d = 0; d < ndim_tri + 1; d++, j++ ) nodelist[j] = tri[d][i]; } int shapetype = 0; - if ( ndim_tri==1 ) + if ( ndim_tri == 1 ) shapetype = DB_ZONETYPE_BEAM; - else if ( ndim_tri==2 ) + else if ( ndim_tri == 2 ) shapetype = DB_ZONETYPE_TRIANGLE; - else if ( ndim_tri==3 ) + else if ( ndim_tri == 3 ) shapetype = DB_ZONETYPE_PYRAMID; else - ERROR("Unknown shapetype"); - int shapesize = ndim_tri+1; - int shapecnt = N_tri; - DBPutZonelist2( fid, zoneName.c_str(), N_tri, ndim_tri, nodelist.data(), - nodelist.size(), 0, 0, 0, &shapetype, &shapesize, &shapecnt, 1, nullptr ); - DBPutUcdmesh( fid, meshName.c_str(), ndim, nullptr, coords, N, - nodelist.size(), zoneName.c_str(), nullptr, getType(), nullptr ); + ERROR( "Unknown shapetype" ); + int shapesize = ndim_tri + 1; + int shapecnt = N_tri; + DBPutZonelist2( fid, zoneName.c_str(), N_tri, ndim_tri, nodelist.data(), nodelist.size(), 0, 0, + 0, &shapetype, &shapesize, &shapecnt, 1, nullptr ); + DBPutUcdmesh( fid, meshName.c_str(), ndim, nullptr, coords, N, nodelist.size(), + zoneName.c_str(), nullptr, getType(), nullptr ); } template -void readTriMesh( DBfile* fid, const std::string& meshname, Array& coords, Array& tri ) +void readTriMesh( DBfile *fid, const std::string &meshname, Array &coords, Array &tri ) { - auto mesh = DBGetUcdmesh( fid, meshname.c_str() ); - int ndim = mesh->ndims; + auto mesh = DBGetUcdmesh( fid, meshname.c_str() ); + int ndim = mesh->ndims; int N_nodes = mesh->nnodes; - coords.resize(N_nodes,ndim); + coords.resize( N_nodes, ndim ); int mesh_type = mesh->datatype; - for (int d=0; d data2( N_nodes ); copyData( data2, mesh_type, mesh->coords[d] ); - memcpy( &coords(0,d), data2.data(), N_nodes*sizeof(TYPE) ); + memcpy( &coords( 0, d ), data2.data(), N_nodes * sizeof( TYPE ) ); } - auto zones = mesh->zones; + auto zones = mesh->zones; int N_zones = zones->nzones; - ASSERT( zones->nshapes==1 ); + ASSERT( zones->nshapes == 1 ); int shapesize = zones->shapesize[0]; - tri.resize(N_zones,shapesize); - for (int i=0; inodelist[i*shapesize+j]; + tri.resize( N_zones, shapesize ); + for ( int i = 0; i < N_zones; i++ ) { + for ( int j = 0; j < shapesize; j++ ) + tri( i, j ) = zones->nodelist[i * shapesize + j]; } DBFreeUcdmesh( mesh ); } template -void writeTriMeshVariable( DBfile* fid, int ndim, const std::string& meshname, - const std::string& varname, const Array& data, VariableType type ) +void writeTriMeshVariable( DBfile *fid, int ndim, const std::string &meshname, + const std::string &varname, const Array &data, VariableType type ) { - int nvars = 0; - int vartype = 0; + int nvars = 0; + int vartype = 0; const TYPE *vars[10] = { nullptr }; if ( type == VariableType::NodeVariable ) { vartype = DB_NODECENT; - nvars = data.size(1); - for (int i=0; i var_names(nvars); - for (int i=0; i var_names( nvars ); + for ( int i = 0; i < nvars; i++ ) var_names[i] = varname + suffix[i]; - std::vector varnames(nvars,nullptr); - for (int i=0; i(var_names[i].c_str()); - DBPutUcdvar( fid, varname.c_str(), meshname.c_str(), nvars, - varnames.data(), vars, data.size(0), nullptr, 0, getType(), vartype, nullptr ); + std::vector varnames( nvars, nullptr ); + for ( int i = 0; i < nvars; i++ ) + varnames[i] = const_cast( var_names[i].c_str() ); + DBPutUcdvar( fid, varname.c_str(), meshname.c_str(), nvars, varnames.data(), vars, + data.size( 0 ), nullptr, 0, getType(), vartype, nullptr ); } template -Array readTriMeshVariable( DBfile* fid, const std::string& varname ) +Array readTriMeshVariable( DBfile *fid, const std::string &varname ) { auto var = DBGetUcdvar( fid, varname.c_str() ); ASSERT( var != nullptr ); Array data( var->nels, var->nvals ); int type = var->datatype; - for (int i=0; invals; i++) { + for ( int i = 0; i < var->nvals; i++ ) { Array data2( var->nels ); copyData( data2, type, var->vals[i] ); - memcpy( &data(0,i), data2.data(), var->nels*sizeof(TYPE) ); + memcpy( &data( 0, i ), data2.data(), var->nels * sizeof( TYPE ) ); } DBFreeUcdvar( var ); return data; } -}; // silo namespace +}; // namespace silo #endif diff --git a/analysis/morphology.cpp b/analysis/morphology.cpp index ad231f3f..37f58d0c 100644 --- a/analysis/morphology.cpp +++ b/analysis/morphology.cpp @@ -542,7 +542,6 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrrank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm); Dm->Comm.barrier(); diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index 0f9f1c74..f43a26ff 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -13,140 +13,170 @@ #include "ProfilerApp.h" -AnalysisType& operator |=(AnalysisType &lhs, AnalysisType rhs) +AnalysisType &operator|=( AnalysisType &lhs, AnalysisType rhs ) { - lhs = static_cast( - static_cast::type>(lhs) | - static_cast::type>(rhs) - ); + lhs = static_cast( static_cast::type>( lhs ) | + static_cast::type>( rhs ) ); return lhs; } bool matches( AnalysisType x, AnalysisType y ) { - return ( static_cast::type>(x) & - static_cast::type>(y) ) != 0; + return ( static_cast::type>( x ) & + static_cast::type>( y ) ) != 0; } +// Create a shared_ptr to an array of values template -void DeleteArray( const TYPE *p ) +static inline std::shared_ptr make_shared_array( size_t N ) { - delete [] p; + return std::shared_ptr( new TYPE[N], []( const TYPE *p ) { delete[] p; } ); } // Helper class to write the restart file from a seperate thread -class WriteRestartWorkItem: public ThreadPool::WorkItemRet +class WriteRestartWorkItem : public ThreadPool::WorkItemRet { public: - WriteRestartWorkItem( const char* filename_, std::shared_ptr cDen_, std::shared_ptr cfq_, int N_ ): - filename(filename_), cfq(cfq_), cDen(cDen_), N(N_) {} - virtual void run() { - PROFILE_START("Save Checkpoint",1); + WriteRestartWorkItem( const std::string &filename_, std::shared_ptr cDen_, + std::shared_ptr cfq_, int N_ ) + : filename( filename_ ), cfq( cfq_ ), cDen( cDen_ ), N( N_ ) + { + } + virtual void run() + { + PROFILE_START( "Save Checkpoint", 1 ); double value; - ofstream File(filename,ios::binary); - for (int n=0; n cfq,cDen; - // const DoubleArray& phase; - //const DoubleArray& dist; + const std::string filename; + std::shared_ptr cfq, cDen; const int N; }; // Helper class to compute the blob ids +typedef std::shared_ptr> BlobIDstruct; +typedef std::shared_ptr> BlobIDList; static const std::string id_map_filename = "lbpm_id_map.txt"; -class BlobIdentificationWorkItem1: public ThreadPool::WorkItemRet +class BlobIdentificationWorkItem1 : public ThreadPool::WorkItemRet { public: - BlobIdentificationWorkItem1( int timestep_, int Nx_, int Ny_, int Nz_, const RankInfoStruct& rank_info_, - std::shared_ptr phase_, const DoubleArray& dist_, - BlobIDstruct last_id_, BlobIDstruct new_index_, BlobIDstruct new_id_, BlobIDList new_list_, runAnalysis::commWrapper&& comm_ ): - timestep(timestep_), Nx(Nx_), Ny(Ny_), Nz(Nz_), rank_info(rank_info_), - phase(phase_), dist(dist_), last_id(last_id_), new_index(new_index_), new_id(new_id_), new_list(new_list_), comm(std::move(comm_)) -{ -} - ~BlobIdentificationWorkItem1() { } - virtual void run() { - // Compute the global blob id and compare to the previous version - PROFILE_START("Identify blobs",1); - double vF = 0.0; - double vS = -1.0; // one voxel buffer region around solid - IntArray& ids = new_index->second; - new_index->first = ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,*phase,dist,vF,vS,ids,comm.comm); - PROFILE_STOP("Identify blobs",1); + BlobIdentificationWorkItem1( int timestep_, int Nx_, int Ny_, int Nz_, + const RankInfoStruct &rank_info_, std::shared_ptr phase_, + const DoubleArray &dist_, BlobIDstruct last_id_, BlobIDstruct new_index_, + BlobIDstruct new_id_, BlobIDList new_list_, runAnalysis::commWrapper &&comm_ ) + : timestep( timestep_ ), + Nx( Nx_ ), + Ny( Ny_ ), + Nz( Nz_ ), + rank_info( rank_info_ ), + phase( phase_ ), + dist( dist_ ), + last_id( last_id_ ), + new_index( new_index_ ), + new_id( new_id_ ), + new_list( new_list_ ), + comm( std::move( comm_ ) ) + { } + ~BlobIdentificationWorkItem1() {} + virtual void run() + { + // Compute the global blob id and compare to the previous version + PROFILE_START( "Identify blobs", 1 ); + double vF = 0.0; + double vS = -1.0; // one voxel buffer region around solid + IntArray &ids = new_index->second; + new_index->first = ComputeGlobalBlobIDs( + Nx - 2, Ny - 2, Nz - 2, rank_info, *phase, dist, vF, vS, ids, comm.comm ); + PROFILE_STOP( "Identify blobs", 1 ); + } + private: BlobIdentificationWorkItem1(); int timestep; int Nx, Ny, Nz; - const RankInfoStruct& rank_info; + const RankInfoStruct rank_info; std::shared_ptr phase; - const DoubleArray& dist; + const DoubleArray &dist; BlobIDstruct last_id, new_index, new_id; BlobIDList new_list; runAnalysis::commWrapper comm; }; -class BlobIdentificationWorkItem2: public ThreadPool::WorkItemRet +class BlobIdentificationWorkItem2 : public ThreadPool::WorkItemRet { public: - BlobIdentificationWorkItem2( int timestep_, int Nx_, int Ny_, int Nz_, const RankInfoStruct& rank_info_, - std::shared_ptr phase_, const DoubleArray& dist_, - BlobIDstruct last_id_, BlobIDstruct new_index_, BlobIDstruct new_id_, BlobIDList new_list_ , runAnalysis::commWrapper&& comm_ ): - timestep(timestep_), Nx(Nx_), Ny(Ny_), Nz(Nz_), rank_info(rank_info_), - phase(phase_), dist(dist_), last_id(last_id_), new_index(new_index_), new_id(new_id_), new_list(new_list_), comm(std::move(comm_)) -{ -} - ~BlobIdentificationWorkItem2() { } - virtual void run() { + BlobIdentificationWorkItem2( int timestep_, int Nx_, int Ny_, int Nz_, + const RankInfoStruct &rank_info_, std::shared_ptr phase_, + const DoubleArray &dist_, BlobIDstruct last_id_, BlobIDstruct new_index_, + BlobIDstruct new_id_, BlobIDList new_list_, runAnalysis::commWrapper &&comm_ ) + : timestep( timestep_ ), + Nx( Nx_ ), + Ny( Ny_ ), + Nz( Nz_ ), + rank_info( rank_info_ ), + phase( phase_ ), + dist( dist_ ), + last_id( last_id_ ), + new_index( new_index_ ), + new_id( new_id_ ), + new_list( new_list_ ), + comm( std::move( comm_ ) ) + { + } + ~BlobIdentificationWorkItem2() {} + virtual void run() + { // Compute the global blob id and compare to the previous version - PROFILE_START("Identify blobs maps",1); - const IntArray& ids = new_index->second; - static int max_id = -1; - new_id->first = new_index->first; - new_id->second = new_index->second; - if ( last_id.get()!=NULL ) { + PROFILE_START( "Identify blobs maps", 1 ); + const IntArray &ids = new_index->second; + static int max_id = -1; + new_id->first = new_index->first; + new_id->second = new_index->second; + if ( last_id.get() != NULL ) { // Compute the timestep-timestep map - const IntArray& old_ids = last_id->second; - ID_map_struct map = computeIDMap(Nx,Ny,Nz,old_ids,ids,comm.comm); + const IntArray &old_ids = last_id->second; + ID_map_struct map = computeIDMap( Nx, Ny, Nz, old_ids, ids, comm.comm ); // Renumber the current timestep's ids - getNewIDs(map,max_id,*new_list); - renumberIDs(*new_list,new_id->second); - writeIDMap(map,timestep,id_map_filename); + getNewIDs( map, max_id, *new_list ); + renumberIDs( *new_list, new_id->second ); + writeIDMap( map, timestep, id_map_filename ); } else { max_id = -1; - ID_map_struct map(new_id->first); - getNewIDs(map,max_id,*new_list); - writeIDMap(map,timestep,id_map_filename); + ID_map_struct map( new_id->first ); + getNewIDs( map, max_id, *new_list ); + writeIDMap( map, timestep, id_map_filename ); } - PROFILE_STOP("Identify blobs maps",1); + PROFILE_STOP( "Identify blobs maps", 1 ); } + private: BlobIdentificationWorkItem2(); int timestep; int Nx, Ny, Nz; - const RankInfoStruct& rank_info; + const RankInfoStruct rank_info; std::shared_ptr phase; - const DoubleArray& dist; + const DoubleArray &dist; BlobIDstruct last_id, new_index, new_id; BlobIDList new_list; runAnalysis::commWrapper comm; @@ -154,324 +184,375 @@ private: // Helper class to write the vis file from a thread -class WriteVisWorkItem: public ThreadPool::WorkItemRet +class WriteVisWorkItem : public ThreadPool::WorkItemRet { public: - WriteVisWorkItem( int timestep_, std::vector& visData_, - TwoPhase& Avgerages_, fillHalo& fillData_, runAnalysis::commWrapper&& comm_ ): - timestep(timestep_), visData(visData_), Averages(Avgerages_), fillData(fillData_), comm(std::move(comm_)) - { - } - ~WriteVisWorkItem() { } - virtual void run() { - PROFILE_START("Save Vis",1); - - ASSERT(visData[0].vars[0]->name=="phase"); - Array& PhaseData = visData[0].vars[0]->data; - fillData.copy(Averages.SDn,PhaseData); + WriteVisWorkItem( int timestep_, std::vector &visData_, + TwoPhase &Avgerages_, std::array n_, RankInfoStruct rank_info_, + runAnalysis::commWrapper &&comm_ ) + : timestep( timestep_ ), + visData( visData_ ), + Averages( Avgerages_ ), + n( std::move( n_ ) ), + rank_info( std::move( rank_info_ ) ), + comm( std::move( comm_ ) ) + { + } + ~WriteVisWorkItem() {} + virtual void run() + { + PROFILE_START( "Save Vis", 1 ); - ASSERT(visData[0].vars[5]->name=="SignDist"); - Array& SignData = visData[0].vars[5]->data; - fillData.copy(Averages.SDs,SignData); + fillHalo fillData( comm.comm, rank_info, n, { 1, 1, 1 }, 0, 1 ); - ASSERT(visData[0].vars[1]->name=="Pressure"); - Array& PressData = visData[0].vars[1]->data; - fillData.copy(Averages.Press,PressData); + ASSERT( visData[0].vars[0]->name == "phase" ); + Array &PhaseData = visData[0].vars[0]->data; + fillData.copy( Averages.SDn, PhaseData ); - ASSERT(visData[0].vars[2]->name=="Velocity_x"); - ASSERT(visData[0].vars[3]->name=="Velocity_y"); - ASSERT(visData[0].vars[4]->name=="Velocity_z"); - Array& VelxData = visData[0].vars[2]->data; - Array& VelyData = visData[0].vars[3]->data; - Array& VelzData = visData[0].vars[4]->data; - fillData.copy(Averages.Vel_x,VelxData); - fillData.copy(Averages.Vel_y,VelyData); - fillData.copy(Averages.Vel_z,VelzData); - - ASSERT(visData[0].vars[6]->name=="BlobID"); - Array& BlobData = visData[0].vars[6]->data; - fillData.copy(Averages.Label_NWP,BlobData); + ASSERT( visData[0].vars[5]->name == "SignDist" ); + Array &SignData = visData[0].vars[5]->data; + fillData.copy( Averages.SDs, SignData ); + + ASSERT( visData[0].vars[1]->name == "Pressure" ); + Array &PressData = visData[0].vars[1]->data; + fillData.copy( Averages.Press, PressData ); + + ASSERT( visData[0].vars[2]->name == "Velocity_x" ); + ASSERT( visData[0].vars[3]->name == "Velocity_y" ); + ASSERT( visData[0].vars[4]->name == "Velocity_z" ); + Array &VelxData = visData[0].vars[2]->data; + Array &VelyData = visData[0].vars[3]->data; + Array &VelzData = visData[0].vars[4]->data; + fillData.copy( Averages.Vel_x, VelxData ); + fillData.copy( Averages.Vel_y, VelyData ); + fillData.copy( Averages.Vel_z, VelzData ); + + ASSERT( visData[0].vars[6]->name == "BlobID" ); + Array &BlobData = visData[0].vars[6]->data; + fillData.copy( Averages.Label_NWP, BlobData ); IO::writeData( timestep, visData, comm.comm ); - - PROFILE_STOP("Save Vis",1); + + PROFILE_STOP( "Save Vis", 1 ); }; + private: WriteVisWorkItem(); int timestep; - std::vector& visData; - TwoPhase& Averages; - fillHalo& fillData; + std::array n; + RankInfoStruct rank_info; + std::vector &visData; + TwoPhase &Averages; runAnalysis::commWrapper comm; }; // Helper class to write the vis file from a thread -class IOWorkItem: public ThreadPool::WorkItemRet +class IOWorkItem : public ThreadPool::WorkItemRet { public: - IOWorkItem(int timestep_, std::shared_ptr input_db_, std::vector& visData_, - SubPhase& Averages_, fillHalo& fillData_, runAnalysis::commWrapper&& comm_ ): - timestep(timestep_), input_db(input_db_), visData(visData_), Averages(Averages_), fillData(fillData_), comm(std::move(comm_)) - { - } - ~IOWorkItem() { } - virtual void run() { - auto color_db = input_db->getDatabase( "Color" ); - auto vis_db = input_db->getDatabase( "Visualization" ); - // int timestep = color_db->getWithDefault( "timestep", 0 ); + IOWorkItem( int timestep_, std::shared_ptr input_db_, + std::vector &visData_, SubPhase &Averages_, std::array n_, + RankInfoStruct rank_info_, runAnalysis::commWrapper &&comm_ ) + : timestep( timestep_ ), + input_db( input_db_ ), + visData( visData_ ), + Averages( Averages_ ), + n( std::move( n_ ) ), + rank_info( std::move( rank_info_ ) ), + comm( std::move( comm_ ) ) + { + } + ~IOWorkItem() {} + virtual void run() + { + PROFILE_START( "Save Vis", 1 ); - PROFILE_START("Save Vis",1); + auto color_db = input_db->getDatabase( "Color" ); + auto vis_db = input_db->getDatabase( "Visualization" ); + // int timestep = color_db->getWithDefault( "timestep", 0 ); - if (vis_db->getWithDefault( "save_phase_field", true )){ - ASSERT(visData[0].vars[0]->name=="phase"); - Array& PhaseData = visData[0].vars[0]->data; - fillData.copy(Averages.Phi,PhaseData); + fillHalo fillData( comm.comm, rank_info, n, { 1, 1, 1 }, 0, 1 ); + + if ( vis_db->getWithDefault( "save_phase_field", true ) ) { + ASSERT( visData[0].vars[0]->name == "phase" ); + Array &PhaseData = visData[0].vars[0]->data; + fillData.copy( Averages.Phi, PhaseData ); } - if (vis_db->getWithDefault( "save_pressure", false )){ - ASSERT(visData[0].vars[1]->name=="Pressure"); - Array& PressData = visData[0].vars[1]->data; - fillData.copy(Averages.Pressure,PressData); + if ( vis_db->getWithDefault( "save_pressure", false ) ) { + ASSERT( visData[0].vars[1]->name == "Pressure" ); + Array &PressData = visData[0].vars[1]->data; + fillData.copy( Averages.Pressure, PressData ); } - if (vis_db->getWithDefault( "save_velocity", false )){ - ASSERT(visData[0].vars[2]->name=="Velocity_x"); - ASSERT(visData[0].vars[3]->name=="Velocity_y"); - ASSERT(visData[0].vars[4]->name=="Velocity_z"); - Array& VelxData = visData[0].vars[2]->data; - Array& VelyData = visData[0].vars[3]->data; - Array& VelzData = visData[0].vars[4]->data; - fillData.copy(Averages.Vel_x,VelxData); - fillData.copy(Averages.Vel_y,VelyData); - fillData.copy(Averages.Vel_z,VelzData); + if ( vis_db->getWithDefault( "save_velocity", false ) ) { + ASSERT( visData[0].vars[2]->name == "Velocity_x" ); + ASSERT( visData[0].vars[3]->name == "Velocity_y" ); + ASSERT( visData[0].vars[4]->name == "Velocity_z" ); + Array &VelxData = visData[0].vars[2]->data; + Array &VelyData = visData[0].vars[3]->data; + Array &VelzData = visData[0].vars[4]->data; + fillData.copy( Averages.Vel_x, VelxData ); + fillData.copy( Averages.Vel_y, VelyData ); + fillData.copy( Averages.Vel_z, VelzData ); } - if (vis_db->getWithDefault( "save_distance", false )){ - ASSERT(visData[0].vars[5]->name=="SignDist"); - Array& SignData = visData[0].vars[5]->data; - fillData.copy(Averages.SDs,SignData); + if ( vis_db->getWithDefault( "save_distance", false ) ) { + ASSERT( visData[0].vars[5]->name == "SignDist" ); + Array &SignData = visData[0].vars[5]->data; + fillData.copy( Averages.SDs, SignData ); } - if (vis_db->getWithDefault( "save_connected_components", false )){ - ASSERT(visData[0].vars[6]->name=="BlobID"); - Array& BlobData = visData[0].vars[6]->data; - fillData.copy(Averages.morph_n->label,BlobData); - } - - if (vis_db->getWithDefault( "write_silo", true )) - IO::writeData( timestep, visData, comm.comm ); - - if (vis_db->getWithDefault( "save_8bit_raw", true )){ - char CurrentIDFilename[40]; - sprintf(CurrentIDFilename,"id_t%d.raw",timestep); - Averages.AggregateLabels(CurrentIDFilename); + if ( vis_db->getWithDefault( "save_connected_components", false ) ) { + ASSERT( visData[0].vars[6]->name == "BlobID" ); + Array &BlobData = visData[0].vars[6]->data; + fillData.copy( Averages.morph_n->label, BlobData ); } - PROFILE_STOP("Save Vis",1); + if ( vis_db->getWithDefault( "write_silo", true ) ) + IO::writeData( timestep, visData, comm.comm ); + + if ( vis_db->getWithDefault( "save_8bit_raw", true ) ) { + char CurrentIDFilename[40]; + sprintf( CurrentIDFilename, "id_t%d.raw", timestep ); + Averages.AggregateLabels( CurrentIDFilename ); + } + + PROFILE_STOP( "Save Vis", 1 ); }; + private: IOWorkItem(); int timestep; + std::array n; + RankInfoStruct rank_info; std::shared_ptr input_db; - std::vector& visData; - SubPhase& Averages; - fillHalo& fillData; + std::vector &visData; + SubPhase &Averages; runAnalysis::commWrapper comm; }; // Helper class to run the analysis from within a thread // Note: Averages will be modified after the constructor is called -class AnalysisWorkItem: public ThreadPool::WorkItemRet +class AnalysisWorkItem : public ThreadPool::WorkItemRet { public: - AnalysisWorkItem( AnalysisType type_, int timestep_, TwoPhase& Averages_, - BlobIDstruct ids, BlobIDList id_list_, double beta_ ): - type(type_), timestep(timestep_), Averages(Averages_), - blob_ids(ids), id_list(id_list_), beta(beta_) { } - ~AnalysisWorkItem() { } - virtual void run() { + AnalysisWorkItem( AnalysisType type_, int timestep_, TwoPhase &Averages_, BlobIDstruct ids, + BlobIDList id_list_, double beta_ ) + : type( type_ ), + timestep( timestep_ ), + Averages( Averages_ ), + blob_ids( ids ), + id_list( id_list_ ), + beta( beta_ ) + { + } + ~AnalysisWorkItem() {} + virtual void run() + { Averages.NumberComponents_NWP = blob_ids->first; - Averages.Label_NWP = blob_ids->second; - Averages.Label_NWP_map = *id_list; - Averages.NumberComponents_WP = 1; - Averages.Label_WP.fill(0.0); - if ( matches(type,AnalysisType::CopyPhaseIndicator) ) { + Averages.Label_NWP = blob_ids->second; + Averages.Label_NWP_map = *id_list; + Averages.NumberComponents_WP = 1; + Averages.Label_WP.fill( 0.0 ); + if ( matches( type, AnalysisType::CopyPhaseIndicator ) ) { // Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.Phase_tplus); } - if ( matches(type,AnalysisType::ComputeAverages) ) { - PROFILE_START("Compute dist",1); + if ( matches( type, AnalysisType::ComputeAverages ) ) { + PROFILE_START( "Compute dist", 1 ); Averages.Initialize(); Averages.ComputeDelPhi(); - Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.SDn); - Averages.ColorToSignedDistance(beta,Averages.Phase_tminus,Averages.Phase_tminus); - Averages.ColorToSignedDistance(beta,Averages.Phase_tplus,Averages.Phase_tplus); + Averages.ColorToSignedDistance( beta, Averages.Phase, Averages.SDn ); + Averages.ColorToSignedDistance( beta, Averages.Phase_tminus, Averages.Phase_tminus ); + Averages.ColorToSignedDistance( beta, Averages.Phase_tplus, Averages.Phase_tplus ); Averages.UpdateMeshValues(); Averages.ComputeLocal(); Averages.Reduce(); - Averages.PrintAll(timestep); + Averages.PrintAll( timestep ); Averages.Initialize(); Averages.ComponentAverages(); Averages.SortBlobs(); - Averages.PrintComponents(timestep); - PROFILE_STOP("Compute dist",1); + Averages.PrintComponents( timestep ); + PROFILE_STOP( "Compute dist", 1 ); } } + private: AnalysisWorkItem(); AnalysisType type; int timestep; - TwoPhase& Averages; + TwoPhase &Averages; BlobIDstruct blob_ids; BlobIDList id_list; double beta; }; -class TCATWorkItem: public ThreadPool::WorkItemRet +class TCATWorkItem : public ThreadPool::WorkItemRet { public: - TCATWorkItem( AnalysisType type_, int timestep_, TwoPhase& Averages_, - BlobIDstruct ids, BlobIDList id_list_, double beta_ ): - type(type_), timestep(timestep_), Averages(Averages_), - blob_ids(ids), id_list(id_list_), beta(beta_) { } - ~TCATWorkItem() { } - virtual void run() { + TCATWorkItem( AnalysisType type_, int timestep_, TwoPhase &Averages_, BlobIDstruct ids, + BlobIDList id_list_, double beta_ ) + : type( type_ ), + timestep( timestep_ ), + Averages( Averages_ ), + blob_ids( ids ), + id_list( id_list_ ), + beta( beta_ ) + { + } + ~TCATWorkItem() {} + virtual void run() + { Averages.NumberComponents_NWP = blob_ids->first; - Averages.Label_NWP = blob_ids->second; - Averages.Label_NWP_map = *id_list; - Averages.NumberComponents_WP = 1; - Averages.Label_WP.fill(0.0); - if ( matches(type,AnalysisType::CopyPhaseIndicator) ) { + Averages.Label_NWP = blob_ids->second; + Averages.Label_NWP_map = *id_list; + Averages.NumberComponents_WP = 1; + Averages.Label_WP.fill( 0.0 ); + if ( matches( type, AnalysisType::CopyPhaseIndicator ) ) { // Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.Phase_tplus); } - if ( matches(type,AnalysisType::ComputeAverages) ) { - PROFILE_START("Compute TCAT",1); + if ( matches( type, AnalysisType::ComputeAverages ) ) { + PROFILE_START( "Compute TCAT", 1 ); Averages.Initialize(); Averages.ComputeDelPhi(); - Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.SDn); - Averages.ColorToSignedDistance(beta,Averages.Phase_tminus,Averages.Phase_tminus); - Averages.ColorToSignedDistance(beta,Averages.Phase_tplus,Averages.Phase_tplus); + Averages.ColorToSignedDistance( beta, Averages.Phase, Averages.SDn ); + Averages.ColorToSignedDistance( beta, Averages.Phase_tminus, Averages.Phase_tminus ); + Averages.ColorToSignedDistance( beta, Averages.Phase_tplus, Averages.Phase_tplus ); Averages.UpdateMeshValues(); Averages.ComputeLocal(); Averages.Reduce(); - Averages.PrintAll(timestep); - PROFILE_STOP("Compute TCAT",1); + Averages.PrintAll( timestep ); + PROFILE_STOP( "Compute TCAT", 1 ); } } + private: TCATWorkItem(); AnalysisType type; int timestep; - TwoPhase& Averages; + TwoPhase &Averages; BlobIDstruct blob_ids; BlobIDList id_list; double beta; }; -class GanglionTrackingWorkItem: public ThreadPool::WorkItemRet +class GanglionTrackingWorkItem : public ThreadPool::WorkItemRet { public: - GanglionTrackingWorkItem( AnalysisType type_, int timestep_, TwoPhase& Averages_, - BlobIDstruct ids, BlobIDList id_list_, double beta_ ): - type(type_), timestep(timestep_), Averages(Averages_), - blob_ids(ids), id_list(id_list_), beta(beta_) { } - ~GanglionTrackingWorkItem() { } - virtual void run() { + GanglionTrackingWorkItem( AnalysisType type_, int timestep_, TwoPhase &Averages_, + BlobIDstruct ids, BlobIDList id_list_, double beta_ ) + : type( type_ ), + timestep( timestep_ ), + Averages( Averages_ ), + blob_ids( ids ), + id_list( id_list_ ), + beta( beta_ ) + { + } + ~GanglionTrackingWorkItem() {} + virtual void run() + { Averages.NumberComponents_NWP = blob_ids->first; - Averages.Label_NWP = blob_ids->second; - Averages.Label_NWP_map = *id_list; - Averages.NumberComponents_WP = 1; - Averages.Label_WP.fill(0.0); - if ( matches(type,AnalysisType::CopyPhaseIndicator) ) { + Averages.Label_NWP = blob_ids->second; + Averages.Label_NWP_map = *id_list; + Averages.NumberComponents_WP = 1; + Averages.Label_WP.fill( 0.0 ); + if ( matches( type, AnalysisType::CopyPhaseIndicator ) ) { // Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.Phase_tplus); } - if ( matches(type,AnalysisType::ComputeAverages) ) { - PROFILE_START("Compute ganglion",1); + if ( matches( type, AnalysisType::ComputeAverages ) ) { + PROFILE_START( "Compute ganglion", 1 ); Averages.Initialize(); Averages.ComputeDelPhi(); - Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.SDn); - Averages.ColorToSignedDistance(beta,Averages.Phase_tminus,Averages.Phase_tminus); - Averages.ColorToSignedDistance(beta,Averages.Phase_tplus,Averages.Phase_tplus); + Averages.ColorToSignedDistance( beta, Averages.Phase, Averages.SDn ); + Averages.ColorToSignedDistance( beta, Averages.Phase_tminus, Averages.Phase_tminus ); + Averages.ColorToSignedDistance( beta, Averages.Phase_tplus, Averages.Phase_tplus ); Averages.UpdateMeshValues(); Averages.ComponentAverages(); Averages.SortBlobs(); - Averages.PrintComponents(timestep); - PROFILE_STOP("Compute ganglion",1); + Averages.PrintComponents( timestep ); + PROFILE_STOP( "Compute ganglion", 1 ); } } + private: GanglionTrackingWorkItem(); AnalysisType type; int timestep; - TwoPhase& Averages; + TwoPhase &Averages; BlobIDstruct blob_ids; BlobIDList id_list; double beta; }; -class BasicWorkItem: public ThreadPool::WorkItemRet +class BasicWorkItem : public ThreadPool::WorkItemRet { public: - BasicWorkItem( AnalysisType type_, int timestep_, SubPhase& Averages_ ): - type(type_), timestep(timestep_), Averages(Averages_){ } - ~BasicWorkItem() { } - virtual void run() { + BasicWorkItem( AnalysisType type_, int timestep_, SubPhase &Averages_ ) + : type( type_ ), timestep( timestep_ ), Averages( Averages_ ) + { + } + ~BasicWorkItem() {} + virtual void run() + { - if ( matches(type,AnalysisType::CopyPhaseIndicator) ) { + if ( matches( type, AnalysisType::CopyPhaseIndicator ) ) { // Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.Phase_tplus); } - if ( matches(type,AnalysisType::ComputeAverages) ) { - PROFILE_START("Compute basic averages",1); + if ( matches( type, AnalysisType::ComputeAverages ) ) { + PROFILE_START( "Compute basic averages", 1 ); Averages.Basic(); - PROFILE_STOP("Compute basic averages",1); + PROFILE_STOP( "Compute basic averages", 1 ); } } + private: BasicWorkItem(); AnalysisType type; int timestep; - SubPhase& Averages; + SubPhase &Averages; double beta; }; -class SubphaseWorkItem: public ThreadPool::WorkItemRet +class SubphaseWorkItem : public ThreadPool::WorkItemRet { public: - SubphaseWorkItem( AnalysisType type_, int timestep_, SubPhase& Averages_ ): - type(type_), timestep(timestep_), Averages(Averages_){ } - ~SubphaseWorkItem() { } - virtual void run() { - - PROFILE_START("Compute subphase",1); - Averages.Full(); - Averages.Write(timestep); - PROFILE_STOP("Compute subphase",1); + SubphaseWorkItem( AnalysisType type_, int timestep_, SubPhase &Averages_ ) + : type( type_ ), timestep( timestep_ ), Averages( Averages_ ) + { } + ~SubphaseWorkItem() {} + virtual void run() + { + + PROFILE_START( "Compute subphase", 1 ); + Averages.Full(); + Averages.Write( timestep ); + PROFILE_STOP( "Compute subphase", 1 ); + } + private: SubphaseWorkItem(); AnalysisType type; int timestep; - SubPhase& Averages; + SubPhase &Averages; double beta; }; - /****************************************************************** * MPI comm wrapper for use with analysis * ******************************************************************/ -runAnalysis::commWrapper::commWrapper( int tag_, const Utilities::MPI& comm_, runAnalysis* analysis_ ): - comm(comm_), - tag(tag_), - analysis(analysis_) +runAnalysis::commWrapper::commWrapper( + int tag_, const Utilities::MPI &comm_, runAnalysis *analysis_ ) + : comm( comm_ ), tag( tag_ ), analysis( analysis_ ) { } -runAnalysis::commWrapper::commWrapper( commWrapper &&rhs ): - comm(rhs.comm), - tag(rhs.tag), - analysis(rhs.analysis) +runAnalysis::commWrapper::commWrapper( commWrapper &&rhs ) + : comm( rhs.comm ), tag( rhs.tag ), analysis( rhs.analysis ) { rhs.tag = -1; } @@ -482,48 +563,44 @@ runAnalysis::commWrapper::~commWrapper() comm.barrier(); analysis->d_comm_used[tag] = false; } -runAnalysis::commWrapper runAnalysis::getComm( ) +runAnalysis::commWrapper runAnalysis::getComm() { // Get a tag from root int tag = -1; if ( d_rank == 0 ) { - for (int i=0; i<1024; i++) { + for ( int i = 0; i < 1024; i++ ) { if ( !d_comm_used[i] ) { tag = i; break; } } if ( tag == -1 ) - ERROR("Unable to get comm"); + ERROR( "Unable to get comm" ); } - tag = d_comm.bcast( tag, 0 ); + tag = d_comm.bcast( tag, 0 ); d_comm_used[tag] = true; if ( d_comms[tag].isNull() ) d_comms[tag] = d_comm.dup(); - return commWrapper(tag,d_comms[tag],this); + return commWrapper( tag, d_comms[tag], this ); } /****************************************************************** * Constructor/Destructors * ******************************************************************/ -runAnalysis::runAnalysis( std::shared_ptr input_db, - const RankInfoStruct& rank_info, - std::shared_ptr ScaLBL_Comm, - std::shared_ptr Dm, - int Np, - bool Regular, - IntArray Map ): - d_Np( Np ), - d_regular ( Regular), - d_rank_info( rank_info ), - d_Map( Map ), - d_comm( Dm->Comm.dup() ), - d_ScaLBL_Comm( ScaLBL_Comm) +runAnalysis::runAnalysis( std::shared_ptr input_db, const RankInfoStruct &rank_info, + std::shared_ptr ScaLBL_Comm, std::shared_ptr Dm, int Np, + bool Regular, IntArray Map ) + : d_Np( Np ), + d_regular( Regular ), + d_rank_info( rank_info ), + d_Map( Map ), + d_comm( Dm->Comm.dup() ), + d_ScaLBL_Comm( ScaLBL_Comm ) { - auto db = input_db->getDatabase( "Analysis" ); - auto vis_db = input_db->getDatabase( "Visualization" ); + auto db = input_db->getDatabase( "Analysis" ); + auto vis_db = input_db->getDatabase( "Visualization" ); // Ids of work items to use for dependencies ThreadPool::thread_id_t d_wait_blobID; @@ -533,117 +610,118 @@ runAnalysis::runAnalysis( std::shared_ptr input_db, ThreadPool::thread_id_t d_wait_subphase; char rankString[20]; - sprintf(rankString,"%05d",Dm->rank()); - d_n[0] = Dm->Nx-2; - d_n[1] = Dm->Ny-2; - d_n[2] = Dm->Nz-2; + sprintf( rankString, "%05d", Dm->rank() ); + d_n[0] = Dm->Nx - 2; + d_n[1] = Dm->Ny - 2; + d_n[2] = Dm->Nz - 2; d_N[0] = Dm->Nx; d_N[1] = Dm->Ny; d_N[2] = Dm->Nz; - - d_restart_interval = db->getScalar( "restart_interval" ); - d_analysis_interval = db->getScalar( "analysis_interval" ); + + d_restart_interval = db->getScalar( "restart_interval" ); + d_analysis_interval = db->getScalar( "analysis_interval" ); d_subphase_analysis_interval = INT_MAX; - d_visualization_interval = INT_MAX; - d_blobid_interval = INT_MAX; - if (db->keyExists( "blobid_interval" )){ - d_blobid_interval = db->getScalar( "blobid_interval" ); - } - if (db->keyExists( "visualization_interval" )){ - d_visualization_interval = db->getScalar( "visualization_interval" ); - } - if (db->keyExists( "subphase_analysis_interval" )){ - d_subphase_analysis_interval = db->getScalar( "subphase_analysis_interval" ); - } - + d_visualization_interval = INT_MAX; + d_blobid_interval = INT_MAX; + if ( db->keyExists( "blobid_interval" ) ) { + d_blobid_interval = db->getScalar( "blobid_interval" ); + } + if ( db->keyExists( "visualization_interval" ) ) { + d_visualization_interval = db->getScalar( "visualization_interval" ); + } + if ( db->keyExists( "subphase_analysis_interval" ) ) { + d_subphase_analysis_interval = db->getScalar( "subphase_analysis_interval" ); + } + auto restart_file = db->getScalar( "restart_file" ); - d_restartFile = restart_file + "." + rankString; - - + d_restartFile = restart_file + "." + rankString; + + d_rank = d_comm.getRank(); - writeIDMap(ID_map_struct(),0,id_map_filename); + writeIDMap( ID_map_struct(), 0, id_map_filename ); // Initialize IO for silo - IO::initialize("","silo","false"); - // Create the MeshDataStruct - d_meshData.resize(1); + IO::initialize( "", "silo", "false" ); + // Create the MeshDataStruct + d_meshData.resize( 1 ); d_meshData[0].meshName = "domain"; - d_meshData[0].mesh = std::make_shared( d_rank_info,d_n[0],d_n[1],d_n[2],Dm->Lx,Dm->Ly,Dm->Lz ); - auto PhaseVar = std::make_shared(); - auto PressVar = std::make_shared(); - auto VxVar = std::make_shared(); - auto VyVar = std::make_shared(); - auto VzVar = std::make_shared(); + d_meshData[0].mesh = std::make_shared( + d_rank_info, d_n[0], d_n[1], d_n[2], Dm->Lx, Dm->Ly, Dm->Lz ); + auto PhaseVar = std::make_shared(); + auto PressVar = std::make_shared(); + auto VxVar = std::make_shared(); + auto VyVar = std::make_shared(); + auto VzVar = std::make_shared(); auto SignDistVar = std::make_shared(); - auto BlobIDVar = std::make_shared(); - - if (vis_db->getWithDefault( "save_phase_field", true )){ + auto BlobIDVar = std::make_shared(); + + if ( vis_db->getWithDefault( "save_phase_field", true ) ) { PhaseVar->name = "phase"; PhaseVar->type = IO::VariableType::VolumeVariable; - PhaseVar->dim = 1; - PhaseVar->data.resize(d_n[0],d_n[1],d_n[2]); - d_meshData[0].vars.push_back(PhaseVar); + PhaseVar->dim = 1; + PhaseVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( PhaseVar ); } - if (vis_db->getWithDefault( "save_pressure", false )){ + if ( vis_db->getWithDefault( "save_pressure", false ) ) { PressVar->name = "Pressure"; PressVar->type = IO::VariableType::VolumeVariable; - PressVar->dim = 1; - PressVar->data.resize(d_n[0],d_n[1],d_n[2]); - d_meshData[0].vars.push_back(PressVar); + PressVar->dim = 1; + PressVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( PressVar ); } - if (vis_db->getWithDefault( "save_velocity", false )){ + if ( vis_db->getWithDefault( "save_velocity", false ) ) { VxVar->name = "Velocity_x"; VxVar->type = IO::VariableType::VolumeVariable; - VxVar->dim = 1; - VxVar->data.resize(d_n[0],d_n[1],d_n[2]); - d_meshData[0].vars.push_back(VxVar); + VxVar->dim = 1; + VxVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( VxVar ); VyVar->name = "Velocity_y"; VyVar->type = IO::VariableType::VolumeVariable; - VyVar->dim = 1; - VyVar->data.resize(d_n[0],d_n[1],d_n[2]); - d_meshData[0].vars.push_back(VyVar); + VyVar->dim = 1; + VyVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( VyVar ); VzVar->name = "Velocity_z"; VzVar->type = IO::VariableType::VolumeVariable; - VzVar->dim = 1; - VzVar->data.resize(d_n[0],d_n[1],d_n[2]); - d_meshData[0].vars.push_back(VzVar); + VzVar->dim = 1; + VzVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( VzVar ); } - if (vis_db->getWithDefault( "save_distance", false )){ + if ( vis_db->getWithDefault( "save_distance", false ) ) { SignDistVar->name = "SignDist"; SignDistVar->type = IO::VariableType::VolumeVariable; - SignDistVar->dim = 1; - SignDistVar->data.resize(d_n[0],d_n[1],d_n[2]); - d_meshData[0].vars.push_back(SignDistVar); + SignDistVar->dim = 1; + SignDistVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( SignDistVar ); } - if (vis_db->getWithDefault( "save_connected_components", false )){ + if ( vis_db->getWithDefault( "save_connected_components", false ) ) { BlobIDVar->name = "BlobID"; BlobIDVar->type = IO::VariableType::VolumeVariable; - BlobIDVar->dim = 1; - BlobIDVar->data.resize(d_n[0],d_n[1],d_n[2]); - d_meshData[0].vars.push_back(BlobIDVar); + BlobIDVar->dim = 1; + BlobIDVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( BlobIDVar ); } - + // Initialize the comms - for (int i=0; i<1024; i++) + for ( int i = 0; i < 1024; i++ ) d_comm_used[i] = false; // Initialize the threads int N_threads = db->getWithDefault( "N_threads", 4 ); - auto method = db->getWithDefault( "load_balance", "default" ); + auto method = db->getWithDefault( "load_balance", "default" ); createThreads( method, N_threads ); } -runAnalysis::~runAnalysis( ) +runAnalysis::~runAnalysis() { // Finish processing analysis finish(); } -void runAnalysis::finish( ) +void runAnalysis::finish() { - PROFILE_START("finish"); + PROFILE_START( "finish" ); // Wait for the work items to finish d_tpool.wait_pool_finished(); // Clear the wait ids @@ -654,23 +732,23 @@ void runAnalysis::finish( ) d_wait_restart.reset(); // Syncronize d_comm.barrier(); - PROFILE_STOP("finish"); + PROFILE_STOP( "finish" ); } /****************************************************************** * Set the thread affinities * ******************************************************************/ -void print( const std::vector& ids ) +void print( const std::vector &ids ) { if ( ids.empty() ) return; - printf("%i",ids[0]); - for (size_t i=1; i 0 ) - std::cerr << "Warning: Failed to start MPI with necessary thread support, errors may occur\n"; + std::cerr + << "Warning: Failed to start MPI with necessary thread support, errors may occur\n"; // Create the threads const auto cores = d_tpool.getProcessAffinity(); if ( N_threads == 0 ) { @@ -694,17 +773,17 @@ void runAnalysis::createThreads( const std::string& method, int N_threads ) int N = cores.size() - 1; d_tpool.setNumThreads( N ); d_tpool.setThreadAffinity( { cores[0] } ); - for ( int i=0; i input_db, TwoPhase& Averages, const double *Phi, - double *Pressure, double *Velocity, double *fq, double *Den) +void runAnalysis::run( int timestep, std::shared_ptr input_db, TwoPhase &Averages, + const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den ) { - int N = d_N[0]*d_N[1]*d_N[2]; + int N = d_N[0] * d_N[1] * d_N[2]; NULL_USE( N ); NULL_USE( Phi ); - - auto db = input_db->getDatabase( "Analysis" ); - //int timestep = db->getWithDefault( "timestep", 0 ); + + auto db = input_db->getDatabase( "Analysis" ); + // int timestep = db->getWithDefault( "timestep", 0 ); // Check which analysis steps we need to perform auto type = computeAnalysisType( timestep ); @@ -783,18 +861,18 @@ void runAnalysis::run(int timestep, std::shared_ptr input_db, TwoPhase finish(); } - PROFILE_START("run"); + PROFILE_START( "run" ); // Copy the appropriate variables to the host (so we can spawn new threads) ScaLBL_DeviceBarrier(); - PROFILE_START("Copy data to host",1); + PROFILE_START( "Copy data to host", 1 ); std::shared_ptr phase; /* if ( matches(type,AnalysisType::CopyPhaseIndicator) || matches(type,AnalysisType::ComputeAverages) || - matches(type,AnalysisType::CopySimState) || + matches(type,AnalysisType::CopySimState) || matches(type,AnalysisType::IdentifyBlobs) ) { - phase = std::shared_ptr(new DoubleArray(d_N[0],d_N[1],d_N[2])); + phase = std::make_shared(d_N[0],d_N[1],d_N[2]); //ScaLBL_CopyToHost(phase->data(),Phi,N*sizeof(double)); // try 2 d_ScaLBL_Comm.RegulLayout(d_Map,Phi,Averages.Phase); // memcpy(Averages.Phase.data(),phase->data(),N*sizeof(double)); @@ -820,136 +898,137 @@ void runAnalysis::run(int timestep, std::shared_ptr input_db, TwoPhase delete [] TmpDat; } */ - //if ( matches(type,AnalysisType::CopyPhaseIndicator) ) { - if ( timestep%d_analysis_interval + 8 == d_analysis_interval ) { - if (d_regular) - d_ScaLBL_Comm->RegularLayout(d_Map,Phi,Averages.Phase_tplus); - else - ScaLBL_CopyToHost(Averages.Phase_tplus.data(),Phi,N*sizeof(double)); - //memcpy(Averages.Phase_tplus.data(),phase->data(),N*sizeof(double)); - } - if ( timestep%d_analysis_interval == 0 ) { - if (d_regular) - d_ScaLBL_Comm->RegularLayout(d_Map,Phi,Averages.Phase_tminus); - else - ScaLBL_CopyToHost(Averages.Phase_tminus.data(),Phi,N*sizeof(double)); - //memcpy(Averages.Phase_tminus.data(),phase->data(),N*sizeof(double)); - } - //if ( matches(type,AnalysisType::CopySimState) ) { - if ( timestep%d_analysis_interval + 4 == d_analysis_interval ) { - // Copy the members of Averages to the cpu (phase was copied above) - PROFILE_START("Copy-Pressure",1); - ScaLBL_D3Q19_Pressure(fq,Pressure,d_Np); - //ScaLBL_D3Q19_Momentum(fq,Velocity,d_Np); - ScaLBL_DeviceBarrier(); - PROFILE_STOP("Copy-Pressure",1); - PROFILE_START("Copy-Wait",1); - PROFILE_STOP("Copy-Wait",1); - PROFILE_START("Copy-State",1); - //memcpy(Averages.Phase.data(),phase->data(),N*sizeof(double)); - if (d_regular) - d_ScaLBL_Comm->RegularLayout(d_Map,Phi,Averages.Phase); + // if ( matches(type,AnalysisType::CopyPhaseIndicator) ) { + if ( timestep % d_analysis_interval + 8 == d_analysis_interval ) { + if ( d_regular ) + d_ScaLBL_Comm->RegularLayout( d_Map, Phi, Averages.Phase_tplus ); else - ScaLBL_CopyToHost(Averages.Phase.data(),Phi,N*sizeof(double)); + ScaLBL_CopyToHost( Averages.Phase_tplus.data(), Phi, N * sizeof( double ) ); + // memcpy(Averages.Phase_tplus.data(),phase->data(),N*sizeof(double)); + } + if ( timestep % d_analysis_interval == 0 ) { + if ( d_regular ) + d_ScaLBL_Comm->RegularLayout( d_Map, Phi, Averages.Phase_tminus ); + else + ScaLBL_CopyToHost( Averages.Phase_tminus.data(), Phi, N * sizeof( double ) ); + // memcpy(Averages.Phase_tminus.data(),phase->data(),N*sizeof(double)); + } + // if ( matches(type,AnalysisType::CopySimState) ) { + if ( timestep % d_analysis_interval + 4 == d_analysis_interval ) { + // Copy the members of Averages to the cpu (phase was copied above) + PROFILE_START( "Copy-Pressure", 1 ); + ScaLBL_D3Q19_Pressure( fq, Pressure, d_Np ); + // ScaLBL_D3Q19_Momentum(fq,Velocity,d_Np); + ScaLBL_DeviceBarrier(); + PROFILE_STOP( "Copy-Pressure", 1 ); + PROFILE_START( "Copy-Wait", 1 ); + PROFILE_STOP( "Copy-Wait", 1 ); + PROFILE_START( "Copy-State", 1 ); + // memcpy(Averages.Phase.data(),phase->data(),N*sizeof(double)); + if ( d_regular ) + d_ScaLBL_Comm->RegularLayout( d_Map, Phi, Averages.Phase ); + else + ScaLBL_CopyToHost( Averages.Phase.data(), Phi, N * sizeof( double ) ); // copy other variables - d_ScaLBL_Comm->RegularLayout(d_Map,Pressure,Averages.Press); - d_ScaLBL_Comm->RegularLayout(d_Map,&Velocity[0],Averages.Vel_x); - d_ScaLBL_Comm->RegularLayout(d_Map,&Velocity[d_Np],Averages.Vel_y); - d_ScaLBL_Comm->RegularLayout(d_Map,&Velocity[2*d_Np],Averages.Vel_z); - PROFILE_STOP("Copy-State",1); + d_ScaLBL_Comm->RegularLayout( d_Map, Pressure, Averages.Press ); + d_ScaLBL_Comm->RegularLayout( d_Map, &Velocity[0], Averages.Vel_x ); + d_ScaLBL_Comm->RegularLayout( d_Map, &Velocity[d_Np], Averages.Vel_y ); + d_ScaLBL_Comm->RegularLayout( d_Map, &Velocity[2 * d_Np], Averages.Vel_z ); + PROFILE_STOP( "Copy-State", 1 ); } - std::shared_ptr cfq,cDen; - //if ( matches(type,AnalysisType::CreateRestart) ) { - if (timestep%d_restart_interval==0){ + std::shared_ptr cfq, cDen; + // if ( matches(type,AnalysisType::CreateRestart) ) { + if ( timestep % d_restart_interval == 0 ) { // Copy restart data to the CPU - cDen = std::shared_ptr(new double[2*d_Np],DeleteArray); - cfq = std::shared_ptr(new double[19*d_Np],DeleteArray); - ScaLBL_CopyToHost(cfq.get(),fq,19*d_Np*sizeof(double)); - ScaLBL_CopyToHost(cDen.get(),Den,2*d_Np*sizeof(double)); + cDen = make_shared_array( 2 * d_Np ); + cfq = make_shared_array( 19 * d_Np ); + ScaLBL_CopyToHost( cfq.get(), fq, 19 * d_Np * sizeof( double ) ); + ScaLBL_CopyToHost( cDen.get(), Den, 2 * d_Np * sizeof( double ) ); } - PROFILE_STOP("Copy data to host",1); + PROFILE_STOP( "Copy data to host", 1 ); // Spawn threads to do blob identification work - if ( matches(type,AnalysisType::IdentifyBlobs) ) { - phase = std::shared_ptr(new DoubleArray(d_N[0],d_N[1],d_N[2])); - if (d_regular) - d_ScaLBL_Comm->RegularLayout(d_Map,Phi,*phase); + if ( matches( type, AnalysisType::IdentifyBlobs ) ) { + phase = std::make_shared( d_N[0], d_N[1], d_N[2] ); + if ( d_regular ) + d_ScaLBL_Comm->RegularLayout( d_Map, Phi, *phase ); else - ScaLBL_CopyToHost(phase->data(),Phi,N*sizeof(double)); + ScaLBL_CopyToHost( phase->data(), Phi, N * sizeof( double ) ); - BlobIDstruct new_index(new std::pair(0,IntArray())); - BlobIDstruct new_ids(new std::pair(0,IntArray())); - BlobIDList new_list(new std::vector()); - auto work1 = new BlobIdentificationWorkItem1(timestep,d_N[0],d_N[1],d_N[2],d_rank_info, - phase,Averages.SDs,d_last_ids,new_index,new_ids,new_list,getComm()); - auto work2 = new BlobIdentificationWorkItem2(timestep,d_N[0],d_N[1],d_N[2],d_rank_info, - phase,Averages.SDs,d_last_ids,new_index,new_ids,new_list,getComm()); - work1->add_dependency(d_wait_blobID); - work2->add_dependency(d_tpool.add_work(work1)); - d_wait_blobID = d_tpool.add_work(work2); - d_last_index = new_index; - d_last_ids = new_ids; + auto new_index = std::make_shared>( 0, IntArray() ); + auto new_ids = std::make_shared>( 0, IntArray() ); + auto new_list = std::make_shared>(); + auto work1 = new BlobIdentificationWorkItem1( timestep, d_N[0], d_N[1], d_N[2], d_rank_info, + phase, Averages.SDs, d_last_ids, new_index, new_ids, new_list, getComm() ); + auto work2 = new BlobIdentificationWorkItem2( timestep, d_N[0], d_N[1], d_N[2], d_rank_info, + phase, Averages.SDs, d_last_ids, new_index, new_ids, new_list, getComm() ); + work1->add_dependency( d_wait_blobID ); + work2->add_dependency( d_tpool.add_work( work1 ) ); + d_wait_blobID = d_tpool.add_work( work2 ); + d_last_index = new_index; + d_last_ids = new_ids; d_last_id_map = new_list; } // Spawn threads to do the analysis work - //if (timestep%d_restart_interval==0){ + // if (timestep%d_restart_interval==0){ // if ( matches(type,AnalysisType::ComputeAverages) ) { - if ( timestep%d_analysis_interval == 0 ) { - auto work = new AnalysisWorkItem(type,timestep,Averages,d_last_index,d_last_id_map,d_beta); - work->add_dependency(d_wait_blobID); - work->add_dependency(d_wait_analysis); - work->add_dependency(d_wait_vis); // Make sure we are done using analysis before modifying - d_wait_analysis = d_tpool.add_work(work); + if ( timestep % d_analysis_interval == 0 ) { + auto work = + new AnalysisWorkItem( type, timestep, Averages, d_last_index, d_last_id_map, d_beta ); + work->add_dependency( d_wait_blobID ); + work->add_dependency( d_wait_analysis ); + work->add_dependency( d_wait_vis ); // Make sure we are done using analysis before modifying + d_wait_analysis = d_tpool.add_work( work ); } // Spawn a thread to write the restart file // if ( matches(type,AnalysisType::CreateRestart) ) { - if (timestep%d_restart_interval==0){ + if ( timestep % d_restart_interval == 0 ) { - if (d_rank==0) { - input_db->putScalar( "Restart", true ); - std::ofstream OutStream("Restart.db"); - input_db->print(OutStream, ""); - OutStream.close(); - } - // Write the restart file (using a seperate thread) - auto work = new WriteRestartWorkItem(d_restartFile.c_str(),cDen,cfq,d_Np); - work->add_dependency(d_wait_restart); - d_wait_restart = d_tpool.add_work(work); + if ( d_rank == 0 ) { + input_db->putScalar( "Restart", true ); + std::ofstream OutStream( "Restart.db" ); + input_db->print( OutStream, "" ); + OutStream.close(); + } + // Write the restart file (using a seperate thread) + auto work = new WriteRestartWorkItem( d_restartFile.c_str(), cDen, cfq, d_Np ); + work->add_dependency( d_wait_restart ); + d_wait_restart = d_tpool.add_work( work ); } // Save the results for visualization // if ( matches(type,AnalysisType::CreateRestart) ) { - if (timestep%d_restart_interval==0){ + if ( timestep % d_restart_interval == 0 ) { // Write the vis files - commWrapper comm = getComm(); - fillHalo fillData( comm.comm, d_rank_info, d_n, {1,1,1}, 0, 1 ); - auto work = new WriteVisWorkItem( timestep, d_meshData, Averages, fillData, std::move( comm ) ); - work->add_dependency(d_wait_blobID); - work->add_dependency(d_wait_analysis); - work->add_dependency(d_wait_vis); - d_wait_vis = d_tpool.add_work(work); + auto work = + new WriteVisWorkItem( timestep, d_meshData, Averages, d_n, d_rank_info, getComm() ); + work->add_dependency( d_wait_blobID ); + work->add_dependency( d_wait_analysis ); + work->add_dependency( d_wait_vis ); + d_wait_vis = d_tpool.add_work( work ); } - PROFILE_STOP("run"); + PROFILE_STOP( "run" ); } /****************************************************************** * Run the analysis * ******************************************************************/ -void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den) +void runAnalysis::basic( int timestep, std::shared_ptr input_db, SubPhase &Averages, + const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den ) { int Nx = d_N[0]; int Ny = d_N[1]; int Nz = d_N[2]; - int N = Nx*Ny*Nz; + int N = Nx * Ny * Nz; NULL_USE( N ); // Check which analysis steps we need to perform - auto color_db = input_db->getDatabase( "Color" ); - auto vis_db = input_db->getDatabase( "Visualization" ); + auto color_db = input_db->getDatabase( "Color" ); + auto vis_db = input_db->getDatabase( "Visualization" ); - //int timestep = color_db->getWithDefault( "timestep", 0 ); + // int timestep = color_db->getWithDefault( "timestep", 0 ); auto type = computeAnalysisType( timestep ); if ( type == AnalysisType::AnalyzeNone ) return; @@ -960,101 +1039,102 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha finish(); } - PROFILE_START("basic"); + PROFILE_START( "basic" ); // Copy the appropriate variables to the host (so we can spawn new threads) ScaLBL_DeviceBarrier(); - PROFILE_START("Copy data to host",1); + PROFILE_START( "Copy data to host", 1 ); - //if ( matches(type,AnalysisType::CopySimState) ) { - if ( timestep%d_analysis_interval == 0 ) { + // if ( matches(type,AnalysisType::CopySimState) ) { + if ( timestep % d_analysis_interval == 0 ) { finish(); // can't copy if threads are still working on data // Copy the members of Averages to the cpu (phase was copied above) - PROFILE_START("Copy-Pressure",1); - ScaLBL_D3Q19_Pressure(fq,Pressure,d_Np); - //ScaLBL_D3Q19_Momentum(fq,Velocity,d_Np); + PROFILE_START( "Copy-Pressure", 1 ); + ScaLBL_D3Q19_Pressure( fq, Pressure, d_Np ); + // ScaLBL_D3Q19_Momentum(fq,Velocity,d_Np); ScaLBL_DeviceBarrier(); - PROFILE_STOP("Copy-Pressure",1); - PROFILE_START("Copy-Wait",1); - PROFILE_STOP("Copy-Wait",1); - PROFILE_START("Copy-State",1); + PROFILE_STOP( "Copy-Pressure", 1 ); + PROFILE_START( "Copy-Wait", 1 ); + PROFILE_STOP( "Copy-Wait", 1 ); + PROFILE_START( "Copy-State", 1 ); /*if (d_regular) d_ScaLBL_Comm->RegularLayout(d_Map,Phi,Averages.Phi); else */ - ScaLBL_CopyToHost(Averages.Phi.data(),Phi,N*sizeof(double)); - // copy other variables - d_ScaLBL_Comm->RegularLayout(d_Map,Pressure,Averages.Pressure); - d_ScaLBL_Comm->RegularLayout(d_Map,&Den[0],Averages.Rho_n); - d_ScaLBL_Comm->RegularLayout(d_Map,&Den[d_Np],Averages.Rho_w); - d_ScaLBL_Comm->RegularLayout(d_Map,&Velocity[0],Averages.Vel_x); - d_ScaLBL_Comm->RegularLayout(d_Map,&Velocity[d_Np],Averages.Vel_y); - d_ScaLBL_Comm->RegularLayout(d_Map,&Velocity[2*d_Np],Averages.Vel_z); - PROFILE_STOP("Copy-State",1); + ScaLBL_CopyToHost( Averages.Phi.data(), Phi, N * sizeof( double ) ); + // copy other variables + d_ScaLBL_Comm->RegularLayout( d_Map, Pressure, Averages.Pressure ); + d_ScaLBL_Comm->RegularLayout( d_Map, &Den[0], Averages.Rho_n ); + d_ScaLBL_Comm->RegularLayout( d_Map, &Den[d_Np], Averages.Rho_w ); + d_ScaLBL_Comm->RegularLayout( d_Map, &Velocity[0], Averages.Vel_x ); + d_ScaLBL_Comm->RegularLayout( d_Map, &Velocity[d_Np], Averages.Vel_y ); + d_ScaLBL_Comm->RegularLayout( d_Map, &Velocity[2 * d_Np], Averages.Vel_z ); + PROFILE_STOP( "Copy-State", 1 ); } - PROFILE_STOP("Copy data to host"); + PROFILE_STOP( "Copy data to host" ); // Spawn threads to do the analysis work - //if (timestep%d_restart_interval==0){ + // if (timestep%d_restart_interval==0){ // if ( matches(type,AnalysisType::ComputeAverages) ) { - if ( timestep%d_analysis_interval == 0 ) { - auto work = new BasicWorkItem(type,timestep,Averages); - work->add_dependency(d_wait_subphase); // Make sure we are done using analysis before modifying - work->add_dependency(d_wait_analysis); - work->add_dependency(d_wait_vis); - d_wait_analysis = d_tpool.add_work(work); - } - - if ( timestep%d_subphase_analysis_interval == 0 ) { - auto work = new SubphaseWorkItem(type,timestep,Averages); - work->add_dependency(d_wait_subphase); // Make sure we are done using analysis before modifying - work->add_dependency(d_wait_analysis); - work->add_dependency(d_wait_vis); - d_wait_subphase = d_tpool.add_work(work); + if ( timestep % d_analysis_interval == 0 ) { + auto work = new BasicWorkItem( type, timestep, Averages ); + work->add_dependency( + d_wait_subphase ); // Make sure we are done using analysis before modifying + work->add_dependency( d_wait_analysis ); + work->add_dependency( d_wait_vis ); + d_wait_analysis = d_tpool.add_work( work ); } - if (timestep%d_restart_interval==0){ - std::shared_ptr cfq,cDen; - // Copy restart data to the CPU - cDen = std::shared_ptr(new double[2*d_Np],DeleteArray); - cfq = std::shared_ptr(new double[19*d_Np],DeleteArray); - ScaLBL_CopyToHost(cfq.get(),fq,19*d_Np*sizeof(double)); - ScaLBL_CopyToHost(cDen.get(),Den,2*d_Np*sizeof(double)); - - if (d_rank==0) { - color_db->putScalar("timestep",timestep); - color_db->putScalar( "Restart", true ); - input_db->putDatabase("Color", color_db); - std::ofstream OutStream("Restart.db"); - input_db->print(OutStream, ""); - OutStream.close(); - - } - // Write the restart file (using a seperate thread) - auto work1 = new WriteRestartWorkItem(d_restartFile.c_str(),cDen,cfq,d_Np); - work1->add_dependency(d_wait_restart); - d_wait_restart = d_tpool.add_work(work1); - + if ( timestep % d_subphase_analysis_interval == 0 ) { + auto work = new SubphaseWorkItem( type, timestep, Averages ); + work->add_dependency( + d_wait_subphase ); // Make sure we are done using analysis before modifying + work->add_dependency( d_wait_analysis ); + work->add_dependency( d_wait_vis ); + d_wait_subphase = d_tpool.add_work( work ); } - - if (timestep%d_visualization_interval==0){ + + if ( timestep % d_restart_interval == 0 ) { + std::shared_ptr cfq, cDen; + // Copy restart data to the CPU + cDen = make_shared_array( 2 * d_Np ); + cfq = make_shared_array( 19 * d_Np ); + ScaLBL_CopyToHost( cfq.get(), fq, 19 * d_Np * sizeof( double ) ); + ScaLBL_CopyToHost( cDen.get(), Den, 2 * d_Np * sizeof( double ) ); + + if ( d_rank == 0 ) { + color_db->putScalar( "timestep", timestep ); + color_db->putScalar( "Restart", true ); + input_db->putDatabase( "Color", color_db ); + std::ofstream OutStream( "Restart.db" ); + input_db->print( OutStream, "" ); + OutStream.close(); + } + // Write the restart file (using a seperate thread) + auto work1 = new WriteRestartWorkItem( d_restartFile.c_str(), cDen, cfq, d_Np ); + work1->add_dependency( d_wait_restart ); + d_wait_restart = d_tpool.add_work( work1 ); + } + + if ( timestep % d_visualization_interval == 0 ) { // Write the vis files - commWrapper comm = getComm(); - fillHalo fillData( comm.comm, d_rank_info, {Nx-2,Ny-2,Nz-2}, {1,1,1}, 0, 1 ); - auto work = new IOWorkItem( timestep, input_db, d_meshData, Averages, fillData, std::move( comm ) ); - work->add_dependency(d_wait_analysis); - work->add_dependency(d_wait_subphase); - work->add_dependency(d_wait_vis); - d_wait_vis = d_tpool.add_work(work); + auto work = + new IOWorkItem( timestep, input_db, d_meshData, Averages, d_n, d_rank_info, getComm() ); + work->add_dependency( d_wait_analysis ); + work->add_dependency( d_wait_subphase ); + work->add_dependency( d_wait_vis ); + d_wait_vis = d_tpool.add_work( work ); } - PROFILE_STOP("basic"); + PROFILE_STOP( "basic" ); } -void runAnalysis::WriteVisData(int timestep, std::shared_ptr input_db, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den) +void runAnalysis::WriteVisData( int timestep, std::shared_ptr input_db, + SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, + double *Den ) { - auto color_db = input_db->getDatabase( "Color" ); - auto vis_db = input_db->getDatabase( "Visualization" ); - //int timestep = color_db->getWithDefault( "timestep", 0 ); + auto color_db = input_db->getDatabase( "Color" ); + auto vis_db = input_db->getDatabase( "Visualization" ); + // int timestep = color_db->getWithDefault( "timestep", 0 ); // Check which analysis steps we need to perform auto type = computeAnalysisType( timestep ); @@ -1070,16 +1150,15 @@ void runAnalysis::WriteVisData(int timestep, std::shared_ptr input_db, // Copy the appropriate variables to the host (so we can spawn new threads) ScaLBL_DeviceBarrier(); - PROFILE_START("write vis",1); + PROFILE_START( "write vis", 1 ); // if (Averages.WriteVis == true){ - commWrapper comm = getComm(); - fillHalo fillData( comm.comm, d_rank_info, d_n, {1,1,1}, 0, 1 ); - auto work2 = new IOWorkItem(timestep, input_db, d_meshData, Averages, fillData, std::move( comm ) ); - work2->add_dependency(d_wait_vis); - d_wait_vis = d_tpool.add_work(work2); + auto work2 = + new IOWorkItem( timestep, input_db, d_meshData, Averages, d_n, d_rank_info, getComm() ); + work2->add_dependency( d_wait_vis ); + d_wait_vis = d_tpool.add_work( work2 ); - //Averages.WriteVis = false; - - PROFILE_STOP("write vis"); + // Averages.WriteVis = false; + + PROFILE_STOP( "write vis" ); } diff --git a/analysis/runAnalysis.h b/analysis/runAnalysis.h index 33adbcb0..a82c4ba0 100644 --- a/analysis/runAnalysis.h +++ b/analysis/runAnalysis.h @@ -1,41 +1,48 @@ #ifndef RunAnalysis_H_INC #define RunAnalysis_H_INC -#include "analysis/analysis.h" -#include "analysis/TwoPhase.h" #include "analysis/SubPhase.h" +#include "analysis/TwoPhase.h" +#include "analysis/analysis.h" #include "common/Communication.h" #include "common/ScaLBL.h" #include "threadpool/thread_pool.h" #include -typedef std::shared_ptr> BlobIDstruct; -typedef std::shared_ptr> BlobIDList; - // Types of analysis -enum class AnalysisType : uint64_t { AnalyzeNone=0, IdentifyBlobs=0x01, CopyPhaseIndicator=0x02, - CopySimState=0x04, ComputeAverages=0x08, CreateRestart=0x10, WriteVis=0x20, ComputeSubphase=0x40 }; +enum class AnalysisType : uint64_t { + AnalyzeNone = 0, + IdentifyBlobs = 0x01, + CopyPhaseIndicator = 0x02, + CopySimState = 0x04, + ComputeAverages = 0x08, + CreateRestart = 0x10, + WriteVis = 0x20, + ComputeSubphase = 0x40 +}; //! Class to run the analysis in multiple threads class runAnalysis { public: - //! Constructor - runAnalysis(std::shared_ptr db, const RankInfoStruct& rank_info, - std::shared_ptr ScaLBL_Comm, std::shared_ptr dm, int Np, bool Regular, IntArray Map ); + runAnalysis( std::shared_ptr db, const RankInfoStruct &rank_info, + std::shared_ptr ScaLBL_Comm, std::shared_ptr dm, int Np, + bool Regular, IntArray Map ); //! Destructor ~runAnalysis(); //! Run the next analysis - void run(int timestep, std::shared_ptr db, TwoPhase &Averages, const double *Phi, + void run( int timestep, std::shared_ptr db, TwoPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den ); - - void basic( int timestep, std::shared_ptr db, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den ); - void WriteVisData(int timestep, std::shared_ptr vis_db, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den); + + void basic( int timestep, std::shared_ptr db, SubPhase &Averages, const double *Phi, + double *Pressure, double *Velocity, double *fq, double *Den ); + void WriteVisData( int timestep, std::shared_ptr vis_db, SubPhase &Averages, + const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den ); //! Finish all active analysis void finish(); @@ -44,7 +51,8 @@ public: * \brief Set the affinities * \details This function will create the analysis threads and set the affinity * of this thread and all analysis threads. If MPI_THREAD_MULTIPLE is not - * enabled, the analysis threads will be disabled and the analysis will run in the current thread. + * enabled, the analysis threads will be disabled and the analysis will run in the current + * thread. * @param[in] method Method used to control the affinities: * none - Don't use threads (runs all analysis in the current thread) * default - Create the specified number of threads, but don't load balance @@ -53,39 +61,36 @@ public: * that all threads run on independent cores * @param[in] N_threads Number of threads, only used by some of the methods */ - void createThreads( const std::string& method = "default", int N_threads = 4 ); + void createThreads( const std::string &method = "default", int N_threads = 4 ); private: - runAnalysis(); // Determine the analysis to perform AnalysisType computeAnalysisType( int timestep ); public: - class commWrapper { - public: + public: Utilities::MPI comm; int tag; runAnalysis *analysis; - commWrapper( int tag, const Utilities::MPI& comm, runAnalysis *analysis ); - commWrapper( ) = delete; + commWrapper( int tag, const Utilities::MPI &comm, runAnalysis *analysis ); + commWrapper() = delete; commWrapper( const commWrapper &rhs ) = delete; - commWrapper& operator=( const commWrapper &rhs ) = delete; + commWrapper &operator=( const commWrapper &rhs ) = delete; commWrapper( commWrapper &&rhs ); ~commWrapper(); }; // Get a comm (not thread safe) - commWrapper getComm( ); + commWrapper getComm(); private: - - std::array d_n; // Number of local cells - std::array d_N; // NNumber of local cells with ghosts + std::array d_n; // Number of local cells + std::array d_N; // Number of local cells with ghosts int d_Np; int d_rank; int d_restart_interval, d_analysis_interval, d_blobid_interval, d_visualization_interval; @@ -95,9 +100,9 @@ private: ThreadPool d_tpool; RankInfoStruct d_rank_info; IntArray d_Map; - BlobIDstruct d_last_ids; - BlobIDstruct d_last_index; - BlobIDList d_last_id_map; + std::shared_ptr> d_last_ids; + std::shared_ptr> d_last_index; + std::shared_ptr> d_last_id_map; std::vector d_meshData; std::string d_restartFile; Utilities::MPI d_comm; @@ -114,8 +119,6 @@ private: // Friends friend commWrapper::~commWrapper(); - }; #endif - diff --git a/common/Communication.h b/common/Communication.h index 4cd9ad70..5baaa962 100644 --- a/common/Communication.h +++ b/common/Communication.h @@ -67,6 +67,10 @@ public: //! Destructor ~fillHalo( ); + fillHalo() = delete; + fillHalo(const fillHalo&) = delete; + fillHalo& operator=(const fillHalo&) = delete; + /*! * @brief Communicate the halos * @param[in] array The array on which we fill the halos @@ -93,9 +97,6 @@ private: TYPE *mem; TYPE *send[3][3][3], *recv[3][3][3]; MPI_Request send_req[3][3][3], recv_req[3][3][3]; - fillHalo(); // Private empty constructor - fillHalo(const fillHalo&); // Private copy constructor - fillHalo& operator=(const fillHalo&); // Private assignment operator void pack( const Array& array, int i, int j, int k, TYPE *buffer ); void unpack( Array& array, int i, int j, int k, const TYPE *buffer ); }; diff --git a/common/Domain.cpp b/common/Domain.cpp index d552fb8a..ec24365d 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -558,17 +558,13 @@ void Domain::Decomp( const std::string& Filename ) int64_t z_transition_size = (nprocz*nz - (global_Nz - zStart))/2; if (z_transition_size < 0) z_transition_size=0; - char LocalRankFilename[40]; - char *loc_id; - loc_id = new char [(nx+2)*(ny+2)*(nz+2)]; - // Set up the sub-domains if (RANK==0){ printf("Distributing subdomains across %i processors \n",nprocs); printf("Process grid: %i x %i x %i \n",nprocx,nprocy,nprocz); printf("Subdomain size: %i x %i x %i \n",nx,ny,nz); printf("Size of transition region: %ld \n", z_transition_size); - + auto loc_id = new char [(nx+2)*(ny+2)*(nz+2)]; for (int kp=0; kpcommunicator = comm.communicator; this->comm_rank = comm.comm_rank; this->comm_size = comm.comm_size; - this->d_ranks = comm.d_ranks; this->d_isNull = comm.d_isNull; this->d_manage = comm.d_manage; this->d_maxTag = comm.d_maxTag; @@ -537,7 +530,6 @@ MPI_CLASS &MPI_CLASS::operator=( MPI_CLASS &&rhs ) std::swap( profile_level, rhs.profile_level ); std::swap( comm_rank, rhs.comm_rank ); std::swap( comm_size, rhs.comm_size ); - std::swap( d_ranks, rhs.d_ranks ); std::swap( d_maxTag, rhs.d_maxTag ); std::swap( d_currentTag, rhs.d_currentTag ); std::swap( d_count, rhs.d_count ); @@ -560,7 +552,6 @@ std::atomic_int d_global_count_self = { 1 }; MPI_CLASS::MPI_CLASS( MPI_Comm comm, bool manage ) { d_count = nullptr; - d_ranks = nullptr; d_manage = false; tmp_alignment = -1; // Check if we are using our version of comm_world @@ -623,11 +614,7 @@ MPI_CLASS::MPI_CLASS( MPI_Comm comm, bool manage ) } if ( d_manage ) ++N_MPI_Comm_created; - // Create d_ranks - if ( comm_size > 1 ) { - d_ranks = new int[comm_size]; - d_ranks[0] = -1; - } + #else // We are not using MPI, intialize based on the communicator NULL_USE( manage ); @@ -636,7 +623,7 @@ MPI_CLASS::MPI_CLASS( MPI_Comm comm, bool manage ) d_maxTag = mpi_max_tag; d_isNull = communicator == MPI_COMM_NULL; if ( d_isNull ) - comm_size = 0; + comm_size = 0; #endif if ( communicator == MPI_CLASS_COMM_WORLD ) { d_currentTag = d_global_currentTag_world1; @@ -663,34 +650,32 @@ MPI_CLASS::MPI_CLASS( MPI_Comm comm, bool manage ) ************************************************************************/ std::vector MPI_CLASS::globalRanks() const { - // Get my global rank if it has not been set - static int myGlobalRank = -1; - if ( myGlobalRank == -1 ) { -#ifdef USE_MPI - if ( MPI_active() ) - MPI_Comm_rank( MPI_CLASS_COMM_WORLD, &myGlobalRank ); -#else - myGlobalRank = 0; -#endif - } - // Check if we are dealing with a serial or null communicator - if ( comm_size == 1 ) - return std::vector( 1, myGlobalRank ); - if ( d_ranks == nullptr || communicator == MPI_COMM_NULL ) + if ( d_isNull ) return std::vector(); - // Fill d_ranks if necessary - if ( d_ranks[0] == -1 ) { - if ( communicator == MPI_CLASS_COMM_WORLD ) { - for ( int i = 0; i < comm_size; i++ ) - d_ranks[i] = i; - } else { - - MPI_ASSERT( myGlobalRank != -1 ); - this->allGather( myGlobalRank, d_ranks ); - } +#ifdef USE_MPI + // Get my global rank and size if it has not been set + static int globalRank = -1; + static int globalSize = -1; + if ( globalRank == -1 && MPI_active() ) { + MPI_Comm_rank( MPI_CLASS_COMM_WORLD, &globalRank ); + MPI_Comm_size( MPI_CLASS_COMM_WORLD, &globalSize ); } - // Return d_ranks - return std::vector( d_ranks, d_ranks + comm_size ); + // Check if we are dealing with a serial or global communicator + if ( comm_size == 1 ) + return std::vector( 1, globalRank ); + if ( comm_size == globalSize ) { + std::vector ranks( globalSize ); + for ( int i = 0; i < globalSize; i++ ) + ranks[i] = i; + return ranks; + } + // Get the global rank from each rank in the communicator + auto ranks = allGather( globalRank ); + std::sort( ranks.begin(), ranks.end() ); + return ranks; +#else + return std::vector( 1, 1 ); +#endif } @@ -2806,49 +2791,44 @@ MPI_Request MPI_CLASS::IrecvBytes( } - /************************************************************************ * sendrecv * ************************************************************************/ #if defined( USE_MPI ) template<> -void MPI_CLASS::sendrecv( const char* sendbuf, int sendcount, int dest, int sendtag, - char* recvbuf, int recvcount, int source, int recvtag ) const +void MPI_CLASS::sendrecv( const char *sendbuf, int sendcount, int dest, int sendtag, + char *recvbuf, int recvcount, int source, int recvtag ) const { PROFILE_START( "sendrecv", profile_level ); - MPI_Sendrecv( sendbuf, sendcount, MPI_CHAR, dest, sendtag, - recvbuf, recvcount, MPI_CHAR, source, recvtag, - communicator, MPI_STATUS_IGNORE ); + MPI_Sendrecv( sendbuf, sendcount, MPI_CHAR, dest, sendtag, recvbuf, recvcount, MPI_CHAR, source, + recvtag, communicator, MPI_STATUS_IGNORE ); PROFILE_STOP( "sendrecv", profile_level ); } template<> -void MPI_CLASS::sendrecv( const int* sendbuf, int sendcount, int dest, int sendtag, - int* recvbuf, int recvcount, int source, int recvtag ) const +void MPI_CLASS::sendrecv( const int *sendbuf, int sendcount, int dest, int sendtag, + int *recvbuf, int recvcount, int source, int recvtag ) const { PROFILE_START( "sendrecv", profile_level ); - MPI_Sendrecv( sendbuf, sendcount, MPI_INT, dest, sendtag, - recvbuf, recvcount, MPI_INT, source, recvtag, - communicator, MPI_STATUS_IGNORE ); + MPI_Sendrecv( sendbuf, sendcount, MPI_INT, dest, sendtag, recvbuf, recvcount, MPI_INT, source, + recvtag, communicator, MPI_STATUS_IGNORE ); PROFILE_STOP( "sendrecv", profile_level ); } template<> -void MPI_CLASS::sendrecv( const float* sendbuf, int sendcount, int dest, int sendtag, - float* recvbuf, int recvcount, int source, int recvtag ) const +void MPI_CLASS::sendrecv( const float *sendbuf, int sendcount, int dest, int sendtag, + float *recvbuf, int recvcount, int source, int recvtag ) const { PROFILE_START( "sendrecv", profile_level ); - MPI_Sendrecv( sendbuf, sendcount, MPI_FLOAT, dest, sendtag, - recvbuf, recvcount, MPI_FLOAT, source, recvtag, - communicator, MPI_STATUS_IGNORE ); + MPI_Sendrecv( sendbuf, sendcount, MPI_FLOAT, dest, sendtag, recvbuf, recvcount, MPI_FLOAT, + source, recvtag, communicator, MPI_STATUS_IGNORE ); PROFILE_STOP( "sendrecv", profile_level ); } template<> -void MPI_CLASS::sendrecv( const double* sendbuf, int sendcount, int dest, int sendtag, - double* recvbuf, int recvcount, int source, int recvtag ) const +void MPI_CLASS::sendrecv( const double *sendbuf, int sendcount, int dest, int sendtag, + double *recvbuf, int recvcount, int source, int recvtag ) const { PROFILE_START( "sendrecv", profile_level ); - MPI_Sendrecv( sendbuf, sendcount, MPI_DOUBLE, dest, sendtag, - recvbuf, recvcount, MPI_DOUBLE, source, recvtag, - communicator, MPI_STATUS_IGNORE ); + MPI_Sendrecv( sendbuf, sendcount, MPI_DOUBLE, dest, sendtag, recvbuf, recvcount, MPI_DOUBLE, + source, recvtag, communicator, MPI_STATUS_IGNORE ); PROFILE_STOP( "sendrecv", profile_level ); } #endif @@ -3815,17 +3795,16 @@ MPI MPI::loadBalance( double local, std::vector work ) MPI_ASSERT( (int) work.size() == getSize() ); auto perf = allGather( local ); std::vector I( work.size() ); - for ( size_t i=0; i key( work.size() ); - for ( size_t i=0; i globalRanks() const; @@ -796,7 +802,8 @@ public: // Member functions * @brief This function sends and recieves data using a blocking call */ template - void sendrecv( const type *sendbuf, int sendcount, int dest, int sendtag, type *recvbuf, int recvcount, int source, int recvtag ) const; + void sendrecv( const type *sendbuf, int sendcount, int dest, int sendtag, type *recvbuf, + int recvcount, int source, int recvtag ) const; /*! @@ -1126,9 +1133,6 @@ private: // data members // The rank and size of the communicator int comm_rank, comm_size; - // The ranks of the comm in the global comm - mutable int *volatile d_ranks; - // Some attributes int d_maxTag; int *volatile d_currentTag; diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 76a60fd3..182004ff 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -306,9 +306,99 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ } -ScaLBL_Communicator::~ScaLBL_Communicator(){ - // destrutor does nothing (bad idea) - // -- note that there needs to be a way to free memory allocated on the device!!! +ScaLBL_Communicator::~ScaLBL_Communicator() +{ + + ScaLBL_FreeDeviceMemory( sendbuf_x ); + ScaLBL_FreeDeviceMemory( sendbuf_X ); + ScaLBL_FreeDeviceMemory( sendbuf_y ); + ScaLBL_FreeDeviceMemory( sendbuf_Y ); + ScaLBL_FreeDeviceMemory( sendbuf_z ); + ScaLBL_FreeDeviceMemory( sendbuf_Z ); + ScaLBL_FreeDeviceMemory( sendbuf_xy ); + ScaLBL_FreeDeviceMemory( sendbuf_xY ); + ScaLBL_FreeDeviceMemory( sendbuf_Xy ); + ScaLBL_FreeDeviceMemory( sendbuf_XY ); + ScaLBL_FreeDeviceMemory( sendbuf_xz ); + ScaLBL_FreeDeviceMemory( sendbuf_xZ ); + ScaLBL_FreeDeviceMemory( sendbuf_Xz ); + ScaLBL_FreeDeviceMemory( sendbuf_XZ ); + ScaLBL_FreeDeviceMemory( sendbuf_yz ); + ScaLBL_FreeDeviceMemory( sendbuf_yZ ); + ScaLBL_FreeDeviceMemory( sendbuf_Yz ); + ScaLBL_FreeDeviceMemory( sendbuf_YZ ); + ScaLBL_FreeDeviceMemory( recvbuf_x ); + ScaLBL_FreeDeviceMemory( recvbuf_X ); + ScaLBL_FreeDeviceMemory( recvbuf_y ); + ScaLBL_FreeDeviceMemory( recvbuf_Y ); + ScaLBL_FreeDeviceMemory( recvbuf_z ); + ScaLBL_FreeDeviceMemory( recvbuf_Z ); + ScaLBL_FreeDeviceMemory( recvbuf_xy ); + ScaLBL_FreeDeviceMemory( recvbuf_xY ); + ScaLBL_FreeDeviceMemory( recvbuf_Xy ); + ScaLBL_FreeDeviceMemory( recvbuf_XY ); + ScaLBL_FreeDeviceMemory( recvbuf_xz ); + ScaLBL_FreeDeviceMemory( recvbuf_xZ ); + ScaLBL_FreeDeviceMemory( recvbuf_Xz ); + ScaLBL_FreeDeviceMemory( recvbuf_XZ ); + ScaLBL_FreeDeviceMemory( recvbuf_yz ); + ScaLBL_FreeDeviceMemory( recvbuf_yZ ); + ScaLBL_FreeDeviceMemory( recvbuf_Yz ); + ScaLBL_FreeDeviceMemory( recvbuf_YZ ); + ScaLBL_FreeDeviceMemory( dvcSendList_x ); + ScaLBL_FreeDeviceMemory( dvcSendList_X ); + ScaLBL_FreeDeviceMemory( dvcSendList_y ); + ScaLBL_FreeDeviceMemory( dvcSendList_Y ); + ScaLBL_FreeDeviceMemory( dvcSendList_z ); + ScaLBL_FreeDeviceMemory( dvcSendList_Z ); + ScaLBL_FreeDeviceMemory( dvcSendList_xy ); + ScaLBL_FreeDeviceMemory( dvcSendList_xY ); + ScaLBL_FreeDeviceMemory( dvcSendList_Xy ); + ScaLBL_FreeDeviceMemory( dvcSendList_XY ); + ScaLBL_FreeDeviceMemory( dvcSendList_xz ); + ScaLBL_FreeDeviceMemory( dvcSendList_xZ ); + ScaLBL_FreeDeviceMemory( dvcSendList_Xz ); + ScaLBL_FreeDeviceMemory( dvcSendList_XZ ); + ScaLBL_FreeDeviceMemory( dvcSendList_yz ); + ScaLBL_FreeDeviceMemory( dvcSendList_yZ ); + ScaLBL_FreeDeviceMemory( dvcSendList_Yz ); + ScaLBL_FreeDeviceMemory( dvcSendList_YZ ); + ScaLBL_FreeDeviceMemory( dvcRecvList_x ); + ScaLBL_FreeDeviceMemory( dvcRecvList_X ); + ScaLBL_FreeDeviceMemory( dvcRecvList_y ); + ScaLBL_FreeDeviceMemory( dvcRecvList_Y ); + ScaLBL_FreeDeviceMemory( dvcRecvList_z ); + ScaLBL_FreeDeviceMemory( dvcRecvList_Z ); + ScaLBL_FreeDeviceMemory( dvcRecvList_xy ); + ScaLBL_FreeDeviceMemory( dvcRecvList_xY ); + ScaLBL_FreeDeviceMemory( dvcRecvList_Xy ); + ScaLBL_FreeDeviceMemory( dvcRecvList_XY ); + ScaLBL_FreeDeviceMemory( dvcRecvList_xz ); + ScaLBL_FreeDeviceMemory( dvcRecvList_xZ ); + ScaLBL_FreeDeviceMemory( dvcRecvList_Xz ); + ScaLBL_FreeDeviceMemory( dvcRecvList_XZ ); + ScaLBL_FreeDeviceMemory( dvcRecvList_yz ); + ScaLBL_FreeDeviceMemory( dvcRecvList_yZ ); + ScaLBL_FreeDeviceMemory( dvcRecvList_Yz ); + ScaLBL_FreeDeviceMemory( dvcRecvList_YZ ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_x ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_X ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_y ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_Y ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_z ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_Z ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_xy ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_xY ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_Xy ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_XY ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_xz ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_xZ ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_Xz ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_XZ ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_yz ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_yZ ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_Yz ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_YZ ); } double ScaLBL_Communicator::GetPerformance(int *NeighborList, double *fq, int Np){ /* EACH MPI PROCESS GETS ITS OWN MEASUREMENT*/ @@ -394,7 +484,7 @@ int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLis int idx,i,j,k,n; // Check that Map has size matching sub-domain - if (Map.size(0) != Nx) + if ( (int) Map.size(0) != Nx) ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Map array dimensions do not match! \n"); // Initialize Map diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 0547814b..3b8edd6c 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -10,14 +10,31 @@ color lattice boltzmann model #include ScaLBL_ColorModel::ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM): -rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),alpha(0),beta(0), -Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0), -Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) + rank(RANK), nprocs(NP), Restart(0), timestep(0), timestepMax(0), + tauA(0), tauB(0), rhoA(0), rhoB(0), alpha(0), beta(0), + Fx(0), Fy(0), Fz(0), flux(0), din(0), dout(0), + inletA(0), inletB(0), outletA(0), outletB(0), + Nx(0), Ny(0), Nz(0), N(0), Np(0), nprocx(0), nprocy(0), nprocz(0), + BoundaryCondition(0), Lx(0), Ly(0), Lz(0), id(nullptr), + NeighborList(nullptr), dvcMap(nullptr), fq(nullptr), Aq(nullptr), Bq(nullptr), + Den(nullptr), Phi(nullptr), ColorGrad(nullptr), Velocity(nullptr), Pressure(nullptr), + comm(COMM) { REVERSE_FLOW_DIRECTION = false; } -ScaLBL_ColorModel::~ScaLBL_ColorModel(){ - +ScaLBL_ColorModel::~ScaLBL_ColorModel() +{ + delete [] id; + ScaLBL_FreeDeviceMemory( NeighborList ); + ScaLBL_FreeDeviceMemory( dvcMap ); + ScaLBL_FreeDeviceMemory( fq ); + ScaLBL_FreeDeviceMemory( Aq ); + ScaLBL_FreeDeviceMemory( Bq ); + ScaLBL_FreeDeviceMemory( Den ); + ScaLBL_FreeDeviceMemory( Phi ); + ScaLBL_FreeDeviceMemory( Pressure ); + ScaLBL_FreeDeviceMemory( Velocity ); + ScaLBL_FreeDeviceMemory( ColorGrad ); } /*void ScaLBL_ColorModel::WriteCheckpoint(const char *FILENAME, const double *cPhi, const double *cfq, int Np) @@ -408,11 +425,13 @@ void ScaLBL_ColorModel::Create(){ // copy the neighbor list ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); + delete [] neighborList; // initialize phi based on PhaseLabel (include solid component labels) double *PhaseLabel; PhaseLabel = new double[N]; AssignComponentLabels(PhaseLabel); ScaLBL_CopyToDevice(Phi, PhaseLabel, N*sizeof(double)); + delete [] PhaseLabel; } /******************************************************** @@ -1097,7 +1116,6 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){ ScaLBL_CopyToHost(phase.data(), Phi, N*sizeof(double)); // Extract only the connected part of NWP - BlobIDstruct new_index; double vF=0.0; double vS=0.0; ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,Averages->SDs,vF,vS,phase_label,Dm->Comm); comm.barrier(); @@ -1334,7 +1352,6 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta double volume_connected = 0.0; double second_biggest = 0.0; if (USE_CONNECTED_NWP){ - BlobIDstruct new_index; ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,phase,Averages->SDs,vF,vS,phase_label,comm); comm.barrier(); diff --git a/tests/TestNetcdf.cpp b/tests/TestNetcdf.cpp index 38fe08b3..6d43a04d 100644 --- a/tests/TestNetcdf.cpp +++ b/tests/TestNetcdf.cpp @@ -1,7 +1,7 @@ // Test reading/writing netcdf files #include "IO/netcdf.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Communication.h" #include "common/UnitTest.h" @@ -13,7 +13,8 @@ void load( const std::string& ); void test_NETCDF( UnitTest& ut ) { - const int rank = comm_rank( MPI_COMM_WORLD ); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); int nprocx = 2; int nprocy = 2; int nprocz = 2; @@ -26,11 +27,11 @@ void test_NETCDF( UnitTest& ut ) size_t z = info.kz*data.size(2); const char* filename = "test.nc"; std::vector dim = { (int) data.size(0)*nprocx, (int) data.size(1)*nprocy, (int) data.size(2)*nprocz }; - int fid = netcdf::open( filename, netcdf::CREATE, MPI_COMM_WORLD ); + int fid = netcdf::open( filename, netcdf::CREATE, comm ); auto dims = netcdf::defDim( fid, {"X", "Y", "Z"}, dim ); netcdf::write( fid, "tmp", dims, data, info ); netcdf::close( fid ); - MPI_Barrier( MPI_COMM_WORLD ); + comm.barrier(); // Read the contents of the file we created fid = netcdf::open( filename, netcdf::READ ); Array tmp = netcdf::getVar( fid, "tmp" ); diff --git a/tests/lbpm_color_simulator.cpp b/tests/lbpm_color_simulator.cpp index 6451d38a..590d5b8e 100644 --- a/tests/lbpm_color_simulator.cpp +++ b/tests/lbpm_color_simulator.cpp @@ -1,13 +1,13 @@ +#include +#include +#include +#include #include #include #include -#include -#include -#include -#include -#include "models/ColorModel.h" #include "common/Utilities.h" +#include "models/ColorModel.h" //#define WRE_SURFACES @@ -21,61 +21,59 @@ // Implementation of Two-Phase Immiscible LBM using CUDA //************************************************************************* -int main(int argc, char **argv) +int main( int argc, char **argv ) { - - // Initialize MPI - Utilities::startup( argc, argv ); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); - - // Load the input database - auto db = std::make_shared( argv[1] ); - // Initialize MPI and error handlers - auto multiple = db->getWithDefault( "MPI_THREAD_MULTIPLE", true ); - //Utilities::startup( argc, argv, multiple ); - //Utilities::MPI::changeProfileLevel( 1 ); + // Initialize + Utilities::startup( argc, argv ); - { // Limit scope so variables that contain communicators will free before MPI_Finialize + // Load the input database + auto db = std::make_shared( argv[1] ); - if (rank == 0){ - printf("********************************************************\n"); - printf("Running Color LBM \n"); - printf("********************************************************\n"); - } - // Initialize compute device - int device=ScaLBL_SetDevice(rank); - NULL_USE( device ); - ScaLBL_DeviceBarrier(); - comm.barrier(); + { // Limit scope so variables that contain communicators will free before MPI_Finialize - PROFILE_ENABLE(1); - //PROFILE_ENABLE_TRACE(); - //PROFILE_ENABLE_MEMORY(); - PROFILE_SYNCHRONIZE(); - PROFILE_START("Main"); - Utilities::setErrorHandlers(); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); - auto filename = argv[1]; - ScaLBL_ColorModel ColorModel(rank,nprocs,comm); - ColorModel.ReadParams(filename); - ColorModel.SetDomain(); - ColorModel.ReadInput(); - ColorModel.Create(); // creating the model will create data structure to match the pore structure and allocate variables - ColorModel.Initialize(); // initializing the model will set initial conditions for variables - ColorModel.Run(); - //ColorModel.WriteDebug(); + if ( rank == 0 ) { + printf( "********************************************************\n" ); + printf( "Running Color LBM \n" ); + printf( "********************************************************\n" ); + } + // Initialize compute device + int device = ScaLBL_SetDevice( rank ); + NULL_USE( device ); + ScaLBL_DeviceBarrier(); + comm.barrier(); - PROFILE_STOP("Main"); - auto file = db->getWithDefault( "TimerFile", "lbpm_color_simulator" ); - auto level = db->getWithDefault( "TimerLevel", 1 ); - PROFILE_SAVE(file,level); - // **************************************************** + PROFILE_ENABLE( 1 ); + // PROFILE_ENABLE_TRACE(); + // PROFILE_ENABLE_MEMORY(); + PROFILE_SYNCHRONIZE(); + PROFILE_START( "Main" ); + Utilities::setErrorHandlers(); + + auto filename = argv[1]; + ScaLBL_ColorModel ColorModel( rank, nprocs, comm ); + ColorModel.ReadParams( filename ); + ColorModel.SetDomain(); + ColorModel.ReadInput(); + ColorModel.Create(); // creating the model will create data structure to match the pore + // structure and allocate variables + ColorModel.Initialize(); // initializing the model will set initial conditions for variables + ColorModel.Run(); + // ColorModel.WriteDebug(); + + PROFILE_STOP( "Main" ); + auto file = db->getWithDefault( "TimerFile", "lbpm_color_simulator" ); + auto level = db->getWithDefault( "TimerLevel", 1 ); + PROFILE_SAVE( file, level ); + // **************************************************** - } // Limit scope so variables that contain communicators will free before MPI_Finialize + } // Limit scope so variables that contain communicators will free before MPI_Finialize - Utilities::shutdown(); + Utilities::shutdown(); + return 0; } diff --git a/tests/lbpm_morph_pp.cpp b/tests/lbpm_morph_pp.cpp index 12f6f319..e40dd6e0 100644 --- a/tests/lbpm_morph_pp.cpp +++ b/tests/lbpm_morph_pp.cpp @@ -128,7 +128,6 @@ int main(int argc, char **argv) comm.barrier(); // Extract only the connected part of NWP - BlobIDstruct new_index; double vF=0.0; double vS=0.0; ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm); Dm->Comm.barrier(); diff --git a/tests/lbpm_uCT_pp.cpp b/tests/lbpm_uCT_pp.cpp index dbf9684b..b5d42e82 100644 --- a/tests/lbpm_uCT_pp.cpp +++ b/tests/lbpm_uCT_pp.cpp @@ -14,7 +14,7 @@ #include "common/Array.h" #include "common/Domain.h" #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "IO/MeshDatabase.h" #include "IO/Mesh.h" #include "IO/Writer.h" @@ -192,7 +192,7 @@ int main(int argc, char **argv) fillFloat[0]->fill( LOCVOL[0] ); } netcdf::close( fid ); - MPI_Barrier(comm); + comm.barrier(); PROFILE_STOP("ReadVolume"); if (rank==0) printf("Read complete\n"); @@ -255,15 +255,15 @@ int main(int argc, char **argv) } } } - count_plus=sumReduce( Dm[0]->Comm, count_plus); - count_minus=sumReduce( Dm[0]->Comm, count_minus); + count_plus = Dm[0]->Comm.sumReduce( count_plus); + count_minus = Dm[0]->Comm.sumReduce( count_minus); if (rank==0) printf("minimum value=%f, max value=%f \n",min_value,max_value); if (rank==0) printf("plus=%i, minus=%i \n",count_plus,count_minus); ASSERT( count_plus > 0 && count_minus > 0 ); - MPI_Barrier(comm); - mean_plus = sumReduce( Dm[0]->Comm, mean_plus ) / count_plus; - mean_minus = sumReduce( Dm[0]->Comm, mean_minus ) / count_minus; - MPI_Barrier(comm); + comm.barrier(); + mean_plus = Dm[0]->Comm.sumReduce( mean_plus ) / count_plus; + mean_minus = Dm[0]->Comm.sumReduce( mean_minus ) / count_minus; + comm.barrier(); if (rank==0) printf(" Region 1 mean (+): %f, Region 2 mean (-): %f \n",mean_plus, mean_minus); //if (rank==0) printf("Scale the input data (size = %i) \n",LOCVOL[0].length()); @@ -284,7 +284,7 @@ int main(int argc, char **argv) // Fill the source data for the coarse meshes if (rank==0) printf("Coarsen the mesh for N_levels=%i \n",N_levels); - MPI_Barrier(comm); + comm.barrier(); PROFILE_START("CoarsenMesh"); for (int i=1; i filter(ratio[0],ratio[1],ratio[2]); @@ -300,7 +300,7 @@ int main(int argc, char **argv) printf(" filter_x=%i, filter_y=%i, filter_z=%i \n",int(filter.size(0)),int(filter.size(1)),int(filter.size(2)) ); printf(" ratio= %i,%i,%i \n",int(ratio[0]),int(ratio[1]),int(ratio[2]) ); } - MPI_Barrier(comm); + comm.barrier(); } PROFILE_STOP("CoarsenMesh"); @@ -312,7 +312,7 @@ int main(int argc, char **argv) NonLocalMean.back(), *fillFloat.back(), *Dm.back(), nprocx, rough_cutoff, lamda, nlm_sigsq, nlm_depth); PROFILE_STOP("Solve coarse mesh"); - MPI_Barrier(comm); + comm.barrier(); // Refine the solution PROFILE_START("Refine distance"); @@ -326,7 +326,7 @@ int main(int argc, char **argv) rough_cutoff, lamda, nlm_sigsq, nlm_depth); } PROFILE_STOP("Refine distance"); - MPI_Barrier(comm); + comm.barrier(); // Perform a final filter PROFILE_START("Filtering final domains"); @@ -424,14 +424,14 @@ int main(int argc, char **argv) meshData[0].vars.push_back(filter_Dist2_var); fillDouble[0]->copy( filter_Dist2, filter_Dist2_var->data ); #endif - MPI_Barrier(comm); + comm.barrier(); if (rank==0) printf("Writing output \n"); // Write visulization data IO::writeData( 0, meshData, comm ); if (rank==0) printf("Finished. \n"); // Compute the Minkowski functionals - MPI_Barrier(comm); + comm.barrier(); auto Averages = std::make_shared(Dm[0]); Array phase_label(Nx[0]+2,Ny[0]+2,Nz[0]+2); From 10c571134648177c5c90ac76ecec39cb26a93c8a Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 22 Jan 2021 19:52:18 -0500 Subject: [PATCH 153/205] add mixed gradient to build on gpu --- cuda/MixedGradient.cu | 73 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 cuda/MixedGradient.cu diff --git a/cuda/MixedGradient.cu b/cuda/MixedGradient.cu new file mode 100644 index 00000000..16b7807c --- /dev/null +++ b/cuda/MixedGradient.cu @@ -0,0 +1,73 @@ +/* Implement Mixed Gradient (Lee et al. JCP 2016)*/ +#include + +#define NBLOCKS 560 +#define NTHREADS 128 + +__global__ void dvc_ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz) +{ + static int D3Q19[18][3]={{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1}, + {1,1,0},{-1,-1,0},{1,-1,0},{-1,1,0}, + {1,0,1},{-1,0,-1},{1,0,-1},{-1,0,1}, + {0,1,1},{0,-1,-1},{0,1,-1},{0,-1,1}}; + + int i,j,k,n,N; + int np,np2,nm; // neighbors + double v,vp,vp2,vm; // values at neighbors + double grad; + + int S = N/NBLOCKS/NTHREADS + 1; + for (int s=0; s>>(Map, Phi, Gradient, start, finish, Np, Nx, Ny, Nz); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_MixedGradient: %s \n",cudaGetErrorString(err)); + } + cudaProfilerStop(); +} + From 90afe56ead6454817e70b6dfa2858dffebc8c5df Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 22 Jan 2021 20:08:34 -0500 Subject: [PATCH 154/205] add mixed gradient to cuda --- cuda/MixedGradient.cu | 7 +++++-- sample_scripts/configure_huckleberry | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/cuda/MixedGradient.cu b/cuda/MixedGradient.cu index 16b7807c..556e34ef 100644 --- a/cuda/MixedGradient.cu +++ b/cuda/MixedGradient.cu @@ -1,5 +1,7 @@ /* Implement Mixed Gradient (Lee et al. JCP 2016)*/ #include +#include +#include #define NBLOCKS 560 #define NTHREADS 128 @@ -11,12 +13,13 @@ __global__ void dvc_ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gr {1,0,1},{-1,0,-1},{1,0,-1},{-1,0,1}, {0,1,1},{0,-1,-1},{0,1,-1},{0,-1,1}}; - int i,j,k,n,N; + int i,j,k,n,N,idx; int np,np2,nm; // neighbors double v,vp,vp2,vm; // values at neighbors double grad; + N = Nx*Ny*Nz; - int S = N/NBLOCKS/NTHREADS + 1; + int S = Np/NBLOCKS/NTHREADS + 1; for (int s=0; s Date: Sun, 31 Jan 2021 19:06:07 -0500 Subject: [PATCH 155/205] save the work;to be built and tested --- cpu/FreeLee.cpp | 2046 +++++++++++++++++++++++++-------------- models/FreeLeeModel.cpp | 237 ++--- models/FreeLeeModel.h | 3 +- 3 files changed, 1408 insertions(+), 878 deletions(-) diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp index 35cbd5fd..40a99dd3 100644 --- a/cpu/FreeLee.cpp +++ b/cpu/FreeLee.cpp @@ -2,6 +2,1311 @@ #define STOKES +extern "C" void ScaLBL_D3Q19_FreeLeeModel_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np) +{ + int n; + double p = 1.0;//NOTE: take initial pressure p=1.0 + for (n=0; n 1.f) phi = 1.0; + if (phi < -1.f) phi = -1.0; + Den[idx] = rhoA + 0.5*(1.0-phi)*(rhoB-rhoA); + + //compute unit normal of color gradient + nx = ColorGrad[idx+0*Np]; + ny = ColorGrad[idx+1*Np]; + nz = ColorGrad[idx+2*Np]; + cg_mag = sqrt(nx*nx+ny*ny+nz*nz); + double ColorMag_temp = cg_mag; + if (cg_mag==0.0) ColorMag_temp=1.0; + nx = nx/ColorMag_temp; + ny = ny/ColorMag_temp; + nz = nz/ColorMag_temp; + + theta = M*cs2_inv*(1-4.0*phi*phi)/W; + + hq[0*Np+idx]=0.3333333333333333*(phi); + hq[1*Np+idx]=0.1111111111111111*(phi+theta*nx); + hq[2*Np+idx]=0.1111111111111111*(phi-theta*nx); + hq[3*Np+idx]=0.1111111111111111*(phi+theta*ny); + hq[4*Np+idx]=0.1111111111111111*(phi-theta*ny); + hq[5*Np+idx]=0.1111111111111111*(phi+theta*nz); + hq[6*Np+idx]=0.1111111111111111*(phi-theta*nz); + + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, + double rhoA, double rhoB, int start, int finish, int Np){ + + int idx,n,nread; + double fq,phi; + + for (int n=start; n 10Np => odd part of dist) + m1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + m2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + m3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + m4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + m5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + m6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + m7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + m8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + m9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + m10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + m11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + m12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + m13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + m14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + m15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + m16 = dist[nr16]; + + // q=17 + nr17 = neighborList[n+16*Np]; + m17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + m18 = dist[nr18]; + + //compute fluid velocity + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(chem*nx+Fx)); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(chem*ny+Fy)); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(chem*nz+Fz)); + //compute pressure + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17) + +0.5*(rhoA-rhoB)/2.0/3.0*(ux*nx+uy*ny+uz*nz); + + //compute equilibrium distributions + feq0 = 0.3333333333333333*p - 0.25*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) - + 0.16666666666666666*rho0*(ux*ux + uy*uy + uz*uz) - 0.5*(-(nx*ux) - ny*uy - nz*uz)* + (-0.08333333333333333*(rhoA - rhoB)*(ux*ux + uy*uy + uz*uz) + chem*(0.3333333333333333 - 0.5*(ux*ux + uy*uy + uz*uz))); + feq1 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-ux*ux + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) - + 0.125*(Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*uz)) - 0.0625*(nx - nx*ux - ny*uy - nz*uz)* + (2*chem*ux*ux - 0.3333333333333333*((-rhoA + rhoB)*ux*ux + 2*chem*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*uz))); + feq2 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-ux*ux + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) - + 0.125*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*uz)) - 0.0625*(nx + nx*ux + ny*uy + nz*uz)* + (-2.*chem*ux*ux + 0.1111111111111111*(-4.*chem + rhoB*(-2.*ux - 1.*ux*ux - 1.*uy*uy - 1.*uz*uz) + + rhoA*(2.*ux + ux*ux + uy*uy + uz*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*ux*ux + + chem*(4.*ux + 2.*ux*ux + 2.*uy*uy + 2.*uz*uz))); + feq3 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uy*uy + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.125*(Fx*ux + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.0625*(ny - nx*ux - ny*uy - nz*uz)* + (2*chem*uy*uy - 0.3333333333333333*((-rhoA + rhoB)*uy*uy + 2*chem*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*uz))); + feq4 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uy*uy + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.125*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.0625*(ny + nx*ux + ny*uy + nz*uz)* + (-2.*chem*uy*uy + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 2.*uy - 1.*uy*uy - 1.*uz*uz) + + rhoA*(ux*ux + 2.*uy + uy*uy + uz*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*uy*uy + + chem*(2.*ux*ux + 4.*uy + 2.*uy*uy + 2.*uz*uz))); + feq5 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uz*uz + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.125*(Fx*ux + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2. + uz)*uz)) - 0.0625*(nx*ux + ny*uy + nz*(-1. + uz))* + (-2.*chem*uz*uz + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 1.*uy*uy + (2. - 1.*uz)*uz) + + rhoA*(ux*ux + uy*uy + (-2. + uz)*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*uz*uz + + chem*(2.*ux*ux + 2.*uy*uy + uz*(-4. + 2.*uz)))); + feq6 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uz*uz + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) - + 0.125*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2. + uz))) - 0.0625*(nz + nx*ux + ny*uy + nz*uz)* + (-2.*chem*uz*uz + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 1.*uy*uy + (-2. - 1.*uz)*uz) + + rhoA*(ux*ux + uy*uy + uz*(2. + uz))) + 0.3333333333333333*((-1.*rhoA + rhoB)*uz*uz + + chem*(2.*ux*ux + 2.*uy*uy + uz*(4. + 2.*uz)))); + feq7 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uy)*(ux + uy) + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx*(-1. + ux) + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(-2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.03125*(nx + ny - nx*ux - ny*uy - nz*uz)* + (2*chem*(ux + uy)*(ux + uy) + 0.3333333333333333*((rhoA - rhoB)*(ux + uy)*(ux + uy) - 2*chem*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz))); + feq8 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uy)*(ux + uy) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.03125*(-(nx*(1 + ux)) - ny*(1 + uy) - nz*uz)* + (2*chem*(ux + uy)*(ux + uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux + uy)*(ux + uy)) + + 2*chem*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz))); + feq9 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uy)*(ux - uy) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fy + Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(-2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.03125*(nx - nx*ux - ny*(1 + uy) - nz*uz)* + (2*chem*(ux - uy)*(ux - uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz))); + feq10 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uy)*(ux - uy) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx*(1 + ux) + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.03125*(ny - nx*(1 + ux) - ny*uy - nz*uz)* + (2*chem*(ux - uy)*(ux - uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz))); + feq11 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uz)*(ux + uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*(-1. + ux) + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)) - 0.03125*(nx + nz - nx*ux - ny*uy - nz*uz)* + (2*chem*(ux + uz)*(ux + uz) + 0.3333333333333333*((rhoA - rhoB)*(ux + uz)*(ux + uz) - 2*chem*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz))); + feq12 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) - + 0.0625*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*(2. + uz))) - 0.03125*(-(nx*(1 + ux)) - ny*uy - nz*(1 + uz))* + (2*chem*(ux + uz)*(ux + uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux + uz)*(ux + uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*(2 + uz)))); + feq13 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uz)*(ux - uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) - + 0.0625*(Fz + Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*(2. + uz))) - 0.03125*(nx - nx*ux - ny*uy - nz*(1 + uz))* + (2*chem*(ux - uz)*(ux - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*(2 + uz)))); + feq14 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uz)*(ux - uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*(1 + ux) + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)) - 0.03125*(nz - nx*(1 + ux) - ny*uy - nz*uz)* + (2*chem*(ux - uz)*(ux - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz))); + feq15 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*ux + Fy*(-1. + uy) + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uy*uy - 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + (-2. + uz)*uz)) - 0.03125*(ny + nz - nx*ux - ny*uy - nz*uz)* + (2*chem*(uy + uz)*(uy + uz) + 0.3333333333333333*((rhoA - rhoB)*(uy + uz)*(uy + uz) - 2*chem*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz))); + feq16 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) - + 0.0625*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy - 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*(2. + uz))) - 0.03125*(-(nx*ux) - ny*(1 + uy) - nz*(1 + uz))* + (2*chem*(uy + uz)*(uy + uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy + uz)*(uy + uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*(2 + uz)))); + feq17 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) - + 0.0625*(Fz + Fx*ux + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*(2. + uz))) - 0.03125*(ny - nx*ux - ny*uy - nz*(1 + uz))* + (2*chem*(uy - uz)*(uy - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*(2 + uz)))); + feq18 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*ux + Fy*(1 + uy) + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uy*uy + 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + (-2. + uz)*uz)) - 0.03125*(nz - nx*ux - ny*(1 + uy) - nz*uz)* + (2*chem*(uy - uz)*(uy - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz))); + + //------------------------------------------------- BCK collison ------------------------------------------------------------// + // q=0 + dist[n] = m0 - (m0-feq0)/tau + 0.25*(-2*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) + + (mgx*ux + mgy*uy + mgz*uz)*(2*chem*(ux*ux + uy*uy + uz*uz) + 0.3333333333333333* + (-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + uz*uz)))); + + // q = 1 + dist[nr2] = m1 - (m1-feq1)/tau + 0.125*(2*(Fx*(-1 + ux) + Fy*uy + Fz*uz)*(0.2222222222222222 + ux*ux - + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) + (mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*ux*ux + 0.3333333333333333*((-rhoA + rhoB)*ux*ux + 2*chem*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*uz)))); + + // q=2 + dist[nr1] = m2 - (m2-feq2)/tau + 0.125*(-2*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) + + (mgx + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*ux*ux + 0.3333333333333333*((-rhoA + rhoB)*ux*ux + + 2*chem*(2*ux + ux*ux + uy*uy + uz*uz)) + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*uz)))); + + // q = 3 + dist[nr4] = m3 - (m3-feq3)/tau + 0.125*(2*(Fx*ux + Fy*(-1 + uy) + Fz*uz)*(0.2222222222222222 + uy*uy - 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*ux + mgy*(-1 + uy) + mgz*uz)*(-2*chem*uy*uy + 0.3333333333333333*((-rhoA + rhoB)*uy*uy + + 2*chem*(ux*ux - 2*uy + uy*uy + uz*uz)) + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 4 + dist[nr3] = m4 - (m4-feq4)/tau + 0.125*(-2*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uy*uy + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgy + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*uy*uy + 0.3333333333333333*((-rhoA + rhoB)*uy*uy + + 2*chem*(ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 5 + dist[nr6] = m5 - (m5-feq5)/tau + 0.125*(2*(Fx*ux + Fy*uy + Fz*(-1 + uz))*(0.2222222222222222 + uz*uz - + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) + (mgx*ux + mgy*uy + mgz*(-1 + uz))* + (-2*chem*uz*uz + 0.3333333333333333*((-rhoA + rhoB)*uz*uz + 2*chem*(ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 6 + dist[nr5] = m6 - (m6-feq6)/tau + 0.125*(-2*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uz*uz + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) + + (mgz + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*uz*uz + 0.3333333333333333*((-rhoA + rhoB)*uz*uz + + 2*chem*(ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + uz*(2 + uz))))); + + // q = 7 + dist[nr8] = m7 - (m7-feq7)/tau + 0.0625*(2*(Fx*(-1 + ux) + Fy*(-1 + uy) + Fz*uz)*(0.2222222222222222 + (ux + uy)*(ux + uy) - + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + (mgx*(-1 + ux) + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*(ux + uy)*(ux + uy) + 0.3333333333333333*(-((rhoA - rhoB)*(ux + uy)*(ux + uy)) + + 2*chem*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 8 + dist[nr7] = m8 - (m8-feq8)/tau + 0.0625*(2*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)*(0.2222222222222222 + (ux + uy)*(ux + uy) - + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + (mgx + mgy + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*(ux + uy)*(ux + uy) + 0.3333333333333333*(-((rhoA - rhoB)*(ux + uy)*(ux + uy)) + + 2*chem*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 9 + dist[nr10] = m9 - (m9-feq9)/tau + 0.0625*(2*(Fy + Fx*(-1 + ux) + Fy*uy + Fz*uz)*(0.2222222222222222 + (ux - uy)*(ux - uy) - + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + (mgy + mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*(ux - uy)*(ux - uy) + 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 10 + dist[nr9] = m10 - (m10-feq10)/tau + 0.0625*(2*(Fx*(1 + ux) + Fy*(-1 + uy) + Fz*uz)*(0.2222222222222222 + (ux - uy)*(ux - uy) - + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + (mgx*(1 + ux) + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*(ux - uy)*(ux - uy) + 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 11 + dist[nr12] = m11 - (m11-feq11)/tau + 0.0625*(2*(Fx*(-1 + ux) + Fy*uy + Fz*(-1 + uz))*(0.2222222222222222 + (ux + uz)*(ux + uz) - + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + (mgx*(-1 + ux) + mgy*uy + mgz*(-1 + uz))* + (-2*chem*(ux + uz)*(ux + uz) + 0.3333333333333333*(-((rhoA - rhoB)*(ux + uz)*(ux + uz)) + + 2*chem*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 12 + dist[nr11] = m12 - (m12-feq12)/tau + 0.0625*(2*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)*(0.2222222222222222 + (ux + uz)*(ux + uz) - + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + (mgx + mgz + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*(ux + uz)*(ux + uz) + 0.3333333333333333*(-((rhoA - rhoB)*(ux + uz)*(ux + uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*(2 + uz))))); + + // q = 13 + dist[nr14] = m13 - (m13-feq13)/tau + 0.0625*(2*(Fz + Fx*(-1 + ux) + Fy*uy + Fz*uz)*(0.2222222222222222 + (ux - uz)*(ux - uz) - + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + (mgz + mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*(ux - uz)*(ux - uz) + 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))))); + + // q= 14 + dist[nr13] = m14 - (m14-feq14)/tau + 0.0625*(2*(Fx*(1 + ux) + Fy*uy + Fz*(-1 + uz))*(0.2222222222222222 + (ux - uz)*(ux - uz) - + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + (mgx*(1 + ux) + mgy*uy + mgz*(-1 + uz))* + (-2*chem*(ux - uz)*(ux - uz) + 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 15 + dist[nr16] = m15 - (m15-feq15)/tau + 0.0625*(2*(Fx*ux + Fy*(-1 + uy) + Fz*(-1 + uz))*(0.2222222222222222 + (uy + uz)(uy + uz) - + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + (mgx*ux + mgy*(-1 + uy) + mgz*(-1 + uz))* + (-2*chem*(uy + uz)*(uy + uz) + 0.3333333333333333*(-((rhoA - rhoB)*(uy + uz)*(uy + uz)) + + 2*chem*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)))); + + // q = 16 + dist[nr15] = m16 - (m16-feq16)/tau + 0.0625*(2*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)*(0.2222222222222222 + (uy + uz)*(uy + uz) - + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + (mgy + mgz + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*(uy + uz)*(uy + uz) + 0.3333333333333333*(-((rhoA - rhoB)*(uy + uz)*(uy + uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))))); + + // q = 17 + dist[nr18] = m17 - (m17-feq17)/tau + 0.0625*(2*(Fz + Fx*ux + Fy*(-1 + uy) + Fz*uz)*(0.2222222222222222 + (uy - uz)*(uy - uz) - + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + (mgz + mgx*ux + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*(uy - uz)*(uy - uz) + 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))))); + + // q = 18 + dist[nr17] = m18 - (m18-feq18)/tau + 0.0625*(2*(Fx*ux + Fy*(1 + uy) + Fz*(-1 + uz))*(0.2222222222222222 + (uy - uz)*(uy - uz) - + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + (mgx*ux + mgy*(1 + uy) + mgz*(-1 + uz))* + (-2*chem*(uy - uz)*(uy - uz) + 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)))); + //----------------------------------------------------------------------------------------------------------------------------------------// + + + // ----------------------------- compute phase field evolution ---------------------------------------- + //Normalize the Color Gradient + C = sqrt(nx*nx+ny*ny+nz*nz); + double ColorMag = C; + if (C==0.0) ColorMag=1.0; + nx = nx/ColorMag; + ny = ny/ColorMag; + nz = nz/ColorMag; + //compute surface tension-related parameter + theta = M*4.5*(1-4.0*phi*phi)/W; + + //load distributions of phase field + //q=0 + h0 = hq[n]; + //q=1 + h1 = hq[nr1]; + + //q=2 + h2 = hq[nr2]; + + //q=3 + h3 = hq[nr3]; + + //q=4 + h4 = hq[nr4]; + + //q=5 + h5 = hq[nr5]; + + //q=6 + h6 = hq[nr6]; + + //-------------------------------- BGK collison for phase field ---------------------------------// + // q = 0 + hq[n] = h0 - (h0 - 0.3333333333333333*phi)/tauM; + + // q = 1 + hq[nr2] = h1 - (h1 - phi*(0.1111111111111111 + 0.5*ux) - (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; + + // q = 2 + hq[nr1] = h2 - (h2 - phi*(0.1111111111111111 - 0.5*ux) + (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; + + // q = 3 + hq[nr4] = h3 - (h3 - phi*(0.1111111111111111 + 0.5*uy) - (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; + + // q = 4 + hq[nr3] = h4 - (h4 - phi*(0.1111111111111111 - 0.5*uy) + (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; + + // q = 5 + hq[nr6] = h5 - (h5 - phi*(0.1111111111111111 + 0.5*uz) - (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; + + // q = 6 + hq[nr5] = h6 - (h6 - phi*(0.1111111111111111 - 0.5*uz) + (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; + //........................................................................ + + //Update velocity on device + Velocity[0*Np+n] = ux; + Velocity[1*Np+n] = uy; + Velocity[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = p; + //Update chemical potential on device + mu_phi[n] = chem; + //Update color gradient on device + ColorGrad[0*Np+n] = nx; + ColorGrad[1*Np+n] = ny; + ColorGrad[2*Np+n] = nz; + + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, + double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, + int strideY, int strideZ, int start, int finish, int Np){ + + int n,nn,nn2x,ijk; + //int nr1,nr2,nr3,nr4,nr5,nr6,nr7,nr8,nr9,nr10,nr11,nr12,nr13,nr14,nr15,nr16,nr17,nr18; + double ux,uy,uz;//fluid velocity + double p;//pressure + double chem;//chemical potential + double phi; //phase field + double rho0;//fluid density + // distribution functions + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m0,m3,m5,m7; + double mm1,mm2,mm4,mm6,mm8,mm9,mm10,mm11,mm12,mm13,mm14,mm15,mm16,mm17,mm18; + double mm3,mm5,mm7; + double feq0,feq1,feq2,feq3,feq4,feq5,feq6,feq7,feq8,feq9,feq10,feq11,feq12,feq13,feq14,feq15,feq16,feq17,feq18; + double nx,ny,nz;//normal color gradient + double mgx,mgy,mgz;//mixed gradient reaching secondary neighbor + + //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double h0,h1,h2,h3,h4,h5,h6;//distributions for LB phase field + double tau;//position dependent LB relaxation time for fluid + double C,theta; + double M = 2.0/9.0*(tauM-0.5);//diffusivity (or mobility) for the phase field D3Q7 + + for (int n=start; n even part of dist) - //fq = dist[nread]; // reading the f2 data into register fq - nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) - fq = dist[nr2]; // reading the f2 data into register fq - rho += fq; - m1 -= 11.0*(fq); - m2 -= 4.0*(fq); - jx -= fq; - m4 += 4.0*(fq); - m9 += 2.0*(fq); - m10 -= 4.0*(fq); - - // q=3 - //nread = neighborList[n+2*Np]; // neighbor 4 - //fq = dist[nread]; - nr3 = neighborList[n+2*Np]; // neighbor 4 - fq = dist[nr3]; - rho += fq; - m1 -= 11.0*fq; - m2 -= 4.0*fq; - jy = fq; - m6 = -4.0*fq; - m9 -= fq; - m10 += 2.0*fq; - m11 = fq; - m12 = -2.0*fq; - - // q = 4 - //nread = neighborList[n+3*Np]; // neighbor 3 - //fq = dist[nread]; - nr4 = neighborList[n+3*Np]; // neighbor 3 - fq = dist[nr4]; - rho+= fq; - m1 -= 11.0*fq; - m2 -= 4.0*fq; - jy -= fq; - m6 += 4.0*fq; - m9 -= fq; - m10 += 2.0*fq; - m11 += fq; - m12 -= 2.0*fq; - - // q=5 - //nread = neighborList[n+4*Np]; - //fq = dist[nread]; - nr5 = neighborList[n+4*Np]; - fq = dist[nr5]; - rho += fq; - m1 -= 11.0*fq; - m2 -= 4.0*fq; - jz = fq; - m8 = -4.0*fq; - m9 -= fq; - m10 += 2.0*fq; - m11 -= fq; - m12 += 2.0*fq; - - - // q = 6 - //nread = neighborList[n+5*Np]; - //fq = dist[nread]; - nr6 = neighborList[n+5*Np]; - fq = dist[nr6]; - rho+= fq; - m1 -= 11.0*fq; - m2 -= 4.0*fq; - jz -= fq; - m8 += 4.0*fq; - m9 -= fq; - m10 += 2.0*fq; - m11 -= fq; - m12 += 2.0*fq; - - // q=7 - //nread = neighborList[n+6*Np]; - //fq = dist[nread]; - nr7 = neighborList[n+6*Np]; - fq = dist[nr7]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx += fq; - m4 += fq; - jy += fq; - m6 += fq; - m9 += fq; - m10 += fq; - m11 += fq; - m12 += fq; - m13 = fq; - m16 = fq; - m17 = -fq; - - // q = 8 - //nread = neighborList[n+7*Np]; - //fq = dist[nread]; - nr8 = neighborList[n+7*Np]; - fq = dist[nr8]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx -= fq; - m4 -= fq; - jy -= fq; - m6 -= fq; - m9 += fq; - m10 += fq; - m11 += fq; - m12 += fq; - m13 += fq; - m16 -= fq; - m17 += fq; - - // q=9 - //nread = neighborList[n+8*Np]; - //fq = dist[nread]; - nr9 = neighborList[n+8*Np]; - fq = dist[nr9]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx += fq; - m4 += fq; - jy -= fq; - m6 -= fq; - m9 += fq; - m10 += fq; - m11 += fq; - m12 += fq; - m13 -= fq; - m16 += fq; - m17 += fq; - - // q = 10 - //nread = neighborList[n+9*Np]; - //fq = dist[nread]; - nr10 = neighborList[n+9*Np]; - fq = dist[nr10]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx -= fq; - m4 -= fq; - jy += fq; - m6 += fq; - m9 += fq; - m10 += fq; - m11 += fq; - m12 += fq; - m13 -= fq; - m16 -= fq; - m17 -= fq; - - // q=11 - //nread = neighborList[n+10*Np]; - //fq = dist[nread]; - nr11 = neighborList[n+10*Np]; - fq = dist[nr11]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx += fq; - m4 += fq; - jz += fq; - m8 += fq; - m9 += fq; - m10 += fq; - m11 -= fq; - m12 -= fq; - m15 = fq; - m16 -= fq; - m18 = fq; - - // q=12 - //nread = neighborList[n+11*Np]; - //fq = dist[nread]; - nr12 = neighborList[n+11*Np]; - fq = dist[nr12]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx -= fq; - m4 -= fq; - jz -= fq; - m8 -= fq; - m9 += fq; - m10 += fq; - m11 -= fq; - m12 -= fq; - m15 += fq; - m16 += fq; - m18 -= fq; - - // q=13 - //nread = neighborList[n+12*Np]; - //fq = dist[nread]; - nr13 = neighborList[n+12*Np]; - fq = dist[nr13]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx += fq; - m4 += fq; - jz -= fq; - m8 -= fq; - m9 += fq; - m10 += fq; - m11 -= fq; - m12 -= fq; - m15 -= fq; - m16 -= fq; - m18 -= fq; - - // q=14 - //nread = neighborList[n+13*Np]; - //fq = dist[nread]; - nr14 = neighborList[n+13*Np]; - fq = dist[nr14]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx -= fq; - m4 -= fq; - jz += fq; - m8 += fq; - m9 += fq; - m10 += fq; - m11 -= fq; - m12 -= fq; - m15 -= fq; - m16 += fq; - m18 += fq; - - // q=15 - nread = neighborList[n+14*Np]; - fq = dist[nread]; - //fq = dist[17*Np+n]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jy += fq; - m6 += fq; - jz += fq; - m8 += fq; - m9 -= 2.0*fq; - m10 -= 2.0*fq; - m14 = fq; - m17 += fq; - m18 -= fq; - - // q=16 - nread = neighborList[n+15*Np]; - fq = dist[nread]; - //fq = dist[8*Np+n]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jy -= fq; - m6 -= fq; - jz -= fq; - m8 -= fq; - m9 -= 2.0*fq; - m10 -= 2.0*fq; - m14 += fq; - m17 -= fq; - m18 += fq; - - // q=17 - //fq = dist[18*Np+n]; - nread = neighborList[n+16*Np]; - fq = dist[nread]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jy += fq; - m6 += fq; - jz -= fq; - m8 -= fq; - m9 -= 2.0*fq; - m10 -= 2.0*fq; - m14 -= fq; - m17 += fq; - m18 += fq; - - // q=18 - nread = neighborList[n+17*Np]; - fq = dist[nread]; - //fq = dist[9*Np+n]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jy -= fq; - m6 -= fq; - jz += fq; - m8 += fq; - m9 -= 2.0*fq; - m10 -= 2.0*fq; - m14 -= fq; - m17 -= fq; - m18 -= fq; - - //........................................................................ - //..............carry out relaxation process.............................. - //..........Toelke, Fruediger et. al. 2006................................ - if (C == 0.0) nx = ny = nz = 0.0; - m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) -19*alpha*C - m1); - m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0)- m2); - m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4); - m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6); - m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8); - m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); - m10 = m10 + rlx_setA*( - m10); - m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); - m12 = m12 + rlx_setA*( - m12); - m13 = m13 + rlx_setA*( (jx*jy/rho0) + 0.5*alpha*C*nx*ny - m13); - m14 = m14 + rlx_setA*( (jy*jz/rho0) + 0.5*alpha*C*ny*nz - m14); - m15 = m15 + rlx_setA*( (jx*jz/rho0) + 0.5*alpha*C*nx*nz - m15); - m16 = m16 + rlx_setB*( - m16); - m17 = m17 + rlx_setB*( - m17); - m18 = m18 + rlx_setB*( - m18); - //.................inverse transformation...................................................... - - // q=0 - fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; - dist[n] = fq; - - // q = 1 - fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx; - //nread = neighborList[n+Np]; - dist[nr2] = fq; - - // q=2 - fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx; - //nread = neighborList[n]; - dist[nr1] = fq; - - // q = 3 - fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy; - //nread = neighborList[n+3*Np]; - dist[nr4] = fq; - - // q = 4 - fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy; - //nread = neighborList[n+2*Np]; - dist[nr3] = fq; - - // q = 5 - fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz; - //nread = neighborList[n+5*Np]; - dist[nr6] = fq; - - // q = 6 - fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz; - //nread = neighborList[n+4*Np]; - dist[nr5] = fq; - - // q = 7 - fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ - mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy); - //nread = neighborList[n+7*Np]; - dist[nr8] = fq; - - // q = 8 - fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 - +mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy); - //nread = neighborList[n+6*Np]; - dist[nr7] = fq; - - // q = 9 - fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ - mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy); - //nread = neighborList[n+9*Np]; - dist[nr10] = fq; - - // q = 10 - fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ - mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy); - //nread = neighborList[n+8*Np]; - dist[nr9] = fq; - - // q = 11 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) - +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 - -mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz); - //nread = neighborList[n+11*Np]; - dist[nr12] = fq; - - // q = 12 - fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ - mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz); - //nread = neighborList[n+10*Np]; - dist[nr11]= fq; - - // q = 13 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) - +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 - -mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz); - //nread = neighborList[n+13*Np]; - dist[nr14] = fq; - - // q= 14 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) - +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 - -mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz); - //nread = neighborList[n+12*Np]; - dist[nr13] = fq; - - - // q = 15 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) - -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz); - nread = neighborList[n+15*Np]; - dist[nread] = fq; - - // q = 16 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) - -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz); - nread = neighborList[n+14*Np]; - dist[nread] = fq; - - - // q = 17 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) - -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz); - nread = neighborList[n+17*Np]; - dist[nread] = fq; - - // q = 18 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) - -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz); - nread = neighborList[n+16*Np]; - dist[nread] = fq; - - // write the velocity - ux = jx / rho0; - uy = jy / rho0; - uz = jz / rho0; - Vel[n] = ux; - Vel[Np+n] = uy; - Vel[2*Np+n] = uz; - - // Instantiate mass transport distributions - // Stationary value - distribution 0 - nAB = 1.0/(nA+nB); - Aq[n] = 0.3333333333333333*nA; - Bq[n] = 0.3333333333333333*nB; - - //............................................... - // q = 0,2,4 - // Cq = {1,0,0}, {0,1,0}, {0,0,1} - delta = beta*nA*nB*nAB*0.1111111111111111*nx; - if (!(nA*nB*nAB>0)) delta=0; - a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; - b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; - a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; - b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; - - // q = 1 - //nread = neighborList[n+Np]; - Aq[nr2] = a1; - Bq[nr2] = b1; - // q=2 - //nread = neighborList[n]; - Aq[nr1] = a2; - Bq[nr1] = b2; - - //............................................... - // Cq = {0,1,0} - delta = beta*nA*nB*nAB*0.1111111111111111*ny; - if (!(nA*nB*nAB>0)) delta=0; - a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; - b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; - a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; - b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; - - // q = 3 - //nread = neighborList[n+3*Np]; - Aq[nr4] = a1; - Bq[nr4] = b1; - // q = 4 - //nread = neighborList[n+2*Np]; - Aq[nr3] = a2; - Bq[nr3] = b2; - - //............................................... - // q = 4 - // Cq = {0,0,1} - delta = beta*nA*nB*nAB*0.1111111111111111*nz; - if (!(nA*nB*nAB>0)) delta=0; - a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; - b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; - a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; - b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; - - // q = 5 - //nread = neighborList[n+5*Np]; - Aq[nr6] = a1; - Bq[nr6] = b1; - // q = 6 - //nread = neighborList[n+4*Np]; - Aq[nr5] = a2; - Bq[nr5] = b2; - //............................................... - } -} - -extern "C" void ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double *Aq, double *Bq, - double *Den, double *Phi, int start, int finish, int Np){ - - int idx,n,nread; - double fq,nA,nB; - - for (int n=start; n ScaLBL_FreeLeeModel::ScaLBL_FreeLeeModel(int RANK, int NP, const Utilities::MPI& COMM): -rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),W(0),gamma(0), +rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),tauM(0),rhoA(0),rhoB(0),W(0),gamma(0),kappa(0),beta(0), Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) { @@ -30,10 +30,13 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){ // set defaults timestepMax = 100000; tauA = tauB = 1.0; + tauM = 1.0;//relaxation time for phase field rhoA = rhoB = 1.0; Fx = Fy = Fz = 0.0; gamma=1e-3;//surface tension W=5.0;//interfacial thickness + beta = 12.0*gamma/W; + kappa = 3.0*gamma*W/2.0;//beta and kappa are related to surface tension \gamma Restart=false; din=dout=1.0; flux=0.0; @@ -81,6 +84,9 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){ inletB=0.f; outletA=0.f; outletB=1.f; + //update secondary parameters + beta = 12.0*gamma/W; + kappa = 3.0*gamma*W/2.0;//beta and kappa are related to surface tension \gamma //if (BoundaryCondition==4) flux *= rhoA; // mass flux must adjust for density (see formulation for details) BoundaryCondition = 0; @@ -258,15 +264,16 @@ void ScaLBL_FreeLeeModel::Create(){ TmpMap[idx] = Nxh*Nyh*Nzh-1; } } + // copy the device map ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np); - ScaLBL_DeviceBarrier(); - delete [] TmpMap; - // copy the neighbor list ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); + comm.barrier(); + delete [] TmpMap; + delete [] neighborList; } -void ScaLBL_FreeLeeModel::AssignComponentLabels() +void ScaLBL_FreeLeeModel::AssignComponentLabels_ChemPotential_ColorGrad() { double *phase; phase = new double[Nh]; @@ -288,12 +295,26 @@ void ScaLBL_FreeLeeModel::AssignComponentLabels() // Assign the labels for (size_t idx=0; idxid[n] + int x=i-1; + int y=j-1; + int z=k-1; + if (x<0) x=0; + if (y<0) y=0; + if (z<0) z=0; + if (x>=Nx) x=Nx-1; + if (y>=Ny) y=Ny-1; + if (z>=Nz) z=Nz-1; + int n = z*Nx*Ny+y*Nx+x; VALUE=id[n]; + // Assign the affinity from the paired list for (unsigned int idx=0; idx < NLABELS; idx++){ //printf("idx=%i, value=%i, %i, \n",idx, VALUE,LabelList[idx]); @@ -307,7 +328,7 @@ void ScaLBL_FreeLeeModel::AssignComponentLabels() // fluid labels are reserved if (VALUE == 1) AFFINITY=1.0; else if (VALUE == 2) AFFINITY=-1.0; - phase[n] = AFFINITY; + phase[nh] = AFFINITY; } } } @@ -329,56 +350,10 @@ void ScaLBL_FreeLeeModel::AssignComponentLabels() } //compute color gradient and laplacian of phase field + double *ColorGrad_host, mu_phi_host; + ColorGrad_host = new double[3*Np]; + mu_phi_host = new double[Np]; - - - - - //copy all data to device - ScaLBL_CopyToDevice(Phi, phase, N*sizeof(double)); - ScaLBL_DeviceBarrier(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); - delete [] phase; -} - -void ScaLBL_FreeLeeModel::AssignChemPotential_ColorGrad() -{ - double *SolidPotential_host = new double [Nx*Ny*Nz]; - double *GreySolidGrad_host = new double [3*Np]; - - size_t NLABELS=0; - signed char VALUE=0; - double AFFINITY=0.f; - - auto LabelList = greyscaleColor_db->getVector( "GreySolidLabels" ); - auto AffinityList = greyscaleColor_db->getVector( "GreySolidAffinity" ); - - NLABELS=LabelList.size(); - if (NLABELS != AffinityList.size()){ - ERROR("Error: GreySolidLabels and GreySolidAffinity must be the same length! \n"); - } - - for (int k=0;kid[n] = 0; // set mask to zero since this is an immobile component - } - } - SolidPotential_host[n] = AFFINITY; - } - } - } - - // Calculate grey-solid color-gradient double *Dst; Dst = new double [3*3*3]; for (int kk=0; kk<3; kk++){ @@ -389,8 +364,8 @@ void ScaLBL_FreeLeeModel::AssignChemPotential_ColorGrad() } } } - double w_face = 1.f; - double w_edge = 0.5; + double w_face = 1.0/18.0; + double w_edge = 1.0/36.0; double w_corner = 0.f; //local Dst[13] = 0.f; @@ -424,14 +399,21 @@ void ScaLBL_FreeLeeModel::AssignChemPotential_ColorGrad() Dst[23] = w_edge; Dst[25] = w_edge; - for (int k=1; kSDs(i,j,k)<2.0){ - GreySolidGrad_host[idx+0*Np] = phi_x; - GreySolidGrad_host[idx+1*Np] = phi_y; - GreySolidGrad_host[idx+2*Np] = phi_z; - } - else{ - GreySolidGrad_host[idx+0*Np] = 0.0; - GreySolidGrad_host[idx+1*Np] = 0.0; - GreySolidGrad_host[idx+2*Np] = 0.0; - } + //store color gradient + ColorGrad_host[idx+0*Np] = cs2_inv*phi_x; + ColorGrad_host[idx+1*Np] = cs2_inv*phi_y; + ColorGrad_host[idx+2*Np] = cs2_inv*phi_z; + //compute chemical potential + phi_Lap = 2.0*cs2_inv*phi_Lap; + mu_phi_host[idx] = 4.0*beta*phase[nh]*(phase[nh]+1.0)*(phase[nh]-1.0) - kappa*phi_Lap; } } } } - - if (rank==0){ - printf("Number of Grey-solid labels: %lu \n",NLABELS); - for (unsigned int idx=0; idxLastExterior(), Np); - ScaLBL_FreeLeeModel_PhaseField_Init(dvcMap, Phi, Den, hq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_FreeLeeModel_PhaseField_Init(dvcMap, Phi, Den, hq, ColorGrad, rhoA, rhoB, tauM, W, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_FreeLeeModel_PhaseField_Init(dvcMap, Phi, Den, hq, ColorGrad, rhoA, rhoB, tauM, W, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); if (Restart == true){ //TODO need to revise this function @@ -576,6 +547,7 @@ void ScaLBL_FreeLeeModel::Initialize(){ } // establish reservoirs for external bC + // TODO to be revised if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4 ){ if (Dm->kproc()==0){ ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0); @@ -617,13 +589,12 @@ void ScaLBL_FreeLeeModel::Run(){ timestep++; //------------------------------------------------------------------------------------------------------------------- // Compute the Phase indicator field - // Read for hq, Bq happens in this routine (requires communication) - //ScaLBL_Comm->SendD3Q7AA(hq); //READ FROM NORMAL - ScaLBL_Comm->SendD3Q7AA(hq); //READ FROM NORMAL - ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm->RecvD3Q7AA(hq); //WRITE INTO OPPOSITE + // Read for hq happens in this routine (requires communication) + ScaLBL_Comm->SendD3Q7AA(hq,0); //READ FROM NORMAL + ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(NeighborList, dvcMap, hq, Den, Phi, rhoA, rhoB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q7AA(hq,0); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); - ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(NeighborList, dvcMap, hq, Den, Phi, rhoA, rhoB, 0, ScaLBL_Comm->LastExterior(), Np); // Perform the collision operation ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FROM NORMAL @@ -635,8 +606,8 @@ void ScaLBL_FreeLeeModel::Run(){ // Halo exchange for phase field ScaLBL_Comm_WideHalo->Send(Phi); - ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, - alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, rhoA, rhoB, tauA, tauB, tauM, + kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm_WideHalo->Recv(Phi); ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); @@ -653,19 +624,19 @@ void ScaLBL_FreeLeeModel::Run(){ ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); } - ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, - alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, rhoA, rhoB, tauA, tauB, tauM, + kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_DeviceBarrier(); MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); // *************EVEN TIMESTEP************* timestep++; // Compute the Phase indicator field - ScaLBL_Comm->SendD3Q7AA(hq); //READ FROM NORMAL - ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm->RecvD3Q7AA(hq); //WRITE INTO OPPOSITE + ScaLBL_Comm->SendD3Q7AA(hq,0); //READ FROM NORMAL + ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(dvcMap, hq, Den, Phi, rhoA, rhoB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q7AA(hq,0); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); - ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(dvcMap, hq, Den, Phi, rhoA, rhoB, 0, ScaLBL_Comm->LastExterior(), Np); // Perform the collision operation ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FORM NORMAL @@ -675,8 +646,8 @@ void ScaLBL_FreeLeeModel::Run(){ ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); } ScaLBL_Comm_WideHalo->Send(Phi); - ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, - alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, rhoA, rhoB, tauA, tauB, tauM, + kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm_WideHalo->Recv(Phi); ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE ScaLBL_DeviceBarrier(); @@ -693,8 +664,8 @@ void ScaLBL_FreeLeeModel::Run(){ ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); } - ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, - alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, rhoA, rhoB, tauA, tauB, tauM, + kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_Comm->Barrier(); //************************************************************************ PROFILE_STOP("Update"); @@ -722,30 +693,24 @@ void ScaLBL_FreeLeeModel::Run(){ void ScaLBL_FreeLeeModel::WriteDebug(){ // Copy back final phase indicator field and convert to regular layout - DoubleArray PhaseField(Nx,Ny,Nz); + DoubleArray PhaseData(Nxh,Nyh,Nzh); //ScaLBL_Comm->RegularLayout(Map,Phi,PhaseField); - ScaLBL_CopyToHost(PhaseField.data(), Phi, sizeof(double)*N); + ScaLBL_CopyToHost(PhaseData.data(), Phi, sizeof(double)*Nh); FILE *OUTFILE; sprintf(LocalRankFilename,"Phase.%05i.raw",rank); OUTFILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,OUTFILE); + fwrite(PhaseData.data(),8,Nh,OUTFILE); fclose(OUTFILE); - ScaLBL_Comm->RegularLayout(Map,&Den[0],PhaseField); + DoubleArray PhaseField(Nx,Ny,Nz); + ScaLBL_Comm->RegularLayout(Map,Den,PhaseField); FILE *AFILE; - sprintf(LocalRankFilename,"A.%05i.raw",rank); + sprintf(LocalRankFilename,"Density.%05i.raw",rank); AFILE = fopen(LocalRankFilename,"wb"); fwrite(PhaseField.data(),8,N,AFILE); fclose(AFILE); - ScaLBL_Comm->RegularLayout(Map,&Den[Np],PhaseField); - FILE *BFILE; - sprintf(LocalRankFilename,"B.%05i.raw",rank); - BFILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,BFILE); - fclose(BFILE); - ScaLBL_Comm->RegularLayout(Map,Pressure,PhaseField); FILE *PFILE; sprintf(LocalRankFilename,"Pressure.%05i.raw",rank); diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h index 5aa2d30a..75d2b413 100644 --- a/models/FreeLeeModel.h +++ b/models/FreeLeeModel.h @@ -35,7 +35,8 @@ public: int timestep,timestepMax; int BoundaryCondition; double tauA,tauB,rhoA,rhoB; - double W,gamma; + double tauM;//relaxation time for phase field (or mass) + double W,gamma,kappa,beta; double Fx,Fy,Fz,flux; double din,dout,inletA,inletB,outletA,outletB; From 3a6edc365d029194eda8187e37475aecdab92eb2 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Sun, 31 Jan 2021 21:26:03 -0500 Subject: [PATCH 156/205] build pass; ongoing model validation --- common/ScaLBL.h | 22 + cpu/FreeLee.cpp | 2100 +----------------------------- models/FreeLeeModel.cpp | 64 +- models/FreeLeeModel.h | 3 +- tests/CMakeLists.txt | 1 + tests/lbpm_freelee_simulator.cpp | 81 ++ 6 files changed, 154 insertions(+), 2117 deletions(-) create mode 100644 tests/lbpm_freelee_simulator.cpp diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 0fe2ad0c..f0c34ea9 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -178,6 +178,28 @@ extern "C" void ScaLBL_D3Q7_AAeven_DFH(double *Aq, double *Bq, double *Den, doub extern "C" void ScaLBL_D3Q19_Gradient_DFH(int *NeighborList, double *Phi, double *ColorGrad, int start, int finish, int Np); +// FREE ENERGY LEE MODEL + +extern "C" void ScaLBL_D3Q19_FreeLeeModel_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np); + +extern "C" void ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, double *Den, double *hq, double *ColorGrad, + double rhonA, double rhoB, double tauM, double W, int start, int finish, int Np); + +extern "C" void ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, + double rhoA, double rhoB, int start, int finish, int Np); + +extern "C" void ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double *hq, double *Den, double *Phi, + double rhoA, double rhoB, int start, int finish, int Np); + +extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, + int strideY, int strideZ, int start, int finish, int Np); + +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, + int strideY, int strideZ, int start, int finish, int Np); + + // BOUNDARY CONDITION ROUTINES extern "C" void ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *neighborList, int *list, double *dist, double din, int count, int Np); diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp index 40a99dd3..f28af185 100644 --- a/cpu/FreeLee.cpp +++ b/cpu/FreeLee.cpp @@ -6,6 +6,9 @@ extern "C" void ScaLBL_D3Q19_FreeLeeModel_Init(double *gqbar, double *mu_phi, do { int n; double p = 1.0;//NOTE: take initial pressure p=1.0 + double chem; + double cg_x,cg_y,cg_z; + for (n=0; n 0){ - - // Retrieve the color gradient - nx = ColorGrad[n]; - ny = ColorGrad[N+n]; - nz = ColorGrad[2*N+n]; - //...........Normalize the Color Gradient................................. - C = sqrt(nx*nx+ny*ny+nz*nz); - if (C==0.0) C=1.0; - nx = nx/C; - ny = ny/C; - nz = nz/C; - //......No color gradient at z-boundary if pressure BC are set............. - // if (pBC && k==0) nx = ny = nz = 0.f; - // if (pBC && k==Nz-1) nx = ny = nz = 0.f; - //........................................................................ - // READ THE DISTRIBUTIONS - // (read from opposite array due to previous swap operation) - //........................................................................ - f2 = distodd[n]; - f4 = distodd[N+n]; - f6 = distodd[2*N+n]; - f8 = distodd[3*N+n]; - f10 = distodd[4*N+n]; - f12 = distodd[5*N+n]; - f14 = distodd[6*N+n]; - f16 = distodd[7*N+n]; - f18 = distodd[8*N+n]; - //........................................................................ - f0 = disteven[n]; - f1 = disteven[N+n]; - f3 = disteven[2*N+n]; - f5 = disteven[3*N+n]; - f7 = disteven[4*N+n]; - f9 = disteven[5*N+n]; - f11 = disteven[6*N+n]; - f13 = disteven[7*N+n]; - f15 = disteven[8*N+n]; - f17 = disteven[9*N+n]; - //........................................................................ - // PERFORM RELAXATION PROCESS - //........................................................................ - //....................compute the moments............................................... - rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; - m1 = -30*f0-11*(f2+f1+f4+f3+f6+f5)+8*(f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18 +f17); - m2 = 12*f0-4*(f2+f1 +f4+f3+f6 +f5)+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; - jx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14; - m4 = 4*(-f1+f2)+f7-f8+f9-f10+f11-f12+f13-f14; - jy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18; - m6 = -4*(f3-f4)+f7-f8-f9+f10+f15-f16+f17-f18; - jz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18; - m8 = -4*(f5-f6)+f11-f12-f13+f14+f15-f16-f17+f18; - m9 = 2*(f1+f2)-f3-f4-f5-f6+f7+f8+f9+f10+f11+f12+f13+f14-2*(f15+f16+f17+f18); - m10 = -4*(f1+f2)+2*(f4+f3+f6+f5)+f8+f7+f10+f9+f12+f11+f14+f13-2*(f16+f15+f18+f17); - m11 = f4+f3-f6-f5+f8+f7+f10+f9-f12-f11-f14-f13; - m12 = -2*(f4+f3-f6-f5)+f8+f7+f10+f9-f12-f11-f14-f13; - m13 = f8+f7-f10-f9; - m14 = f16+f15-f18-f17; - m15 = f12+f11-f14-f13; - m16 = f7-f8+f9-f10-f11+f12-f13+f14; - m17 = -f7+f8+f9-f10+f15-f16+f17-f18; - m18 = f11-f12-f13+f14-f15+f16+f17-f18; - //..........Toelke, Fruediger et. al. 2006............... - if (C == 0.0) nx = ny = nz = 1.0; -#ifdef STOKES - m1 = m1 + rlx_setA*(- 11*rho -alpha*C - m1); - m2 = m2 + rlx_setA*(3*rho - m2); - m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4); - m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6); - m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8); - m9 = m9 + rlx_setA*( 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); - m10 = m10 + rlx_setA*( - m10); - m11 = m11 + rlx_setA*( 0.5*alpha*C*(ny*ny-nz*nz)- m11); - m12 = m12 + rlx_setA*( - m12); - m13 = m13 + rlx_setA*( 0.5*alpha*C*nx*ny - m13); - m14 = m14 + rlx_setA*( 0.5*alpha*C*ny*nz - m14); - m15 = m15 + rlx_setA*( 0.5*alpha*C*nx*nz - m15); - m16 = m16 + rlx_setB*( - m16); - m17 = m17 + rlx_setB*( - m17); - m18 = m18 + rlx_setB*( - m18); -#else - m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) -alpha*C - m1); - m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho)- m2); - m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4); - m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6); - m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8); - m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); - m10 = m10 + rlx_setA*( - m10); - m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); - m12 = m12 + rlx_setA*( - m12); - m13 = m13 + rlx_setA*( (jx*jy/rho) + 0.5*alpha*C*nx*ny - m13); - m14 = m14 + rlx_setA*( (jy*jz/rho) + 0.5*alpha*C*ny*nz - m14); - m15 = m15 + rlx_setA*( (jx*jz/rho) + 0.5*alpha*C*nx*nz - m15); - m16 = m16 + rlx_setB*( - m16); - m17 = m17 + rlx_setB*( - m17); - m18 = m18 + rlx_setB*( - m18); -#endif - //.................inverse transformation...................................................... - f0 = 0.05263157894736842*rho-0.012531328320802*m1+0.04761904761904762*m2; - f1 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 - +0.1*(jx-m4)+0.0555555555555555555555555*(m9-m10); - f2 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 - +0.1*(m4-jx)+0.0555555555555555555555555*(m9-m10); - f3 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 - +0.1*(jy-m6)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12); - f4 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 - +0.1*(m6-jy)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12); - f5 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 - +0.1*(jz-m8)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11); - f6 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 - +0.1*(m8-jz)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11); - f7 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx+jy)+0.025*(m4+m6) - +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 - +0.04166666666666666*m12+0.25*m13+0.125*(m16-m17); - f8 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2-0.1*(jx+jy)-0.025*(m4+m6) - +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 - +0.04166666666666666*m12+0.25*m13+0.125*(m17-m16); - f9 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx-jy)+0.025*(m4-m6) - +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 - +0.04166666666666666*m12-0.25*m13+0.125*(m16+m17); - f10 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jy-jx)+0.025*(m6-m4) - +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 - +0.04166666666666666*m12-0.25*m13-0.125*(m16+m17); - f11 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2+0.1*(jx+jz)+0.025*(m4+m8) - +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 - -0.04166666666666666*m12+0.25*m15+0.125*(m18-m16); - f12 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2-0.1*(jx+jz)-0.025*(m4+m8) - +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 - -0.04166666666666666*m12+0.25*m15+0.125*(m16-m18); - f13 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2+0.1*(jx-jz)+0.025*(m4-m8) - +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 - -0.04166666666666666*m12-0.25*m15-0.125*(m16+m18); - f14 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2+0.1*(jz-jx)+0.025*(m8-m4) - +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 - -0.04166666666666666*m12-0.25*m15+0.125*(m16+m18); - f15 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2+0.1*(jy+jz)+0.025*(m6+m8) - -0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m17-m18); - f16 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2-0.1*(jy+jz)-0.025*(m6+m8) - -0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m18-m17); - f17 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2+0.1*(jy-jz)+0.025*(m6-m8) - -0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14+0.125*(m17+m18); - f18 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2+0.1*(jz-jy)+0.025*(m8-m6) - -0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14-0.125*(m17+m18); - //....................................................................................................... - // incorporate external force - f1 += 0.16666666*Fx; - f2 -= 0.16666666*Fx; - f3 += 0.16666666*Fy; - f4 -= 0.16666666*Fy; - f5 += 0.16666666*Fz; - f6 -= 0.16666666*Fz; - f7 += 0.08333333333*(Fx+Fy); - f8 -= 0.08333333333*(Fx+Fy); - f9 += 0.08333333333*(Fx-Fy); - f10 -= 0.08333333333*(Fx-Fy); - f11 += 0.08333333333*(Fx+Fz); - f12 -= 0.08333333333*(Fx+Fz); - f13 += 0.08333333333*(Fx-Fz); - f14 -= 0.08333333333*(Fx-Fz); - f15 += 0.08333333333*(Fy+Fz); - f16 -= 0.08333333333*(Fy+Fz); - f17 += 0.08333333333*(Fy-Fz); - f18 -= 0.08333333333*(Fy-Fz); - //*********** WRITE UPDATED VALUES TO MEMORY ****************** - // Write the updated distributions - //....EVEN..................................... - disteven[n] = f0; - disteven[N+n] = f2; - disteven[2*N+n] = f4; - disteven[3*N+n] = f6; - disteven[4*N+n] = f8; - disteven[5*N+n] = f10; - disteven[6*N+n] = f12; - disteven[7*N+n] = f14; - disteven[8*N+n] = f16; - disteven[9*N+n] = f18; - //....ODD...................................... - distodd[n] = f1; - distodd[N+n] = f3; - distodd[2*N+n] = f5; - distodd[3*N+n] = f7; - distodd[4*N+n] = f9; - distodd[5*N+n] = f11; - distodd[6*N+n] = f13; - distodd[7*N+n] = f15; - distodd[8*N+n] = f17; - - //...Store the Velocity.......................... - Velocity[n] = jx; - Velocity[N+n] = jy; - Velocity[2*N+n] = jz; - /* Velocity[3*n] = jx; - Velocity[3*n+1] = jy; - Velocity[3*n+2] = jz; - */ //...Store the Color Gradient.................... - // ColorGrad[3*n] = nx*C; - // ColorGrad[3*n+1] = ny*C; - // ColorGrad[3*n+2] = nz*C; - //............................................... - //*************************************************************** - } // check if n is in the solid - } // loop over n -} - -extern "C" void ScaLBL_D3Q19_ColorCollide( char *ID, double *disteven, double *distodd, double *phi, double *ColorGrad, - double *Velocity, int Nx, int Ny, int Nz, double rlx_setA, double rlx_setB, - double alpha, double beta, double Fx, double Fy, double Fz) -{ - - int i,j,k,n,nn,N; - // distributions - double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; - double f10,f11,f12,f13,f14,f15,f16,f17,f18; - - // non-conserved moments - double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; - // additional variables needed for computations - double rho,jx,jy,jz,C,nx,ny,nz; - - N = Nx*Ny*Nz; - char id; - - for (n=0; n 0){ - - //.......Back out the 3-D indices for node n.............. - k = n/(Nx*Ny); - j = (n-Nx*Ny*k)/Nx; - i = n-Nx*Ny*k-Nx*j; - //........................................................................ - //........Get 1-D index for this thread.................... - // n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x; - //........................................................................ - // COMPUTE THE COLOR GRADIENT - //........................................................................ - //.................Read Phase Indicator Values............................ - //........................................................................ - nn = n-1; // neighbor index (get convention) - if (i-1<0) nn += Nx; // periodic BC along the x-boundary - f1 = phi[nn]; // get neighbor for phi - 1 - //........................................................................ - nn = n+1; // neighbor index (get convention) - if (!(i+10)) delta=0; - a1 = na*(0.1111111111111111*(1+4.5*ux))+delta; - b1 = nb*(0.1111111111111111*(1+4.5*ux))-delta; - a2 = na*(0.1111111111111111*(1-4.5*ux))-delta; - b2 = nb*(0.1111111111111111*(1-4.5*ux))+delta; - - A_odd[n] = a1; - A_even[N+n] = a2; - B_odd[n] = b1; - B_even[N+n] = b2; - //............................................... - // q = 2 - // Cq = {0,1,0} - delta = beta*na*nb*nab*0.1111111111111111*ny; - if (!(na*nb*nab>0)) delta=0; - a1 = na*(0.1111111111111111*(1+4.5*uy))+delta; - b1 = nb*(0.1111111111111111*(1+4.5*uy))-delta; - a2 = na*(0.1111111111111111*(1-4.5*uy))-delta; - b2 = nb*(0.1111111111111111*(1-4.5*uy))+delta; - - A_odd[N+n] = a1; - A_even[2*N+n] = a2; - B_odd[N+n] = b1; - B_even[2*N+n] = b2; - //............................................... - // q = 4 - // Cq = {0,0,1} - delta = beta*na*nb*nab*0.1111111111111111*nz; - if (!(na*nb*nab>0)) delta=0; - a1 = na*(0.1111111111111111*(1+4.5*uz))+delta; - b1 = nb*(0.1111111111111111*(1+4.5*uz))-delta; - a2 = na*(0.1111111111111111*(1-4.5*uz))-delta; - b2 = nb*(0.1111111111111111*(1-4.5*uz))+delta; - - A_odd[2*N+n] = a1; - A_even[3*N+n] = a2; - B_odd[2*N+n] = b1; - B_even[3*N+n] = b2; - //............................................... - - /* // Construction and streaming for the components - for (idx=0; idx<3; idx++){ - //............................................... - // Distribution index - q = 2*idx; - // Associated discrete velocity - Cqx = D3Q7[idx][0]; - Cqy = D3Q7[idx][1]; - Cqz = D3Q7[idx][2]; - // Generate the Equilibrium Distribution - a1 = na*feq[q]; - b1 = nb*feq[q]; - a2 = na*feq[q+1]; - b2 = nb*feq[q+1]; - // Recolor the distributions - if (C > 0.0){ - sp = nx*double(Cqx)+ny*double(Cqy)+nz*double(Cqz); - //if (idx > 2) sp = 0.7071067811865475*sp; - //delta = sp*min( min(a1,a2), min(b1,b2) ); - delta = na*nb/(na+nb)*0.1111111111111111*sp; - //if (a1>0 && b1>0){ - a1 += beta*delta; - a2 -= beta*delta; - b1 -= beta*delta; - b2 += beta*delta; - } - // Save the re-colored distributions - A_odd[N*idx+n] = a1; - A_even[N*(idx+1)+n] = a2; - B_odd[N*idx+n] = b1; - B_even[N*(idx+1)+n] = b2; - //............................................... - } - */ - } - } -} - -//************************************************************************* -extern "C" void DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity, - double beta, int Nx, int Ny, int Nz, bool pBC, int S) -{ - char id; - - int idx; - int in,jn,kn,n,nn,N; - int q,Cqx,Cqy,Cqz; - // int sendLoc; - - double na,nb; // density values - double ux,uy,uz; // flow velocity - double nx,ny,nz,C; // color gradient components - double a1,a2,b1,b2; - double sp,delta; - double feq[6]; // equilibrium distributions - // Set of Discrete velocities for the D3Q19 Model - int D3Q7[3][3]={{1,0,0},{0,1,0},{0,0,1}}; - N = Nx*Ny*Nz; - - for (n=0; n 0 && na+nb > 0.0){ - //.......Back out the 3-D indices for node n.............. - int k = n/(Nx*Ny); - int j = (n-Nx*Ny*k)/Nx; - int i = n-Nx*Ny*k-Nx*j; - //.....Load the Color gradient......... - nx = ColorGrad[n]; - ny = ColorGrad[N+n]; - nz = ColorGrad[2*N+n]; - C = sqrt(nx*nx+ny*ny+nz*nz); - nx = nx/C; - ny = ny/C; - nz = nz/C; - //....Load the flow velocity........... - ux = Velocity[n]; - uy = Velocity[N+n]; - uz = Velocity[2*N+n]; - //....Instantiate the density distributions - // Generate Equilibrium Distributions and stream - // Stationary value - distribution 0 - // Den[2*n] += 0.3333333333333333*na; - // Den[2*n+1] += 0.3333333333333333*nb; - Den[2*n] += 0.3333333333333333*na; - Den[2*n+1] += 0.3333333333333333*nb; - // Non-Stationary equilibrium distributions - feq[0] = 0.1111111111111111*(1+3*ux); - feq[1] = 0.1111111111111111*(1-3*ux); - feq[2] = 0.1111111111111111*(1+3*uy); - feq[3] = 0.1111111111111111*(1-3*uy); - feq[4] = 0.1111111111111111*(1+3*uz); - feq[5] = 0.1111111111111111*(1-3*uz); - // Construction and streaming for the components - for (idx=0; idx<3; idx++){ - // Distribution index - q = 2*idx; - // Associated discrete velocity - Cqx = D3Q7[idx][0]; - Cqy = D3Q7[idx][1]; - Cqz = D3Q7[idx][2]; - // Generate the Equilibrium Distribution - a1 = na*feq[q]; - b1 = nb*feq[q]; - a2 = na*feq[q+1]; - b2 = nb*feq[q+1]; - // Recolor the distributions - if (C > 0.0){ - sp = nx*double(Cqx)+ny*double(Cqy)+nz*double(Cqz); - //if (idx > 2) sp = 0.7071067811865475*sp; - //delta = sp*min( min(a1,a2), min(b1,b2) ); - delta = na*nb/(na+nb)*0.1111111111111111*sp; - //if (a1>0 && b1>0){ - a1 += beta*delta; - a2 -= beta*delta; - b1 -= beta*delta; - b2 += beta*delta; - } - - // .......Get the neighbor node.............. - //nn = n + Stride[idx]; - in = i+Cqx; - jn = j+Cqy; - kn = k+Cqz; - - // Adjust for periodic BC, if necessary - // if (in<0) in+= Nx; - // if (jn<0) jn+= Ny; - // if (kn<0) kn+= Nz; - // if (!(in 0 ){ - // Get the density value (Streaming already performed) - Na = Den[n]; - Nb = Den[N+n]; - Phi[n] = (Na-Nb)/(Na+Nb); - } - } - //................................................................... -} - -extern "C" void ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice){ - int n; - for (n=Slice*Nx*Ny; n<(Slice+1)*Nx*Ny; n++){ - Phi[n] = value; - } -} - - -//extern "C" void ScaLBL_D3Q19_AAeven_Color(double *dist, double *Aq, double *Bq, double *Den, double *Velocity, -// double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, -// double Fx, double Fy, double Fz, int start, int finish, int Np){ -extern "C" void ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi, - double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, - double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ - - int ijk,nn,n; - double fq; - // conserved momemnts - double rho,jx,jy,jz; - // non-conserved moments - double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; - double m3,m5,m7; - double nA,nB; // number density - double a1,b1,a2,b2,nAB,delta; - double C,nx,ny,nz; //color gradient magnitude and direction - double ux,uy,uz; - double phi,tau,rho0,rlx_setA,rlx_setB; - - const double mrt_V1=0.05263157894736842; - const double mrt_V2=0.012531328320802; - const double mrt_V3=0.04761904761904762; - const double mrt_V4=0.004594820384294068; - const double mrt_V5=0.01587301587301587; - const double mrt_V6=0.0555555555555555555555555; - const double mrt_V7=0.02777777777777778; - const double mrt_V8=0.08333333333333333; - const double mrt_V9=0.003341687552213868; - const double mrt_V10=0.003968253968253968; - const double mrt_V11=0.01388888888888889; - const double mrt_V12=0.04166666666666666; - - - for (int n=start; n0)) delta=0; - a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; - b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; - a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; - b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; - - Aq[1*Np+n] = a1; - Bq[1*Np+n] = b1; - Aq[2*Np+n] = a2; - Bq[2*Np+n] = b2; - - //............................................... - // q = 2 - // Cq = {0,1,0} - delta = beta*nA*nB*nAB*0.1111111111111111*ny; - if (!(nA*nB*nAB>0)) delta=0; - a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; - b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; - a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; - b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; - - Aq[3*Np+n] = a1; - Bq[3*Np+n] = b1; - Aq[4*Np+n] = a2; - Bq[4*Np+n] = b2; - //............................................... - // q = 4 - // Cq = {0,0,1} - delta = beta*nA*nB*nAB*0.1111111111111111*nz; - if (!(nA*nB*nAB>0)) delta=0; - a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; - b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; - a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; - b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; - - Aq[5*Np+n] = a1; - Bq[5*Np+n] = b1; - Aq[6*Np+n] = a2; - Bq[6*Np+n] = b2; - //............................................... - - } - -} - -//extern "C" void ScaLBL_D3Q19_AAodd_Color(int *neighborList, double *dist, double *Aq, double *Bq, double *Den, double *Velocity, -// double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, -// double Fx, double Fy, double Fz, int start, int finish, int Np){ - - -extern "C" void ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq, double *Den, double *Phi, - int start, int finish, int Np){ - int idx,n,nread; - double fq,nA,nB; - for (int n=start; n 1.f){ - nA = 1.0; nB = 0.f; - } - else if (phi < -1.f){ - nB = 1.0; nA = 0.f; - } - else{ - nA=0.5*(phi+1.f); - nB=0.5*(1.f-phi); - } - Den[idx] = nA; - Den[Np+idx] = nB; - - Aq[idx]=0.3333333333333333*nA; - Aq[Np+idx]=0.1111111111111111*nA; - Aq[2*Np+idx]=0.1111111111111111*nA; - Aq[3*Np+idx]=0.1111111111111111*nA; - Aq[4*Np+idx]=0.1111111111111111*nA; - Aq[5*Np+idx]=0.1111111111111111*nA; - Aq[6*Np+idx]=0.1111111111111111*nA; - - Bq[idx]=0.3333333333333333*nB; - Bq[Np+idx]=0.1111111111111111*nB; - Bq[2*Np+idx]=0.1111111111111111*nB; - Bq[3*Np+idx]=0.1111111111111111*nB; - Bq[4*Np+idx]=0.1111111111111111*nB; - Bq[5*Np+idx]=0.1111111111111111*nB; - Bq[6*Np+idx]=0.1111111111111111*nB; - } -} - -extern "C" void ScaLBL_CopySlice_z(double *Phi, int Nx, int Ny, int Nz, int Source, int Dest){ - int n; double value; - for (n=0; nkeyExists( "tauB" )){ tauB = freelee_db->getScalar( "tauB" ); } + if (freelee_db->keyExists( "tauM" )){ + tauM = freelee_db->getScalar( "tauM" ); + } if (freelee_db->keyExists( "rhoA" )){ rhoA = freelee_db->getScalar( "rhoA" ); } @@ -282,8 +285,8 @@ void ScaLBL_FreeLeeModel::AssignComponentLabels_ChemPotential_ColorGrad() signed char VALUE=0; double AFFINITY=0.f; - auto LabelList = greyscaleColor_db->getVector( "ComponentLabels" ); - auto AffinityList = greyscaleColor_db->getVector( "ComponentAffinity" ); + auto LabelList = freelee_db->getVector( "ComponentLabels" ); + auto AffinityList = freelee_db->getVector( "ComponentAffinity" ); NLABELS=LabelList.size(); if (NLABELS != AffinityList.size()){ @@ -337,7 +340,7 @@ void ScaLBL_FreeLeeModel::AssignComponentLabels_ChemPotential_ColorGrad() for (int i=0; iid[i] = Mask->id[i]; for (size_t idx=0; idxComm, label_count[idx]); + label_count_global[idx] = Dm->Comm.sumReduce(label_count[idx]); if (rank==0){ printf("Number of component labels: %lu \n",NLABELS); @@ -350,7 +353,7 @@ void ScaLBL_FreeLeeModel::AssignComponentLabels_ChemPotential_ColorGrad() } //compute color gradient and laplacian of phase field - double *ColorGrad_host, mu_phi_host; + double *ColorGrad_host, *mu_phi_host; ColorGrad_host = new double[3*Np]; mu_phi_host = new double[Np]; @@ -461,6 +464,7 @@ void ScaLBL_FreeLeeModel::AssignComponentLabels_ChemPotential_ColorGrad() ScaLBL_CopyToDevice(Phi, phase, Nh*sizeof(double)); ScaLBL_CopyToDevice(ColorGrad, ColorGrad_host, 3*Np*sizeof(double)); ScaLBL_CopyToDevice(mu_phi, mu_phi_host, Np*sizeof(double)); + ScaLBL_Comm->Barrier(); comm.barrier(); delete [] phase; delete [] ColorGrad_host; @@ -536,14 +540,15 @@ void ScaLBL_FreeLeeModel::Initialize(){ // Copy the restart data to the GPU ScaLBL_CopyToDevice(Den,cDen,2*Np*sizeof(double)); - ScaLBL_CopyToDevice(fq,cDist,19*Np*sizeof(double)); + ScaLBL_CopyToDevice(gqbar,cDist,19*Np*sizeof(double)); ScaLBL_CopyToDevice(Phi,cPhi,N*sizeof(double)); ScaLBL_Comm->Barrier(); comm.barrier(); if (rank==0) printf ("Initializing phase and density fields on device from Restart\n"); - ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + //TODO the following function is to be updated. + //ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, 0, ScaLBL_Comm->LastExterior(), Np); + //ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); } // establish reservoirs for external bC @@ -575,7 +580,7 @@ void ScaLBL_FreeLeeModel::Run(){ //.......create and start timer............ double starttime,stoptime,cputime; - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); comm.barrier(); starttime = MPI_Wtime(); //......................................... @@ -593,7 +598,7 @@ void ScaLBL_FreeLeeModel::Run(){ ScaLBL_Comm->SendD3Q7AA(hq,0); //READ FROM NORMAL ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(NeighborList, dvcMap, hq, Den, Phi, rhoA, rhoB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(hq,0); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(NeighborList, dvcMap, hq, Den, Phi, rhoA, rhoB, 0, ScaLBL_Comm->LastExterior(), Np); // Perform the collision operation @@ -606,28 +611,27 @@ void ScaLBL_FreeLeeModel::Run(){ // Halo exchange for phase field ScaLBL_Comm_WideHalo->Send(Phi); - ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, rhoA, rhoB, tauA, tauB, tauM, + ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm_WideHalo->Recv(Phi); ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); // Set BCs if (BoundaryCondition == 3){ - ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); - ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); } if (BoundaryCondition == 4){ - din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); - ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); } else if (BoundaryCondition == 5){ - ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); - ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar); } - ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, rhoA, rhoB, tauA, tauB, tauM, + ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + ScaLBL_Comm->Barrier(); // *************EVEN TIMESTEP************* timestep++; @@ -635,7 +639,7 @@ void ScaLBL_FreeLeeModel::Run(){ ScaLBL_Comm->SendD3Q7AA(hq,0); //READ FROM NORMAL ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(dvcMap, hq, Den, Phi, rhoA, rhoB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(hq,0); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(dvcMap, hq, Den, Phi, rhoA, rhoB, 0, ScaLBL_Comm->LastExterior(), Np); // Perform the collision operation @@ -646,25 +650,25 @@ void ScaLBL_FreeLeeModel::Run(){ ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); } ScaLBL_Comm_WideHalo->Send(Phi); - ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, rhoA, rhoB, tauA, tauB, tauM, + ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm_WideHalo->Recv(Phi); ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); // Set boundary conditions if (BoundaryCondition == 3){ - ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); - ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); } else if (BoundaryCondition == 4){ - din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); - ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); } else if (BoundaryCondition == 5){ - ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); - ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar); } - ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, rhoA, rhoB, tauA, tauB, tauM, + ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_Comm->Barrier(); //************************************************************************ diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h index 75d2b413..1b78792a 100644 --- a/models/FreeLeeModel.h +++ b/models/FreeLeeModel.h @@ -62,7 +62,7 @@ public: signed char *id; int *NeighborList; int *dvcMap; - double *fq, *hq; + double *gqbar, *hq; double *mu_phi, *Den, *Phi; double *ColorGrad; double *Velocity; @@ -82,6 +82,7 @@ private: //int rank,nprocs; void LoadParams(std::shared_ptr db0); + void AssignComponentLabels_ChemPotential_ColorGrad(); }; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 0b634f06..63086219 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,6 +6,7 @@ ADD_LBPM_EXECUTABLE( lbpm_permeability_simulator ) ADD_LBPM_EXECUTABLE( lbpm_greyscale_simulator ) ADD_LBPM_EXECUTABLE( lbpm_greyscaleColor_simulator ) ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_SingleFluid_simulator ) +ADD_LBPM_EXECUTABLE( lbpm_freelee_simulator ) #ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator ) #ADD_LBPM_EXECUTABLE( lbpm_color_macro_simulator ) ADD_LBPM_EXECUTABLE( lbpm_dfh_simulator ) diff --git a/tests/lbpm_freelee_simulator.cpp b/tests/lbpm_freelee_simulator.cpp new file mode 100644 index 00000000..61de8c28 --- /dev/null +++ b/tests/lbpm_freelee_simulator.cpp @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "models/FreeLeeModel.h" +#include "common/Utilities.h" + +//#define WRE_SURFACES + +/* + * Simulator for two-phase flow in porous media + * James E. McClure 2013-2014 + */ + + +//************************************************************************* +// Implementation of Two-Phase Immiscible LBM using CUDA +//************************************************************************* + +int main(int argc, char **argv) +{ + + // Initialize MPI + Utilities::startup( argc, argv ); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); + + // Load the input database + auto db = std::make_shared( argv[1] ); + + // Initialize MPI and error handlers + auto multiple = db->getWithDefault( "MPI_THREAD_MULTIPLE", true ); + //Utilities::startup( argc, argv, multiple ); + //Utilities::MPI::changeProfileLevel( 1 ); + + { // Limit scope so variables that contain communicators will free before MPI_Finialize + + if (rank == 0){ + printf("********************************************************\n"); + printf("Running Free Energy Lee LBM \n"); + printf("********************************************************\n"); + } + // Initialize compute device + int device=ScaLBL_SetDevice(rank); + NULL_USE( device ); + ScaLBL_DeviceBarrier(); + comm.barrier(); + + PROFILE_ENABLE(1); + //PROFILE_ENABLE_TRACE(); + //PROFILE_ENABLE_MEMORY(); + PROFILE_SYNCHRONIZE(); + PROFILE_START("Main"); + Utilities::setErrorHandlers(); + + auto filename = argv[1]; + ScaLBL_FreeLeeModel LeeModel(rank,nprocs,comm); + LeeModel.ReadParams(filename); + LeeModel.SetDomain(); + LeeModel.ReadInput(); + LeeModel.Create(); // creating the model will create data structure to match the pore structure and allocate variables + LeeModel.Initialize(); // initializing the model will set initial conditions for variables + LeeModel.Run(); + LeeModel.WriteDebug(); + + PROFILE_STOP("Main"); + auto file = db->getWithDefault( "TimerFile", "lbpm_freelee_simulator" ); + auto level = db->getWithDefault( "TimerLevel", 1 ); + PROFILE_SAVE(file,level); + // **************************************************** + + + } // Limit scope so variables that contain communicators will free before MPI_Finialize + + Utilities::shutdown(); +} From ac06cd342888dcbb0e0bc5e2899b3c2d6f8f8dcb Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 1 Feb 2021 00:14:24 -0500 Subject: [PATCH 157/205] build pass; continue model debugging --- common/WideHalo.cpp | 4 ++-- common/WideHalo.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/common/WideHalo.cpp b/common/WideHalo.cpp index b56e8b96..a39ab317 100644 --- a/common/WideHalo.cpp +++ b/common/WideHalo.cpp @@ -67,7 +67,7 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr Date: Mon, 1 Feb 2021 00:39:21 -0500 Subject: [PATCH 158/205] code clean up --- tests/lbpm_freelee_simulator.cpp | 113 ++++++++++++++----------------- 1 file changed, 51 insertions(+), 62 deletions(-) diff --git a/tests/lbpm_freelee_simulator.cpp b/tests/lbpm_freelee_simulator.cpp index 61de8c28..3e9c372a 100644 --- a/tests/lbpm_freelee_simulator.cpp +++ b/tests/lbpm_freelee_simulator.cpp @@ -1,81 +1,70 @@ +#include +#include +#include +#include #include #include #include -#include -#include -#include -#include -#include "models/FreeLeeModel.h" #include "common/Utilities.h" +#include "models/FreeLeeModel.h" -//#define WRE_SURFACES +//******************************************************************* +// Implementation of Free-Energy Two-Phase LBM (Lee model) +//******************************************************************* -/* - * Simulator for two-phase flow in porous media - * James E. McClure 2013-2014 - */ - - -//************************************************************************* -// Implementation of Two-Phase Immiscible LBM using CUDA -//************************************************************************* - -int main(int argc, char **argv) +int main( int argc, char **argv ) { - // Initialize MPI - Utilities::startup( argc, argv ); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + // Initialize + Utilities::startup( argc, argv ); + + // Load the input database + auto db = std::make_shared( argv[1] ); - // Load the input database - auto db = std::make_shared( argv[1] ); + { // Limit scope so variables that contain communicators will free before MPI_Finialize - // Initialize MPI and error handlers - auto multiple = db->getWithDefault( "MPI_THREAD_MULTIPLE", true ); - //Utilities::startup( argc, argv, multiple ); - //Utilities::MPI::changeProfileLevel( 1 ); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); - { // Limit scope so variables that contain communicators will free before MPI_Finialize + if (rank == 0){ + printf("********************************************************\n"); + printf("Running Free Energy Lee LBM \n"); + printf("********************************************************\n"); + } + // Initialize compute device + int device=ScaLBL_SetDevice(rank); + NULL_USE( device ); + ScaLBL_DeviceBarrier(); + comm.barrier(); - if (rank == 0){ - printf("********************************************************\n"); - printf("Running Free Energy Lee LBM \n"); - printf("********************************************************\n"); - } - // Initialize compute device - int device=ScaLBL_SetDevice(rank); - NULL_USE( device ); - ScaLBL_DeviceBarrier(); - comm.barrier(); + PROFILE_ENABLE(1); + //PROFILE_ENABLE_TRACE(); + //PROFILE_ENABLE_MEMORY(); + PROFILE_SYNCHRONIZE(); + PROFILE_START("Main"); + Utilities::setErrorHandlers(); - PROFILE_ENABLE(1); - //PROFILE_ENABLE_TRACE(); - //PROFILE_ENABLE_MEMORY(); - PROFILE_SYNCHRONIZE(); - PROFILE_START("Main"); - Utilities::setErrorHandlers(); + auto filename = argv[1]; + ScaLBL_FreeLeeModel LeeModel( rank,nprocs,comm ); + LeeModel.ReadParams( filename ); + LeeModel.SetDomain(); + LeeModel.ReadInput(); + LeeModel.Create(); + LeeModel.Initialize(); + LeeModel.Run(); + LeeModel.WriteDebug(); - auto filename = argv[1]; - ScaLBL_FreeLeeModel LeeModel(rank,nprocs,comm); - LeeModel.ReadParams(filename); - LeeModel.SetDomain(); - LeeModel.ReadInput(); - LeeModel.Create(); // creating the model will create data structure to match the pore structure and allocate variables - LeeModel.Initialize(); // initializing the model will set initial conditions for variables - LeeModel.Run(); - LeeModel.WriteDebug(); - - PROFILE_STOP("Main"); - auto file = db->getWithDefault( "TimerFile", "lbpm_freelee_simulator" ); - auto level = db->getWithDefault( "TimerLevel", 1 ); - PROFILE_SAVE(file,level); - // **************************************************** + PROFILE_STOP("Main"); + auto file = db->getWithDefault( "TimerFile", "lbpm_freelee_simulator" ); + auto level = db->getWithDefault( "TimerLevel", 1 ); + PROFILE_SAVE( file,level ); + // **************************************************** - } // Limit scope so variables that contain communicators will free before MPI_Finialize + } // Limit scope so variables that contain communicators will free before MPI_Finialize - Utilities::shutdown(); + Utilities::shutdown(); + return 0; } From 1f08c9a0b6c8d16d138444139e2fdcf48065b7f3 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Sun, 7 Feb 2021 20:08:38 -0500 Subject: [PATCH 159/205] save the work; add debugging output --- models/FreeLeeModel.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index 7d530406..5a048d38 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -466,6 +466,16 @@ void ScaLBL_FreeLeeModel::AssignComponentLabels_ChemPotential_ColorGrad() ScaLBL_CopyToDevice(mu_phi, mu_phi_host, Np*sizeof(double)); ScaLBL_Comm->Barrier(); comm.barrier(); + + //debug + //save the phase field and check it + //FILE *OUTFILE; + //sprintf(LocalRankFilename,"Phase_Init.%05i.raw",rank); + //OUTFILE = fopen(LocalRankFilename,"wb"); + //fwrite(phase,8,Nh,OUTFILE); + //fclose(OUTFILE); + + delete [] phase; delete [] ColorGrad_host; delete [] mu_phi_host; From 9ddf949a9e1373bf636e353686467346f43f03c8 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Sun, 7 Feb 2021 23:37:26 -0500 Subject: [PATCH 160/205] save the work; to be compiled and tested --- common/ScaLBL.h | 10 +- cpu/FreeLee.cpp | 941 +++++++++++++++++++++++++++++++++------- models/FreeLeeModel.cpp | 269 +++++++++++- models/FreeLeeModel.h | 12 +- 4 files changed, 1052 insertions(+), 180 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 73b89f1d..42c51525 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -180,7 +180,9 @@ extern "C" void ScaLBL_D3Q19_Gradient_DFH(int *NeighborList, double *Phi, double // FREE ENERGY LEE MODEL -extern "C" void ScaLBL_D3Q19_FreeLeeModel_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np); +extern "C" void ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np); + +extern "C" void ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(double *gqbar, double Fx, double Fy, double Fz, int Np); extern "C" void ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, double *Den, double *hq, double *ColorGrad, double rhonA, double rhoB, double tauM, double W, int start, int finish, int Np); @@ -199,6 +201,12 @@ extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborList, double *dist, double *Vel, double *Pressure, + double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np); + +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure, + double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np); + // BOUNDARY CONDITION ROUTINES diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp index f28af185..bd6e744a 100644 --- a/cpu/FreeLee.cpp +++ b/cpu/FreeLee.cpp @@ -2,7 +2,7 @@ #define STOKES -extern "C" void ScaLBL_D3Q19_FreeLeeModel_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np) +extern "C" void ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np) { int n; double p = 1.0;//NOTE: take initial pressure p=1.0 @@ -40,6 +40,38 @@ extern "C" void ScaLBL_D3Q19_FreeLeeModel_Init(double *gqbar, double *mu_phi, do } } +extern "C" void ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(double *gqbar, double Fx, double Fy, double Fz, int Np) +{ + int n; + double p = 1.0;//NOTE: take initial pressure p=1.0 + + for (n=0; n 10Np => odd part of dist) + m1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + m2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + m3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + m4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + m5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + m6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + m7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + m8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + m9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + m10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + m11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + m12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + m13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + m14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + m15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + m16 = dist[nr16]; + + // q=17 + nr17 = neighborList[n+16*Np]; + m17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + m18 = dist[nr18]; + + //compute fluid velocity + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(Fx)); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(Fy)); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(Fz)); + //compute pressure + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17); + + //------------------------------------------------- BCK collison ------------------------------------------------------------// + // q=0 + dist[n] = m0 + 0.5*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) - + (m0 - 0.3333333333333333*p + 0.25*(Fx*ux + Fy*uy + Fz*uz)* + (-0.6666666666666666 + ux*ux + uy*uy + uz*uz) + 0.16666666666666666*rho0*(ux*ux + uy*uy + uz*uz))/ + tau; + + // q = 1 + dist[nr2] = m1 + 0.25*(Fx*(-1 + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) - + (m1 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(ux*ux) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.125*(Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*uz)))/tau; + + // q=2 + dist[nr1] = m2 + 0.25*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) - + (m2 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(ux*ux) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) + + 0.125*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(ux*ux) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*uz)))/tau; + + // q = 3 + dist[nr4] = m3 + 0.25*(Fx*ux + Fy*(-1 + uy) + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) - + (m3 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uy*uy) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.125*(Fx*ux + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 4 + dist[nr3] = m4 + 0.25*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) - + (m4 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uy*uy) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.125*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(uy*uy) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 5 + dist[nr6] = m5 + 0.25*(Fx*ux + Fy*uy + Fz*(-1 + uz))*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) - + (m5 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.125*(Fx*ux + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 6 + dist[nr5] = m6 + 0.25*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) - + (m6 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) + + 0.125*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q = 7 + dist[nr8] = m7 - 0.125*(Fx*(-1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (0.2222222222222222 + (ux + uy)*(ux + uy) - 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz))\ + - (m7 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uy)*(ux + uy)) + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx*(-1. + ux) + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(-2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 8 + dist[nr7] = m8 + 0.125*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uy)*(ux + uy) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz))\ + - (m8 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uy)*(ux + uy)) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 9 + dist[nr10] = m9 + 0.125*(Fy + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + - (m9 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uy)*(ux - uy)) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fy + Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(-2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 10 + dist[nr9] = m10 + 0.125*(Fx*(1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz))\ + - (m10 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uy)*(ux - uy)) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx*(1 + ux) + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 11 + dist[nr12] = m11 - 0.125*(Fx*(-1 + ux) + Fy*uy + Fz*(-1 + uz))* + (0.2222222222222222 + (ux + uz)*(ux + uz) - 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz))\ + - (m11 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uz)*(ux + uz)) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*(-1. + ux) + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 12 + dist[nr11] = m12 + 0.125*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz)))\ + - (m12 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uz)*(ux + uz)) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.0625*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q = 13 + dist[nr14] = m13 + 0.125*(Fz + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz)))\ + - (m13 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uz)*(ux - uz)) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.0625*(Fz + Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q= 14 + dist[nr13] = m14 + 0.125*(Fx*(1 + ux) + Fy*uy + Fz*(-1 + uz))* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz))\ + - (m14 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uz)*(ux - uz)) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*(1 + ux) + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 15 + dist[nr16] = m15 - 0.125*(Fx*ux + Fy*(-1 + uy) + Fz*(-1 + uz))* + (0.2222222222222222 + (uy + uz)*(uy + uz) - 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz))\ + - (m15 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy + uz)*(uy + uz)) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*ux + Fy*(-1. + uy) + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uy*uy) - 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 16 + dist[nr15] = m16 + 0.125*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz)))\ + - (m16 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy + uz)*(uy + uz)) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + 0.0625*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) - 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*(2. + uz))))/tau; + + // q = 17 + dist[nr18] = m17 + 0.125*(Fz + Fx*ux + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz)))\ + - (m17 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy - uz)*(uy - uz)) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + 0.0625*(Fz + Fx*ux + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) + 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*(2. + uz))))/tau; + + // q = 18 + dist[nr17] = m18 + 0.125*(Fx*ux + Fy*(1 + uy) + Fz*(-1 + uz))* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz))\ + - (m18 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy - uz)*(uy - uz)) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*ux + Fy*(1 + uy) + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uy*uy) + 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + (-2. + uz)*uz)))/tau; + //----------------------------------------------------------------------------------------------------------------------------------------// + + + //Update velocity on device + Vel[0*Np+n] = ux; + Vel[1*Np+n] = uy; + Vel[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = p; + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure, + double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){ + + int n; + double ux,uy,uz;//fluid velocity + double p;//pressure + // distribution functions + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m0,m3,m5,m7; + + for (int n=start; n(new ScaLBL_Communicator(Mask)); - ScaLBL_Comm_Regular = std::shared_ptr(new ScaLBL_Communicator(Mask)); + //ScaLBL_Comm_Regular = std::shared_ptr(new ScaLBL_Communicator(Mask)); ScaLBL_Comm_WideHalo = std::shared_ptr(new ScaLBLWideHalo_Communicator(Mask,2)); // create the layout for the LBM @@ -276,6 +276,51 @@ void ScaLBL_FreeLeeModel::Create(){ delete [] neighborList; } +void ScaLBL_FreeLeeModel::Create_SingleFluid(){ + /* + * This function creates the variables needed to run single-fluid Lee model + */ + //......................................................... + // Initialize communication structures in averaging domain + for (int i=0; iid[i] = Mask->id[i]; + Mask->CommInit(); + Np=Mask->PoreCount(); + //........................................................................... + if (rank==0) printf ("Create ScaLBL_Communicator \n"); + // Create a communicator for the device (will use optimized layout) + // ScaLBL_Communicator ScaLBL_Comm(Mask); // original + ScaLBL_Comm = std::shared_ptr(new ScaLBL_Communicator(Mask)); + + // create the layout for the LBM + int Npad=(Np/16 + 2)*16; + if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N); + Map.resize(Nx,Ny,Nz); Map.fill(-2); + auto neighborList= new int[18*Npad]; + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np,1); + comm.barrier(); + + //........................................................................... + // MAIN VARIABLES ALLOCATED HERE + //........................................................................... + // LBM variables + if (rank==0) printf ("Allocating distributions \n"); + //......................device distributions................................. + dist_mem_size = Np*sizeof(double); + neighborSize=18*(Np*sizeof(int)); + //........................................................................... + ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize); + ScaLBL_AllocateDeviceMemory((void **) &gqbar, 19*dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &Pressure, sizeof(double)*Np); + ScaLBL_AllocateDeviceMemory((void **) &Velocity, 3*sizeof(double)*Np); + //........................................................................... + // Update GPU data structures + if (rank==0) printf ("Setting up device map and neighbor list \n"); + // copy the neighbor list + ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); + comm.barrier(); + delete [] neighborList; +} + void ScaLBL_FreeLeeModel::AssignComponentLabels_ChemPotential_ColorGrad() { double *phase; @@ -482,15 +527,15 @@ void ScaLBL_FreeLeeModel::AssignComponentLabels_ChemPotential_ColorGrad() delete [] Dst; } -void ScaLBL_FreeLeeModel::Initialize(){ +void ScaLBL_FreeLeeModel::Initialize_TwoFluid(){ /* - * This function initializes model + * This function initializes two-fluid Lee model */ if (rank==0) printf ("Initializing phase field, chemical potential and color gradient\n"); AssignComponentLabels_ChemPotential_ColorGrad();//initialize phase field Phi if (rank==0) printf ("Initializing distributions for momentum transport\n"); - ScaLBL_D3Q19_FreeLeeModel_Init(gqbar, mu_phi, ColorGrad, Fx, Fy, Fz, Np); + ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(gqbar, mu_phi, ColorGrad, Fx, Fy, Fz, Np); if (rank==0) printf ("Initializing density field and distributions for phase-field transport\n"); ScaLBL_FreeLeeModel_PhaseField_Init(dvcMap, Phi, Den, hq, ColorGrad, rhoA, rhoB, tauM, W, 0, ScaLBL_Comm->LastExterior(), Np); @@ -578,7 +623,84 @@ void ScaLBL_FreeLeeModel::Initialize(){ //ScaLBL_CopyToHost(Averages->Phi.data(),Phi,N*sizeof(double)); } -void ScaLBL_FreeLeeModel::Run(){ +void ScaLBL_FreeLeeModel::Initialize_SingleFluid(){ + /* + * This function initializes single-fluid Lee model + */ + if (rank==0) printf ("Initializing distributions for momentum transport\n"); + ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(gqbar, Fx, Fy, Fz, Np); + + if (Restart == true){ + //TODO need to revise this function + //remove the phase-related part + + + +// if (rank==0){ +// printf("Reading restart file! \n"); +// } +// +// // Read in the restart file to CPU buffers +// int *TmpMap; +// TmpMap = new int[Np]; +// +// double *cPhi, *cDist, *cDen; +// cPhi = new double[N]; +// cDen = new double[2*Np]; +// cDist = new double[19*Np]; +// ScaLBL_CopyToHost(TmpMap, dvcMap, Np*sizeof(int)); +// //ScaLBL_CopyToHost(cPhi, Phi, N*sizeof(double)); +// +// ifstream File(LocalRestartFile,ios::binary); +// int idx; +// double value,va,vb; +// for (int n=0; nLastExterior(); n++){ +// va = cDen[n]; +// vb = cDen[Np + n]; +// value = (va-vb)/(va+vb); +// idx = TmpMap[n]; +// if (!(idx < 0) && idxFirstInterior(); nLastInterior(); n++){ +// va = cDen[n]; +// vb = cDen[Np + n]; +// value = (va-vb)/(va+vb); +// idx = TmpMap[n]; +// if (!(idx < 0) && idxBarrier(); +// comm.barrier(); +// +// if (rank==0) printf ("Initializing phase and density fields on device from Restart\n"); +// //TODO the following function is to be updated. +// //ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, 0, ScaLBL_Comm->LastExterior(), Np); +// //ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + } +} + +void ScaLBL_FreeLeeModel::Run_TwoFluid(){ int nprocs=nprocx*nprocy*nprocz; const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); @@ -704,8 +826,105 @@ void ScaLBL_FreeLeeModel::Run(){ // ************************************************************************ } +void ScaLBL_FreeLeeModel::Run_SingleFluid(){ + int nprocs=nprocx*nprocy*nprocz; + const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); + + if (rank==0){ + printf("********************************************************\n"); + printf("No. of timesteps: %i \n", timestepMax); + fflush(stdout); + } -void ScaLBL_FreeLeeModel::WriteDebug(){ + //.......create and start timer............ + double starttime,stoptime,cputime; + ScaLBL_Comm->Barrier(); + comm.barrier(); + starttime = MPI_Wtime(); + //......................................... + + //************ MAIN ITERATION LOOP ***************************************/ + PROFILE_START("Loop"); + while (timestep < timestepMax ) { + //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } + PROFILE_START("Update"); + // *************ODD TIMESTEP************* + timestep++; + //------------------------------------------------------------------------------------------------------------------- + // Perform the collision operation + ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FROM NORMAL + ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(NeighborList, gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz, + ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE + ScaLBL_Comm->Barrier(); + // Set boundary conditions + // TODO to be revised! + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); + } + if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar); + } + ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(NeighborList, gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz, + 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_Comm->Barrier(); + + // *************EVEN TIMESTEP************* + timestep++; + //------------------------------------------------------------------------------------------------------------------- + // Perform the collision operation + ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FORM NORMAL + ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz, + ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE + ScaLBL_Comm->Barrier(); + // Set boundary conditions + // TODO to be revised! + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); + } + else if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar); + } + ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz, + 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_Comm->Barrier(); + //************************************************************************ + PROFILE_STOP("Update"); + } + PROFILE_STOP("Loop"); + PROFILE_SAVE("lbpm_color_simulator",1); + //************************************************************************ + stoptime = MPI_Wtime(); + if (rank==0) printf("-------------------------------------------------------------------\n"); + // Compute the walltime per timestep + cputime = (stoptime - starttime)/timestep; + // Performance obtained from each node + double MLUPS = double(Np)/cputime/1000000; + + if (rank==0) printf("********************************************************\n"); + if (rank==0) printf("CPU time = %f \n", cputime); + if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); + MLUPS *= nprocs; + if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); + if (rank==0) printf("********************************************************\n"); + + // ************************************************************************ +} + +void ScaLBL_FreeLeeModel::WriteDebug_TwoFluid(){ // Copy back final phase indicator field and convert to regular layout DoubleArray PhaseData(Nxh,Nyh,Nzh); //ScaLBL_Comm->RegularLayout(Map,Phi,PhaseField); @@ -775,3 +994,37 @@ void ScaLBL_FreeLeeModel::WriteDebug(){ fclose(CGZ_FILE); */ } + +void ScaLBL_FreeLeeModel::WriteDebug_SingleFluid(){ + + DoubleArray PhaseData(Nxh,Nyh,Nzh); + + // Copy back final phase indicator field and convert to regular layout + ScaLBL_Comm->RegularLayout(Map,Pressure,PhaseField); + FILE *PFILE; + sprintf(LocalRankFilename,"Pressure.%05i.raw",rank); + PFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,PFILE); + fclose(PFILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField); + FILE *VELX_FILE; + sprintf(LocalRankFilename,"Velocity_X.%05i.raw",rank); + VELX_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELX_FILE); + fclose(VELX_FILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],PhaseField); + FILE *VELY_FILE; + sprintf(LocalRankFilename,"Velocity_Y.%05i.raw",rank); + VELY_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELY_FILE); + fclose(VELY_FILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],PhaseField); + FILE *VELZ_FILE; + sprintf(LocalRankFilename,"Velocity_Z.%05i.raw",rank); + VELZ_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELZ_FILE); + fclose(VELZ_FILE); +} diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h index 1b78792a..5a7bf248 100644 --- a/models/FreeLeeModel.h +++ b/models/FreeLeeModel.h @@ -26,10 +26,14 @@ public: void ReadParams(std::shared_ptr db0); void SetDomain(); void ReadInput(); - void Create(); - void Initialize(); - void Run(); - void WriteDebug(); + void Create_TwoFluid(); + void Initialize_TwoFluid(); + void Run_TwoFluid(); + void WriteDebug_TwoFluid(); + void Create_SingleFluid(); + void Initialize_SingleFluid(); + void Run_SingleFluid(); + void WriteDebug_SingleFluid(); bool Restart,pBC; int timestep,timestepMax; From 98491ccd80cc9f8343d4080d353ea9ee09bd3a1f Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Sun, 7 Feb 2021 23:50:17 -0500 Subject: [PATCH 161/205] save the work2; to be complied and tested --- models/FreeLeeModel.cpp | 9 +++ models/FreeLeeModel.h | 1 + tests/CMakeLists.txt | 1 + .../lbpm_freelee_SingleFluidBGK_simulator.cpp | 70 +++++++++++++++++++ tests/lbpm_freelee_simulator.cpp | 8 +-- 5 files changed, 85 insertions(+), 4 deletions(-) create mode 100644 tests/lbpm_freelee_SingleFluidBGK_simulator.cpp diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index aca5d8d8..120d3ced 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -12,6 +12,7 @@ color lattice boltzmann model ScaLBL_FreeLeeModel::ScaLBL_FreeLeeModel(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),tauM(0),rhoA(0),rhoB(0),W(0),gamma(0),kappa(0),beta(0), Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0), +tau(0),rho0(0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) { @@ -32,6 +33,8 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){ tauA = tauB = 1.0; tauM = 1.0;//relaxation time for phase field rhoA = rhoB = 1.0; + tau = 1.0;//only for single-fluid Lee model + rho0 = 1.0;//only for single-fluid Lee model Fx = Fy = Fz = 0.0; gamma=1e-3;//surface tension W=5.0;//interfacial thickness @@ -45,6 +48,9 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){ if (freelee_db->keyExists( "timestepMax" )){ timestepMax = freelee_db->getScalar( "timestepMax" ); } + if (freelee_db->keyExists( "tau" )){//only for single-fluid Lee model + tau = freelee_db->getScalar( "tau" ); + } if (freelee_db->keyExists( "tauA" )){ tauA = freelee_db->getScalar( "tauA" ); } @@ -54,6 +60,9 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){ if (freelee_db->keyExists( "tauM" )){ tauM = freelee_db->getScalar( "tauM" ); } + if (freelee_db->keyExists( "rho0" )){ + rho0 = freelee_db->getScalar( "rho0" ); + } if (freelee_db->keyExists( "rhoA" )){ rhoA = freelee_db->getScalar( "rhoA" ); } diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h index 5a7bf248..1e372f50 100644 --- a/models/FreeLeeModel.h +++ b/models/FreeLeeModel.h @@ -39,6 +39,7 @@ public: int timestep,timestepMax; int BoundaryCondition; double tauA,tauB,rhoA,rhoB; + double tau, rho0;//only for single-fluid Lee model double tauM;//relaxation time for phase field (or mass) double W,gamma,kappa,beta; double Fx,Fy,Fz,flux; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 63086219..8df4e6bd 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,6 +7,7 @@ ADD_LBPM_EXECUTABLE( lbpm_greyscale_simulator ) ADD_LBPM_EXECUTABLE( lbpm_greyscaleColor_simulator ) ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_SingleFluid_simulator ) ADD_LBPM_EXECUTABLE( lbpm_freelee_simulator ) +ADD_LBPM_EXECUTABLE( lbpm_freelee_SingleFluidBGK_simulator ) #ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator ) #ADD_LBPM_EXECUTABLE( lbpm_color_macro_simulator ) ADD_LBPM_EXECUTABLE( lbpm_dfh_simulator ) diff --git a/tests/lbpm_freelee_SingleFluidBGK_simulator.cpp b/tests/lbpm_freelee_SingleFluidBGK_simulator.cpp new file mode 100644 index 00000000..dd3be8d9 --- /dev/null +++ b/tests/lbpm_freelee_SingleFluidBGK_simulator.cpp @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "common/Utilities.h" +#include "models/FreeLeeModel.h" + +//******************************************************************* +// Implementation of Free-Energy Two-Phase LBM (Lee model) +//******************************************************************* + +int main( int argc, char **argv ) +{ + + // Initialize + Utilities::startup( argc, argv ); + + // Load the input database + auto db = std::make_shared( argv[1] ); + + { // Limit scope so variables that contain communicators will free before MPI_Finialize + + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); + + if (rank == 0){ + printf("********************************************************\n"); + printf("Running Single-Fluid Solver based on Lee LBM \n"); + printf("********************************************************\n"); + } + // Initialize compute device + int device=ScaLBL_SetDevice(rank); + NULL_USE( device ); + ScaLBL_DeviceBarrier(); + comm.barrier(); + + PROFILE_ENABLE(1); + //PROFILE_ENABLE_TRACE(); + //PROFILE_ENABLE_MEMORY(); + PROFILE_SYNCHRONIZE(); + PROFILE_START("Main"); + Utilities::setErrorHandlers(); + + auto filename = argv[1]; + ScaLBL_FreeLeeModel LeeModel( rank,nprocs,comm ); + LeeModel.ReadParams( filename ); + LeeModel.SetDomain(); + LeeModel.ReadInput(); + LeeModel.Create_SingleFluid()(); + LeeModel.Initialize_SingleFluid()(); + LeeModel.Run_SingleFluid()(); + LeeModel.WriteDebug_SingleFluid()(); + + PROFILE_STOP("Main"); + auto file = db->getWithDefault( "TimerFile", "lbpm_freelee_SingleFluidBGK_simulator" ); + auto level = db->getWithDefault( "TimerLevel", 1 ); + PROFILE_SAVE( file,level ); + // **************************************************** + + + } // Limit scope so variables that contain communicators will free before MPI_Finialize + + Utilities::shutdown(); + return 0; +} diff --git a/tests/lbpm_freelee_simulator.cpp b/tests/lbpm_freelee_simulator.cpp index 3e9c372a..3663c4e9 100644 --- a/tests/lbpm_freelee_simulator.cpp +++ b/tests/lbpm_freelee_simulator.cpp @@ -51,10 +51,10 @@ int main( int argc, char **argv ) LeeModel.ReadParams( filename ); LeeModel.SetDomain(); LeeModel.ReadInput(); - LeeModel.Create(); - LeeModel.Initialize(); - LeeModel.Run(); - LeeModel.WriteDebug(); + LeeModel.Create_TwoFluid(); + LeeModel.Initialize_TwoFluid(); + LeeModel.Run_TwoFluid(); + LeeModel.WriteDebug_TwoFluid(); PROFILE_STOP("Main"); auto file = db->getWithDefault( "TimerFile", "lbpm_freelee_simulator" ); From e34170d2325bfee43dc9682d67677898d067c7f8 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 8 Feb 2021 00:00:26 -0500 Subject: [PATCH 162/205] built passed --- cpu/FreeLee.cpp | 9 ++++----- models/FreeLeeModel.cpp | 2 +- tests/lbpm_freelee_SingleFluidBGK_simulator.cpp | 8 ++++---- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp index bd6e744a..266b3a84 100644 --- a/cpu/FreeLee.cpp +++ b/cpu/FreeLee.cpp @@ -1738,12 +1738,11 @@ extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, d m18 = dist[17*Np+n]; //compute fluid velocity - ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(chem*nx+Fx)); - uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(chem*ny+Fy)); - uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(chem*nz+Fz)); + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(Fx)); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(Fy)); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(Fz)); //compute pressure - p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17) - +0.5*(rhoA-rhoB)/2.0/3.0*(ux*nx+uy*ny+uz*nz); + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17); //------------------------------------------------- BCK collison ------------------------------------------------------------// // q=0 diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index 120d3ced..b0ee372d 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -1006,7 +1006,7 @@ void ScaLBL_FreeLeeModel::WriteDebug_TwoFluid(){ void ScaLBL_FreeLeeModel::WriteDebug_SingleFluid(){ - DoubleArray PhaseData(Nxh,Nyh,Nzh); + DoubleArray PhaseField(Nx,Ny,Nz); // Copy back final phase indicator field and convert to regular layout ScaLBL_Comm->RegularLayout(Map,Pressure,PhaseField); diff --git a/tests/lbpm_freelee_SingleFluidBGK_simulator.cpp b/tests/lbpm_freelee_SingleFluidBGK_simulator.cpp index dd3be8d9..19d99b9c 100644 --- a/tests/lbpm_freelee_SingleFluidBGK_simulator.cpp +++ b/tests/lbpm_freelee_SingleFluidBGK_simulator.cpp @@ -51,10 +51,10 @@ int main( int argc, char **argv ) LeeModel.ReadParams( filename ); LeeModel.SetDomain(); LeeModel.ReadInput(); - LeeModel.Create_SingleFluid()(); - LeeModel.Initialize_SingleFluid()(); - LeeModel.Run_SingleFluid()(); - LeeModel.WriteDebug_SingleFluid()(); + LeeModel.Create_SingleFluid(); + LeeModel.Initialize_SingleFluid(); + LeeModel.Run_SingleFluid(); + LeeModel.WriteDebug_SingleFluid(); PROFILE_STOP("Main"); auto file = db->getWithDefault( "TimerFile", "lbpm_freelee_SingleFluidBGK_simulator" ); From d8f5b21436433e9fff89250d9b78268a7d60f21c Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Mon, 8 Feb 2021 20:31:45 -0500 Subject: [PATCH 163/205] add a correcting factor cs2 into velocity equation --- cpu/FreeLee.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp index 266b3a84..32a7b568 100644 --- a/cpu/FreeLee.cpp +++ b/cpu/FreeLee.cpp @@ -15,7 +15,7 @@ extern "C" void ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *m cg_y = ColorGrad[1*Np+n]; cg_z = ColorGrad[2*Np+n]; - gqbar[0*Np+n] = 0.3333333333333333; + gqbar[0*Np+n] = 0.3333333333333333*p; gqbar[1*Np+n] = 0.055555555555555555*(p - 0.5*(chem*cg_x+Fx)); //double(100*n)+1.f; gqbar[2*Np+n] = 0.055555555555555555*(p - 0.5*(-chem*cg_x-Fx)); //double(100*n)+2.f; gqbar[3*Np+n] = 0.055555555555555555*(p - 0.5*(chem*cg_y+Fy)); //double(100*n)+3.f; @@ -47,7 +47,7 @@ extern "C" void ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(double *gqbar, double for (n=0; n Date: Tue, 9 Feb 2021 15:25:20 -0500 Subject: [PATCH 164/205] fix wide halo bug in list memory --- common/WideHalo.cpp | 3 ++- common/WideHalo.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/common/WideHalo.cpp b/common/WideHalo.cpp index a39ab317..0c8f1781 100644 --- a/common/WideHalo.cpp +++ b/common/WideHalo.cpp @@ -124,7 +124,7 @@ ScaLBLWideHalo_Communicator::ScaLBLWideHalo_Communicator(std::shared_ptr Date: Thu, 11 Feb 2021 14:31:29 -0500 Subject: [PATCH 165/205] added FlowAdapter class --- models/ColorModel.cpp | 45 +++++++++++++++++++++++++++++++++++++++++++ models/ColorModel.h | 14 ++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 3b8edd6c..1769324e 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -9,6 +9,7 @@ color lattice boltzmann model #include #include + ScaLBL_ColorModel::ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK), nprocs(NP), Restart(0), timestep(0), timestepMax(0), tauA(0), tauB(0), rhoA(0), rhoB(0), alpha(0), beta(0), @@ -1600,3 +1601,47 @@ void ScaLBL_ColorModel::WriteDebug(){ fclose(CGZ_FILE); */ } + +FlowAdaptor::FlowAdaptor(ScaLBL_ColorModel &M){ + Nx = M.Dm->Nx; + Ny = M.Dm->Ny; + Nz = M.Dm->Nz; + timestep=-1; + timestep_previous=-1; + + phi.resize(Nx,Ny,Nz); phi.fill(0); // phase indicator field + phi_t.resize(Nx,Ny,Nz); phi_t.fill(0); // time derivative for the phase indicator field +} + +FlowAdaptor::~FlowAdaptor(){ + +} + +double FlowAdaptor::MoveInterface(ScaLBL_ColorModel &M){ + + double INTERFACE_CUTOFF = M.color_db->getWithDefault( "move_interface_cutoff", 0.975 ); + double MOVE_INTERFACE_FACTOR = M.color_db->getWithDefault( "move_interface_factor", 10.0 ); + + ScaLBL_CopyToHost( phi.data(), M.Phi, Nx*Ny*Nz* sizeof( double ) ); + /* compute the local derivative of phase indicator field */ + double beta = M.beta; + double factor = 0.5/beta; + for (int n=0; nPhi(n); + double dist1 = factor*log((1.0+value1)/(1.0-value1)); + double value2 = phi(n); + double dist2 = factor*log((1.0+value2)/(1.0-value2)); + phi_t(n) = value2; + if (value1 < INTERFACE_CUTOFF && value1 > -1*INTERFACE_CUTOFF && value2 < INTERFACE_CUTOFF && value2 > -1*INTERFACE_CUTOFF ){ + /* time derivative of distance */ + double dxdt = 0.125*(dist2-dist1); + /* extrapolate to move the distance further */ + double dist3 = dist2 + MOVE_INTERFACE_FACTOR*dxdt; + /* compute the new phase interface */ + phi_t(n) = (2.f*(exp(-2.f*beta*(dist3)))/(1.f+exp(-2.f*beta*(dist3))) - 1.f); + } + } + ScaLBL_CopyToDevice( M.Phi, phi_t.data(), Nx*Ny*Nz* sizeof( double ) ); +} + diff --git a/models/ColorModel.h b/models/ColorModel.h index f5667765..b2a9c1d1 100644 --- a/models/ColorModel.h +++ b/models/ColorModel.h @@ -30,6 +30,7 @@ public: void Initialize(); void Run(); void WriteDebug(); + void getPhaseField(DoubleArray &f); bool Restart,pBC; bool REVERSE_FLOW_DIRECTION; @@ -86,3 +87,16 @@ private: double MorphOpenConnected(double target_volume_change); }; +class FlowAdaptor{ +public: + FlowAdaptor(ScaLBL_ColorModel &M); + ~FlowAdaptor(); + double MoveInterface(ScaLBL_ColorModel &M); + DoubleArray phi; + DoubleArray phi_t; +private: + int Nx, Ny, Nz; + int timestep; + int timestep_previous; +}; + From da55748d30912f03b929bb5575b57117d219e219 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 12 Feb 2021 09:10:54 -0500 Subject: [PATCH 166/205] update to flow adapter --- models/ColorModel.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 1769324e..a9886337 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -1238,6 +1238,7 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){ } return(volume_change); } + double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){ srand(time(NULL)); double mass_loss =0.f; @@ -1626,6 +1627,8 @@ double FlowAdaptor::MoveInterface(ScaLBL_ColorModel &M){ /* compute the local derivative of phase indicator field */ double beta = M.beta; double factor = 0.5/beta; + double total_interface_displacement = 0.0; + double total_interface_sites = 0.0; for (int n=0; nPhi(n); @@ -1640,8 +1643,27 @@ double FlowAdaptor::MoveInterface(ScaLBL_ColorModel &M){ double dist3 = dist2 + MOVE_INTERFACE_FACTOR*dxdt; /* compute the new phase interface */ phi_t(n) = (2.f*(exp(-2.f*beta*(dist3)))/(1.f+exp(-2.f*beta*(dist3))) - 1.f); + total_interface_displacement += fabs(MOVE_INTERFACE_FACTOR*dxdt); + total_interface_sites += 1.0; } } ScaLBL_CopyToDevice( M.Phi, phi_t.data(), Nx*Ny*Nz* sizeof( double ) ); + + +/* ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4){ + if (Dm->kproc()==0){ + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0); + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,1); + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,2); + } + if (Dm->kproc() == nprocz-1){ + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-1); + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-2); + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-3); + } + } + */ } From 81d726030b35cff1b726f20a166ffff45c7979f2 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 12 Feb 2021 09:36:50 -0500 Subject: [PATCH 167/205] add freelee gpu skeleton --- cuda/FreeLee.cu | 2013 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2013 insertions(+) create mode 100644 cuda/FreeLee.cu diff --git a/cuda/FreeLee.cu b/cuda/FreeLee.cu new file mode 100644 index 00000000..bc641ed9 --- /dev/null +++ b/cuda/FreeLee.cu @@ -0,0 +1,2013 @@ +#include + +#define STOKES + +__global__ void dvc_ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np) +{ + int n; + double p = 1.0;//NOTE: take initial pressure p=1.0 + double chem; + double cg_x,cg_y,cg_z; + + //for (n=0; n 1.f) phi = 1.0; + if (phi < -1.f) phi = -1.0; + Den[idx] = rhoA + 0.5*(1.0-phi)*(rhoB-rhoA); + + //compute unit normal of color gradient + nx = ColorGrad[idx+0*Np]; + ny = ColorGrad[idx+1*Np]; + nz = ColorGrad[idx+2*Np]; + cg_mag = sqrt(nx*nx+ny*ny+nz*nz); + double ColorMag_temp = cg_mag; + if (cg_mag==0.0) ColorMag_temp=1.0; + nx = nx/ColorMag_temp; + ny = ny/ColorMag_temp; + nz = nz/ColorMag_temp; + + theta = M*cs2_inv*(1-4.0*phi*phi)/W; + + hq[0*Np+idx]=0.3333333333333333*(phi); + hq[1*Np+idx]=0.1111111111111111*(phi+theta*nx); + hq[2*Np+idx]=0.1111111111111111*(phi-theta*nx); + hq[3*Np+idx]=0.1111111111111111*(phi+theta*ny); + hq[4*Np+idx]=0.1111111111111111*(phi-theta*ny); + hq[5*Np+idx]=0.1111111111111111*(phi+theta*nz); + hq[6*Np+idx]=0.1111111111111111*(phi-theta*nz); + + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, + double rhoA, double rhoB, int start, int finish, int Np){ + + int idx,n,nread; + double fq,phi; + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + m1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + m2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + m3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + m4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + m5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + m6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + m7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + m8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + m9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + m10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + m11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + m12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + m13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + m14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + m15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + m16 = dist[nr16]; + + // q=17 + nr17 = neighborList[n+16*Np]; + m17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + m18 = dist[nr18]; + + //compute fluid velocity + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(chem*nx+Fx)/3.0); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(chem*ny+Fy)/3.0); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(chem*nz+Fz)/3.0); + //compute pressure + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17) + +0.5*(rhoA-rhoB)/2.0/3.0*(ux*nx+uy*ny+uz*nz); + + //compute equilibrium distributions + feq0 = 0.3333333333333333*p - 0.25*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) - + 0.16666666666666666*rho0*(ux*ux + uy*uy + uz*uz) - 0.5*(-(nx*ux) - ny*uy - nz*uz)* + (-0.08333333333333333*(rhoA - rhoB)*(ux*ux + uy*uy + uz*uz) + chem*(0.3333333333333333 - 0.5*(ux*ux + uy*uy + uz*uz))); + feq1 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-ux*ux + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) - + 0.125*(Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*uz)) - 0.0625*(nx - nx*ux - ny*uy - nz*uz)* + (2*chem*ux*ux - 0.3333333333333333*((-rhoA + rhoB)*ux*ux + 2*chem*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*uz))); + feq2 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-ux*ux + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) - + 0.125*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*uz)) - 0.0625*(nx + nx*ux + ny*uy + nz*uz)* + (-2.*chem*ux*ux + 0.1111111111111111*(-4.*chem + rhoB*(-2.*ux - 1.*ux*ux - 1.*uy*uy - 1.*uz*uz) + + rhoA*(2.*ux + ux*ux + uy*uy + uz*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*ux*ux + + chem*(4.*ux + 2.*ux*ux + 2.*uy*uy + 2.*uz*uz))); + feq3 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uy*uy + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.125*(Fx*ux + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.0625*(ny - nx*ux - ny*uy - nz*uz)* + (2*chem*uy*uy - 0.3333333333333333*((-rhoA + rhoB)*uy*uy + 2*chem*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*uz))); + feq4 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uy*uy + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.125*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.0625*(ny + nx*ux + ny*uy + nz*uz)* + (-2.*chem*uy*uy + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 2.*uy - 1.*uy*uy - 1.*uz*uz) + + rhoA*(ux*ux + 2.*uy + uy*uy + uz*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*uy*uy + + chem*(2.*ux*ux + 4.*uy + 2.*uy*uy + 2.*uz*uz))); + feq5 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uz*uz + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.125*(Fx*ux + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2. + uz)*uz)) - 0.0625*(nx*ux + ny*uy + nz*(-1. + uz))* + (-2.*chem*uz*uz + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 1.*uy*uy + (2. - 1.*uz)*uz) + + rhoA*(ux*ux + uy*uy + (-2. + uz)*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*uz*uz + + chem*(2.*ux*ux + 2.*uy*uy + uz*(-4. + 2.*uz)))); + feq6 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uz*uz + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) - + 0.125*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2. + uz))) - 0.0625*(nz + nx*ux + ny*uy + nz*uz)* + (-2.*chem*uz*uz + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 1.*uy*uy + (-2. - 1.*uz)*uz) + + rhoA*(ux*ux + uy*uy + uz*(2. + uz))) + 0.3333333333333333*((-1.*rhoA + rhoB)*uz*uz + + chem*(2.*ux*ux + 2.*uy*uy + uz*(4. + 2.*uz)))); + feq7 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uy)*(ux + uy) + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx*(-1. + ux) + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(-2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.03125*(nx + ny - nx*ux - ny*uy - nz*uz)* + (2*chem*(ux + uy)*(ux + uy) + 0.3333333333333333*((rhoA - rhoB)*(ux + uy)*(ux + uy) - 2*chem*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz))); + feq8 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uy)*(ux + uy) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.03125*(-(nx*(1 + ux)) - ny*(1 + uy) - nz*uz)* + (2*chem*(ux + uy)*(ux + uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux + uy)*(ux + uy)) + + 2*chem*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz))); + feq9 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uy)*(ux - uy) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fy + Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(-2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.03125*(nx - nx*ux - ny*(1 + uy) - nz*uz)* + (2*chem*(ux - uy)*(ux - uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz))); + feq10 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uy)*(ux - uy) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx*(1 + ux) + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.03125*(ny - nx*(1 + ux) - ny*uy - nz*uz)* + (2*chem*(ux - uy)*(ux - uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz))); + feq11 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uz)*(ux + uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*(-1. + ux) + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)) - 0.03125*(nx + nz - nx*ux - ny*uy - nz*uz)* + (2*chem*(ux + uz)*(ux + uz) + 0.3333333333333333*((rhoA - rhoB)*(ux + uz)*(ux + uz) - 2*chem*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz))); + feq12 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) - + 0.0625*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*(2. + uz))) - 0.03125*(-(nx*(1 + ux)) - ny*uy - nz*(1 + uz))* + (2*chem*(ux + uz)*(ux + uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux + uz)*(ux + uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*(2 + uz)))); + feq13 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uz)*(ux - uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) - + 0.0625*(Fz + Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*(2. + uz))) - 0.03125*(nx - nx*ux - ny*uy - nz*(1 + uz))* + (2*chem*(ux - uz)*(ux - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*(2 + uz)))); + feq14 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uz)*(ux - uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*(1 + ux) + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)) - 0.03125*(nz - nx*(1 + ux) - ny*uy - nz*uz)* + (2*chem*(ux - uz)*(ux - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz))); + feq15 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*ux + Fy*(-1. + uy) + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uy*uy - 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + (-2. + uz)*uz)) - 0.03125*(ny + nz - nx*ux - ny*uy - nz*uz)* + (2*chem*(uy + uz)*(uy + uz) + 0.3333333333333333*((rhoA - rhoB)*(uy + uz)*(uy + uz) - 2*chem*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz))); + feq16 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) - + 0.0625*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy - 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*(2. + uz))) - 0.03125*(-(nx*ux) - ny*(1 + uy) - nz*(1 + uz))* + (2*chem*(uy + uz)*(uy + uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy + uz)*(uy + uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*(2 + uz)))); + feq17 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) - + 0.0625*(Fz + Fx*ux + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*(2. + uz))) - 0.03125*(ny - nx*ux - ny*uy - nz*(1 + uz))* + (2*chem*(uy - uz)*(uy - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*(2 + uz)))); + feq18 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*ux + Fy*(1 + uy) + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uy*uy + 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + (-2. + uz)*uz)) - 0.03125*(nz - nx*ux - ny*(1 + uy) - nz*uz)* + (2*chem*(uy - uz)*(uy - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz))); + + //------------------------------------------------- BCK collison ------------------------------------------------------------// + // q=0 + dist[n] = m0 - (m0-feq0)/tau + 0.25*(2*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) + + (mgx*ux + mgy*uy + mgz*uz)*(2*chem*(ux*ux + uy*uy + uz*uz) + + 0.3333333333333333*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + uz*uz)))); + + // q = 1 + dist[nr2] = m1 - (m1-feq1)/tau + 0.125*(2*(Fx*(-1 + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) + + (mgx*(-1 + ux) + mgy*uy + mgz*uz)*(-2*chem*(ux*ux) + + 0.3333333333333333*((-rhoA + rhoB)*(ux*ux) + 2*chem*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*uz)))); + + // q=2 + dist[nr1] = m2 - (m2-feq2)/tau + 0.125*(2*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) + + (mgx + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(ux*ux) + + 0.3333333333333333*((-rhoA + rhoB)*(ux*ux) + 2*chem*(2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*uz)))); + + // q = 3 + dist[nr4] = m3 - (m3-feq3)/tau + 0.125*(2*(Fx*ux + Fy*(-1 + uy) + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*ux + mgy*(-1 + uy) + mgz*uz)*(-2*chem*(uy*uy) + + 0.3333333333333333*((-rhoA + rhoB)*(uy*uy) + 2*chem*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 4 + dist[nr3] = m4 - (m4-feq4)/tau + 0.125*(2*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgy + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(uy*uy) + + 0.3333333333333333*((-rhoA + rhoB)*(uy*uy) + 2*chem*(ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 5 + dist[nr6] = m5 - (m5-feq5)/tau + 0.125*(2*(Fx*ux + Fy*uy + Fz*(-1 + uz))*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*uy + mgz*(-1 + uz))*(-2*chem*(uz*uz) + + 0.3333333333333333*((-rhoA + rhoB)*(uz*uz) + 2*chem*(ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 6 + dist[nr5] = m6 - (m6-feq6)/tau + 0.125*(2*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) + + (mgz + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(uz*uz) + + 0.3333333333333333*((-rhoA + rhoB)*(uz*uz) + 2*chem*(ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + uz*(2 + uz))))); + + // q = 7 + dist[nr8] = m7 - (m7-feq7)/tau + 0.0625*(-2*(Fx*(-1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (0.2222222222222222 + (ux + uy)*(ux + uy) - + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*(-1 + ux) + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((ux + uy)*(ux + uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uy)*(ux + uy))) + 2*chem*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 8 + dist[nr7] = m8 - (m8-feq8)/tau + 0.0625*(2*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uy)*(ux + uy) + + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgx + mgy + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((ux + uy)*(ux + uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uy)*(ux + uy))) + 2*chem*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 9 + dist[nr10] = m9 - (m9-feq9)/tau + 0.0625*(2*(Fy + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgy + mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*((ux - uy)*(ux - uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uy)*(ux - uy))) + 2*chem*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 10 + dist[nr9] = m10 - (m10-feq10)/tau + 0.0625*(2*(Fx*(1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*(1 + ux) + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((ux - uy)*(ux - uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uy)*(ux - uy))) + 2*chem*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 11 + dist[nr12] = m11 - (m11-feq11)/tau + 0.0625*(-2*(Fx*(-1 + ux) + Fy*uy + Fz*(-1 + uz))* + (0.2222222222222222 + (ux + uz)*(ux + uz) - + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*(-1 + ux) + mgy*uy + mgz*(-1 + uz))* + (-2*chem*((ux + uz)*(ux + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uz)*(ux + uz))) + 2*chem*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 12 + dist[nr11] = m12 - (m12-feq12)/tau + 0.0625*(2*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + (mgx + mgz + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((ux + uz)*(ux + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uz)*(ux + uz))) + 2*chem*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*(2 + uz))))); + + // q = 13 + dist[nr14] = m13 - (m13-feq13)/tau + 0.0625*(2*(Fz + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + (mgz + mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*((ux - uz)*(ux - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uz)*(ux - uz))) + 2*chem*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))))); + + // q= 14 + dist[nr13] = m14 - (m14-feq14)/tau + 0.0625*(2*(Fx*(1 + ux) + Fy*uy + Fz*(-1 + uz))* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*(1 + ux) + mgy*uy + mgz*(-1 + uz))* + (-2*chem*((ux - uz)*(ux - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uz)*(ux - uz))) + 2*chem*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 15 + dist[nr16] = m15 - (m15-feq15)/tau + 0.0625*(-2*(Fx*ux + Fy*(-1 + uy) + Fz*(-1 + uz))* + (0.2222222222222222 + (uy + uz)*(uy + uz) - 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*(-1 + uy) + mgz*(-1 + uz))* + (-2*chem*((uy + uz)*(uy + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy + uz)*(uy + uz))) + 2*chem*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)))); + + // q = 16 + dist[nr15] = m16 - (m16-feq16)/tau + 0.0625*(2*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + (mgy + mgz + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((uy + uz)*(uy + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy + uz)*(uy + uz))) + 2*chem*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))))); + + // q = 17 + dist[nr18] = m17 - (m17-feq17)/tau + 0.0625*(2*(Fz + Fx*ux + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + (mgz + mgx*ux + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((uy - uz)*(uy - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy - uz)*(uy - uz))) + 2*chem*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))))); + + // q = 18 + dist[nr17] = m18 - (m18-feq18)/tau + 0.0625*(2*(Fx*ux + Fy*(1 + uy) + Fz*(-1 + uz))* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*(1 + uy) + mgz*(-1 + uz))* + (-2*chem*((uy - uz)*(uy - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy - uz)*(uy - uz))) + 2*chem*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)))); + //----------------------------------------------------------------------------------------------------------------------------------------// + + + // ----------------------------- compute phase field evolution ---------------------------------------- + //Normalize the Color Gradient + C = sqrt(nx*nx+ny*ny+nz*nz); + double ColorMag = C; + if (C==0.0) ColorMag=1.0; + nx = nx/ColorMag; + ny = ny/ColorMag; + nz = nz/ColorMag; + //compute surface tension-related parameter + theta = M*4.5*(1-4.0*phi*phi)/W; + + //load distributions of phase field + //q=0 + h0 = hq[n]; + //q=1 + h1 = hq[nr1]; + + //q=2 + h2 = hq[nr2]; + + //q=3 + h3 = hq[nr3]; + + //q=4 + h4 = hq[nr4]; + + //q=5 + h5 = hq[nr5]; + + //q=6 + h6 = hq[nr6]; + + //-------------------------------- BGK collison for phase field ---------------------------------// + // q = 0 + hq[n] = h0 - (h0 - 0.3333333333333333*phi)/tauM; + + // q = 1 + hq[nr2] = h1 - (h1 - phi*(0.1111111111111111 + 0.5*ux) - (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; + + // q = 2 + hq[nr1] = h2 - (h2 - phi*(0.1111111111111111 - 0.5*ux) + (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; + + // q = 3 + hq[nr4] = h3 - (h3 - phi*(0.1111111111111111 + 0.5*uy) - (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; + + // q = 4 + hq[nr3] = h4 - (h4 - phi*(0.1111111111111111 - 0.5*uy) + (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; + + // q = 5 + hq[nr6] = h5 - (h5 - phi*(0.1111111111111111 + 0.5*uz) - (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; + + // q = 6 + hq[nr5] = h6 - (h6 - phi*(0.1111111111111111 - 0.5*uz) + (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; + //........................................................................ + + //Update velocity on device + Vel[0*Np+n] = ux; + Vel[1*Np+n] = uy; + Vel[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = p; + //Update chemical potential on device + mu_phi[n] = chem; + //Update color gradient on device + ColorGrad[0*Np+n] = nx; + ColorGrad[1*Np+n] = ny; + ColorGrad[2*Np+n] = nz; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, + int strideY, int strideZ, int start, int finish, int Np){ + + int n,nn,nn2x,ijk; + //int nr1,nr2,nr3,nr4,nr5,nr6,nr7,nr8,nr9,nr10,nr11,nr12,nr13,nr14,nr15,nr16,nr17,nr18; + double ux,uy,uz;//fluid velocity + double p;//pressure + double chem;//chemical potential + double phi; //phase field + double rho0;//fluid density + // distribution functions + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m0,m3,m5,m7; + double mm1,mm2,mm4,mm6,mm8,mm9,mm10,mm11,mm12,mm13,mm14,mm15,mm16,mm17,mm18; + double mm3,mm5,mm7; + double feq0,feq1,feq2,feq3,feq4,feq5,feq6,feq7,feq8,feq9,feq10,feq11,feq12,feq13,feq14,feq15,feq16,feq17,feq18; + double nx,ny,nz;//normal color gradient + double mgx,mgy,mgz;//mixed gradient reaching secondary neighbor + + //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double h0,h1,h2,h3,h4,h5,h6;//distributions for LB phase field + double tau;//position dependent LB relaxation time for fluid + double C,theta; + double M = 2.0/9.0*(tauM-0.5);//diffusivity (or mobility) for the phase field D3Q7 + + // for (int n=start; n 10Np => odd part of dist) + m1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + m2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + m3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + m4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + m5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + m6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + m7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + m8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + m9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + m10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + m11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + m12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + m13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + m14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + m15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + m16 = dist[nr16]; + + // q=17 + nr17 = neighborList[n+16*Np]; + m17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + m18 = dist[nr18]; + + //compute fluid velocity + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(Fx)/3.0); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(Fy)/3.0); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(Fz)/3.0); + //compute pressure + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17); + + //------------------------------------------------- BCK collison ------------------------------------------------------------// + // q=0 + dist[n] = m0 + 0.5*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) - + (m0 - 0.3333333333333333*p + 0.25*(Fx*ux + Fy*uy + Fz*uz)* + (-0.6666666666666666 + ux*ux + uy*uy + uz*uz) + 0.16666666666666666*rho0*(ux*ux + uy*uy + uz*uz))/ + tau; + + // q = 1 + dist[nr2] = m1 + 0.25*(Fx*(-1 + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) - + (m1 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(ux*ux) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.125*(Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*uz)))/tau; + + // q=2 + dist[nr1] = m2 + 0.25*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) - + (m2 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(ux*ux) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) + + 0.125*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(ux*ux) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*uz)))/tau; + + // q = 3 + dist[nr4] = m3 + 0.25*(Fx*ux + Fy*(-1 + uy) + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) - + (m3 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uy*uy) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.125*(Fx*ux + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 4 + dist[nr3] = m4 + 0.25*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) - + (m4 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uy*uy) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.125*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(uy*uy) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 5 + dist[nr6] = m5 + 0.25*(Fx*ux + Fy*uy + Fz*(-1 + uz))*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) - + (m5 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.125*(Fx*ux + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 6 + dist[nr5] = m6 + 0.25*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) - + (m6 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) + + 0.125*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q = 7 + dist[nr8] = m7 - 0.125*(Fx*(-1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (0.2222222222222222 + (ux + uy)*(ux + uy) - 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz))\ + - (m7 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uy)*(ux + uy)) + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx*(-1. + ux) + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(-2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 8 + dist[nr7] = m8 + 0.125*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uy)*(ux + uy) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz))\ + - (m8 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uy)*(ux + uy)) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 9 + dist[nr10] = m9 + 0.125*(Fy + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + - (m9 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uy)*(ux - uy)) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fy + Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(-2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 10 + dist[nr9] = m10 + 0.125*(Fx*(1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz))\ + - (m10 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uy)*(ux - uy)) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx*(1 + ux) + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 11 + dist[nr12] = m11 - 0.125*(Fx*(-1 + ux) + Fy*uy + Fz*(-1 + uz))* + (0.2222222222222222 + (ux + uz)*(ux + uz) - 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz))\ + - (m11 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uz)*(ux + uz)) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*(-1. + ux) + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 12 + dist[nr11] = m12 + 0.125*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz)))\ + - (m12 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uz)*(ux + uz)) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.0625*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q = 13 + dist[nr14] = m13 + 0.125*(Fz + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz)))\ + - (m13 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uz)*(ux - uz)) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.0625*(Fz + Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q= 14 + dist[nr13] = m14 + 0.125*(Fx*(1 + ux) + Fy*uy + Fz*(-1 + uz))* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz))\ + - (m14 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uz)*(ux - uz)) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*(1 + ux) + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 15 + dist[nr16] = m15 - 0.125*(Fx*ux + Fy*(-1 + uy) + Fz*(-1 + uz))* + (0.2222222222222222 + (uy + uz)*(uy + uz) - 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz))\ + - (m15 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy + uz)*(uy + uz)) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*ux + Fy*(-1. + uy) + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uy*uy) - 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 16 + dist[nr15] = m16 + 0.125*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz)))\ + - (m16 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy + uz)*(uy + uz)) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + 0.0625*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) - 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*(2. + uz))))/tau; + + // q = 17 + dist[nr18] = m17 + 0.125*(Fz + Fx*ux + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz)))\ + - (m17 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy - uz)*(uy - uz)) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + 0.0625*(Fz + Fx*ux + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) + 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*(2. + uz))))/tau; + + // q = 18 + dist[nr17] = m18 + 0.125*(Fx*ux + Fy*(1 + uy) + Fz*(-1 + uz))* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz))\ + - (m18 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy - uz)*(uy - uz)) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*ux + Fy*(1 + uy) + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uy*uy) + 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + (-2. + uz)*uz)))/tau; + //----------------------------------------------------------------------------------------------------------------------------------------// + + + //Update velocity on device + Vel[0*Np+n] = ux; + Vel[1*Np+n] = uy; + Vel[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = p; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure, + double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){ + + int n; + double ux,uy,uz;//fluid velocity + double p;//pressure + // distribution functions + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m0,m3,m5,m7; + + // for (int n=start; n Date: Fri, 12 Feb 2021 10:05:00 -0500 Subject: [PATCH 168/205] skeleton freelee build for gpu --- cuda/FreeLee.cu | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cuda/FreeLee.cu b/cuda/FreeLee.cu index bc641ed9..e37a92a3 100644 --- a/cuda/FreeLee.cu +++ b/cuda/FreeLee.cu @@ -2,6 +2,9 @@ #define STOKES +#define NBLOCKS 1024 +#define NTHREADS 256 + __global__ void dvc_ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np) { int n; From e04abb922470f396244ac7a814daaf01e9ac11ab Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 12 Feb 2021 10:23:19 -0500 Subject: [PATCH 169/205] hip versions for new physics --- cuda/BGK.cu | 414 +++--- hip/D3Q7BC.cu | 536 ++++++++ hip/FreeLee.cu | 2017 +++++++++++++++++++++++++++ hip/Greyscale.cu | 2745 +++++++++++++++++++++++++++++++++++++ hip/GreyscaleColor.cu | 3038 +++++++++++++++++++++++++++++++++++++++++ hip/Ion.cu | 392 ++++++ hip/MixedGradient.cu | 78 ++ hip/Poisson.cu | 330 +++++ hip/Stokes.cu | 996 ++++++++++++++ 9 files changed, 10339 insertions(+), 207 deletions(-) create mode 100644 hip/D3Q7BC.cu create mode 100644 hip/FreeLee.cu create mode 100644 hip/Greyscale.cu create mode 100644 hip/GreyscaleColor.cu create mode 100644 hip/Ion.cu create mode 100644 hip/MixedGradient.cu create mode 100644 hip/Poisson.cu create mode 100644 hip/Stokes.cu diff --git a/cuda/BGK.cu b/cuda/BGK.cu index b1da88bb..d9206a4f 100644 --- a/cuda/BGK.cu +++ b/cuda/BGK.cu @@ -12,111 +12,111 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int S = Np/NBLOCKS/NTHREADS + 1; for (int s=0; s 10Np => odd part of dist) - f1 = dist[nr1]; // reading the f1 data into register fq + if ( n 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq - nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) - f2 = dist[nr2]; // reading the f2 data into register fq + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq - // q=3 - nr3 = neighborList[n+2*Np]; // neighbor 4 - f3 = dist[nr3]; + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; - // q = 4 - nr4 = neighborList[n+3*Np]; // neighbor 3 - f4 = dist[nr4]; + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; - // q=5 - nr5 = neighborList[n+4*Np]; - f5 = dist[nr5]; + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; - // q = 6 - nr6 = neighborList[n+5*Np]; - f6 = dist[nr6]; - - // q=7 - nr7 = neighborList[n+6*Np]; - f7 = dist[nr7]; + // q = 6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; - // q = 8 - nr8 = neighborList[n+7*Np]; - f8 = dist[nr8]; + // q=7 + nr7 = neighborList[n+6*Np]; + f7 = dist[nr7]; - // q=9 - nr9 = neighborList[n+8*Np]; - f9 = dist[nr9]; + // q = 8 + nr8 = neighborList[n+7*Np]; + f8 = dist[nr8]; - // q = 10 - nr10 = neighborList[n+9*Np]; - f10 = dist[nr10]; + // q=9 + nr9 = neighborList[n+8*Np]; + f9 = dist[nr9]; - // q=11 - nr11 = neighborList[n+10*Np]; - f11 = dist[nr11]; + // q = 10 + nr10 = neighborList[n+9*Np]; + f10 = dist[nr10]; - // q=12 - nr12 = neighborList[n+11*Np]; - f12 = dist[nr12]; + // q=11 + nr11 = neighborList[n+10*Np]; + f11 = dist[nr11]; - // q=13 - nr13 = neighborList[n+12*Np]; - f13 = dist[nr13]; + // q=12 + nr12 = neighborList[n+11*Np]; + f12 = dist[nr12]; - // q=14 - nr14 = neighborList[n+13*Np]; - f14 = dist[nr14]; + // q=13 + nr13 = neighborList[n+12*Np]; + f13 = dist[nr13]; - // q=15 - nr15 = neighborList[n+14*Np]; - f15 = dist[nr15]; + // q=14 + nr14 = neighborList[n+13*Np]; + f14 = dist[nr14]; - // q=16 - nr16 = neighborList[n+15*Np]; - f16 = dist[nr16]; + // q=15 + nr15 = neighborList[n+14*Np]; + f15 = dist[nr15]; - // q=17 - //fq = dist[18*Np+n]; - nr17 = neighborList[n+16*Np]; - f17 = dist[nr17]; + // q=16 + nr16 = neighborList[n+15*Np]; + f16 = dist[nr16]; - // q=18 - nr18 = neighborList[n+17*Np]; - f18 = dist[nr18]; + // q=17 + //fq = dist[18*Np+n]; + nr17 = neighborList[n+16*Np]; + f17 = dist[nr17]; - rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; - ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14; - uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18; - uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18; - uu = 1.5*(ux*ux+uy*uy+uz*uz); + // q=18 + nr18 = neighborList[n+17*Np]; + f18 = dist[nr18]; - // q=0 - dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu); + rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; + ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14; + uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18; + uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18; + uu = 1.5*(ux*ux+uy*uy+uz*uz); - // q = 1 - dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx; + // q=0 + dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu); - // q=2 - dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx; + // q = 1 + dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx; - // q = 3 - dist[nr4] = f3*(1.0-rlx) + - rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy; + // q=2 + dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx; - // q = 4 - dist[nr3] = f4*(1.0-rlx) + - rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy; + // q = 3 + dist[nr4] = f3*(1.0-rlx) + + rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy; - // q = 5 - dist[nr6] = f5*(1.0-rlx) + - rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz; + // q = 4 + dist[nr3] = f4*(1.0-rlx) + + rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy; - // q = 6 - dist[nr5] = f6*(1.0-rlx) + - rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz; + // q = 5 + dist[nr6] = f5*(1.0-rlx) + + rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz; - // q = 7 - dist[nr8] = f7*(1.0-rlx) + - rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy); + // q = 6 + dist[nr5] = f6*(1.0-rlx) + + rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz; - // q = 8 - dist[nr7] = f8*(1.0-rlx) + - rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy); + // q = 7 + dist[nr8] = f7*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy); - // q = 9 - dist[nr10] = f9*(1.0-rlx) + - rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy); + // q = 8 + dist[nr7] = f8*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy); - // q = 10 - dist[nr9] = f10*(1.0-rlx) + - rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy); + // q = 9 + dist[nr10] = f9*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy); - // q = 11 - dist[nr12] = f11*(1.0-rlx) + - rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz); + // q = 10 + dist[nr9] = f10*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy); - // q = 12 - dist[nr11] = f12*(1.0-rlx) + - rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz); + // q = 11 + dist[nr12] = f11*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz); - // q = 13 - dist[nr14] = f13*(1.0-rlx) + - rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz); + // q = 12 + dist[nr11] = f12*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz); - // q= 14 - dist[nr13] = f14*(1.0-rlx) + - rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz); + // q = 13 + dist[nr14] = f13*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz); - // q = 15 - dist[nr16] = f15*(1.0-rlx) + - rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz); + // q= 14 + dist[nr13] = f14*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz); - // q = 16 - dist[nr15] = f16*(1.0-rlx) + - rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz); + // q = 15 + dist[nr16] = f15*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz); - // q = 17 - dist[nr18] = f17*(1.0-rlx) + - rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz); + // q = 16 + dist[nr15] = f16*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz); - // q = 18 - dist[nr17] = f18*(1.0-rlx) + - rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz); + // q = 17 + dist[nr18] = f17*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz); + + // q = 18 + dist[nr17] = f18*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz); } } } extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ - - dvc_ScaLBL_D3Q19_AAeven_BGK<<>>(dist,start,finish,Np,rlx,Fx,Fy,Fz); - cudaError_t err = cudaGetLastError(); + dvc_ScaLBL_D3Q19_AAeven_BGK<<>>(dist,start,finish,Np,rlx,Fx,Fy,Fz); + + cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q19_AAeven_BGK: %s \n",cudaGetErrorString(err)); } } extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ - dvc_ScaLBL_D3Q19_AAodd_BGK<<>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz); + dvc_ScaLBL_D3Q19_AAodd_BGK<<>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz); - cudaError_t err = cudaGetLastError(); + cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q19_AAeven_BGK: %s \n",cudaGetErrorString(err)); } diff --git a/hip/D3Q7BC.cu b/hip/D3Q7BC.cu new file mode 100644 index 00000000..9413a68a --- /dev/null +++ b/hip/D3Q7BC.cu @@ -0,0 +1,536 @@ +#include +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 560 +#define NTHREADS 128 + +__global__ void dvc_ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count) +{ + + int idx; + int iq,ib; + double value_b,value_q; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + iq = BounceBackDist_list[idx]; + ib = BounceBackSolid_list[idx]; + value_b = BoundaryValue[ib];//get boundary value from a solid site + value_q = dist[iq]; + dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice + } +} + +__global__ void dvc_ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count) +{ + + int idx; + int iq,ib; + double value_b,value_q; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + iq = BounceBackDist_list[idx]; + ib = BounceBackSolid_list[idx]; + value_b = BoundaryValue[ib];//get boundary value from a solid site + value_q = dist[iq]; + dist[iq] = value_q + value_b; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np) +{ + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f6 = dist[5*Np+n]; + //................................................... + f5 = Vin - (f0+f1+f2+f3+f4+f6); + dist[6*Np+n] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np) +{ + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f5 = dist[6*Np+n]; + //................................................... + f6 = Vout - (f0+f1+f2+f3+f4+f5); + dist[5*Np+n] = f6; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np) +{ + int idx, n; + int nread,nr5; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + nread = d_neighborList[n+5*Np]; + f6 = dist[nread]; + + // Unknown distributions + nr5 = d_neighborList[n+4*Np]; + f5 = Vin - (f0+f1+f2+f3+f4+f6); + dist[nr5] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np) +{ + int idx, n; + int nread,nr6; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+4*Np]; + f5 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + // unknown distributions + nr6 = d_neighborList[n+5*Np]; + f6 = Vout - (f0+f1+f2+f3+f4+f5); + dist[nr6] = f6; + } +} + +__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count) +{ + int idx,n,nm; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + nm = Map[n]; + Psi[nm] = Vin; + } +} + + +__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count) +{ + int idx,n,nm; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + nm = Map[n]; + Psi[nm] = Vout; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np) +{ + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f6 = dist[5*Np+n]; + //................................................... + f5 = Cin - (f0+f1+f2+f3+f4+f6); + dist[6*Np+n] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np) +{ + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f5 = dist[6*Np+n]; + //................................................... + f6 = Cout - (f0+f1+f2+f3+f4+f5); + dist[5*Np+n] = f6; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np) +{ + int idx, n; + int nread,nr5; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + nread = d_neighborList[n+5*Np]; + f6 = dist[nread]; + + // Unknown distributions + nr5 = d_neighborList[n+4*Np]; + f5 = Cin - (f0+f1+f2+f3+f4+f6); + dist[nr5] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np) +{ + int idx, n; + int nread,nr6; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+4*Np]; + f5 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + // unknown distributions + nr6 = d_neighborList[n+5*Np]; + f6 = Cout - (f0+f1+f2+f3+f4+f5); + dist[nr6] = f6; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np) +{ + //NOTE: FluxIn is the inward flux + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + double fsum_partial; + double uz; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f6 = dist[5*Np+n]; + fsum_partial = f0+f1+f2+f3+f4+f6; + uz = VelocityZ[n]; + //................................................... + f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau); + dist[6*Np+n] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np) +{ + //NOTE: FluxIn is the inward flux + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + double fsum_partial; + double uz; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f5 = dist[6*Np+n]; + fsum_partial = f0+f1+f2+f3+f4+f5; + uz = VelocityZ[n]; + //................................................... + f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau); + dist[5*Np+n] = f6; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np) +{ + //NOTE: FluxIn is the inward flux + int idx, n; + int nread,nr5; + double f0,f1,f2,f3,f4,f5,f6; + double fsum_partial; + double uz; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + nread = d_neighborList[n+5*Np]; + f6 = dist[nread]; + + fsum_partial = f0+f1+f2+f3+f4+f6; + uz = VelocityZ[n]; + //................................................... + f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau); + + // Unknown distributions + nr5 = d_neighborList[n+4*Np]; + dist[nr5] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np) +{ + //NOTE: FluxIn is the inward flux + int idx, n; + int nread,nr6; + double f0,f1,f2,f3,f4,f5,f6; + double fsum_partial; + double uz; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+4*Np]; + f5 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + fsum_partial = f0+f1+f2+f3+f4+f5; + uz = VelocityZ[n]; + //................................................... + f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau); + + // unknown distributions + nr6 = d_neighborList[n+5*Np]; + dist[nr6] = f6; + } +} +//************************************************************************* + +extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){ + int GRID = count / 512 + 1; + dvc_ScaLBL_Solid_Dirichlet_D3Q7<<>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_Solid_Dirichlet_D3Q7 (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){ + int GRID = count / 512 + 1; + dvc_ScaLBL_Solid_Neumann_D3Q7<<>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_Solid_Neumann_D3Q7 (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z<<>>(list, dist, Vin, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z<<>>(list, dist, Vout, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z<<>>(d_neighborList, list, dist, Vin, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z<<>>(d_neighborList, list, dist, Vout, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count){ + int GRID = count / 512 + 1; + dvc_ScaLBL_Poisson_D3Q7_BC_z<<>>(list, Map, Psi, Vin, count); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_Poisson_D3Q7_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count){ + int GRID = count / 512 + 1; + dvc_ScaLBL_Poisson_D3Q7_BC_Z<<>>(list, Map, Psi, Vout, count); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_Poisson_D3Q7_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z<<>>(list, dist, Cin, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z<<>>(list, dist, Cout, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z<<>>(d_neighborList, list, dist, Cin, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z<<>>(d_neighborList, list, dist, Cout, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z<<>>(list, dist, FluxIn, tau, VelocityZ, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z<<>>(list, dist, FluxIn, tau, VelocityZ, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z<<>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z<<>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} diff --git a/hip/FreeLee.cu b/hip/FreeLee.cu new file mode 100644 index 00000000..558bd2f1 --- /dev/null +++ b/hip/FreeLee.cu @@ -0,0 +1,2017 @@ +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 1024 +#define NTHREADS 256 + +#define STOKES + +__global__ void dvc_ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np) +{ + int n; + double p = 1.0;//NOTE: take initial pressure p=1.0 + double chem; + double cg_x,cg_y,cg_z; + + //for (n=0; n 1.f) phi = 1.0; + if (phi < -1.f) phi = -1.0; + Den[idx] = rhoA + 0.5*(1.0-phi)*(rhoB-rhoA); + + //compute unit normal of color gradient + nx = ColorGrad[idx+0*Np]; + ny = ColorGrad[idx+1*Np]; + nz = ColorGrad[idx+2*Np]; + cg_mag = sqrt(nx*nx+ny*ny+nz*nz); + double ColorMag_temp = cg_mag; + if (cg_mag==0.0) ColorMag_temp=1.0; + nx = nx/ColorMag_temp; + ny = ny/ColorMag_temp; + nz = nz/ColorMag_temp; + + theta = M*cs2_inv*(1-4.0*phi*phi)/W; + + hq[0*Np+idx]=0.3333333333333333*(phi); + hq[1*Np+idx]=0.1111111111111111*(phi+theta*nx); + hq[2*Np+idx]=0.1111111111111111*(phi-theta*nx); + hq[3*Np+idx]=0.1111111111111111*(phi+theta*ny); + hq[4*Np+idx]=0.1111111111111111*(phi-theta*ny); + hq[5*Np+idx]=0.1111111111111111*(phi+theta*nz); + hq[6*Np+idx]=0.1111111111111111*(phi-theta*nz); + + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, + double rhoA, double rhoB, int start, int finish, int Np){ + + int idx,n,nread; + double fq,phi; + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + m1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + m2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + m3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + m4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + m5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + m6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + m7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + m8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + m9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + m10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + m11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + m12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + m13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + m14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + m15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + m16 = dist[nr16]; + + // q=17 + nr17 = neighborList[n+16*Np]; + m17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + m18 = dist[nr18]; + + //compute fluid velocity + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(chem*nx+Fx)/3.0); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(chem*ny+Fy)/3.0); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(chem*nz+Fz)/3.0); + //compute pressure + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17) + +0.5*(rhoA-rhoB)/2.0/3.0*(ux*nx+uy*ny+uz*nz); + + //compute equilibrium distributions + feq0 = 0.3333333333333333*p - 0.25*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) - + 0.16666666666666666*rho0*(ux*ux + uy*uy + uz*uz) - 0.5*(-(nx*ux) - ny*uy - nz*uz)* + (-0.08333333333333333*(rhoA - rhoB)*(ux*ux + uy*uy + uz*uz) + chem*(0.3333333333333333 - 0.5*(ux*ux + uy*uy + uz*uz))); + feq1 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-ux*ux + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) - + 0.125*(Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*uz)) - 0.0625*(nx - nx*ux - ny*uy - nz*uz)* + (2*chem*ux*ux - 0.3333333333333333*((-rhoA + rhoB)*ux*ux + 2*chem*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*uz))); + feq2 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-ux*ux + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) - + 0.125*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*uz)) - 0.0625*(nx + nx*ux + ny*uy + nz*uz)* + (-2.*chem*ux*ux + 0.1111111111111111*(-4.*chem + rhoB*(-2.*ux - 1.*ux*ux - 1.*uy*uy - 1.*uz*uz) + + rhoA*(2.*ux + ux*ux + uy*uy + uz*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*ux*ux + + chem*(4.*ux + 2.*ux*ux + 2.*uy*uy + 2.*uz*uz))); + feq3 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uy*uy + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.125*(Fx*ux + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.0625*(ny - nx*ux - ny*uy - nz*uz)* + (2*chem*uy*uy - 0.3333333333333333*((-rhoA + rhoB)*uy*uy + 2*chem*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*uz))); + feq4 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uy*uy + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.125*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.0625*(ny + nx*ux + ny*uy + nz*uz)* + (-2.*chem*uy*uy + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 2.*uy - 1.*uy*uy - 1.*uz*uz) + + rhoA*(ux*ux + 2.*uy + uy*uy + uz*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*uy*uy + + chem*(2.*ux*ux + 4.*uy + 2.*uy*uy + 2.*uz*uz))); + feq5 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uz*uz + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.125*(Fx*ux + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2. + uz)*uz)) - 0.0625*(nx*ux + ny*uy + nz*(-1. + uz))* + (-2.*chem*uz*uz + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 1.*uy*uy + (2. - 1.*uz)*uz) + + rhoA*(ux*ux + uy*uy + (-2. + uz)*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*uz*uz + + chem*(2.*ux*ux + 2.*uy*uy + uz*(-4. + 2.*uz)))); + feq6 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uz*uz + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) - + 0.125*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2. + uz))) - 0.0625*(nz + nx*ux + ny*uy + nz*uz)* + (-2.*chem*uz*uz + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 1.*uy*uy + (-2. - 1.*uz)*uz) + + rhoA*(ux*ux + uy*uy + uz*(2. + uz))) + 0.3333333333333333*((-1.*rhoA + rhoB)*uz*uz + + chem*(2.*ux*ux + 2.*uy*uy + uz*(4. + 2.*uz)))); + feq7 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uy)*(ux + uy) + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx*(-1. + ux) + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(-2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.03125*(nx + ny - nx*ux - ny*uy - nz*uz)* + (2*chem*(ux + uy)*(ux + uy) + 0.3333333333333333*((rhoA - rhoB)*(ux + uy)*(ux + uy) - 2*chem*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz))); + feq8 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uy)*(ux + uy) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.03125*(-(nx*(1 + ux)) - ny*(1 + uy) - nz*uz)* + (2*chem*(ux + uy)*(ux + uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux + uy)*(ux + uy)) + + 2*chem*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz))); + feq9 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uy)*(ux - uy) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fy + Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(-2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.03125*(nx - nx*ux - ny*(1 + uy) - nz*uz)* + (2*chem*(ux - uy)*(ux - uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz))); + feq10 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uy)*(ux - uy) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx*(1 + ux) + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.03125*(ny - nx*(1 + ux) - ny*uy - nz*uz)* + (2*chem*(ux - uy)*(ux - uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz))); + feq11 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uz)*(ux + uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*(-1. + ux) + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)) - 0.03125*(nx + nz - nx*ux - ny*uy - nz*uz)* + (2*chem*(ux + uz)*(ux + uz) + 0.3333333333333333*((rhoA - rhoB)*(ux + uz)*(ux + uz) - 2*chem*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz))); + feq12 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) - + 0.0625*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*(2. + uz))) - 0.03125*(-(nx*(1 + ux)) - ny*uy - nz*(1 + uz))* + (2*chem*(ux + uz)*(ux + uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux + uz)*(ux + uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*(2 + uz)))); + feq13 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uz)*(ux - uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) - + 0.0625*(Fz + Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*(2. + uz))) - 0.03125*(nx - nx*ux - ny*uy - nz*(1 + uz))* + (2*chem*(ux - uz)*(ux - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*(2 + uz)))); + feq14 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uz)*(ux - uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*(1 + ux) + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)) - 0.03125*(nz - nx*(1 + ux) - ny*uy - nz*uz)* + (2*chem*(ux - uz)*(ux - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz))); + feq15 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*ux + Fy*(-1. + uy) + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uy*uy - 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + (-2. + uz)*uz)) - 0.03125*(ny + nz - nx*ux - ny*uy - nz*uz)* + (2*chem*(uy + uz)*(uy + uz) + 0.3333333333333333*((rhoA - rhoB)*(uy + uz)*(uy + uz) - 2*chem*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz))); + feq16 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) - + 0.0625*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy - 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*(2. + uz))) - 0.03125*(-(nx*ux) - ny*(1 + uy) - nz*(1 + uz))* + (2*chem*(uy + uz)*(uy + uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy + uz)*(uy + uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*(2 + uz)))); + feq17 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) - + 0.0625*(Fz + Fx*ux + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*(2. + uz))) - 0.03125*(ny - nx*ux - ny*uy - nz*(1 + uz))* + (2*chem*(uy - uz)*(uy - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*(2 + uz)))); + feq18 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*ux + Fy*(1 + uy) + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uy*uy + 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + (-2. + uz)*uz)) - 0.03125*(nz - nx*ux - ny*(1 + uy) - nz*uz)* + (2*chem*(uy - uz)*(uy - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz))); + + //------------------------------------------------- BCK collison ------------------------------------------------------------// + // q=0 + dist[n] = m0 - (m0-feq0)/tau + 0.25*(2*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) + + (mgx*ux + mgy*uy + mgz*uz)*(2*chem*(ux*ux + uy*uy + uz*uz) + + 0.3333333333333333*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + uz*uz)))); + + // q = 1 + dist[nr2] = m1 - (m1-feq1)/tau + 0.125*(2*(Fx*(-1 + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) + + (mgx*(-1 + ux) + mgy*uy + mgz*uz)*(-2*chem*(ux*ux) + + 0.3333333333333333*((-rhoA + rhoB)*(ux*ux) + 2*chem*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*uz)))); + + // q=2 + dist[nr1] = m2 - (m2-feq2)/tau + 0.125*(2*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) + + (mgx + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(ux*ux) + + 0.3333333333333333*((-rhoA + rhoB)*(ux*ux) + 2*chem*(2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*uz)))); + + // q = 3 + dist[nr4] = m3 - (m3-feq3)/tau + 0.125*(2*(Fx*ux + Fy*(-1 + uy) + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*ux + mgy*(-1 + uy) + mgz*uz)*(-2*chem*(uy*uy) + + 0.3333333333333333*((-rhoA + rhoB)*(uy*uy) + 2*chem*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 4 + dist[nr3] = m4 - (m4-feq4)/tau + 0.125*(2*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgy + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(uy*uy) + + 0.3333333333333333*((-rhoA + rhoB)*(uy*uy) + 2*chem*(ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 5 + dist[nr6] = m5 - (m5-feq5)/tau + 0.125*(2*(Fx*ux + Fy*uy + Fz*(-1 + uz))*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*uy + mgz*(-1 + uz))*(-2*chem*(uz*uz) + + 0.3333333333333333*((-rhoA + rhoB)*(uz*uz) + 2*chem*(ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 6 + dist[nr5] = m6 - (m6-feq6)/tau + 0.125*(2*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) + + (mgz + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(uz*uz) + + 0.3333333333333333*((-rhoA + rhoB)*(uz*uz) + 2*chem*(ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + uz*(2 + uz))))); + + // q = 7 + dist[nr8] = m7 - (m7-feq7)/tau + 0.0625*(-2*(Fx*(-1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (0.2222222222222222 + (ux + uy)*(ux + uy) - + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*(-1 + ux) + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((ux + uy)*(ux + uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uy)*(ux + uy))) + 2*chem*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 8 + dist[nr7] = m8 - (m8-feq8)/tau + 0.0625*(2*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uy)*(ux + uy) + + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgx + mgy + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((ux + uy)*(ux + uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uy)*(ux + uy))) + 2*chem*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 9 + dist[nr10] = m9 - (m9-feq9)/tau + 0.0625*(2*(Fy + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgy + mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*((ux - uy)*(ux - uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uy)*(ux - uy))) + 2*chem*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 10 + dist[nr9] = m10 - (m10-feq10)/tau + 0.0625*(2*(Fx*(1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*(1 + ux) + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((ux - uy)*(ux - uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uy)*(ux - uy))) + 2*chem*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 11 + dist[nr12] = m11 - (m11-feq11)/tau + 0.0625*(-2*(Fx*(-1 + ux) + Fy*uy + Fz*(-1 + uz))* + (0.2222222222222222 + (ux + uz)*(ux + uz) - + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*(-1 + ux) + mgy*uy + mgz*(-1 + uz))* + (-2*chem*((ux + uz)*(ux + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uz)*(ux + uz))) + 2*chem*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 12 + dist[nr11] = m12 - (m12-feq12)/tau + 0.0625*(2*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + (mgx + mgz + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((ux + uz)*(ux + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uz)*(ux + uz))) + 2*chem*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*(2 + uz))))); + + // q = 13 + dist[nr14] = m13 - (m13-feq13)/tau + 0.0625*(2*(Fz + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + (mgz + mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*((ux - uz)*(ux - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uz)*(ux - uz))) + 2*chem*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))))); + + // q= 14 + dist[nr13] = m14 - (m14-feq14)/tau + 0.0625*(2*(Fx*(1 + ux) + Fy*uy + Fz*(-1 + uz))* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*(1 + ux) + mgy*uy + mgz*(-1 + uz))* + (-2*chem*((ux - uz)*(ux - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uz)*(ux - uz))) + 2*chem*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 15 + dist[nr16] = m15 - (m15-feq15)/tau + 0.0625*(-2*(Fx*ux + Fy*(-1 + uy) + Fz*(-1 + uz))* + (0.2222222222222222 + (uy + uz)*(uy + uz) - 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*(-1 + uy) + mgz*(-1 + uz))* + (-2*chem*((uy + uz)*(uy + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy + uz)*(uy + uz))) + 2*chem*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)))); + + // q = 16 + dist[nr15] = m16 - (m16-feq16)/tau + 0.0625*(2*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + (mgy + mgz + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((uy + uz)*(uy + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy + uz)*(uy + uz))) + 2*chem*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))))); + + // q = 17 + dist[nr18] = m17 - (m17-feq17)/tau + 0.0625*(2*(Fz + Fx*ux + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + (mgz + mgx*ux + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((uy - uz)*(uy - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy - uz)*(uy - uz))) + 2*chem*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))))); + + // q = 18 + dist[nr17] = m18 - (m18-feq18)/tau + 0.0625*(2*(Fx*ux + Fy*(1 + uy) + Fz*(-1 + uz))* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*(1 + uy) + mgz*(-1 + uz))* + (-2*chem*((uy - uz)*(uy - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy - uz)*(uy - uz))) + 2*chem*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)))); + //----------------------------------------------------------------------------------------------------------------------------------------// + + + // ----------------------------- compute phase field evolution ---------------------------------------- + //Normalize the Color Gradient + C = sqrt(nx*nx+ny*ny+nz*nz); + double ColorMag = C; + if (C==0.0) ColorMag=1.0; + nx = nx/ColorMag; + ny = ny/ColorMag; + nz = nz/ColorMag; + //compute surface tension-related parameter + theta = M*4.5*(1-4.0*phi*phi)/W; + + //load distributions of phase field + //q=0 + h0 = hq[n]; + //q=1 + h1 = hq[nr1]; + + //q=2 + h2 = hq[nr2]; + + //q=3 + h3 = hq[nr3]; + + //q=4 + h4 = hq[nr4]; + + //q=5 + h5 = hq[nr5]; + + //q=6 + h6 = hq[nr6]; + + //-------------------------------- BGK collison for phase field ---------------------------------// + // q = 0 + hq[n] = h0 - (h0 - 0.3333333333333333*phi)/tauM; + + // q = 1 + hq[nr2] = h1 - (h1 - phi*(0.1111111111111111 + 0.5*ux) - (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; + + // q = 2 + hq[nr1] = h2 - (h2 - phi*(0.1111111111111111 - 0.5*ux) + (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; + + // q = 3 + hq[nr4] = h3 - (h3 - phi*(0.1111111111111111 + 0.5*uy) - (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; + + // q = 4 + hq[nr3] = h4 - (h4 - phi*(0.1111111111111111 - 0.5*uy) + (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; + + // q = 5 + hq[nr6] = h5 - (h5 - phi*(0.1111111111111111 + 0.5*uz) - (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; + + // q = 6 + hq[nr5] = h6 - (h6 - phi*(0.1111111111111111 - 0.5*uz) + (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; + //........................................................................ + + //Update velocity on device + Vel[0*Np+n] = ux; + Vel[1*Np+n] = uy; + Vel[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = p; + //Update chemical potential on device + mu_phi[n] = chem; + //Update color gradient on device + ColorGrad[0*Np+n] = nx; + ColorGrad[1*Np+n] = ny; + ColorGrad[2*Np+n] = nz; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, + int strideY, int strideZ, int start, int finish, int Np){ + + int n,nn,nn2x,ijk; + //int nr1,nr2,nr3,nr4,nr5,nr6,nr7,nr8,nr9,nr10,nr11,nr12,nr13,nr14,nr15,nr16,nr17,nr18; + double ux,uy,uz;//fluid velocity + double p;//pressure + double chem;//chemical potential + double phi; //phase field + double rho0;//fluid density + // distribution functions + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m0,m3,m5,m7; + double mm1,mm2,mm4,mm6,mm8,mm9,mm10,mm11,mm12,mm13,mm14,mm15,mm16,mm17,mm18; + double mm3,mm5,mm7; + double feq0,feq1,feq2,feq3,feq4,feq5,feq6,feq7,feq8,feq9,feq10,feq11,feq12,feq13,feq14,feq15,feq16,feq17,feq18; + double nx,ny,nz;//normal color gradient + double mgx,mgy,mgz;//mixed gradient reaching secondary neighbor + + //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double h0,h1,h2,h3,h4,h5,h6;//distributions for LB phase field + double tau;//position dependent LB relaxation time for fluid + double C,theta; + double M = 2.0/9.0*(tauM-0.5);//diffusivity (or mobility) for the phase field D3Q7 + + // for (int n=start; n 10Np => odd part of dist) + m1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + m2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + m3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + m4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + m5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + m6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + m7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + m8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + m9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + m10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + m11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + m12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + m13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + m14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + m15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + m16 = dist[nr16]; + + // q=17 + nr17 = neighborList[n+16*Np]; + m17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + m18 = dist[nr18]; + + //compute fluid velocity + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(Fx)/3.0); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(Fy)/3.0); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(Fz)/3.0); + //compute pressure + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17); + + //------------------------------------------------- BCK collison ------------------------------------------------------------// + // q=0 + dist[n] = m0 + 0.5*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) - + (m0 - 0.3333333333333333*p + 0.25*(Fx*ux + Fy*uy + Fz*uz)* + (-0.6666666666666666 + ux*ux + uy*uy + uz*uz) + 0.16666666666666666*rho0*(ux*ux + uy*uy + uz*uz))/ + tau; + + // q = 1 + dist[nr2] = m1 + 0.25*(Fx*(-1 + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) - + (m1 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(ux*ux) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.125*(Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*uz)))/tau; + + // q=2 + dist[nr1] = m2 + 0.25*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) - + (m2 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(ux*ux) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) + + 0.125*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(ux*ux) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*uz)))/tau; + + // q = 3 + dist[nr4] = m3 + 0.25*(Fx*ux + Fy*(-1 + uy) + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) - + (m3 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uy*uy) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.125*(Fx*ux + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 4 + dist[nr3] = m4 + 0.25*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) - + (m4 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uy*uy) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.125*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(uy*uy) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 5 + dist[nr6] = m5 + 0.25*(Fx*ux + Fy*uy + Fz*(-1 + uz))*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) - + (m5 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.125*(Fx*ux + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 6 + dist[nr5] = m6 + 0.25*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) - + (m6 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) + + 0.125*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q = 7 + dist[nr8] = m7 - 0.125*(Fx*(-1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (0.2222222222222222 + (ux + uy)*(ux + uy) - 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz))\ + - (m7 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uy)*(ux + uy)) + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx*(-1. + ux) + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(-2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 8 + dist[nr7] = m8 + 0.125*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uy)*(ux + uy) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz))\ + - (m8 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uy)*(ux + uy)) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 9 + dist[nr10] = m9 + 0.125*(Fy + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + - (m9 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uy)*(ux - uy)) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fy + Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(-2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 10 + dist[nr9] = m10 + 0.125*(Fx*(1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz))\ + - (m10 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uy)*(ux - uy)) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx*(1 + ux) + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 11 + dist[nr12] = m11 - 0.125*(Fx*(-1 + ux) + Fy*uy + Fz*(-1 + uz))* + (0.2222222222222222 + (ux + uz)*(ux + uz) - 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz))\ + - (m11 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uz)*(ux + uz)) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*(-1. + ux) + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 12 + dist[nr11] = m12 + 0.125*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz)))\ + - (m12 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uz)*(ux + uz)) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.0625*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q = 13 + dist[nr14] = m13 + 0.125*(Fz + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz)))\ + - (m13 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uz)*(ux - uz)) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.0625*(Fz + Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q= 14 + dist[nr13] = m14 + 0.125*(Fx*(1 + ux) + Fy*uy + Fz*(-1 + uz))* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz))\ + - (m14 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uz)*(ux - uz)) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*(1 + ux) + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 15 + dist[nr16] = m15 - 0.125*(Fx*ux + Fy*(-1 + uy) + Fz*(-1 + uz))* + (0.2222222222222222 + (uy + uz)*(uy + uz) - 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz))\ + - (m15 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy + uz)*(uy + uz)) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*ux + Fy*(-1. + uy) + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uy*uy) - 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 16 + dist[nr15] = m16 + 0.125*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz)))\ + - (m16 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy + uz)*(uy + uz)) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + 0.0625*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) - 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*(2. + uz))))/tau; + + // q = 17 + dist[nr18] = m17 + 0.125*(Fz + Fx*ux + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz)))\ + - (m17 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy - uz)*(uy - uz)) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + 0.0625*(Fz + Fx*ux + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) + 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*(2. + uz))))/tau; + + // q = 18 + dist[nr17] = m18 + 0.125*(Fx*ux + Fy*(1 + uy) + Fz*(-1 + uz))* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz))\ + - (m18 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy - uz)*(uy - uz)) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*ux + Fy*(1 + uy) + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uy*uy) + 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + (-2. + uz)*uz)))/tau; + //----------------------------------------------------------------------------------------------------------------------------------------// + + + //Update velocity on device + Vel[0*Np+n] = ux; + Vel[1*Np+n] = uy; + Vel[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = p; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure, + double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){ + + int n; + double ux,uy,uz;//fluid velocity + double p;//pressure + // distribution functions + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m0,m3,m5,m7; + + // for (int n=start; n +#include "hip/hip_runtime.h" + +#define NBLOCKS 1024 +#define NTHREADS 256 + + +__global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Gx, double Gy, double Gz, + double *Poros,double *Perm, double *Velocity, double *Pressure){ + int n; + // conserved momemnts + double rho,vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + double pressure; + //double uu; + // non-conserved moments + double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double GeoFun;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double mu_eff = (1.0/rlx_eff-0.5)/3.0;//kinematic viscosity + double Fx, Fy, Fz;//The total body force including Brinkman force and user-specified (Gx,Gy,Gz) + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + f7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + f8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + f9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + f10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + f11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + f12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + f13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + f14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + f15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + f16 = dist[nr16]; + + // q=17 + //fq = dist[18*Np+n]; + nr17 = neighborList[n+16*Np]; + f17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + f18 = dist[nr18]; + + porosity = Poros[n]; + perm = Perm[n]; + + c0 = 0.5*(1.0+porosity*0.5*mu_eff/perm); + if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes + GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); + c1 = porosity*0.5*GeoFun/sqrt(perm); + if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes + + rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; + pressure = rho/porosity/3.0; + vx = (f1-f2+f7-f8+f9-f10+f11-f12+f13-f14)/rho+0.5*porosity*Gx; + vy = (f3-f4+f7-f8-f9+f10+f15-f16+f17-f18)/rho+0.5*porosity*Gy; + vz = (f5-f6+f11-f12-f13+f14+f15-f16-f17+f18)/rho+0.5*porosity*Gz; + v_mag=sqrt(vx*vx+vy*vy+vz*vz); + ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); + uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); + uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); + u_mag=sqrt(ux*ux+uy*uy+uz*uz); + + //Update the body force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium + Fx = -porosity*mu_eff/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx; + Fy = -porosity*mu_eff/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy; + Fz = -porosity*mu_eff/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz; + if (porosity==1.0){ + Fx=Gx; + Fy=Gy; + Fz=Gz; + } + + //------------------------ BGK collison where body force has higher-order terms ----------------------------------------------------------// +// // q=0 +// dist[n] = f0*(1.0-rlx) + rlx*0.3333333333333333*rho*(1. - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// + 0.3333333333333333*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 1 +// dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q=2 +// dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(-3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 3 +// dist[nr4] = f3*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 4 +// dist[nr3] = f4*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 5 +// dist[nr6] = f5*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(3. + (6.*uz)/porosity)); +// +// // q = 6 +// dist[nr5] = f6*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux+ uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(-3. + (6.*uz)/porosity)); +// +// // q = 7 +// dist[nr8] = f7*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uy) + (4.5*(ux + uy)*(ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(ux + uy))/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 8 +// dist[nr7] = f8*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uy) + (4.5*(-ux - uy)*(-ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uy))/porosity) + Fy*(-3. - (9.*(-ux - uy))/porosity - (3.*uy)/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 9 +// dist[nr10] = f9*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uy) + (4.5*(ux - uy)*(ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux - uy))/porosity) + Fy*(-3. - (9.*(ux - uy))/porosity - (3.*uy)/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 10 +// dist[nr9] = f10*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uy) + (4.5*(-ux + uy)*(-ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(-ux + uy))/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 11 +// dist[nr12] = f11*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uz) + (4.5*(ux + uz)*(ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(ux + uz))/porosity)); +// +// // q = 12 +// dist[nr11] = f12*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uz) + (4.5*(-ux - uz)*(-ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uz))/porosity) + +// Fz*(-3. - (9.*(-ux - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 13 +// dist[nr14] = f13*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uz) + (4.5*(ux - uz)*(ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux - uz))/porosity) + +// Fz*(-3. - (9.*(ux - uz))/porosity - (3.*uz)/porosity)); +// +// // q= 14 +// dist[nr13] = f14*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uz) + (4.5*(-ux + uz)*(-ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(-ux + uz))/porosity)); +// +// // q = 15 +// dist[nr16] = f15*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy + uz) + (4.5*(uy + uz)*(uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(uy + uz))/porosity)); +// +// // q = 16 +// dist[nr15] = f16*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy - uz) + (4.5*(-uy - uz)*(-uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy - uz))/porosity) + +// Fz*(-3. - (9.*(-uy - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 17 +// dist[nr18] = f17*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy - uz) + (4.5*(uy - uz)*(uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy - uz))/porosity) + +// Fz*(-3. - (9.*(uy - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 18 +// dist[nr17] = f18*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy + uz) + (4.5*(-uy + uz)*(-uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(-uy + uz))/porosity)); + //----------------------------------------------------------------------------------------------------------------------------------------// + + + //------------------------ BGK collison where body force has NO higher-order terms ----------------------------------------------------------// + // q=0 + dist[n] = f0*(1.0-rlx) + rlx*0.3333333333333333*rho*(1. - (1.5*(ux*ux + uy*uy + uz*uz))/porosity); + + // q = 1 + dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(3.)); + + // q=2 + dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(-3.)); + + // q = 3 + dist[nr4] = f3*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fy*(3.)); + + // q = 4 + dist[nr3] = f4*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fy*(-3.)); + + // q = 5 + dist[nr6] = f5*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fz*(3.)); + + // q = 6 + dist[nr5] = f6*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux+ uy*uy + uz*uz))/porosity) + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fz*(-3.)); + + // q = 7 + dist[nr8] = f7*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uy) + (4.5*(ux + uy)*(ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fy*(3.)); + + // q = 8 + dist[nr7] = f8*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uy) + (4.5*(-ux - uy)*(-ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fy*(-3.)); + + // q = 9 + dist[nr10] = f9*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uy) + (4.5*(ux - uy)*(ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fy*(-3.)); + + // q = 10 + dist[nr9] = f10*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uy) + (4.5*(-ux + uy)*(-ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fy*(3.)); + + // q = 11 + dist[nr12] = f11*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uz) + (4.5*(ux + uz)*(ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fz*(3.)); + + // q = 12 + dist[nr11] = f12*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uz) + (4.5*(-ux - uz)*(-ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fz*(-3.)); + + // q = 13 + dist[nr14] = f13*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uz) + (4.5*(ux - uz)*(ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fz*(-3.)); + + // q= 14 + dist[nr13] = f14*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uz) + (4.5*(-ux + uz)*(-ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fz*(3.)); + + // q = 15 + dist[nr16] = f15*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy + uz) + (4.5*(uy + uz)*(uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(3.) + Fz*(3.)); + + // q = 16 + dist[nr15] = f16*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy - uz) + (4.5*(-uy - uz)*(-uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(-3.) + Fz*(-3.)); + + // q = 17 + dist[nr18] = f17*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy - uz) + (4.5*(uy - uz)*(uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(3.) + Fz*(-3.)); + + // q = 18 + dist[nr17] = f18*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy + uz) + (4.5*(-uy + uz)*(-uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(-3.) + Fz*(3.)); + //-------------------------------------------------------------------------------------------------------------------------------------------// + + + //Update velocity on device + Velocity[0*Np+n] = ux; + Velocity[1*Np+n] = uy; + Velocity[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = pressure; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Gx, double Gy, double Gz, + double *Poros,double *Perm, double *Velocity, double Den, double *Pressure){ + + int n; + double vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + double pressure;//defined for this incompressible model + // conserved momemnts + double jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double fq; + //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double GeoFun;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double mu_eff = (1.0/rlx_eff-0.5)/3.0;//kinematic viscosity + double Fx, Fy, Fz;//The total body force including Brinkman force and user-specified (Gx,Gy,Gz) + double rlx_setA = rlx; + double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA); + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + fq = dist[nread]; // reading the f1 data into register fq + pressure = fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jx = fq; + m4 = -4.0*fq; + m9 = 2.0*fq; + m10 = -4.0*fq; + + // q=2 + nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nread]; // reading the f2 data into register fq + pressure += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + nread = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + nread = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + nread = neighborList[n+4*Np]; + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q = 6 + nread = neighborList[n+5*Np]; + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + nread = neighborList[n+6*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + nread = neighborList[n+7*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + nread = neighborList[n+8*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + nread = neighborList[n+9*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + nread = neighborList[n+10*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + nread = neighborList[n+11*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + nread = neighborList[n+12*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + nread = neighborList[n+13*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + nread = neighborList[n+16*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + //---------------------------------------------------------------------// + + porosity = Poros[n]; + perm = Perm[n]; + + c0 = 0.5*(1.0+porosity*0.5*mu_eff/perm); + if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes + GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); + c1 = porosity*0.5*GeoFun/sqrt(perm); + if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes + + vx = jx/Den+0.5*porosity*Gx; + vy = jy/Den+0.5*porosity*Gy; + vz = jz/Den+0.5*porosity*Gz; + v_mag=sqrt(vx*vx+vy*vy+vz*vz); + ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); + uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); + uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); + u_mag=sqrt(ux*ux+uy*uy+uz*uz); + + //Update the total force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium + Fx = Den*(-porosity*mu_eff/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx); + Fy = Den*(-porosity*mu_eff/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy); + Fz = Den*(-porosity*mu_eff/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz); + if (porosity==1.0){ + Fx=Den*Gx; + Fy=Den*Gy; + Fz=Den*Gz; + } + + //Calculate pressure for Incompressible-MRT model + pressure=0.5/porosity*(pressure-0.5*Den*u_mag*u_mag/porosity); + +// //..............carry out relaxation process............................................... +// m1 = m1 + rlx_setA*((-30*Den+19*Den*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1) +// + (1-0.5*rlx_setA)*38*(Fx*ux+Fy*uy+Fz*uz)/porosity; +// m2 = m2 + rlx_setA*((12*Den - 5.5*Den*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2) +// + (1-0.5*rlx_setA)*11*(-Fx*ux-Fy*uy-Fz*uz)/porosity; +// jx = jx + Fx; +// m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); +// jy = jy + Fy; +// m6 = m6 + rlx_setB*((-0.6666666666666666*uy*Den) - m6) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); +// jz = jz + Fz; +// m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); +// m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9) +// + (1-0.5*rlx_setA)*(4*Fx*ux-2*Fy*uy-2*Fz*uz)/porosity; +// m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10) +// + (1-0.5*rlx_setA)*(-2*Fx*ux+Fy*uy+Fz*uz)/porosity; +// m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11) +// + (1-0.5*rlx_setA)*(2*Fy*uy-2*Fz*uz)/porosity; +// m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12) +// + (1-0.5*rlx_setA)*(-Fy*uy+Fz*uz)/porosity; +// m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13) +// + (1-0.5*rlx_setA)*(Fy*ux+Fx*uy)/porosity; +// m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14) +// + (1-0.5*rlx_setA)*(Fz*uy+Fy*uz)/porosity; +// m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15) +// + (1-0.5*rlx_setA)*(Fz*ux+Fx*uz)/porosity; +// m16 = m16 + rlx_setB*( - m16); +// m17 = m17 + rlx_setB*( - m17); +// m18 = m18 + rlx_setB*( - m18); +// //....................................................................................................... + + //-------------------- IMRT collison where body force has NO higher-order terms -------------// + //..............carry out relaxation process............................................... + m1 = m1 + rlx_setA*((-30*Den+19*Den*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1); + m2 = m2 + rlx_setA*((12*Den - 5.5*Den*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2); + jx = jx + Fx; + m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); + jy = jy + Fy; + m6 = m6 + rlx_setB*((-0.6666666666666666*uy*Den) - m6) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); + jz = jz + Fz; + m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); + m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9); + m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); + m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11); + m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12); + m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13); + m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14); + m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //....................................................................................................... + + + //.................inverse transformation...................................................... + // q=0 + fq = mrt_V1*Den-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10); + nread = neighborList[n+Np]; + dist[nread] = fq; + + // q=2 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10); + nread = neighborList[n]; + dist[nread] = fq; + + // q = 3 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + nread = neighborList[n+3*Np]; + dist[nread] = fq; + + // q = 4 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + nread = neighborList[n+2*Np]; + dist[nread] = fq; + + // q = 5 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + nread = neighborList[n+5*Np]; + dist[nread] = fq; + + // q = 6 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + nread = neighborList[n+4*Np]; + dist[nread] = fq; + + // q = 7 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17); + nread = neighborList[n+7*Np]; + dist[nread] = fq; + + // q = 8 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m17-m16); + nread = neighborList[n+6*Np]; + dist[nread] = fq; + + // q = 9 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17); + nread = neighborList[n+9*Np]; + dist[nread] = fq; + + // q = 10 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17); + nread = neighborList[n+8*Np]; + dist[nread] = fq; + + // q = 11 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m18-m16); + nread = neighborList[n+11*Np]; + dist[nread] = fq; + + // q = 12 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18); + nread = neighborList[n+10*Np]; + dist[nread]= fq; + + // q = 13 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15-0.125*(m16+m18); + nread = neighborList[n+13*Np]; + dist[nread] = fq; + + // q= 14 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15+0.125*(m16+m18); + nread = neighborList[n+12*Np]; + dist[nread] = fq; + + // q = 15 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + // q = 17 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + //........................................................................ + + //Update velocity on device + Velocity[0*Np+n] = ux; + Velocity[1*Np+n] = uy; + Velocity[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = pressure; + + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAodd_Greyscale_MRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Gx, double Gy, double Gz, + double *Poros,double *Perm, double *Velocity,double rho0, double *Pressure){ + + int n, nread; + int nr1,nr2,nr3,nr4,nr5,nr6; + int nr7,nr8,nr9,nr10; + int nr11,nr12,nr13,nr14; + double vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + double pressure;//defined for this incompressible model + // conserved momemnts + double rho,jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double fq; + //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double GeoFun;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double mu_eff = (1.0/rlx_eff-0.5)/3.0;//kinematic viscosity + double Fx, Fy, Fz;//The total body force including Brinkman force and user-specified (Gx,Gy,Gz) + double rlx_setA = rlx; + double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA); + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s even part of dist) + //fq = dist[nread]; // reading the f2 data into register fq + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nr2]; // reading the f2 data into register fq + rho += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + //nread = neighborList[n+2*Np]; // neighbor 4 + //fq = dist[nread]; + nr3 = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nr3]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + //nread = neighborList[n+3*Np]; // neighbor 3 + //fq = dist[nread]; + nr4 = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nr4]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + //nread = neighborList[n+4*Np]; + //fq = dist[nread]; + nr5 = neighborList[n+4*Np]; + fq = dist[nr5]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + + // q = 6 + //nread = neighborList[n+5*Np]; + //fq = dist[nread]; + nr6 = neighborList[n+5*Np]; + fq = dist[nr6]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + //nread = neighborList[n+6*Np]; + //fq = dist[nread]; + nr7 = neighborList[n+6*Np]; + fq = dist[nr7]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + //nread = neighborList[n+7*Np]; + //fq = dist[nread]; + nr8 = neighborList[n+7*Np]; + fq = dist[nr8]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + //nread = neighborList[n+8*Np]; + //fq = dist[nread]; + nr9 = neighborList[n+8*Np]; + fq = dist[nr9]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + //nread = neighborList[n+9*Np]; + //fq = dist[nread]; + nr10 = neighborList[n+9*Np]; + fq = dist[nr10]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + //nread = neighborList[n+10*Np]; + //fq = dist[nread]; + nr11 = neighborList[n+10*Np]; + fq = dist[nr11]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + //nread = neighborList[n+11*Np]; + //fq = dist[nread]; + nr12 = neighborList[n+11*Np]; + fq = dist[nr12]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + //nread = neighborList[n+12*Np]; + //fq = dist[nread]; + nr13 = neighborList[n+12*Np]; + fq = dist[nr13]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + //nread = neighborList[n+13*Np]; + //fq = dist[nread]; + nr14 = neighborList[n+13*Np]; + fq = dist[nr14]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + //fq = dist[17*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + //fq = dist[8*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + //fq = dist[18*Np+n]; + nread = neighborList[n+16*Np]; + fq = dist[nread]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + //fq = dist[9*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + //---------------------------------------------------------------------// + + porosity = Poros[n]; + perm = Perm[n]; + + c0 = 0.5*(1.0+porosity*0.5*mu_eff/perm); + if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes + GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); + c1 = porosity*0.5*GeoFun/sqrt(perm); + if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes + + vx = jx/rho0+0.5*porosity*Gx; + vy = jy/rho0+0.5*porosity*Gy; + vz = jz/rho0+0.5*porosity*Gz; + v_mag=sqrt(vx*vx+vy*vy+vz*vz); + ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); + uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); + uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); + u_mag=sqrt(ux*ux+uy*uy+uz*uz); + + //Update the total force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium + Fx = rho0*(-porosity*mu_eff/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx); + Fy = rho0*(-porosity*mu_eff/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy); + Fz = rho0*(-porosity*mu_eff/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz); + if (porosity==1.0){ + Fx=rho0*Gx; + Fy=rho0*Gy; + Fz=rho0*Gz; + } + + //Calculate pressure for MRT model + //pressure=rho/3.f/porosity; + pressure=rho/3.f; + + //-------------------- MRT collison where body force has NO higher-order terms -------------// + m1 = m1 + rlx_setA*((19*(ux*ux+uy*uy+uz*uz)*rho0/porosity - 11*rho) - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(ux*ux+uy*uy+uz*uz)*rho0/porosity) - m2); + jx = jx + Fx; + m4 = m4 + rlx_setB*((-0.6666666666666666*ux*rho0)- m4) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); + jy = jy + Fy; + m6 = m6 + rlx_setB*((-0.6666666666666666*uy*rho0)- m6) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); + jz = jz + Fz; + m8 = m8 + rlx_setB*((-0.6666666666666666*uz*rho0)- m8) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); + m9 = m9 + rlx_setA*(((2*ux*ux-uy*uy-uz*uz)*rho0/porosity) - m9); + m10 = m10 + rlx_setA*( - m10); + //m10 = m10 + rlx_setA*(-0.5*rho0*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); + m11 = m11 + rlx_setA*(((uy*uy-uz*uz)*rho0/porosity) - m11); + m12 = m12 + rlx_setA*( - m12); + //m12 = m12 + rlx_setA*(-0.5*(rho0*(uy*uy-uz*uz)/porosity)- m12); + m13 = m13 + rlx_setA*( (ux*uy*rho0/porosity) - m13); + m14 = m14 + rlx_setA*( (uy*uz*rho0/porosity) - m14); + m15 = m15 + rlx_setA*( (ux*uz*rho0/porosity) - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //....................................................................................................... + + + //.................inverse transformation...................................................... + // q=0 + fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10); + //nread = neighborList[n+Np]; + dist[nr2] = fq; + + // q=2 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10); + //nread = neighborList[n]; + dist[nr1] = fq; + + // q = 3 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + //nread = neighborList[n+3*Np]; + dist[nr4] = fq; + + // q = 4 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + //nread = neighborList[n+2*Np]; + dist[nr3] = fq; + + // q = 5 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + //nread = neighborList[n+5*Np]; + dist[nr6] = fq; + + // q = 6 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + //nread = neighborList[n+4*Np]; + dist[nr5] = fq; + + // q = 7 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17); + //nread = neighborList[n+7*Np]; + dist[nr8] = fq; + + // q = 8 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m17-m16); + //nread = neighborList[n+6*Np]; + dist[nr7] = fq; + + // q = 9 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17); + //nread = neighborList[n+9*Np]; + dist[nr10] = fq; + + // q = 10 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17); + //nread = neighborList[n+8*Np]; + dist[nr9] = fq; + + // q = 11 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m18-m16); + //nread = neighborList[n+11*Np]; + dist[nr12] = fq; + + // q = 12 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ + mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18); + //nread = neighborList[n+10*Np]; + dist[nr11]= fq; + + // q = 13 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15-0.125*(m16+m18); + //nread = neighborList[n+13*Np]; + dist[nr14] = fq; + + // q= 14 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15+0.125*(m16+m18); + //nread = neighborList[n+12*Np]; + dist[nr13] = fq; + + + // q = 15 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + + // q = 17 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) + -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) + -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + //........................................................................ + + //Update velocity on device + Velocity[0*Np+n] = ux; + Velocity[1*Np+n] = uy; + Velocity[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = pressure; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale_MRT(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Gx, double Gy, double Gz, + double *Poros,double *Perm, double *Velocity,double rho0, double *Pressure){ + + int n; + double vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + double pressure;//defined for this incompressible model + // conserved momemnts + double rho,jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double fq; + //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double GeoFun;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double mu_eff = (1.0/rlx_eff-0.5)/3.0;//kinematic viscosity + double Fx, Fy, Fz;//The total body force including Brinkman force and user-specified (Gx,Gy,Gz) + double rlx_setA = rlx; + double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA); + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s>>(dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,Pressure); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAeven_Greyscale: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double *Pressure){ + + dvc_ScaLBL_D3Q19_AAodd_Greyscale<<>>(neighborList,dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,Pressure); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAodd_Greyscale: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double Den,double *Pressure){ + + dvc_ScaLBL_D3Q19_AAeven_Greyscale_IMRT<<>>(dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,Den,Pressure); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAeven_Greyscale_IMRT: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double Den,double *Pressure){ + + dvc_ScaLBL_D3Q19_AAodd_Greyscale_IMRT<<>>(neighborList,dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,Den,Pressure); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAodd_Greyscale_IMRT: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_MRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double rho0,double *Pressure){ + + dvc_ScaLBL_D3Q19_AAodd_Greyscale_MRT<<>>(neighborList,dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,rho0,Pressure); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAodd_Greyscale_MRT: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_MRT(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double rho0,double *Pressure){ + + dvc_ScaLBL_D3Q19_AAeven_Greyscale_MRT<<>>(dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,rho0,Pressure); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAeven_Greyscale_MRT: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_GreyIMRT_Init(double *dist, int Np, double Den){ + dvc_ScaLBL_D3Q19_GreyIMRT_Init<<>>(dist, Np, Den); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_GreyIMRT_Init: %s \n",hipGetErrorString(err)); + } +} diff --git a/hip/GreyscaleColor.cu b/hip/GreyscaleColor.cu new file mode 100644 index 00000000..0ceb0522 --- /dev/null +++ b/hip/GreyscaleColor.cu @@ -0,0 +1,3038 @@ +#include +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 1024 +#define NTHREADS 256 + + +//Model-1 & 4 +__global__ void dvc_ScaLBL_D3Q19_AAodd_GreyscaleColor(int *neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, + double *Phi, double *GreySolidGrad, double *Poros,double *Perm, double *Velocity, double *Pressure, + double rhoA, double rhoB, double tauA, double tauB,double tauA_eff,double tauB_eff,double alpha, double beta, + double Gx, double Gy, double Gz, int strideY, int strideZ, int start, int finish, int Np){ + + int n,nn,ijk,nread; + int nr1,nr2,nr3,nr4,nr5,nr6; + int nr7,nr8,nr9,nr10; + int nr11,nr12,nr13,nr14; + //int nr15,nr16,nr17,nr18; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + double vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m3,m5,m7; + double nA,nB; // number density + double a1,b1,a2,b2,nAB,delta; + double C,nx,ny,nz; //color gradient magnitude and direction + double phi,tau,rho0,rlx_setA,rlx_setB; + + double GeoFun=0.0;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double tau_eff; + double mu_eff;//kinematic viscosity + double nx_gs,ny_gs,nz_gs;//grey-solid color gradient + double nx_phase,ny_phase,nz_phase,C_phase; + double Fx,Fy,Fz; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s even part of dist) + //fq = dist[nread]; // reading the f2 data into register fq + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nr2]; // reading the f2 data into register fq + rho += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + //nread = neighborList[n+2*Np]; // neighbor 4 + //fq = dist[nread]; + nr3 = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nr3]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + //nread = neighborList[n+3*Np]; // neighbor 3 + //fq = dist[nread]; + nr4 = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nr4]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + //nread = neighborList[n+4*Np]; + //fq = dist[nread]; + nr5 = neighborList[n+4*Np]; + fq = dist[nr5]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + + // q = 6 + //nread = neighborList[n+5*Np]; + //fq = dist[nread]; + nr6 = neighborList[n+5*Np]; + fq = dist[nr6]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + //nread = neighborList[n+6*Np]; + //fq = dist[nread]; + nr7 = neighborList[n+6*Np]; + fq = dist[nr7]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + //nread = neighborList[n+7*Np]; + //fq = dist[nread]; + nr8 = neighborList[n+7*Np]; + fq = dist[nr8]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + //nread = neighborList[n+8*Np]; + //fq = dist[nread]; + nr9 = neighborList[n+8*Np]; + fq = dist[nr9]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + //nread = neighborList[n+9*Np]; + //fq = dist[nread]; + nr10 = neighborList[n+9*Np]; + fq = dist[nr10]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + //nread = neighborList[n+10*Np]; + //fq = dist[nread]; + nr11 = neighborList[n+10*Np]; + fq = dist[nr11]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + //nread = neighborList[n+11*Np]; + //fq = dist[nread]; + nr12 = neighborList[n+11*Np]; + fq = dist[nr12]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + //nread = neighborList[n+12*Np]; + //fq = dist[nread]; + nr13 = neighborList[n+12*Np]; + fq = dist[nr13]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + //nread = neighborList[n+13*Np]; + //fq = dist[nread]; + nr14 = neighborList[n+13*Np]; + fq = dist[nr14]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + //fq = dist[17*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + //fq = dist[8*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + //fq = dist[18*Np+n]; + nread = neighborList[n+16*Np]; + fq = dist[nread]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + //fq = dist[9*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + + // Compute greyscale related parameters + c0 = 0.5*(1.0+porosity*0.5*mu_eff/perm); + if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes + //GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); + c1 = porosity*0.5*GeoFun/sqrt(perm); + if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes + + vx = jx/rho0+0.5*(porosity*Gx); + vy = jy/rho0+0.5*(porosity*Gy); + vz = jz/rho0+0.5*(porosity*Gz); + v_mag=sqrt(vx*vx+vy*vy+vz*vz); + ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); + uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); + uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); + u_mag=sqrt(ux*ux+uy*uy+uz*uz); + + //Update the total force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium + Fx = rho0*(-porosity*mu_eff/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx); + Fy = rho0*(-porosity*mu_eff/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy); + Fz = rho0*(-porosity*mu_eff/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz); + if (porosity==1.0){ + Fx=rho0*(Gx); + Fy=rho0*(Gy); + Fz=rho0*(Gz); + } + + // write the velocity + Velocity[n] = ux; + Velocity[Np+n] = uy; + Velocity[2*Np+n] = uz; + //Pressure[n] = rho/3.f/porosity; + Pressure[n] = rho/3.f; + + //........................................................................ + //..............carry out relaxation process.............................. + //..........Toelke, Fruediger et. al. 2006................................ + //---------------- NO higher-order force -------------------------------// + if (C == 0.0) nx = ny = nz = 0.0; + m1 = m1 + rlx_setA*((19*(ux*ux+uy*uy+uz*uz)*rho0/porosity - 11*rho) -19*alpha*C - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(ux*ux+uy*uy+uz*uz)*rho0/porosity)- m2); + jx = jx + Fx; + m4 = m4 + rlx_setB*((-0.6666666666666666*ux*rho0)- m4) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); + jy = jy + Fy; + m6 = m6 + rlx_setB*((-0.6666666666666666*uy*rho0)- m6) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); + jz = jz + Fz; + m8 = m8 + rlx_setB*((-0.6666666666666666*uz*rho0)- m8) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); + m9 = m9 + rlx_setA*(((2*ux*ux-uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); + m10 = m10 + rlx_setA*( - m10); + //m10 = m10 + rlx_setA*(-0.5*rho0*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); + m11 = m11 + rlx_setA*(((uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); + m12 = m12 + rlx_setA*( - m12); + //m12 = m12 + rlx_setA*(-0.5*(rho0*(uy*uy-uz*uz)/porosity)- m12); + m13 = m13 + rlx_setA*( (ux*uy*rho0/porosity) + 0.5*alpha*C*nx*ny - m13); + m14 = m14 + rlx_setA*( (uy*uz*rho0/porosity) + 0.5*alpha*C*ny*nz - m14); + m15 = m15 + rlx_setA*( (ux*uz*rho0/porosity) + 0.5*alpha*C*nx*nz - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //----------------------------------------------------------------------// + + //----------------With higher-order force ------------------------------// + //if (C == 0.0) nx = ny = nz = 0.0; + //m1 = m1 + rlx_setA*((19*(ux*ux+uy*uy+uz*uz)*rho0/porosity - 11*rho) -19*alpha*C - m1) + // + (1-0.5*rlx_setA)*38*(Fx*ux+Fy*uy+Fz*uz)/porosity; + //m2 = m2 + rlx_setA*((3*rho - 5.5*(ux*ux+uy*uy+uz*uz)*rho0/porosity)- m2) + // + (1-0.5*rlx_setA)*11*(-Fx*ux-Fy*uy-Fz*uz)/porosity; + //jx = jx + Fx; + //m4 = m4 + rlx_setB*((-0.6666666666666666*ux*rho0)- m4) + // + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); + //jy = jy + Fy; + //m6 = m6 + rlx_setB*((-0.6666666666666666*uy*rho0)- m6) + // + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); + //jz = jz + Fz; + //m8 = m8 + rlx_setB*((-0.6666666666666666*uz*rho0)- m8) + // + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); + //m9 = m9 + rlx_setA*(((2*ux*ux-uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9) + // + (1-0.5*rlx_setA)*(4*Fx*ux-2*Fy*uy-2*Fz*uz)/porosity; + ////m10 = m10 + rlx_setA*( - m10); + //m10 = m10 + rlx_setA*(-0.5*rho0*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10) + // + (1-0.5*rlx_setA)*(-2*Fx*ux+Fy*uy+Fz*uz)/porosity; + //m11 = m11 + rlx_setA*(((uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(ny*ny-nz*nz)- m11) + // + (1-0.5*rlx_setA)*(2*Fy*uy-2*Fz*uz)/porosity; + ////m12 = m12 + rlx_setA*( - m12); + //m12 = m12 + rlx_setA*(-0.5*(rho0*(uy*uy-uz*uz)/porosity)- m12) + // + (1-0.5*rlx_setA)*(-Fy*uy+Fz*uz)/porosity; + //m13 = m13 + rlx_setA*( (ux*uy*rho0/porosity) + 0.5*alpha*C*nx*ny - m13); + // + (1-0.5*rlx_setA)*(Fy*ux+Fx*uy)/porosity; + //m14 = m14 + rlx_setA*( (uy*uz*rho0/porosity) + 0.5*alpha*C*ny*nz - m14); + // + (1-0.5*rlx_setA)*(Fz*uy+Fy*uz)/porosity; + //m15 = m15 + rlx_setA*( (ux*uz*rho0/porosity) + 0.5*alpha*C*nx*nz - m15); + // + (1-0.5*rlx_setA)*(Fz*ux+Fx*uz)/porosity; + //m16 = m16 + rlx_setB*( - m16); + //m17 = m17 + rlx_setB*( - m17); + //m18 = m18 + rlx_setB*( - m18); + //----------------------------------------------------------------------// + + //.................inverse transformation...................................................... + // q=0 + fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10); + //nread = neighborList[n+Np]; + dist[nr2] = fq; + + // q=2 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10); + //nread = neighborList[n]; + dist[nr1] = fq; + + // q = 3 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + //nread = neighborList[n+3*Np]; + dist[nr4] = fq; + + // q = 4 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + //nread = neighborList[n+2*Np]; + dist[nr3] = fq; + + // q = 5 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + //nread = neighborList[n+5*Np]; + dist[nr6] = fq; + + // q = 6 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + //nread = neighborList[n+4*Np]; + dist[nr5] = fq; + + // q = 7 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17); + //nread = neighborList[n+7*Np]; + dist[nr8] = fq; + + // q = 8 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m17-m16); + //nread = neighborList[n+6*Np]; + dist[nr7] = fq; + + // q = 9 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17); + //nread = neighborList[n+9*Np]; + dist[nr10] = fq; + + // q = 10 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17); + //nread = neighborList[n+8*Np]; + dist[nr9] = fq; + + // q = 11 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m18-m16); + //nread = neighborList[n+11*Np]; + dist[nr12] = fq; + + // q = 12 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ + mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18); + //nread = neighborList[n+10*Np]; + dist[nr11]= fq; + + // q = 13 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15-0.125*(m16+m18); + //nread = neighborList[n+13*Np]; + dist[nr14] = fq; + + // q= 14 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15+0.125*(m16+m18); + //nread = neighborList[n+12*Np]; + dist[nr13] = fq; + + + // q = 15 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + + // q = 17 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) + -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) + -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + //........................................................................ + + // Instantiate mass transport distributions + // Stationary value - distribution 0 + nAB = 1.0/(nA+nB); + Aq[n] = 0.3333333333333333*nA; + Bq[n] = 0.3333333333333333*nB; + + //............................................... + // q = 0,2,4 + // Cq = {1,0,0}, {0,1,0}, {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nx; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; + + // q = 1 + //nread = neighborList[n+Np]; + Aq[nr2] = a1; + Bq[nr2] = b1; + // q=2 + //nread = neighborList[n]; + Aq[nr1] = a2; + Bq[nr1] = b2; + + //............................................... + // Cq = {0,1,0} + delta = beta*nA*nB*nAB*0.1111111111111111*ny; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; + + // q = 3 + //nread = neighborList[n+3*Np]; + Aq[nr4] = a1; + Bq[nr4] = b1; + // q = 4 + //nread = neighborList[n+2*Np]; + Aq[nr3] = a2; + Bq[nr3] = b2; + + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nz; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; + + // q = 5 + //nread = neighborList[n+5*Np]; + Aq[nr6] = a1; + Bq[nr6] = b1; + // q = 6 + //nread = neighborList[n+4*Np]; + Aq[nr5] = a2; + Bq[nr5] = b2; + //............................................... + } + } +} + +//Model-1 & 4 +__global__ void dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor(int *Map, double *dist, double *Aq, double *Bq, double *Den, + double *Phi, double *GreySolidGrad, double *Poros,double *Perm, double *Velocity, double *Pressure, + double rhoA, double rhoB, double tauA, double tauB,double tauA_eff,double tauB_eff, double alpha, double beta, + double Gx, double Gy, double Gz, int strideY, int strideZ, int start, int finish, int Np){ + int ijk,nn,n; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + double vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m3,m5,m7; + double nA,nB; // number density + double a1,b1,a2,b2,nAB,delta; + double C,nx,ny,nz; //color gradient magnitude and direction + double phi,tau,rho0,rlx_setA,rlx_setB; + + double GeoFun=0.0;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double tau_eff; + double mu_eff;//kinematic viscosity + double nx_gs,ny_gs,nz_gs;//grey-solid color gradient + double nx_phase,ny_phase,nz_phase,C_phase; + double Fx,Fy,Fz; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; + + Aq[1*Np+n] = a1; + Bq[1*Np+n] = b1; + Aq[2*Np+n] = a2; + Bq[2*Np+n] = b2; + + //............................................... + // q = 2 + // Cq = {0,1,0} + delta = beta*nA*nB*nAB*0.1111111111111111*ny; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; + + Aq[3*Np+n] = a1; + Bq[3*Np+n] = b1; + Aq[4*Np+n] = a2; + Bq[4*Np+n] = b2; + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nz; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; + + Aq[5*Np+n] = a1; + Bq[5*Np+n] = b1; + Aq[6*Np+n] = a2; + Bq[6*Np+n] = b2; + //............................................... + + } + } +} + +__global__ void dvc_ScaLBL_PhaseField_InitFromRestart(double *Den, double *Aq, double *Bq, int start, int finish, int Np){ + int idx; + double nA,nB; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s1.0) t1 =((t1>0.0)-(t1<0.0))*(1.0-fabs(t1))+t1; +// //........................................................................ +// nn = ijk+1; // neighbor index (get convention) +// m2 = Phi[nn]; // get neighbor for phi - 2 +// t2 = m2+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t2)>1.0) t2 =((t2>0.0)-(t2<0.0))*(1.0-fabs(t2))+t2; +// //........................................................................ +// nn = ijk-strideY; // neighbor index (get convention) +// m3 = Phi[nn]; // get neighbor for phi - 3 +// t3 = m3+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t3)>1.0) t3 =((t3>0.0)-(t3<0.0))*(1.0-fabs(t3))+t3; +// //........................................................................ +// nn = ijk+strideY; // neighbor index (get convention) +// m4 = Phi[nn]; // get neighbor for phi - 4 +// t4 = m4+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t4)>1.0) t4 =((t4>0.0)-(t4<0.0))*(1.0-fabs(t4))+t4; +// //........................................................................ +// nn = ijk-strideZ; // neighbor index (get convention) +// m5 = Phi[nn]; // get neighbor for phi - 5 +// t5 = m5+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t5)>1.0) t5 =((t5>0.0)-(t5<0.0))*(1.0-fabs(t5))+t5; +// //........................................................................ +// nn = ijk+strideZ; // neighbor index (get convention) +// m6 = Phi[nn]; // get neighbor for phi - 6 +// t6 = m6+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t6)>1.0) t6 =((t6>0.0)-(t6<0.0))*(1.0-fabs(t6))+t6; +// //........................................................................ +// nn = ijk-strideY-1; // neighbor index (get convention) +// m7 = Phi[nn]; // get neighbor for phi - 7 +// t7 = m7+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t7)>1.0) t7 =((t7>0.0)-(t7<0.0))*(1.0-fabs(t7))+t7; +// //........................................................................ +// nn = ijk+strideY+1; // neighbor index (get convention) +// m8 = Phi[nn]; // get neighbor for phi - 8 +// t8 = m8+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t8)>1.0) t8 =((t8>0.0)-(t8<0.0))*(1.0-fabs(t8))+t8; +// //........................................................................ +// nn = ijk+strideY-1; // neighbor index (get convention) +// m9 = Phi[nn]; // get neighbor for phi - 9 +// t9 = m9+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t9)>1.0) t9 =((t9>0.0)-(t9<0.0))*(1.0-fabs(t9))+t9; +// //........................................................................ +// nn = ijk-strideY+1; // neighbor index (get convention) +// m10 = Phi[nn]; // get neighbor for phi - 10 +// t10 = m10+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t10)>1.0) t10 =((t10>0.0)-(t10<0.0))*(1.0-fabs(t10))+t10; +// //........................................................................ +// nn = ijk-strideZ-1; // neighbor index (get convention) +// m11 = Phi[nn]; // get neighbor for phi - 11 +// t11 = m11+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t11)>1.0) t11 =((t11>0.0)-(t11<0.0))*(1.0-fabs(t11))+t11; +// //........................................................................ +// nn = ijk+strideZ+1; // neighbor index (get convention) +// m12 = Phi[nn]; // get neighbor for phi - 12 +// t12 = m12+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t12)>1.0) t12 =((t12>0.0)-(t12<0.0))*(1.0-fabs(t12))+t12; +// //........................................................................ +// nn = ijk+strideZ-1; // neighbor index (get convention) +// m13 = Phi[nn]; // get neighbor for phi - 13 +// t13 = m13+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t13)>1.0) t13 =((t13>0.0)-(t13<0.0))*(1.0-fabs(t13))+t13; +// //........................................................................ +// nn = ijk-strideZ+1; // neighbor index (get convention) +// m14 = Phi[nn]; // get neighbor for phi - 14 +// t14 = m14+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t14)>1.0) t14 =((t14>0.0)-(t14<0.0))*(1.0-fabs(t14))+t14; +// //........................................................................ +// nn = ijk-strideZ-strideY; // neighbor index (get convention) +// m15 = Phi[nn]; // get neighbor for phi - 15 +// t15 = m15+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t15)>1.0) t15 =((t15>0.0)-(t15<0.0))*(1.0-fabs(t15))+t15; +// //........................................................................ +// nn = ijk+strideZ+strideY; // neighbor index (get convention) +// m16 = Phi[nn]; // get neighbor for phi - 16 +// t16 = m16+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t16)>1.0) t16 =((t16>0.0)-(t16<0.0))*(1.0-fabs(t16))+t16; +// //........................................................................ +// nn = ijk+strideZ-strideY; // neighbor index (get convention) +// m17 = Phi[nn]; // get neighbor for phi - 17 +// t17 = m17+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t17)>1.0) t17 =((t17>0.0)-(t17<0.0))*(1.0-fabs(t17))+t17; +// //........................................................................ +// nn = ijk-strideZ+strideY; // neighbor index (get convention) +// m18 = Phi[nn]; // get neighbor for phi - 18 +// t18 = m18+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t18)>1.0) t18 =((t18>0.0)-(t18<0.0))*(1.0-fabs(t18))+t18; +// //............Compute the Color Gradient................................... +// nx_phase = -(m1-m2+0.5*(m7-m8+m9-m10+m11-m12+m13-m14)); +// ny_phase = -(m3-m4+0.5*(m7-m8-m9+m10+m15-m16+m17-m18)); +// nz_phase = -(m5-m6+0.5*(m11-m12-m13+m14+m15-m16-m17+m18)); +// C_phase = sqrt(nx_phase*nx_phase+ny_phase*ny_phase+nz_phase*nz_phase); +// //correct the normal color gradient by considering the effect of grey solid +// nx = -(t1-t2+0.5*(t7-t8+t9-t10+t11-t12+t13-t14)); +// ny = -(t3-t4+0.5*(t7-t8-t9+t10+t15-t16+t17-t18)); +// nz = -(t5-t6+0.5*(t11-t12-t13+t14+t15-t16-t17+t18)); +// +// if (C_phase==0.0){//i.e. if in a bulk phase, there is no need for grey-solid correction +// nx = nx_phase; +// ny = ny_phase; +// nz = nz_phase; +// } +// +// //...........Normalize the Color Gradient................................. +// C = sqrt(nx*nx+ny*ny+nz*nz); +// double ColorMag = C; +// if (C==0.0) ColorMag=1.0; +// nx = nx/ColorMag; +// ny = ny/ColorMag; +// nz = nz/ColorMag; +// +// // q=0 +// fq = dist[n]; +// rho = fq; +// m1 = -30.0*fq; +// m2 = 12.0*fq; +// +// // q=1 +// //nread = neighborList[n]; // neighbor 2 +// //fq = dist[nread]; // reading the f1 data into register fq +// nr1 = neighborList[n]; +// fq = dist[nr1]; // reading the f1 data into register fq +// rho += fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jx = fq; +// m4 = -4.0*fq; +// m9 = 2.0*fq; +// m10 = -4.0*fq; +// +// // f2 = dist[10*Np+n]; +// //nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) +// //fq = dist[nread]; // reading the f2 data into register fq +// nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) +// fq = dist[nr2]; // reading the f2 data into register fq +// rho += fq; +// m1 -= 11.0*(fq); +// m2 -= 4.0*(fq); +// jx -= fq; +// m4 += 4.0*(fq); +// m9 += 2.0*(fq); +// m10 -= 4.0*(fq); +// +// // q=3 +// //nread = neighborList[n+2*Np]; // neighbor 4 +// //fq = dist[nread]; +// nr3 = neighborList[n+2*Np]; // neighbor 4 +// fq = dist[nr3]; +// rho += fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jy = fq; +// m6 = -4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 = fq; +// m12 = -2.0*fq; +// +// // q = 4 +// //nread = neighborList[n+3*Np]; // neighbor 3 +// //fq = dist[nread]; +// nr4 = neighborList[n+3*Np]; // neighbor 3 +// fq = dist[nr4]; +// rho+= fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jy -= fq; +// m6 += 4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 += fq; +// m12 -= 2.0*fq; +// +// // q=5 +// //nread = neighborList[n+4*Np]; +// //fq = dist[nread]; +// nr5 = neighborList[n+4*Np]; +// fq = dist[nr5]; +// rho += fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jz = fq; +// m8 = -4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 -= fq; +// m12 += 2.0*fq; +// +// +// // q = 6 +// //nread = neighborList[n+5*Np]; +// //fq = dist[nread]; +// nr6 = neighborList[n+5*Np]; +// fq = dist[nr6]; +// rho+= fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jz -= fq; +// m8 += 4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 -= fq; +// m12 += 2.0*fq; +// +// // q=7 +// //nread = neighborList[n+6*Np]; +// //fq = dist[nread]; +// nr7 = neighborList[n+6*Np]; +// fq = dist[nr7]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jy += fq; +// m6 += fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 = fq; +// m16 = fq; +// m17 = -fq; +// +// // q = 8 +// //nread = neighborList[n+7*Np]; +// //fq = dist[nread]; +// nr8 = neighborList[n+7*Np]; +// fq = dist[nr8]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jy -= fq; +// m6 -= fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 += fq; +// m16 -= fq; +// m17 += fq; +// +// // q=9 +// //nread = neighborList[n+8*Np]; +// //fq = dist[nread]; +// nr9 = neighborList[n+8*Np]; +// fq = dist[nr9]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jy -= fq; +// m6 -= fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 -= fq; +// m16 += fq; +// m17 += fq; +// +// // q = 10 +// //nread = neighborList[n+9*Np]; +// //fq = dist[nread]; +// nr10 = neighborList[n+9*Np]; +// fq = dist[nr10]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jy += fq; +// m6 += fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 -= fq; +// m16 -= fq; +// m17 -= fq; +// +// // q=11 +// //nread = neighborList[n+10*Np]; +// //fq = dist[nread]; +// nr11 = neighborList[n+10*Np]; +// fq = dist[nr11]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jz += fq; +// m8 += fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 = fq; +// m16 -= fq; +// m18 = fq; +// +// // q=12 +// //nread = neighborList[n+11*Np]; +// //fq = dist[nread]; +// nr12 = neighborList[n+11*Np]; +// fq = dist[nr12]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jz -= fq; +// m8 -= fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 += fq; +// m16 += fq; +// m18 -= fq; +// +// // q=13 +// //nread = neighborList[n+12*Np]; +// //fq = dist[nread]; +// nr13 = neighborList[n+12*Np]; +// fq = dist[nr13]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jz -= fq; +// m8 -= fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 -= fq; +// m16 -= fq; +// m18 -= fq; +// +// // q=14 +// //nread = neighborList[n+13*Np]; +// //fq = dist[nread]; +// nr14 = neighborList[n+13*Np]; +// fq = dist[nr14]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jz += fq; +// m8 += fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 -= fq; +// m16 += fq; +// m18 += fq; +// +// // q=15 +// nread = neighborList[n+14*Np]; +// fq = dist[nread]; +// //fq = dist[17*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy += fq; +// m6 += fq; +// jz += fq; +// m8 += fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 = fq; +// m17 += fq; +// m18 -= fq; +// +// // q=16 +// nread = neighborList[n+15*Np]; +// fq = dist[nread]; +// //fq = dist[8*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy -= fq; +// m6 -= fq; +// jz -= fq; +// m8 -= fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 += fq; +// m17 -= fq; +// m18 += fq; +// +// // q=17 +// //fq = dist[18*Np+n]; +// nread = neighborList[n+16*Np]; +// fq = dist[nread]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy += fq; +// m6 += fq; +// jz -= fq; +// m8 -= fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 -= fq; +// m17 += fq; +// m18 += fq; +// +// // q=18 +// nread = neighborList[n+17*Np]; +// fq = dist[nread]; +// //fq = dist[9*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy -= fq; +// m6 -= fq; +// jz += fq; +// m8 += fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 -= fq; +// m17 -= fq; +// m18 -= fq; +// +// // Compute greyscale related parameters +// c0 = 0.5*(1.0+porosity*0.5*mu_eff/perm); +// if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes +// //GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); +// c1 = porosity*0.5*GeoFun/sqrt(perm); +// if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes +// +// vx = jx/rho0+0.5*(porosity*Gx); +// vy = jy/rho0+0.5*(porosity*Gy); +// vz = jz/rho0+0.5*(porosity*Gz); +// v_mag=sqrt(vx*vx+vy*vy+vz*vz); +// ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); +// uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); +// uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); +// u_mag=sqrt(ux*ux+uy*uy+uz*uz); +// +// //Update the total force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium +// Fx = rho0*(-porosity*mu_eff/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx); +// Fy = rho0*(-porosity*mu_eff/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy); +// Fz = rho0*(-porosity*mu_eff/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz); +// if (porosity==1.0){ +// Fx=rho0*(Gx); +// Fy=rho0*(Gy); +// Fz=rho0*(Gz); +// } +// +// // write the velocity +// Velocity[n] = ux; +// Velocity[Np+n] = uy; +// Velocity[2*Np+n] = uz; +// +// //........................................................................ +// //..............carry out relaxation process.............................. +// //..........Toelke, Fruediger et. al. 2006................................ +// if (C == 0.0) nx = ny = nz = 0.0; +// m1 = m1 + rlx_setA*((19*(ux*ux+uy*uy+uz*uz)*rho0/porosity - 11*rho) -19*alpha*C - m1); +// m2 = m2 + rlx_setA*((3*rho - 5.5*(ux*ux+uy*uy+uz*uz)*rho0/porosity)- m2); +// jx = jx + Fx; +// m4 = m4 + rlx_setB*((-0.6666666666666666*ux*rho0)- m4) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); +// jy = jy + Fy; +// m6 = m6 + rlx_setB*((-0.6666666666666666*uy*rho0)- m6) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); +// jz = jz + Fz; +// m8 = m8 + rlx_setB*((-0.6666666666666666*uz*rho0)- m8) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); +// m9 = m9 + rlx_setA*(((2*ux*ux-uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); +// m10 = m10 + rlx_setA*( - m10); +// //m10 = m10 + rlx_setA*(-0.5*rho0*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); +// m11 = m11 + rlx_setA*(((uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); +// m12 = m12 + rlx_setA*( - m12); +// //m12 = m12 + rlx_setA*(-0.5*(rho0*(uy*uy-uz*uz)/porosity)- m12); +// m13 = m13 + rlx_setA*( (ux*uy*rho0/porosity) + 0.5*alpha*C*nx*ny - m13); +// m14 = m14 + rlx_setA*( (uy*uz*rho0/porosity) + 0.5*alpha*C*ny*nz - m14); +// m15 = m15 + rlx_setA*( (ux*uz*rho0/porosity) + 0.5*alpha*C*nx*nz - m15); +// m16 = m16 + rlx_setB*( - m16); +// m17 = m17 + rlx_setB*( - m17); +// m18 = m18 + rlx_setB*( - m18); +// +// //.................inverse transformation...................................................... +// // q=0 +// fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; +// dist[n] = fq; +// +// // q = 1 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10); +// //nread = neighborList[n+Np]; +// dist[nr2] = fq; +// +// // q=2 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10); +// //nread = neighborList[n]; +// dist[nr1] = fq; +// +// // q = 3 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); +// //nread = neighborList[n+3*Np]; +// dist[nr4] = fq; +// +// // q = 4 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); +// //nread = neighborList[n+2*Np]; +// dist[nr3] = fq; +// +// // q = 5 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); +// //nread = neighborList[n+5*Np]; +// dist[nr6] = fq; +// +// // q = 6 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); +// //nread = neighborList[n+4*Np]; +// dist[nr5] = fq; +// +// // q = 7 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ +// mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17); +// //nread = neighborList[n+7*Np]; +// dist[nr8] = fq; +// +// // q = 8 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 +// +mrt_V12*m12+0.25*m13+0.125*(m17-m16); +// //nread = neighborList[n+6*Np]; +// dist[nr7] = fq; +// +// // q = 9 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ +// mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17); +// //nread = neighborList[n+9*Np]; +// dist[nr10] = fq; +// +// // q = 10 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ +// mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17); +// //nread = neighborList[n+8*Np]; +// dist[nr9] = fq; +// +// // q = 11 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) +// +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 +// -mrt_V12*m12+0.25*m15+0.125*(m18-m16); +// //nread = neighborList[n+11*Np]; +// dist[nr12] = fq; +// +// // q = 12 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ +// mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18); +// //nread = neighborList[n+10*Np]; +// dist[nr11]= fq; +// +// // q = 13 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) +// +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 +// -mrt_V12*m12-0.25*m15-0.125*(m16+m18); +// //nread = neighborList[n+13*Np]; +// dist[nr14] = fq; +// +// // q= 14 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) +// +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 +// -mrt_V12*m12-0.25*m15+0.125*(m16+m18); +// //nread = neighborList[n+12*Np]; +// dist[nr13] = fq; +// +// +// // q = 15 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) +// -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18); +// nread = neighborList[n+15*Np]; +// dist[nread] = fq; +// +// // q = 16 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) +// -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17); +// nread = neighborList[n+14*Np]; +// dist[nread] = fq; +// +// +// // q = 17 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) +// -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18); +// nread = neighborList[n+17*Np]; +// dist[nread] = fq; +// +// // q = 18 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) +// -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18); +// nread = neighborList[n+16*Np]; +// dist[nread] = fq; +// //........................................................................ +// +// // Instantiate mass transport distributions +// // Stationary value - distribution 0 +// nAB = 1.0/(nA+nB); +// Aq[n] = 0.3333333333333333*nA; +// Bq[n] = 0.3333333333333333*nB; +// +// //............................................... +// // q = 0,2,4 +// // Cq = {1,0,0}, {0,1,0}, {0,0,1} +// delta = beta*nA*nB*nAB*0.1111111111111111*nx; +// if (!(nA*nB*nAB>0)) delta=0; +// a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; +// b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; +// a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; +// b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; +// +// // q = 1 +// //nread = neighborList[n+Np]; +// Aq[nr2] = a1; +// Bq[nr2] = b1; +// // q=2 +// //nread = neighborList[n]; +// Aq[nr1] = a2; +// Bq[nr1] = b2; +// +// //............................................... +// // Cq = {0,1,0} +// delta = beta*nA*nB*nAB*0.1111111111111111*ny; +// if (!(nA*nB*nAB>0)) delta=0; +// a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; +// b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; +// a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; +// b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; +// +// // q = 3 +// //nread = neighborList[n+3*Np]; +// Aq[nr4] = a1; +// Bq[nr4] = b1; +// // q = 4 +// //nread = neighborList[n+2*Np]; +// Aq[nr3] = a2; +// Bq[nr3] = b2; +// +// //............................................... +// // q = 4 +// // Cq = {0,0,1} +// delta = beta*nA*nB*nAB*0.1111111111111111*nz; +// if (!(nA*nB*nAB>0)) delta=0; +// a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; +// b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; +// a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; +// b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; +// +// // q = 5 +// //nread = neighborList[n+5*Np]; +// Aq[nr6] = a1; +// Bq[nr6] = b1; +// // q = 6 +// //nread = neighborList[n+4*Np]; +// Aq[nr5] = a2; +// Bq[nr5] = b2; +// //............................................... +// } +// } +//} +// +////Model-2&3 +//__global__ void dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor(int *Map, double *dist, double *Aq, double *Bq, double *Den, +// double *Phi, double *GreySolidGrad, double *Poros,double *Perm, double *Velocity, +// double rhoA, double rhoB, double tauA, double tauB,double tauA_eff,double tauB_eff, double alpha, double beta, +// double Gx, double Gy, double Gz, int strideY, int strideZ, int start, int finish, int Np){ +// int ijk,nn,n; +// double fq; +// // conserved momemnts +// double rho,jx,jy,jz; +// double vx,vy,vz,v_mag; +// double ux,uy,uz,u_mag; +// // non-conserved moments +// double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; +// double m3,m5,m7; +// double t1,t2,t4,t6,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18; +// double t3,t5,t7; +// double nA,nB; // number density +// double a1,b1,a2,b2,nAB,delta; +// double C,nx,ny,nz; //color gradient magnitude and direction +// double phi,tau,rho0,rlx_setA,rlx_setB; +// +// double GeoFun=0.0;//geometric function from Guo's PRE 66, 036304 (2002) +// double porosity; +// double perm;//voxel permeability +// double c0, c1; //Guo's model parameters +// double tau_eff; +// double mu_eff;//kinematic viscosity +// double nx_phase,ny_phase,nz_phase,C_phase; +// double Fx,Fy,Fz; +// +// const double mrt_V1=0.05263157894736842; +// const double mrt_V2=0.012531328320802; +// const double mrt_V3=0.04761904761904762; +// const double mrt_V4=0.004594820384294068; +// const double mrt_V5=0.01587301587301587; +// const double mrt_V6=0.0555555555555555555555555; +// const double mrt_V7=0.02777777777777778; +// const double mrt_V8=0.08333333333333333; +// const double mrt_V9=0.003341687552213868; +// const double mrt_V10=0.003968253968253968; +// const double mrt_V11=0.01388888888888889; +// const double mrt_V12=0.04166666666666666; +// +// int S = Np/NBLOCKS/NTHREADS + 1; +// for (int s=0; s1.0) t1 =((t1>0.0)-(t1<0.0))*(1.0-fabs(t1))+t1; +// //........................................................................ +// nn = ijk+1; // neighbor index (get convention) +// m2 = Phi[nn]; // get neighbor for phi - 2 +// t2 = m2+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t2)>1.0) t2 =((t2>0.0)-(t2<0.0))*(1.0-fabs(t2))+t2; +// //........................................................................ +// nn = ijk-strideY; // neighbor index (get convention) +// m3 = Phi[nn]; // get neighbor for phi - 3 +// t3 = m3+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t3)>1.0) t3 =((t3>0.0)-(t3<0.0))*(1.0-fabs(t3))+t3; +// //........................................................................ +// nn = ijk+strideY; // neighbor index (get convention) +// m4 = Phi[nn]; // get neighbor for phi - 4 +// t4 = m4+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t4)>1.0) t4 =((t4>0.0)-(t4<0.0))*(1.0-fabs(t4))+t4; +// //........................................................................ +// nn = ijk-strideZ; // neighbor index (get convention) +// m5 = Phi[nn]; // get neighbor for phi - 5 +// t5 = m5+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t5)>1.0) t5 =((t5>0.0)-(t5<0.0))*(1.0-fabs(t5))+t5; +// //........................................................................ +// nn = ijk+strideZ; // neighbor index (get convention) +// m6 = Phi[nn]; // get neighbor for phi - 6 +// t6 = m6+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t6)>1.0) t6 =((t6>0.0)-(t6<0.0))*(1.0-fabs(t6))+t6; +// //........................................................................ +// nn = ijk-strideY-1; // neighbor index (get convention) +// m7 = Phi[nn]; // get neighbor for phi - 7 +// t7 = m7+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t7)>1.0) t7 =((t7>0.0)-(t7<0.0))*(1.0-fabs(t7))+t7; +// //........................................................................ +// nn = ijk+strideY+1; // neighbor index (get convention) +// m8 = Phi[nn]; // get neighbor for phi - 8 +// t8 = m8+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t8)>1.0) t8 =((t8>0.0)-(t8<0.0))*(1.0-fabs(t8))+t8; +// //........................................................................ +// nn = ijk+strideY-1; // neighbor index (get convention) +// m9 = Phi[nn]; // get neighbor for phi - 9 +// t9 = m9+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t9)>1.0) t9 =((t9>0.0)-(t9<0.0))*(1.0-fabs(t9))+t9; +// //........................................................................ +// nn = ijk-strideY+1; // neighbor index (get convention) +// m10 = Phi[nn]; // get neighbor for phi - 10 +// t10 = m10+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t10)>1.0) t10 =((t10>0.0)-(t10<0.0))*(1.0-fabs(t10))+t10; +// //........................................................................ +// nn = ijk-strideZ-1; // neighbor index (get convention) +// m11 = Phi[nn]; // get neighbor for phi - 11 +// t11 = m11+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t11)>1.0) t11 =((t11>0.0)-(t11<0.0))*(1.0-fabs(t11))+t11; +// //........................................................................ +// nn = ijk+strideZ+1; // neighbor index (get convention) +// m12 = Phi[nn]; // get neighbor for phi - 12 +// t12 = m12+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t12)>1.0) t12 =((t12>0.0)-(t12<0.0))*(1.0-fabs(t12))+t12; +// //........................................................................ +// nn = ijk+strideZ-1; // neighbor index (get convention) +// m13 = Phi[nn]; // get neighbor for phi - 13 +// t13 = m13+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t13)>1.0) t13 =((t13>0.0)-(t13<0.0))*(1.0-fabs(t13))+t13; +// //........................................................................ +// nn = ijk-strideZ+1; // neighbor index (get convention) +// m14 = Phi[nn]; // get neighbor for phi - 14 +// t14 = m14+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t14)>1.0) t14 =((t14>0.0)-(t14<0.0))*(1.0-fabs(t14))+t14; +// //........................................................................ +// nn = ijk-strideZ-strideY; // neighbor index (get convention) +// m15 = Phi[nn]; // get neighbor for phi - 15 +// t15 = m15+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t15)>1.0) t15 =((t15>0.0)-(t15<0.0))*(1.0-fabs(t15))+t15; +// //........................................................................ +// nn = ijk+strideZ+strideY; // neighbor index (get convention) +// m16 = Phi[nn]; // get neighbor for phi - 16 +// t16 = m16+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t16)>1.0) t16 =((t16>0.0)-(t16<0.0))*(1.0-fabs(t16))+t16; +// //........................................................................ +// nn = ijk+strideZ-strideY; // neighbor index (get convention) +// m17 = Phi[nn]; // get neighbor for phi - 17 +// t17 = m17+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t17)>1.0) t17 =((t17>0.0)-(t17<0.0))*(1.0-fabs(t17))+t17; +// //........................................................................ +// nn = ijk-strideZ+strideY; // neighbor index (get convention) +// m18 = Phi[nn]; // get neighbor for phi - 18 +// t18 = m18+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t18)>1.0) t18 =((t18>0.0)-(t18<0.0))*(1.0-fabs(t18))+t18; +// //............Compute the Color Gradient................................... +// nx_phase = -(m1-m2+0.5*(m7-m8+m9-m10+m11-m12+m13-m14)); +// ny_phase = -(m3-m4+0.5*(m7-m8-m9+m10+m15-m16+m17-m18)); +// nz_phase = -(m5-m6+0.5*(m11-m12-m13+m14+m15-m16-m17+m18)); +// C_phase = sqrt(nx_phase*nx_phase+ny_phase*ny_phase+nz_phase*nz_phase); +// //correct the normal color gradient by considering the effect of grey solid +// nx = -(t1-t2+0.5*(t7-t8+t9-t10+t11-t12+t13-t14)); +// ny = -(t3-t4+0.5*(t7-t8-t9+t10+t15-t16+t17-t18)); +// nz = -(t5-t6+0.5*(t11-t12-t13+t14+t15-t16-t17+t18)); +// +// if (C_phase==0.0){ +// nx = nx_phase; +// ny = ny_phase; +// nz = nz_phase; +// } +// +// //...........Normalize the Color Gradient................................. +// C = sqrt(nx*nx+ny*ny+nz*nz); +// double ColorMag = C; +// if (C==0.0) ColorMag=1.0; +// nx = nx/ColorMag; +// ny = ny/ColorMag; +// nz = nz/ColorMag; +// +// // q=0 +// fq = dist[n]; +// rho = fq; +// m1 = -30.0*fq; +// m2 = 12.0*fq; +// +// // q=1 +// fq = dist[2*Np+n]; +// rho += fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jx = fq; +// m4 = -4.0*fq; +// m9 = 2.0*fq; +// m10 = -4.0*fq; +// +// // f2 = dist[10*Np+n]; +// fq = dist[1*Np+n]; +// rho += fq; +// m1 -= 11.0*(fq); +// m2 -= 4.0*(fq); +// jx -= fq; +// m4 += 4.0*(fq); +// m9 += 2.0*(fq); +// m10 -= 4.0*(fq); +// +// // q=3 +// fq = dist[4*Np+n]; +// rho += fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jy = fq; +// m6 = -4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 = fq; +// m12 = -2.0*fq; +// +// // q = 4 +// fq = dist[3*Np+n]; +// rho+= fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jy -= fq; +// m6 += 4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 += fq; +// m12 -= 2.0*fq; +// +// // q=5 +// fq = dist[6*Np+n]; +// rho += fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jz = fq; +// m8 = -4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 -= fq; +// m12 += 2.0*fq; +// +// // q = 6 +// fq = dist[5*Np+n]; +// rho+= fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jz -= fq; +// m8 += 4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 -= fq; +// m12 += 2.0*fq; +// +// // q=7 +// fq = dist[8*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jy += fq; +// m6 += fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 = fq; +// m16 = fq; +// m17 = -fq; +// +// // q = 8 +// fq = dist[7*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jy -= fq; +// m6 -= fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 += fq; +// m16 -= fq; +// m17 += fq; +// +// // q=9 +// fq = dist[10*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jy -= fq; +// m6 -= fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 -= fq; +// m16 += fq; +// m17 += fq; +// +// // q = 10 +// fq = dist[9*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jy += fq; +// m6 += fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 -= fq; +// m16 -= fq; +// m17 -= fq; +// +// // q=11 +// fq = dist[12*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jz += fq; +// m8 += fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 = fq; +// m16 -= fq; +// m18 = fq; +// +// // q=12 +// fq = dist[11*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jz -= fq; +// m8 -= fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 += fq; +// m16 += fq; +// m18 -= fq; +// +// // q=13 +// fq = dist[14*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jz -= fq; +// m8 -= fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 -= fq; +// m16 -= fq; +// m18 -= fq; +// +// // q=14 +// fq = dist[13*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jz += fq; +// m8 += fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 -= fq; +// m16 += fq; +// m18 += fq; +// +// // q=15 +// fq = dist[16*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy += fq; +// m6 += fq; +// jz += fq; +// m8 += fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 = fq; +// m17 += fq; +// m18 -= fq; +// +// // q=16 +// fq = dist[15*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy -= fq; +// m6 -= fq; +// jz -= fq; +// m8 -= fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 += fq; +// m17 -= fq; +// m18 += fq; +// +// // q=17 +// fq = dist[18*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy += fq; +// m6 += fq; +// jz -= fq; +// m8 -= fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 -= fq; +// m17 += fq; +// m18 += fq; +// +// // q=18 +// fq = dist[17*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy -= fq; +// m6 -= fq; +// jz += fq; +// m8 += fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 -= fq; +// m17 -= fq; +// m18 -= fq; +// +// // Compute greyscale related parameters +// c0 = 0.5*(1.0+porosity*0.5*mu_eff/perm); +// if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes +// //GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); +// c1 = porosity*0.5*GeoFun/sqrt(perm); +// if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes +// +// vx = jx/rho0+0.5*(porosity*Gx); +// vy = jy/rho0+0.5*(porosity*Gy); +// vz = jz/rho0+0.5*(porosity*Gz); +// v_mag=sqrt(vx*vx+vy*vy+vz*vz); +// ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); +// uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); +// uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); +// u_mag=sqrt(ux*ux+uy*uy+uz*uz); +// +// //Update the total force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium +// Fx = rho0*(-porosity*mu_eff/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx); +// Fy = rho0*(-porosity*mu_eff/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy); +// Fz = rho0*(-porosity*mu_eff/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz); +// if (porosity==1.0){ +// Fx=rho0*(Gx); +// Fy=rho0*(Gy); +// Fz=rho0*(Gz); +// } +// +// // write the velocity +// Velocity[n] = ux; +// Velocity[Np+n] = uy; +// Velocity[2*Np+n] = uz; +// +// //........................................................................ +// //..............carry out relaxation process.............................. +// //..........Toelke, Fruediger et. al. 2006................................ +// if (C == 0.0) nx = ny = nz = 0.0; +// m1 = m1 + rlx_setA*((19*(ux*ux+uy*uy+uz*uz)*rho0/porosity - 11*rho) -19*alpha*C - m1); +// m2 = m2 + rlx_setA*((3*rho - 5.5*(ux*ux+uy*uy+uz*uz)*rho0/porosity)- m2); +// jx = jx + Fx; +// m4 = m4 + rlx_setB*((-0.6666666666666666*ux*rho0)- m4) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); +// jy = jy + Fy; +// m6 = m6 + rlx_setB*((-0.6666666666666666*uy*rho0)- m6) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); +// jz = jz + Fz; +// m8 = m8 + rlx_setB*((-0.6666666666666666*uz*rho0)- m8) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); +// m9 = m9 + rlx_setA*(((2*ux*ux-uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); +// m10 = m10 + rlx_setA*( - m10); +// //m10 = m10 + rlx_setA*(-0.5*rho0*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); +// m11 = m11 + rlx_setA*(((uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); +// m12 = m12 + rlx_setA*( - m12); +// //m12 = m12 + rlx_setA*(-0.5*(rho0*(uy*uy-uz*uz)/porosity)- m12); +// m13 = m13 + rlx_setA*( (ux*uy*rho0/porosity) + 0.5*alpha*C*nx*ny - m13); +// m14 = m14 + rlx_setA*( (uy*uz*rho0/porosity) + 0.5*alpha*C*ny*nz - m14); +// m15 = m15 + rlx_setA*( (ux*uz*rho0/porosity) + 0.5*alpha*C*nx*nz - m15); +// m16 = m16 + rlx_setB*( - m16); +// m17 = m17 + rlx_setB*( - m17); +// m18 = m18 + rlx_setB*( - m18); +// +// //.................inverse transformation...................................................... +// // q=0 +// fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; +// dist[n] = fq; +// +// // q = 1 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10); +// dist[1*Np+n] = fq; +// +// // q=2 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10); +// dist[2*Np+n] = fq; +// +// // q = 3 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); +// dist[3*Np+n] = fq; +// +// // q = 4 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); +// dist[4*Np+n] = fq; +// +// // q = 5 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); +// dist[5*Np+n] = fq; +// +// // q = 6 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); +// dist[6*Np+n] = fq; +// +// // q = 7 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ +// mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17); +// dist[7*Np+n] = fq; +// +// +// // q = 8 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 +// +mrt_V12*m12+0.25*m13+0.125*(m17-m16); +// dist[8*Np+n] = fq; +// +// // q = 9 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ +// mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17); +// dist[9*Np+n] = fq; +// +// // q = 10 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ +// mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17); +// dist[10*Np+n] = fq; +// +// +// // q = 11 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) +// +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 +// -mrt_V12*m12+0.25*m15+0.125*(m18-m16); +// dist[11*Np+n] = fq; +// +// // q = 12 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ +// mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18); +// dist[12*Np+n] = fq; +// +// // q = 13 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) +// +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 +// -mrt_V12*m12-0.25*m15-0.125*(m16+m18); +// dist[13*Np+n] = fq; +// +// // q= 14 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) +// +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 +// -mrt_V12*m12-0.25*m15+0.125*(m16+m18); +// +// dist[14*Np+n] = fq; +// +// // q = 15 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) +// -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18); +// dist[15*Np+n] = fq; +// +// // q = 16 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) +// -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17); +// dist[16*Np+n] = fq; +// +// +// // q = 17 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) +// -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18); +// dist[17*Np+n] = fq; +// +// // q = 18 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) +// -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18); +// dist[18*Np+n] = fq; +// //........................................................................ +// +// // Instantiate mass transport distributions +// // Stationary value - distribution 0 +// nAB = 1.0/(nA+nB); +// Aq[n] = 0.3333333333333333*nA; +// Bq[n] = 0.3333333333333333*nB; +// +// //............................................... +// // q = 0,2,4 +// // Cq = {1,0,0}, {0,1,0}, {0,0,1} +// delta = beta*nA*nB*nAB*0.1111111111111111*nx; +// if (!(nA*nB*nAB>0)) delta=0; +// a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; +// b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; +// a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; +// b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; +// +// Aq[1*Np+n] = a1; +// Bq[1*Np+n] = b1; +// Aq[2*Np+n] = a2; +// Bq[2*Np+n] = b2; +// +// //............................................... +// // q = 2 +// // Cq = {0,1,0} +// delta = beta*nA*nB*nAB*0.1111111111111111*ny; +// if (!(nA*nB*nAB>0)) delta=0; +// a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; +// b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; +// a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; +// b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; +// +// Aq[3*Np+n] = a1; +// Bq[3*Np+n] = b1; +// Aq[4*Np+n] = a2; +// Bq[4*Np+n] = b2; +// //............................................... +// // q = 4 +// // Cq = {0,0,1} +// delta = beta*nA*nB*nAB*0.1111111111111111*nz; +// if (!(nA*nB*nAB>0)) delta=0; +// a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; +// b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; +// a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; +// b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; +// +// Aq[5*Np+n] = a1; +// Bq[5*Np+n] = b1; +// Aq[6*Np+n] = a2; +// Bq[6*Np+n] = b2; +// //............................................... +// +// } +// } +//} + +//__global__ void dvc_ScaLBL_D3Q19_GreyscaleColor_Init(double *dist, double *Porosity, int Np) +//{ +// int n; +// int S = Np/NBLOCKS/NTHREADS + 1; +// double porosity; +// for (int s=0; s>>(dist,Porosity,Np); +// hipError_t err = hipGetLastError(); +// if (hipSuccess != err){ +// printf("hip error in ScaLBL_D3Q19_GreyscaleColor_Init: %s \n",hipGetErrorString(err)); +// } +//} + +//Model-1 & 4 +extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleColor(int *Map, double *dist, double *Aq, double *Bq, double *Den, + double *Phi,double *GreySolidGrad, double *Poros,double *Perm,double *Vel, double *Pressure, + double rhoA, double rhoB, double tauA, double tauB,double tauA_eff,double tauB_eff, double alpha, double beta, + double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ + + //cudaProfilerStart(); + //cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor, cudaFuncCachePreferL1); + + dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor<<>>(Map, dist, Aq, Bq, Den, Phi, GreySolidGrad, Poros, Perm, Vel, Pressure, + rhoA, rhoB, tauA, tauB, tauA_eff, tauB_eff, alpha, beta, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAeven_GreyscaleColor: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); + +} + +//Model-1 & 4 +extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleColor(int *d_neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, + double *Phi, double *GreySolidGrad, double *Poros,double *Perm,double *Vel,double *Pressure, + double rhoA, double rhoB, double tauA, double tauB, double tauA_eff,double tauB_eff, double alpha, double beta, + double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ + + //cudaProfilerStart(); + //cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_GreyscaleColor, cudaFuncCachePreferL1); + + dvc_ScaLBL_D3Q19_AAodd_GreyscaleColor<<>>(d_neighborList, Map, dist, Aq, Bq, Den, Phi, GreySolidGrad, Poros, Perm,Vel,Pressure, + + rhoA, rhoB, tauA, tauB, tauA_eff, tauB_eff,alpha, beta, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAodd_GreyscaleColor: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_PhaseField_InitFromRestart(double *Den, double *Aq, double *Bq, int start, int finish, int Np){ + dvc_ScaLBL_PhaseField_InitFromRestart<<>>(Den, Aq, Bq, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_PhaseField_InitFromRestart: %s \n",hipGetErrorString(err)); + } +} +////Model-2&3 +//extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleColor(int *Map, double *dist, double *Aq, double *Bq, double *Den, +// double *Phi,double *GreySolidGrad, double *Poros,double *Perm,double *Vel, +// double rhoA, double rhoB, double tauA, double tauB,double tauA_eff,double tauB_eff, double alpha, double beta, +// double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ +// +// //cudaProfilerStart(); +// //cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor, cudaFuncCachePreferL1); +// +// dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor<<>>(Map, dist, Aq, Bq, Den, Phi, GreySolidGrad, Poros, Perm, Vel, +// rhoA, rhoB, tauA, tauB, tauA_eff, tauB_eff, alpha, beta, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); +// hipError_t err = hipGetLastError(); +// if (hipSuccess != err){ +// printf("hip error in ScaLBL_D3Q19_AAeven_GreyscaleColor: %s \n",hipGetErrorString(err)); +// } +// //cudaProfilerStop(); +// +//} +// +////Model-2&3 +//extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleColor(int *d_neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, +// double *Phi, double *GreySolidGrad, double *Poros,double *Perm,double *Vel, +// double rhoA, double rhoB, double tauA, double tauB, double tauA_eff,double tauB_eff, double alpha, double beta, +// double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ +// +// //cudaProfilerStart(); +// //cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_GreyscaleColor, cudaFuncCachePreferL1); +// +// dvc_ScaLBL_D3Q19_AAodd_GreyscaleColor<<>>(d_neighborList, Map, dist, Aq, Bq, Den, Phi, GreySolidGrad, Poros, Perm,Vel, +// rhoA, rhoB, tauA, tauB, tauA_eff, tauB_eff,alpha, beta, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); +// +// hipError_t err = hipGetLastError(); +// if (hipSuccess != err){ +// printf("hip error in ScaLBL_D3Q19_AAodd_GreyscaleColor: %s \n",hipGetErrorString(err)); +// } +// //cudaProfilerStop(); +//} diff --git a/hip/Ion.cu b/hip/Ion.cu new file mode 100644 index 00000000..2c48858d --- /dev/null +++ b/hip/Ion.cu @@ -0,0 +1,392 @@ +#include +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 1024 +#define NTHREADS 256 + + +__global__ void dvc_ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){ + int n,nread; + double fq,Ci; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq + // q=2 + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; + // q=4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; + // q=6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; + + // q=0 + dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci; + //dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + + // q = 1 + dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)); + //dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + + // q=2 + dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)); + //dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + + // q = 3 + dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)); + //dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + + // q = 4 + dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)); + //dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + + // q = 5 + dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)); + //dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + + // q = 6 + dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)); + //dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + } + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField, + double Di, int zi, double rlx, double Vt, int start, int finish, int Np){ + int n; + double Ci; + double ux,uy,uz; + double uEPx,uEPy,uEPz;//electrochemical induced velocity + double Ex,Ey,Ez;//electrical field + double f0,f1,f2,f3,f4,f5,f6; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s0) + CD_tmp; + } + } +} + + +extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAodd_IonConcentration<<>>(neighborList,dist,Den,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_IonConcentration: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAeven_IonConcentration<<>>(dist,Den,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_IonConcentration: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *Velocity, double *ElectricField, + double Di, int zi, double rlx, double Vt, int start, int finish, int Np){ + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAodd_Ion<<>>(neighborList,dist,Den,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Ion: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField, + double Di, int zi, double rlx, double Vt, int start, int finish, int Np){ + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAeven_Ion<<>>(dist,Den,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Ion: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_Ion_Init<<>>(dist,Den,DenInit,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_Ion_Init: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_Ion_Init_FromFile<<>>(dist,Den,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_Ion_Init_FromFile: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_Ion_ChargeDensity<<>>(Den,ChargeDensity,IonValence,ion_component,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_Ion_ChargeDensity: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} diff --git a/hip/MixedGradient.cu b/hip/MixedGradient.cu new file mode 100644 index 00000000..f171f408 --- /dev/null +++ b/hip/MixedGradient.cu @@ -0,0 +1,78 @@ +/* Implement Mixed Gradient (Lee et al. JCP 2016)*/ +#include +#include +//#include +#include "hip/hip_runtime.h" + + +#define NBLOCKS 560 +#define NTHREADS 128 + +__global__ void dvc_ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz) +{ + static int D3Q19[18][3]={{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1}, + {1,1,0},{-1,-1,0},{1,-1,0},{-1,1,0}, + {1,0,1},{-1,0,-1},{1,0,-1},{-1,0,1}, + {0,1,1},{0,-1,-1},{0,1,-1},{0,-1,1}}; + + int i,j,k,n,N,idx; + int np,np2,nm; // neighbors + double v,vp,vp2,vm; // values at neighbors + double grad; + N = Nx*Ny*Nz; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s>>(Map, Phi, Gradient, start, finish, Np, Nx, Ny, Nz); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_MixedGradient: %s \n",hipGetErrorString(err)); + } + cudaProfilerStop(); +} + diff --git a/hip/Poisson.cu b/hip/Poisson.cu new file mode 100644 index 00000000..34975f58 --- /dev/null +++ b/hip/Poisson.cu @@ -0,0 +1,330 @@ +#include +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 1024 +#define NTHREADS 256 + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){ + int n; + double psi;//electric potential + double fq; + int nread; + int idx; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; + + Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu + Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound + Ez = (f5-f6)*rlx*4.0; + ElectricField[n+0*Np] = Ex; + ElectricField[n+1*Np] = Ey; + ElectricField[n+2*Np] = Ez; + + // q = 0 + dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e); + + // q = 1 + dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 2 + dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 3 + dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 4 + dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 5 + dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 6 + dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + //........................................................................ + } + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){ + + int n; + double psi;//electric potential + double Ex,Ey,Ez;//electric field + double rho_e;//local charge density + double f0,f1,f2,f3,f4,f5,f6; + double rlx=1.0/tau; + int idx; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s>>(neighborList,Map,dist,Psi,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential<<>>(Map,dist,Psi,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAodd_Poisson<<>>(neighborList,Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Poisson: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAeven_Poisson<<>>(Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Poisson: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_Poisson_Init<<>>(Map,dist,Psi,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_Poisson_Init: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} diff --git a/hip/Stokes.cu b/hip/Stokes.cu new file mode 100644 index 00000000..a6a05fba --- /dev/null +++ b/hip/Stokes.cu @@ -0,0 +1,996 @@ +#include +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 1024 +#define NTHREADS 256 + + +__global__ void dvc_ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz, double rho0, double den_scale, double h, double time_conv,int start, int finish, int Np){ + + int n; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + double ux,uy,uz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + int nread; + // body force due to electric field + double rhoE;//charge density + double Ex,Ey,Ez; + // total body force + double Fx,Fy,Fz; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + fq = dist[nread]; // reading the f1 data into register fq + //fp = dist[10*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jx = fq; + m4 = -4.0*fq; + m9 = 2.0*fq; + m10 = -4.0*fq; + + // f2 = dist[10*Np+n]; + nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nread]; // reading the f2 data into register fq + //fq = dist[Np+n]; + rho += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + nread = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nread]; + //fq = dist[11*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + nread = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nread]; + //fq = dist[2*Np+n]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + nread = neighborList[n+4*Np]; + fq = dist[nread]; + //fq = dist[12*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + + // q = 6 + nread = neighborList[n+5*Np]; + fq = dist[nread]; + //fq = dist[3*Np+n]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + nread = neighborList[n+6*Np]; + fq = dist[nread]; + //fq = dist[13*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + nread = neighborList[n+7*Np]; + fq = dist[nread]; + //fq = dist[4*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + nread = neighborList[n+8*Np]; + fq = dist[nread]; + //fq = dist[14*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + nread = neighborList[n+9*Np]; + fq = dist[nread]; + //fq = dist[5*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + nread = neighborList[n+10*Np]; + fq = dist[nread]; + //fq = dist[15*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + nread = neighborList[n+11*Np]; + fq = dist[nread]; + //fq = dist[6*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + nread = neighborList[n+12*Np]; + fq = dist[nread]; + //fq = dist[16*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + nread = neighborList[n+13*Np]; + fq = dist[nread]; + //fq = dist[7*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + //fq = dist[17*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + //fq = dist[8*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + //fq = dist[18*Np+n]; + nread = neighborList[n+16*Np]; + fq = dist[nread]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + //fq = dist[9*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + + // write the velocity + ux = jx / rho0; + uy = jy / rho0; + uz = jz / rho0; + Velocity[n] = ux; + Velocity[Np+n] = uy; + Velocity[2*Np+n] = uz; + + //..............incorporate external force................................................ + //..............carry out relaxation process............................................... + m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0) - m2); + m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4); + m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6); + m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8); + m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) - m9); + m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10); + m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) - m11); + m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho0) - m12); + m13 = m13 + rlx_setA*((jx*jy/rho0) - m13); + m14 = m14 + rlx_setA*((jy*jz/rho0) - m14); + m15 = m15 + rlx_setA*((jx*jz/rho0) - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //....................................................................................................... + //.................inverse transformation...................................................... + + // q=0 + fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx; + nread = neighborList[n+Np]; + dist[nread] = fq; + + // q=2 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx; + nread = neighborList[n]; + dist[nread] = fq; + + // q = 3 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy; + nread = neighborList[n+3*Np]; + dist[nread] = fq; + + // q = 4 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy; + nread = neighborList[n+2*Np]; + dist[nread] = fq; + + // q = 5 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz; + nread = neighborList[n+5*Np]; + dist[nread] = fq; + + // q = 6 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz; + nread = neighborList[n+4*Np]; + dist[nread] = fq; + + // q = 7 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6) + +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy); + nread = neighborList[n+7*Np]; + dist[nread] = fq; + + // q = 8 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy); + nread = neighborList[n+6*Np]; + dist[nread] = fq; + + // q = 9 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6) + +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy); + nread = neighborList[n+9*Np]; + dist[nread] = fq; + + // q = 10 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4) + +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy); + nread = neighborList[n+8*Np]; + dist[nread] = fq; + + // q = 11 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz); + nread = neighborList[n+11*Np]; + dist[nread] = fq; + + // q = 12 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz); + nread = neighborList[n+10*Np]; + dist[nread]= fq; + + // q = 13 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz); + nread = neighborList[n+13*Np]; + dist[nread] = fq; + + // q= 14 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz); + nread = neighborList[n+12*Np]; + dist[nread] = fq; + + + // q = 15 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + + // q = 17 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) + -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) + -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){ + + int n; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + double ux,uy,uz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + // body force due to electric field + double rhoE;//charge density + double Ex,Ey,Ez; + // total body force + double Fx,Fy,Fz; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s>>(neighborList,dist,Velocity,ChargeDensity,ElectricField,rlx_setA,rlx_setB,Gx,Gy,Gz,rho0,den_scale,h,time_conv,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAodd_StokesMRT: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q19_AAeven_StokesMRT<<>>(dist,Velocity,ChargeDensity,ElectricField,rlx_setA,rlx_setB,Gx,Gy,Gz,rho0,den_scale,h,time_conv,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAeven_StokesMRT: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + From 5c27e3830ae5d29005d3534dcb1e184bcb5803eb Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Fri, 12 Feb 2021 13:43:26 -0500 Subject: [PATCH 170/205] Fixing compile errors without MPI --- StackTrace/ErrorHandlers.h | 3 +-- common/ScaLBL.cpp | 12 +++++++----- common/WideHalo.cpp | 5 ++--- common/WideHalo.h | 3 +-- models/ColorModel.cpp | 13 ++++--------- models/DFHModel.cpp | 10 +++------- models/FreeLeeModel.cpp | 20 +++++++------------- models/GreyscaleColorModel.cpp | 7 +++---- models/GreyscaleModel.cpp | 7 +++---- models/IonModel.cpp | 6 +++--- models/MRTModel.cpp | 7 +++---- models/PoissonSolver.cpp | 8 ++++---- models/StokesModel.cpp | 8 +++----- 13 files changed, 44 insertions(+), 65 deletions(-) diff --git a/StackTrace/ErrorHandlers.h b/StackTrace/ErrorHandlers.h index 12b8d7de..6dd961d7 100644 --- a/StackTrace/ErrorHandlers.h +++ b/StackTrace/ErrorHandlers.h @@ -3,11 +3,10 @@ #include "StackTrace/StackTrace.h" +#include "common/MPI.h" #include -#include "mpi.h" - namespace StackTrace { diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index 182004ff..dcadb08e 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -1,5 +1,8 @@ #include "common/ScaLBL.h" +#include + + ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ //...................................................................................... Lock=false; // unlock the communicator @@ -411,20 +414,19 @@ double ScaLBL_Communicator::GetPerformance(int *NeighborList, double *fq, int Np double FZ = 0.0; ScaLBL_D3Q19_Init(fq, Np); //.......create and start timer............ - double starttime,stoptime,cputime; Barrier(); - starttime = MPI_Wtime(); - //......................................... + auto t1 = std::chrono::system_clock::now(); for (int t=0; t( t2 - t1 ).count(); + double cputime = 0.5*diff/TIMESTEPS; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; return MLUPS; diff --git a/common/WideHalo.cpp b/common/WideHalo.cpp index 0c8f1781..ee2b2ca5 100644 --- a/common/WideHalo.cpp +++ b/common/WideHalo.cpp @@ -298,11 +298,10 @@ ScaLBLWideHalo_Communicator::~ScaLBLWideHalo_Communicator() void ScaLBLWideHalo_Communicator::Recv(double *data){ //................................................................................... - MPI_Waitall(26,req1,stat1); - MPI_Waitall(26,req2,stat2); + Utilities::MPI::waitAll(26,req1); + Utilities::MPI::waitAll(26,req2); ScaLBL_DeviceBarrier(); //................................................................................... - //................................................................................... ScaLBL_Scalar_Unpack(dvcRecvList_x, recvCount_x,recvbuf_x, data, Nh); ScaLBL_Scalar_Unpack(dvcRecvList_y, recvCount_y,recvbuf_y, data, Nh); ScaLBL_Scalar_Unpack(dvcRecvList_X, recvCount_X,recvbuf_X, data, Nh); diff --git a/common/WideHalo.h b/common/WideHalo.h index 55c76e50..5c9fcedf 100644 --- a/common/WideHalo.h +++ b/common/WideHalo.h @@ -4,6 +4,7 @@ This class implements support for halo widths larger than 1 #ifndef WideHalo_H #define WideHalo_H #include "common/ScaLBL.h" +#include "common/MPI.h" class ScaLBLWideHalo_Communicator{ public: @@ -52,9 +53,7 @@ private: int sendtag,recvtag; // Give the object it's own MPI communicator RankInfoStruct rank_info; - MPI_Group Group; // Group of processors associated with this domain MPI_Request req1[26],req2[26]; - MPI_Status stat1[26],stat2[26]; //...................................................................................... // MPI ranks for all 18 neighbors //...................................................................................... diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index a9886337..a46ca337 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -688,20 +688,15 @@ void ScaLBL_ColorModel::Run(){ fflush(stdout); } - //.......create and start timer............ - double starttime,stoptime,cputime; - ScaLBL_Comm->Barrier(); - comm.barrier(); - starttime = MPI_Wtime(); - //......................................... - //************ MAIN ITERATION LOOP ***************************************/ + comm.barrier(); PROFILE_START("Loop"); //std::shared_ptr analysis_db; bool Regular = false; auto current_db = db->cloneDatabase(); runAnalysis analysis( current_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map ); //analysis.createThreads( analysis_method, 4 ); + auto t1 = std::chrono::system_clock::now(); while (timestep < timestepMax ) { //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } PROFILE_START("Update"); @@ -1034,10 +1029,10 @@ void ScaLBL_ColorModel::Run(){ PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ ScaLBL_Comm->Barrier(); - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; diff --git a/models/DFHModel.cpp b/models/DFHModel.cpp index 7fd61271..24639d3e 100644 --- a/models/DFHModel.cpp +++ b/models/DFHModel.cpp @@ -490,14 +490,10 @@ void ScaLBL_DFHModel::Run(){ if (rank==0) printf("********************************************************\n"); if (rank==0) printf("No. of timesteps: %i \n", timestepMax); - //.......create and start timer............ - double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = MPI_Wtime(); - //......................................... //************ MAIN ITERATION LOOP ***************************************/ - + auto t1 = std::chrono::system_clock::now(); bool Regular = true; PROFILE_START("Loop"); runAnalysis analysis( analysis_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map ); @@ -589,10 +585,10 @@ void ScaLBL_DFHModel::Run(){ //************************************************************************ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; if (rank==0) printf("********************************************************\n"); diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index b0ee372d..5a6805b6 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -719,14 +719,9 @@ void ScaLBL_FreeLeeModel::Run_TwoFluid(){ fflush(stdout); } - //.......create and start timer............ - double starttime,stoptime,cputime; - ScaLBL_Comm->Barrier(); - comm.barrier(); - starttime = MPI_Wtime(); - //......................................... - //************ MAIN ITERATION LOOP ***************************************/ + comm.barrier(); + auto t1 = std::chrono::system_clock::now(); PROFILE_START("Loop"); while (timestep < timestepMax ) { //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } @@ -818,10 +813,10 @@ void ScaLBL_FreeLeeModel::Run_TwoFluid(){ PROFILE_STOP("Loop"); PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; @@ -846,14 +841,13 @@ void ScaLBL_FreeLeeModel::Run_SingleFluid(){ } //.......create and start timer............ - double starttime,stoptime,cputime; ScaLBL_Comm->Barrier(); comm.barrier(); - starttime = MPI_Wtime(); //......................................... //************ MAIN ITERATION LOOP ***************************************/ PROFILE_START("Loop"); + auto t1 = std::chrono::system_clock::now(); while (timestep < timestepMax ) { //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } PROFILE_START("Update"); @@ -916,10 +910,10 @@ void ScaLBL_FreeLeeModel::Run_SingleFluid(){ PROFILE_STOP("Loop"); PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; diff --git a/models/GreyscaleColorModel.cpp b/models/GreyscaleColorModel.cpp index dc1e12f9..5d2b4d07 100644 --- a/models/GreyscaleColorModel.cpp +++ b/models/GreyscaleColorModel.cpp @@ -910,10 +910,8 @@ void ScaLBL_GreyscaleColorModel::Run(){ } //.......create and start timer............ - double starttime,stoptime,cputime; ScaLBL_Comm->Barrier(); comm.barrier(); - starttime = MPI_Wtime(); //......................................... //************ MAIN ITERATION LOOP ***************************************/ @@ -923,6 +921,7 @@ void ScaLBL_GreyscaleColorModel::Run(){ auto current_db = db->cloneDatabase(); //runAnalysis analysis( current_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map ); //analysis.createThreads( analysis_method, 4 ); + auto t1 = std::chrono::system_clock::now(); while (timestep < timestepMax ) { //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } PROFILE_START("Update"); @@ -1319,10 +1318,10 @@ void ScaLBL_GreyscaleColorModel::Run(){ PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ ScaLBL_Comm->Barrier(); - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index 6c580cc5..308cc1e6 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -485,10 +485,8 @@ void ScaLBL_GreyscaleModel::Run(){ } //.......create and start timer............ - double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = MPI_Wtime(); //......................................... Minkowski Morphology(Mask); @@ -500,6 +498,7 @@ void ScaLBL_GreyscaleModel::Run(){ double rlx_eff = 1.0/tau_eff; double error = 1.0; double flow_rate_previous = 0.0; + auto t1 = std::chrono::system_clock::now(); while (timestep < timestepMax && error > tolerance) { //************************************************************************/ // *************ODD TIMESTEP*************// @@ -744,10 +743,10 @@ void ScaLBL_GreyscaleModel::Run(){ //************************************************************************ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; diff --git a/models/IonModel.cpp b/models/IonModel.cpp index bdd07473..67887811 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -784,7 +784,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ //.......create and start timer............ //double starttime,stoptime,cputime; //ScaLBL_Comm->Barrier(); comm.barrier(); - //starttime = MPI_Wtime(); + //auto t1 = std::chrono::system_clock::now(); for (int ic=0; icLastExterior(), Np); } //************************************************************************/ - //stoptime = MPI_Wtime(); //if (rank==0) printf("-------------------------------------------------------------------\n"); //// Compute the walltime per timestep - //cputime = (stoptime - starttime)/timestep; + //auto t2 = std::chrono::system_clock::now(); + //double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; //// Performance obtained from each node //double MLUPS = double(Np)/cputime/1000000; diff --git a/models/MRTModel.cpp b/models/MRTModel.cpp index 01d13762..e1a451e2 100644 --- a/models/MRTModel.cpp +++ b/models/MRTModel.cpp @@ -230,14 +230,13 @@ void ScaLBL_MRTModel::Run(){ } //.......create and start timer............ - double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = MPI_Wtime(); if (rank==0) printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax); if (rank==0) printf("********************************************************\n"); timestep=0; double error = 1.0; double flow_rate_previous = 0.0; + auto t1 = std::chrono::system_clock::now(); while (timestep < timestepMax && error > tolerance) { //************************************************************************/ timestep++; @@ -354,10 +353,10 @@ void ScaLBL_MRTModel::Run(){ } } //************************************************************************/ - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index 58c0deac..25a31600 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -522,8 +522,8 @@ void ScaLBL_Poisson::Run(double *ChargeDensity, int timestep_from_Study){ //.......create and start timer............ //double starttime,stoptime,cputime; - //ScaLBL_Comm->Barrier(); comm.barrier(); - //starttime = MPI_Wtime(); + //comm.barrier(); + //auto t1 = std::chrono::system_clock::now(); timestep=0; double error = 1.0; @@ -579,11 +579,11 @@ void ScaLBL_Poisson::Run(double *ChargeDensity, int timestep_from_Study){ } //************************************************************************/ - //stoptime = MPI_Wtime(); ////if (rank==0) printf("LB-Poission Solver: a steady-state solution is obtained\n"); ////if (rank==0) printf("---------------------------------------------------------------------------\n"); //// Compute the walltime per timestep - //cputime = (stoptime - starttime)/timestep; + //auto t2 = std::chrono::system_clock::now(); + //double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; //// Performance obtained from each node //double MLUPS = double(Np)/cputime/1000000; diff --git a/models/StokesModel.cpp b/models/StokesModel.cpp index 50b7fa39..fe6b0c92 100644 --- a/models/StokesModel.cpp +++ b/models/StokesModel.cpp @@ -573,16 +573,14 @@ void ScaLBL_StokesModel::Run(){ } } - //.......create and start timer............ - double starttime,stoptime,cputime; ScaLBL_Comm->Barrier(); comm.barrier(); - starttime = MPI_Wtime(); if (rank==0) printf("****************************************************************\n"); if (rank==0) printf("LB Single-Fluid Navier-Stokes Solver: timestepMax = %i\n", timestepMax); if (rank==0) printf("****************************************************************\n"); timestep=0; double error = 1.0; double flow_rate_previous = 0.0; + auto t1 = std::chrono::system_clock::now(); while (timestep < timestepMax && error > tolerance) { //************************************************************************/ timestep++; @@ -700,10 +698,10 @@ void ScaLBL_StokesModel::Run(){ } } //************************************************************************/ - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; From 97517f648266ec1bb40f6217e9443917475c0a5b Mon Sep 17 00:00:00 2001 From: Mark Allen Berrill Date: Fri, 12 Feb 2021 13:19:37 -0600 Subject: [PATCH 171/205] Fixing compile errors with HIP --- hip/CMakeLists.txt | 5 +-- hip/D3Q19.cu | 72 ++++++++++++++++++++++++++++++++++++++++++++ hip/MixedGradient.cu | 7 ++--- 3 files changed, 78 insertions(+), 6 deletions(-) diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index f63cf035..9e613960 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -1,6 +1,7 @@ SET( HIP_SEPERABLE_COMPILATION ON ) -SET_SOURCE_FILES_PROPERTIES( BGK.cu Color.cu CudaExtras.cu D3Q19.cu D3Q7.cu dfh.cu Extras.cu MRT.hip PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 ) -HIP_ADD_LIBRARY( lbpm-hip BGK.cu Color.cu CudaExtras.cu D3Q19.cu D3Q7.cu dfh.cu Extras.cu MRT.cu SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} ) +FILE( GLOB HIP_SOURCES "*.cu" ) +SET_SOURCE_FILES_PROPERTIES( ${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 ) +HIP_ADD_LIBRARY( lbpm-hip ${HIP_SOURCES} SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} ) #TARGET_LINK_LIBRARIES( lbpm-hip /opt/rocm-3.3.0/lib/libhip_hcc.so ) #TARGET_LINK_LIBRARIES( lbpm-wia lbpm-hip ) #ADD_DEPENDENCIES( lbpm-hip copy-include ) diff --git a/hip/D3Q19.cu b/hip/D3Q19.cu index 13d4ab75..fe06820b 100644 --- a/hip/D3Q19.cu +++ b/hip/D3Q19.cu @@ -89,9 +89,25 @@ __global__ void sum_kernel_block(double *sum, double *input, int n) __inline__ __device__ double warpReduceSum(double val) { +#if 0 for (int offset = warpSize/2; offset > 0; offset /= 2) val += __shfl_down_sync(0xFFFFFFFF, val, offset, 32); return val; +#else + short int id = threadIdx.x % warpSize; + __shared__ double tmp[64]; + tmp[id] = val; + __syncthreads(); + if ( warpSize == 64) { + tmp[id] += tmp[id+32]; __syncthreads(); + } + tmp[id] += tmp[id+16]; __syncthreads(); + tmp[id] += tmp[id+8]; __syncthreads(); + tmp[id] += tmp[id+4]; __syncthreads(); + tmp[id] += tmp[id+2]; __syncthreads(); + tmp[id] += tmp[id+1]; __syncthreads(); + return tmp[0]; +#endif } __inline__ __device__ @@ -1730,6 +1746,44 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Pressure_BC_Z(int *list, double *dist, } } +__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){ + int idx, n; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + double f5 = 0.111111111111111111111111 - dist[6*Np+n]; + double f11 = 0.05555555555555555555556 - dist[12*Np+n]; + double f14 = 0.05555555555555555555556 - dist[13*Np+n]; + double f15 = 0.05555555555555555555556 - dist[16*Np+n]; + double f18 = 0.05555555555555555555556 - dist[17*Np+n]; + + dist[6*Np+n] = f5; + dist[12*Np+n] = f11; + dist[13*Np+n] = f14; + dist[16*Np+n] = f15; + dist[17*Np+n] = f18; + } +} + +__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){ + int idx, n; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + double f6 = 0.111111111111111111111111 - dist[5*Np+n]; + double f12 = 0.05555555555555555555556 - dist[11*Np+n]; + double f13 = 0.05555555555555555555556 - dist[14*Np+n] ; + double f16 = 0.05555555555555555555556 - dist[15*Np+n]; + double f17 = 0.05555555555555555555556 - dist[18*Np+n]; + + dist[5*Np+n] = f6; + dist[11*Np+n] = f12; + dist[14*Np+n] = f13; + dist[15*Np+n] = f16; + dist[18*Np+n] = f17; + } +} + __global__ void dvc_ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *d_neighborList, int *list, double *dist, double din, int count, int Np) { int idx, n; @@ -2605,6 +2659,24 @@ extern "C" double ScaLBL_D3Q19_Flux_BC_Z(double *disteven, double *distodd, doub } +extern "C" void ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_Reflection_BC_z<<>>(list, dist, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("HIP error in ScaLBL_D3Q19_Reflection_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_Reflection_BC_Z<<>>(list, dist, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("HIP error in ScaLBL_D3Q19_Reflection_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + extern "C" double deviceReduce(double *in, double* out, int N) { int threads = 512; int blocks = min((N + threads - 1) / threads, 1024); diff --git a/hip/MixedGradient.cu b/hip/MixedGradient.cu index f171f408..31518ee5 100644 --- a/hip/MixedGradient.cu +++ b/hip/MixedGradient.cu @@ -1,5 +1,4 @@ /* Implement Mixed Gradient (Lee et al. JCP 2016)*/ -#include #include //#include #include "hip/hip_runtime.h" @@ -10,7 +9,7 @@ __global__ void dvc_ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz) { - static int D3Q19[18][3]={{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1}, + static const int D3Q19[18][3]={{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1}, {1,1,0},{-1,-1,0},{1,-1,0},{-1,1,0}, {1,0,1},{-1,0,-1},{1,0,-1},{-1,0,1}, {0,1,1},{0,-1,-1},{0,1,-1},{0,-1,1}}; @@ -66,13 +65,13 @@ __global__ void dvc_ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gr extern "C" void ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz) { - cudaProfilerStart(); + hipProfilerStart(); dvc_ScaLBL_D3Q19_MixedGradient<<>>(Map, Phi, Gradient, start, finish, Np, Nx, Ny, Nz); hipError_t err = hipGetLastError(); if (hipSuccess != err){ printf("hip error in ScaLBL_D3Q19_MixedGradient: %s \n",hipGetErrorString(err)); } - cudaProfilerStop(); + hipProfilerStop(); } From 317f5f3bbea9120751854610e79bacb14aa86579 Mon Sep 17 00:00:00 2001 From: James McClure Date: Fri, 12 Feb 2021 14:27:49 -0500 Subject: [PATCH 172/205] add SCAL wetting convention --- models/ColorModel.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 3b8edd6c..e0617e32 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -288,12 +288,17 @@ void ScaLBL_ColorModel::AssignComponentLabels(double *phase) auto LabelList = color_db->getVector( "ComponentLabels" ); auto AffinityList = color_db->getVector( "ComponentAffinity" ); + auto WettingConvention = color_db->getWithDefault( "WettingConvention", "none" ); NLABELS=LabelList.size(); if (NLABELS != AffinityList.size()){ ERROR("Error: ComponentLabels and ComponentAffinity must be the same length! \n"); } + if (WettingConvention == "SCAL"){ + for (size_t idx=0; idx Date: Mon, 15 Feb 2021 18:33:07 -0500 Subject: [PATCH 173/205] corrected the phase field loading and removed a wrong factor in mixed-grad --- cpu/FreeLee.cpp | 16 ++++++++-------- models/FreeLeeModel.cpp | 28 ++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp index 32a7b568..c77436a2 100644 --- a/cpu/FreeLee.cpp +++ b/cpu/FreeLee.cpp @@ -236,12 +236,12 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, dou for (int n=start; nRegularLayout(Map,mu_phi_host,PhaseField); + sprintf(LocalRankFilename,"Chem_Init.%05i.raw",rank); + OUTFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,OUTFILE); + fclose(OUTFILE); + + ScaLBL_Comm->RegularLayout(Map,&ColorGrad_host[0],PhaseField); + FILE *CGX_FILE; + sprintf(LocalRankFilename,"Gradient_X_Init.%05i.raw",rank); + CGX_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,CGX_FILE); + fclose(CGX_FILE); + + ScaLBL_Comm->RegularLayout(Map,&ColorGrad_host[Np],PhaseField); + FILE *CGY_FILE; + sprintf(LocalRankFilename,"Gradient_Y_Init.%05i.raw",rank); + CGY_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,CGY_FILE); + fclose(CGY_FILE); + + ScaLBL_Comm->RegularLayout(Map,&ColorGrad_host[2*Np],PhaseField); + FILE *CGZ_FILE; + sprintf(LocalRankFilename,"Gradient_Z_Init.%05i.raw",rank); + CGZ_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,CGZ_FILE); + fclose(CGZ_FILE); delete [] phase; delete [] ColorGrad_host; From a0b42380a42338349e1f32bfb740ca93acdc7edd Mon Sep 17 00:00:00 2001 From: James McClure Date: Sun, 21 Feb 2021 23:56:10 -0500 Subject: [PATCH 174/205] fixed wide halo bug --- common/WideHalo.cpp | 107 +++++++++++++++++++++++--------------------- 1 file changed, 55 insertions(+), 52 deletions(-) diff --git a/common/WideHalo.cpp b/common/WideHalo.cpp index ee2b2ca5..ca82473f 100644 --- a/common/WideHalo.cpp +++ b/common/WideHalo.cpp @@ -234,59 +234,59 @@ void ScaLBLWideHalo_Communicator::Send(double *data){ //................................................................................... // Send / Recv all the phase indcator field values //................................................................................... - req1[0] = MPI_COMM_SCALBL.Isend(&sendCount_x,1,rank_x,sendtag+0); - req2[0] = MPI_COMM_SCALBL.Irecv(&recvCount_X,1,rank_X,recvtag+0); - req1[1] = MPI_COMM_SCALBL.Isend(&sendCount_X,1,rank_X,sendtag+1); - req2[1] = MPI_COMM_SCALBL.Irecv(&recvCount_x,1,rank_x,recvtag+1); - req1[2] = MPI_COMM_SCALBL.Isend(&sendCount_y,1,rank_y,sendtag+2); - req2[2] = MPI_COMM_SCALBL.Irecv(&recvCount_Y,1,rank_Y,recvtag+2); - req1[3] = MPI_COMM_SCALBL.Isend(&sendCount_Y,1,rank_Y,sendtag+3); - req2[3] = MPI_COMM_SCALBL.Irecv(&recvCount_y,1,rank_y,recvtag+3); - req1[4] = MPI_COMM_SCALBL.Isend(&sendCount_z,1,rank_z,sendtag+4); - req2[4] = MPI_COMM_SCALBL.Irecv(&recvCount_Z,1,rank_Z,recvtag+4); - req1[5] = MPI_COMM_SCALBL.Isend(&sendCount_Z,1,rank_Z,sendtag+5); - req2[5] = MPI_COMM_SCALBL.Irecv(&recvCount_z,1,rank_z,recvtag+5); - req1[6] = MPI_COMM_SCALBL.Isend(&sendCount_xy,1,rank_xy,sendtag+6); - req2[6] = MPI_COMM_SCALBL.Irecv(&recvCount_XY,1,rank_XY,recvtag+6); - req1[7] = MPI_COMM_SCALBL.Isend(&sendCount_XY,1,rank_XY,sendtag+7); - req2[7] = MPI_COMM_SCALBL.Irecv(&recvCount_xy,1,rank_xy,recvtag+7); - req1[8] = MPI_COMM_SCALBL.Isend(&sendCount_Xy,1,rank_Xy,sendtag+8); - req2[8] = MPI_COMM_SCALBL.Irecv(&recvCount_xY,1,rank_xY,recvtag+8); - req1[9] = MPI_COMM_SCALBL.Isend(&sendCount_xY,1,rank_xY,sendtag+9); - req2[9] = MPI_COMM_SCALBL.Irecv(&recvCount_Xy,1,rank_Xy,recvtag+9); - req1[10] = MPI_COMM_SCALBL.Isend(&sendCount_xz,1,rank_xz,sendtag+10); - req2[10] = MPI_COMM_SCALBL.Irecv(&recvCount_XZ,1,rank_XZ,recvtag+10); - req1[11] = MPI_COMM_SCALBL.Isend(&sendCount_XZ,1,rank_XZ,sendtag+11); - req2[11] = MPI_COMM_SCALBL.Irecv(&recvCount_xz,1,rank_xz,recvtag+11); - req1[12] = MPI_COMM_SCALBL.Isend(&sendCount_Xz,1,rank_Xz,sendtag+12); - req2[12] = MPI_COMM_SCALBL.Irecv(&recvCount_xZ,1,rank_xZ,recvtag+12); - req1[13] = MPI_COMM_SCALBL.Isend(&sendCount_xZ,1,rank_xZ,sendtag+13); - req2[13] = MPI_COMM_SCALBL.Irecv(&recvCount_Xz,1,rank_Xz,recvtag+13); - req1[14] = MPI_COMM_SCALBL.Isend(&sendCount_yz,1,rank_yz,sendtag+14); - req2[14] = MPI_COMM_SCALBL.Irecv(&recvCount_YZ,1,rank_YZ,recvtag+14); - req1[15] = MPI_COMM_SCALBL.Isend(&sendCount_YZ,1,rank_YZ,sendtag+15); - req2[15] = MPI_COMM_SCALBL.Irecv(&recvCount_yz,1,rank_yz,recvtag+15); - req1[16] = MPI_COMM_SCALBL.Isend(&sendCount_Yz,1,rank_Yz,sendtag+16); - req2[16] = MPI_COMM_SCALBL.Irecv(&recvCount_yZ,1,rank_yZ,recvtag+16); - req1[17] = MPI_COMM_SCALBL.Isend(&sendCount_yZ,1,rank_yZ,sendtag+17); - req2[17] = MPI_COMM_SCALBL.Irecv(&recvCount_Yz,1,rank_Yz,recvtag+17); + req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x,sendCount_x,rank_x,sendtag+0); + req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X,recvCount_X,rank_X,recvtag+0); + req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X,sendCount_X,rank_X,sendtag+1); + req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x,recvCount_x,rank_x,recvtag+1); + req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y,sendCount_y,rank_y,sendtag+2); + req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y,recvCount_Y,rank_Y,recvtag+2); + req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y,sendCount_Y,rank_Y,sendtag+3); + req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y,recvCount_y,rank_y,recvtag+3); + req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z,sendCount_z,rank_z,sendtag+4); + req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z,recvCount_Z,rank_Z,recvtag+4); + req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z,sendCount_Z,rank_Z,sendtag+5); + req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z,recvCount_z,rank_z,recvtag+5); + req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy,sendCount_xy,rank_xy,sendtag+6); + req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY,recvCount_XY,rank_XY,recvtag+6); + req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY,sendCount_XY,rank_XY,sendtag+7); + req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy,recvCount_xy,rank_xy,recvtag+7); + req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy,sendCount_Xy,rank_Xy,sendtag+8); + req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY,recvCount_xY,rank_xY,recvtag+8); + req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY,sendCount_xY,rank_xY,sendtag+9); + req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy,recvCount_Xy,rank_Xy,recvtag+9); + req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz,sendCount_xz,rank_xz,sendtag+10); + req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ,recvCount_XZ,rank_XZ,recvtag+10); + req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ,sendCount_XZ,rank_XZ,sendtag+11); + req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz,recvCount_xz,rank_xz,recvtag+11); + req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz,sendCount_Xz,rank_Xz,sendtag+12); + req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ,recvCount_xZ,rank_xZ,recvtag+12); + req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ,sendCount_xZ,rank_xZ,sendtag+13); + req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz,recvCount_Xz,rank_Xz,recvtag+13); + req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz,sendCount_yz,rank_yz,sendtag+14); + req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ,recvCount_YZ,rank_YZ,recvtag+14); + req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ,sendCount_YZ,rank_YZ,sendtag+15); + req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz,recvCount_yz,rank_yz,recvtag+15); + req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz,sendCount_Yz,rank_Yz,sendtag+16); + req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ,recvCount_yZ,rank_yZ,recvtag+16); + req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ,sendCount_yZ,rank_yZ,sendtag+17); + req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz,recvCount_Yz,rank_Yz,recvtag+17); /* Corners */ - req1[18] = MPI_COMM_SCALBL.Isend(&sendCount_xyz,1,rank_xyz,sendtag+18); - req2[18] = MPI_COMM_SCALBL.Irecv(&recvCount_XYZ,1,rank_XYZ,recvtag+18); - req1[19] = MPI_COMM_SCALBL.Isend(&sendCount_XYz,1,rank_XYz,sendtag+19); - req2[19] = MPI_COMM_SCALBL.Irecv(&recvCount_xyZ,1,rank_xyZ,recvtag+19); - req1[20] = MPI_COMM_SCALBL.Isend(&sendCount_Xyz,1,rank_Xyz,sendtag+20); - req2[20] = MPI_COMM_SCALBL.Irecv(&recvCount_xYZ,1,rank_xYZ,recvtag+20); - req1[21] = MPI_COMM_SCALBL.Isend(&sendCount_xYz,1,rank_xYz,sendtag+21); - req2[21] = MPI_COMM_SCALBL.Irecv(&recvCount_XyZ,1,rank_XyZ,recvtag+21); - req1[22] = MPI_COMM_SCALBL.Isend(&sendCount_xyZ,1,rank_xyZ,sendtag+22); - req2[22] = MPI_COMM_SCALBL.Irecv(&recvCount_XYz,1,rank_XYz,recvtag+22); - req1[23] = MPI_COMM_SCALBL.Isend(&sendCount_XYZ,1,rank_XYZ,sendtag+23); - req2[23] = MPI_COMM_SCALBL.Irecv(&recvCount_xyz,1,rank_xyz,recvtag+23); - req1[24] = MPI_COMM_SCALBL.Isend(&sendCount_XyZ,1,rank_XyZ,sendtag+24); - req2[24] = MPI_COMM_SCALBL.Irecv(&recvCount_xYz,1,rank_xYz,recvtag+24); - req1[25] = MPI_COMM_SCALBL.Isend(&sendCount_xYZ,1,rank_xYZ,sendtag+25); - req2[25] = MPI_COMM_SCALBL.Irecv(&recvCount_Xyz,1,rank_Xyz,recvtag+25); + req1[18] = MPI_COMM_SCALBL.Isend(sendbuf_xyz,sendCount_xyz,rank_xyz,sendtag+18); + req2[18] = MPI_COMM_SCALBL.Irecv(recvbuf_XYZ,recvCount_XYZ,rank_XYZ,recvtag+18); + req1[19] = MPI_COMM_SCALBL.Isend(sendbuf_XYz,sendCount_XYz,rank_XYz,sendtag+19); + req2[19] = MPI_COMM_SCALBL.Irecv(recvbuf_xyZ,recvCount_xyZ,rank_xyZ,recvtag+19); + req1[20] = MPI_COMM_SCALBL.Isend(sendbuf_Xyz,sendCount_Xyz,rank_Xyz,sendtag+20); + req2[20] = MPI_COMM_SCALBL.Irecv(recvbuf_xYZ,recvCount_xYZ,rank_xYZ,recvtag+20); + req1[21] = MPI_COMM_SCALBL.Isend(sendbuf_xYz,sendCount_xYz,rank_xYz,sendtag+21); + req2[21] = MPI_COMM_SCALBL.Irecv(recvbuf_XyZ,recvCount_XyZ,rank_XyZ,recvtag+21); + req1[22] = MPI_COMM_SCALBL.Isend(sendbuf_xyZ,sendCount_xyZ,rank_xyZ,sendtag+22); + req2[22] = MPI_COMM_SCALBL.Irecv(recvbuf_XYz,recvCount_XYz,rank_XYz,recvtag+22); + req1[23] = MPI_COMM_SCALBL.Isend(sendbuf_XYZ,sendCount_XYZ,rank_XYZ,sendtag+23); + req2[23] = MPI_COMM_SCALBL.Irecv(recvbuf_xyz,recvCount_xyz,rank_xyz,recvtag+23); + req1[24] = MPI_COMM_SCALBL.Isend(sendbuf_XyZ,sendCount_XyZ,rank_XyZ,sendtag+24); + req2[24] = MPI_COMM_SCALBL.Irecv(recvbuf_xYz,recvCount_xYz,rank_xYz,recvtag+24); + req1[25] = MPI_COMM_SCALBL.Isend(sendbuf_xYZ,sendCount_xYZ,rank_xYZ,sendtag+25); + req2[25] = MPI_COMM_SCALBL.Irecv(recvbuf_Xyz,recvCount_Xyz,rank_Xyz,recvtag+25); //................................................................................... } @@ -302,6 +302,9 @@ void ScaLBLWideHalo_Communicator::Recv(double *data){ Utilities::MPI::waitAll(26,req2); ScaLBL_DeviceBarrier(); //................................................................................... + //printf("Ready to unpack %i to x\n",recvCount_x); + //printf(" print first 10 values...\n"); + //for (int idx=0; idx<10; idx++) printf(" recvBuf[%i]=%f \n",idx,recvbuf_x[idx]); ScaLBL_Scalar_Unpack(dvcRecvList_x, recvCount_x,recvbuf_x, data, Nh); ScaLBL_Scalar_Unpack(dvcRecvList_y, recvCount_y,recvbuf_y, data, Nh); ScaLBL_Scalar_Unpack(dvcRecvList_X, recvCount_X,recvbuf_X, data, Nh); From 991350500f684af0cabc65e8ad724fcbb93b0ce7 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Thu, 25 Feb 2021 11:38:45 -0500 Subject: [PATCH 175/205] Updating manual configuration of MPI --- cmake/libraries.cmake | 132 ++++++++++++++++-------------------------- cmake/macros.cmake | 4 +- 2 files changed, 53 insertions(+), 83 deletions(-) diff --git a/cmake/libraries.cmake b/cmake/libraries.cmake index 43d2726e..14ff1ac9 100644 --- a/cmake/libraries.cmake +++ b/cmake/libraries.cmake @@ -42,92 +42,62 @@ ENDMACRO() MACRO( CONFIGURE_MPI ) # Determine if we want to use MPI CHECK_ENABLE_FLAG(USE_MPI 1 ) + CHECK_ENABLE_FLAG( USE_MPI 1 ) + MESSAGE("MPIEXEC = ${MPIEXEC}") IF ( USE_MPI ) - # Check if we specified the MPI directory - IF ( MPI_DIRECTORY ) - # Check the provided MPI directory for include files - VERIFY_PATH( "${MPI_DIRECTORY}" ) - IF ( EXISTS "${MPI_DIRECTORY}/include/mpi.h" ) - SET( MPI_INCLUDE_PATH "${MPI_DIRECTORY}/include" ) - ELSEIF ( EXISTS "${MPI_DIRECTORY}/Inc/mpi.h" ) - SET( MPI_INCLUDE_PATH "${MPI_DIRECTORY}/Inc" ) - ELSE() - MESSAGE( FATAL_ERROR "mpi.h not found in ${MPI_DIRECTORY}/include" ) - ENDIF () - INCLUDE_DIRECTORIES ( ${MPI_INCLUDE_PATH} ) - SET ( MPI_INCLUDE ${MPI_INCLUDE_PATH} ) - # Set MPI libraries - IF ( ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" ) - FIND_LIBRARY( MSMPI_LIB NAMES msmpi PATHS "${MPI_DIRECTORY}/Lib/x64" NO_DEFAULT_PATH ) - FIND_LIBRARY( MSMPI_LIB NAMES msmpi PATHS "${MPI_DIRECTORY}/Lib/amd64" NO_DEFAULT_PATH ) - FIND_LIBRARY( MSMPIFEC_LIB NAMES msmpifec PATHS "${MPI_DIRECTORY}/Lib/x64" NO_DEFAULT_PATH ) - FIND_LIBRARY( MSMPIFEC_LIB NAMES msmpifec PATHS "${MPI_DIRECTORY}/Lib/amd64" NO_DEFAULT_PATH ) - FIND_LIBRARY( MSMPIFMC_LIB NAMES msmpifmc PATHS "${MPI_DIRECTORY}/Lib/x64" NO_DEFAULT_PATH ) - FIND_LIBRARY( MSMPIFMC_LIB NAMES msmpifmc PATHS "${MPI_DIRECTORY}/Lib/amd64" NO_DEFAULT_PATH ) - SET( MPI_LIBRARIES ${MSMPI_LIB} ${MSMPIFEC_LIB} ${MSMPIFMC_LIB} ) - ENDIF() - # Set the mpi executable - IF ( MPIEXEC ) - # User specified the MPI command directly, use as is - ELSEIF ( MPIEXEC_CMD ) - # User specified the name of the MPI executable - SET ( MPIEXEC ${MPI_DIRECTORY}/bin/${MPIEXEC_CMD} ) - IF ( NOT EXISTS ${MPIEXEC} ) - MESSAGE( FATAL_ERROR "${MPIEXEC_CMD} not found in ${MPI_DIRECTORY}/bin" ) - ENDIF () - ELSE () - # Search for the MPI executable in the current directory - FIND_PROGRAM( MPIEXEC NAMES mpiexec mpirun lamexec PATHS ${MPI_DIRECTORY}/bin NO_DEFAULT_PATH ) - IF ( NOT MPIEXEC ) - MESSAGE( FATAL_ERROR "Could not locate mpi executable" ) - ENDIF() - ENDIF () - # Set MPI flags - IF ( NOT MPIEXEC_NUMPROC_FLAG ) - SET( MPIEXEC_NUMPROC_FLAG "-np" ) - ENDIF() - ELSEIF ( MPI_COMPILER ) - # The mpi compiler should take care of everything - IF ( MPI_INCLUDE ) - INCLUDE_DIRECTORIES( ${MPI_INCLUDE} ) - ENDIF() + MESSAGE( "Configuring MPI" ) + IF ( MPIEXEC ) + SET( MPIEXEC_EXECUTABLE ${MPIEXEC} ) + ENDIF() + IF ( NOT MPI_SKIP_SEARCH ) + FIND_PACKAGE( MPI ) ELSE() - # Perform the default search for MPI - INCLUDE ( FindMPI ) - IF ( NOT MPI_FOUND ) - MESSAGE( " MPI_INCLUDE = ${MPI_INCLUDE}" ) - MESSAGE( " MPI_LINK_FLAGS = ${MPI_LINK_FLAGS}" ) - MESSAGE( " MPI_LIBRARIES = ${MPI_LIBRARIES}" ) - MESSAGE( FATAL_ERROR "Did not find MPI" ) - ENDIF () - INCLUDE_DIRECTORIES( "${MPI_INCLUDE_PATH}" ) - SET( MPI_INCLUDE "${MPI_INCLUDE_PATH}" ) + # Write mpi test + SET( MPI_TEST_SRC "${CMAKE_CURRENT_BINARY_DIR}/test_mpi.cpp" ) + FILE(WRITE ${MPI_TEST_SRC} "#include \n" ) + FILE(APPEND ${MPI_TEST_SRC} "int main(int argc, char** argv) {\n" ) + FILE(APPEND ${MPI_TEST_SRC} " MPI_Init(NULL, NULL);\n") + FILE(APPEND ${MPI_TEST_SRC} " MPI_Finalize();\n" ) + FILE(APPEND ${MPI_TEST_SRC} "}\n" ) + # Test the compile + IF ( CMAKE_CXX_COMPILER ) + SET( TMP_FLAGS -DINCLUDE_DIRECTORIES=${MPI_CXX_INCLUDE_PATH} ) + TRY_COMPILE( MPI_TEST_CXX ${CMAKE_CURRENT_BINARY_DIR} ${MPI_TEST_SRC} + CMAKE_FLAGS ${TMP_FLAGS} + LINK_OPTIONS ${MPI_CXX_LINK_FLAGS} + LINK_LIBRARIES ${MPI_CXX_LIBRARIES} + OUTPUT_VARIABLE OUT_TXT) + IF ( NOT ${MPI_TEST} ) + MESSAGE( FATAL_ERROR "Skipping MPI search and default compile fails:\n${OUT_TXT}" ) + ENDIF() + SET( MPI_C_FOUND TRUE ) + SET( MPI_CXX_FOUND TRUE ) + SET( MPI_Fortran_FOUND TRUE ) + ENDIF() ENDIF() - # Check if we need to use MPI for serial tests - CHECK_ENABLE_FLAG( USE_MPI_FOR_SERIAL_TESTS 0 ) - # Set defaults if they have not been set - IF ( NOT MPIEXEC ) - SET( MPIEXEC mpirun ) + STRING( STRIP "${MPI_CXX_COMPILE_FLAGS}" MPI_CXX_COMPILE_FLAGS ) + STRING( STRIP "${MPI_CXX_LINK_FLAGS}" MPI_CXX_LINK_FLAGS ) + STRING( STRIP "${MPI_CXX_LIBRARIES}" MPI_CXX_LIBRARIES ) + MESSAGE( " MPI_CXX_FOUND = ${MPI_CXX_FOUND}" ) + MESSAGE( " MPI_CXX_COMPILER = ${MPI_CXX_COMPILER}" ) + MESSAGE( " MPI_CXX_COMPILE_FLAGS = ${MPI_CXX_COMPILE_FLAGS}" ) + MESSAGE( " MPI_CXX_INCLUDE_PATH = ${MPI_CXX_INCLUDE_PATH}" ) + MESSAGE( " MPI_CXX_LINK_FLAGS = ${MPI_CXX_LINK_FLAGS}" ) + MESSAGE( " MPI_CXX_LIBRARIES = ${MPI_CXX_LIBRARIES}" ) + MESSAGE( " MPIEXEC = ${MPIEXEC}" ) + MESSAGE( " MPIEXEC_NUMPROC_FLAG = ${MPIEXEC_NUMPROC_FLAG}" ) + MESSAGE( " MPIEXEC_PREFLAGS = ${MPIEXEC_PREFLAGS}" ) + MESSAGE( " MPIEXEC_POSTFLAGS = ${MPIEXEC_POSTFLAGS}" ) + ADD_DEFINITIONS( -DUSE_MPI ) + INCLUDE_DIRECTORIES( ${MPI_CXX_INCLUDE_PATH} ) + SET( MPI_LIBRARIES ${MPI_CXX_LIBRARIES} ) + SET( MPI_LINK_FLAGS ${MPI_CXX_LINK_FLAGS} ) + IF ( NOT MPI_CXX_FOUND ) + MESSAGE( FATAL_ERROR "MPI not found" ) ENDIF() - IF ( NOT MPIEXEC_NUMPROC_FLAG ) - SET( MPIEXEC_NUMPROC_FLAG "-np" ) + IF ( USE_MPI AND NOT MPIEXEC ) + MESSAGE( FATAL_ERROR "Unable to find MPIEXEC, please set it before continuing" ) ENDIF() - # Set the definitions - ADD_DEFINITIONS( "-DUSE_MPI" ) - MESSAGE( "Using MPI" ) - MESSAGE( " MPIEXEC = ${MPIEXEC}" ) - MESSAGE( " MPIEXEC_NUMPROC_FLAG = ${MPIEXEC_NUMPROC_FLAG}" ) - MESSAGE( " MPI_INCLUDE = ${MPI_INCLUDE}" ) - MESSAGE( " MPI_LINK_FLAGS = ${MPI_LINK_FLAGS}" ) - MESSAGE( " MPI_LIBRARIES = ${MPI_LIBRARIES}" ) - ELSE() - SET( USE_MPI_FOR_SERIAL_TESTS 0 ) - SET( MPIEXEC "" ) - SET( MPIEXEC_NUMPROC_FLAG "" ) - SET( MPI_INCLUDE "" ) - SET( MPI_LINK_FLAGS "" ) - SET( MPI_LIBRARIES "" ) - MESSAGE( "Not using MPI, all parallel tests will be disabled" ) ENDIF() ENDMACRO() diff --git a/cmake/macros.cmake b/cmake/macros.cmake index d1c8dbe7..8030dfa4 100644 --- a/cmake/macros.cmake +++ b/cmake/macros.cmake @@ -681,8 +681,8 @@ MACRO( TARGET_LINK_EXTERNAL_LIBRARIES TARGET_NAME ) FOREACH ( tmp ${BLAS_LAPACK_LIBS} ) TARGET_LINK_LIBRARIES( ${TARGET_NAME} ${ARGN} ${tmp} ) ENDFOREACH() - FOREACH ( MPI_LIBRARIES ) - TARGET_LINK_LIBRARIES( ${EXE} ${ARGN} ${tmp} ) + FOREACH ( tmp ${MPI_LIBRARIES} ) + TARGET_LINK_LIBRARIES( ${TARGET_NAME} ${ARGN} ${tmp} ) ENDFOREACH() FOREACH ( tmp ${CMAKE_C_IMPLICIT_LINK_LIBRARIES} ${CMAKE_CXX_IMPLICIT_LINK_LIBRARIES} ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES} ) From 99f1d9b72741aac7776349ecb9515b9ae305c73c Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Thu, 25 Feb 2021 20:30:48 -0500 Subject: [PATCH 176/205] add a test routine for mixed gradient --- common/ScaLBL.h | 1 + cpu/FreeLee.cpp | 164 ++++++++++++++++++++++++++++++++++++++++ models/FreeLeeModel.cpp | 148 ++++++++++++++++++++++++++++++++++++ models/FreeLeeModel.h | 3 + tests/CMakeLists.txt | 1 + tests/TestMixedGrad.cpp | 85 +++++++++++++++++++++ 6 files changed, 402 insertions(+) create mode 100644 tests/TestMixedGrad.cpp diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 42c51525..a6a4aeae 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -207,6 +207,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborLis extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure, double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q9_MGTest(int *Map, double *Phi,double *ColorGrad, int start, int finish, Np); // BOUNDARY CONDITION ROUTINES diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp index c77436a2..8d5eeb0d 100644 --- a/cpu/FreeLee.cpp +++ b/cpu/FreeLee.cpp @@ -1916,3 +1916,167 @@ extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, d Pressure[n] = p; } } + +extern "C" void ScaLBL_D3Q9_MGTest(int *Map, double *Phi,double *ColorGrad, int start, int finish, Np){ + + int n,nn,nn2x,ijk; + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m0,m3,m5,m7; + double mm1,mm2,mm4,mm6,mm8,mm9,mm10,mm11,mm12,mm13,mm14,mm15,mm16,mm17,mm18; + double mm3,mm5,mm7; + //double nx,ny,nz;//normal color gradient + double mgx,mgy,mgz;//mixed gradient reaching secondary neighbor + double phi; + + for (int n=start; nid[i] = Mask->id[i]; + Mask->CommInit(); + Np=Mask->PoreCount(); + //........................................................................... + if (rank==0) printf ("Create ScaLBL_Communicator \n"); + // Create a communicator for the device (will use optimized layout) + // ScaLBL_Communicator ScaLBL_Comm(Mask); // original + //ScaLBL_Comm = std::shared_ptr(new ScaLBL_Communicator(Mask)); + //ScaLBL_Comm_Regular = std::shared_ptr(new ScaLBL_Communicator(Mask)); + ScaLBL_Comm_WideHalo = std::shared_ptr(new ScaLBLWideHalo_Communicator(Mask,2)); + + // create the layout for the LBM + int Npad=(Np/16 + 2)*16; + if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N); + Map.resize(Nx,Ny,Nz); Map.fill(-2); + auto neighborList= new int[18*Npad]; + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np,2); + comm.barrier(); + + //........................................................................... + // MAIN VARIABLES ALLOCATED HERE + //........................................................................... + // LBM variables + if (rank==0) printf ("Allocating distributions \n"); + //......................device distributions................................. + dist_mem_size = Np*sizeof(double); + neighborSize=18*(Np*sizeof(int)); + //........................................................................... + //ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize); + ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Np); + //ScaLBL_AllocateDeviceMemory((void **) &gqbar, 19*dist_mem_size); + //ScaLBL_AllocateDeviceMemory((void **) &hq, 7*dist_mem_size); + //ScaLBL_AllocateDeviceMemory((void **) &mu_phi, dist_mem_size); + //ScaLBL_AllocateDeviceMemory((void **) &Den, dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &Phi, sizeof(double)*Nh); + //ScaLBL_AllocateDeviceMemory((void **) &Pressure, sizeof(double)*Np); + //ScaLBL_AllocateDeviceMemory((void **) &Velocity, 3*sizeof(double)*Np); + ScaLBL_AllocateDeviceMemory((void **) &ColorGrad, 3*sizeof(double)*Np); + //........................................................................... + // Update GPU data structures + if (rank==0) printf ("Setting up device map and neighbor list \n"); + fflush(stdout); + int *TmpMap; + TmpMap=new int[Np]; + for (int k=1; kMap(i,j,k); + } + } + } + // check that TmpMap is valid + for (int idx=0; idxLastExterior(); idx++){ + auto n = TmpMap[idx]; + if (n > Nxh*Nyh*Nzh){ + printf("Bad value! idx=%i \n", n); + TmpMap[idx] = Nxh*Nyh*Nzh-1; + } + } + for (int idx=ScaLBL_Comm->FirstInterior(); idxLastInterior(); idx++){ + auto n = TmpMap[idx]; + if ( n > Nxh*Nyh*Nzh ){ + printf("Bad value! idx=%i \n",n); + TmpMap[idx] = Nxh*Nyh*Nzh-1; + } + } + // copy the device map + ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np); + // copy the neighbor list + //ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); + comm.barrier(); + + double *phase; + phase = new double[Nh]; + + for (int k=0;kid[n] + int x=i-1; + int y=j-1; + int z=k-1; + if (x<0) x=0; + if (y<0) y=0; + if (z<0) z=0; + if (x>=Nx) x=Nx-1; + if (y>=Ny) y=Ny-1; + if (z>=Nz) z=Nz-1; + int n = z*Nx*Ny+y*Nx+x; + phase[nh]=id[n]; + } + } + } + ScaLBL_CopyToDevice(Phi, phase, Nh*sizeof(double)); + ScaLBL_Comm->Barrier(); + comm.barrier(); + delete [] TmpMap; + delete [] neighborList; + delete [] phase; +} + +void ScaLBL_FreeLeeModel::MGTest(){ + + comm.barrier(); + + ScaLBL_Comm_WideHalo->Send(Phi); + ScaLBL_D3Q9_MGTest(dvcMap,Phi,ColorGrad, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm_WideHalo->Send(Phi); + ScaLBL_D3Q9_MGTest(dvcMap,Phi,ColorGrad, 0, ScaLBL_Comm->LastExterior(), Np); + + //check the sum of ColorGrad + double cgx_loc = 0.0; + double cgy_loc = 0.0; + double cgz_loc = 0.0; + double cgx,cgy,cgz; + double *ColorGrad_host; + ColorGrad_host = new double [3*Np]; + ScaLBL_CopyToHost(&ColorGrad_host[0],&ColorGrad[0], 3*Np*sizeof(double)); + for (int i = ScaLBL_Comm->FirstInterior(), iLastInterior(),i++){ + cgx_loc+=ColorGrad_host[0*Np+i]; + cgy_loc+=ColorGrad_host[1*Np+i]; + cgz_loc+=ColorGrad_host[2*Np+i]; + } + for (int i = 0, iLastExterior(),i++){ + cgx_loc+=ColorGrad_host[0*Np+i]; + cgy_loc+=ColorGrad_host[1*Np+i]; + cgz_loc+=ColorGrad_host[2*Np+i]; + } + cgx=Dm->Comm.sumReduce( cgx_loc); + cgy=Dm->Comm.sumReduce( cgy_loc); + cgz=Dm->Comm.sumReduce( cgz_loc); + if (rank==0){ + printf("Sum of all x-component of the mixed gradient = %.2g",cgx); + printf("Sum of all y-component of the mixed gradient = %.2g",cgy); + printf("Sum of all z-component of the mixed gradient = %.2g",cgz); + } + + delete [] ColorGrad_host; +} diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h index 1e372f50..23afe39b 100644 --- a/models/FreeLeeModel.h +++ b/models/FreeLeeModel.h @@ -34,6 +34,9 @@ public: void Initialize_SingleFluid(); void Run_SingleFluid(); void WriteDebug_SingleFluid(); + // test utilities + void Create_DummyPhase_MGTest(); + void MGTest(); bool Restart,pBC; int timestep,timestepMax; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8df4e6bd..b32c0b57 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -43,6 +43,7 @@ ADD_LBPM_EXECUTABLE( TestPoissonSolver ) ADD_LBPM_EXECUTABLE( TestIonModel ) ADD_LBPM_EXECUTABLE( TestNernstPlanck ) ADD_LBPM_EXECUTABLE( TestPNP_Stokes ) +ADD_LBPM_EXECUTABLE( TestMixedGrad ) diff --git a/tests/TestMixedGrad.cpp b/tests/TestMixedGrad.cpp new file mode 100644 index 00000000..821d6465 --- /dev/null +++ b/tests/TestMixedGrad.cpp @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "common/Utilities.h" +#include "models/FreeLeeModel.h" + +inline void Initialize_DummyPhaseField(ScaLBL_FreeLeeModel &LeeModel){ + // initialize a bubble + int i,j,k,n; + int rank = LeeModel.Mask->rank(); + int Nx = LeeModel.Mask->Nx; + int Ny = LeeModel.Mask->Ny; + int Nz = LeeModel.Mask->Nz; + if (rank == 0) cout << "Setting up dummy phase field..." << endl; + for (k=0;kid[n]=1; + LeeMOdel.id[n] = LeeModel.Mask->id[n]; + } + } + } +} + + +int main( int argc, char **argv ) +{ + + // Initialize + Utilities::startup( argc, argv ); + + // Load the input database + auto db = std::make_shared( argv[1] ); + + { // Limit scope so variables that contain communicators will free before MPI_Finialize + + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); + + if ( rank == 0 ) { + printf( "********************************************************\n" ); + printf( "Running Mixed Gradient Test \n" ); + printf( "********************************************************\n" ); + } + // Initialize compute device + int device = ScaLBL_SetDevice( rank ); + NULL_USE( device ); + ScaLBL_DeviceBarrier(); + comm.barrier(); + + PROFILE_ENABLE( 1 ); + // PROFILE_ENABLE_TRACE(); + // PROFILE_ENABLE_MEMORY(); + PROFILE_SYNCHRONIZE(); + PROFILE_START( "Main" ); + Utilities::setErrorHandlers(); + + auto filename = argv[1]; + ScaLBL_FreeLeeModel LeeModel( rank, nprocs, comm ); + LeeModel.ReadParams( filename ); + LeeModel.SetDomain(); + Initialize_DummyPhaseField(LeeModel); + LeeModel.Create_DummyPhase_MGTest(); + LeeModel.MGTest() + + PROFILE_STOP( "Main" ); + auto file = db->getWithDefault( "TimerFile", "TestMixedGrad" ); + auto level = db->getWithDefault( "TimerLevel", 1 ); + PROFILE_SAVE( file, level ); + // **************************************************** + + + } // Limit scope so variables that contain communicators will free before MPI_Finialize + + Utilities::shutdown(); + return 0; + +} From 1a393ab26604a8025899af02bd2f9973e28c80a7 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Thu, 25 Feb 2021 21:25:29 -0500 Subject: [PATCH 177/205] fix typo and build pass --- common/ScaLBL.h | 2 +- cpu/FreeLee.cpp | 2 +- models/FreeLeeModel.cpp | 18 +++++++++--------- tests/TestMixedGrad.cpp | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index a6a4aeae..cbfdb636 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -207,7 +207,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborLis extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure, double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q9_MGTest(int *Map, double *Phi,double *ColorGrad, int start, int finish, Np); +extern "C" void ScaLBL_D3Q9_MGTest(int *Map, double *Phi,double *ColorGrad,int strideY, int strideZ, int start, int finish, int Np); // BOUNDARY CONDITION ROUTINES diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp index 8d5eeb0d..8aebdb02 100644 --- a/cpu/FreeLee.cpp +++ b/cpu/FreeLee.cpp @@ -1917,7 +1917,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, d } } -extern "C" void ScaLBL_D3Q9_MGTest(int *Map, double *Phi,double *ColorGrad, int start, int finish, Np){ +extern "C" void ScaLBL_D3Q9_MGTest(int *Map, double *Phi,double *ColorGrad,int strideY, int strideZ, int start, int finish, int Np){ int n,nn,nn2x,ijk; double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index 588dfb14..77a485f4 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -1069,7 +1069,7 @@ void ScaLBL_FreeLeeModel::Create_DummyPhase_MGTest(){ if (rank==0) printf ("Create ScaLBL_Communicator \n"); // Create a communicator for the device (will use optimized layout) // ScaLBL_Communicator ScaLBL_Comm(Mask); // original - //ScaLBL_Comm = std::shared_ptr(new ScaLBL_Communicator(Mask)); + ScaLBL_Comm = std::shared_ptr(new ScaLBL_Communicator(Mask)); //ScaLBL_Comm_Regular = std::shared_ptr(new ScaLBL_Communicator(Mask)); ScaLBL_Comm_WideHalo = std::shared_ptr(new ScaLBLWideHalo_Communicator(Mask,2)); @@ -1174,9 +1174,9 @@ void ScaLBL_FreeLeeModel::MGTest(){ comm.barrier(); ScaLBL_Comm_WideHalo->Send(Phi); - ScaLBL_D3Q9_MGTest(dvcMap,Phi,ColorGrad, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm_WideHalo->Send(Phi); - ScaLBL_D3Q9_MGTest(dvcMap,Phi,ColorGrad, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q9_MGTest(dvcMap,Phi,ColorGrad,Nx,Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm_WideHalo->Recv(Phi); + ScaLBL_D3Q9_MGTest(dvcMap,Phi,ColorGrad,Nx,Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); //check the sum of ColorGrad double cgx_loc = 0.0; @@ -1186,12 +1186,12 @@ void ScaLBL_FreeLeeModel::MGTest(){ double *ColorGrad_host; ColorGrad_host = new double [3*Np]; ScaLBL_CopyToHost(&ColorGrad_host[0],&ColorGrad[0], 3*Np*sizeof(double)); - for (int i = ScaLBL_Comm->FirstInterior(), iLastInterior(),i++){ + for (int i = ScaLBL_Comm->FirstInterior(); iLastInterior();i++){ cgx_loc+=ColorGrad_host[0*Np+i]; cgy_loc+=ColorGrad_host[1*Np+i]; cgz_loc+=ColorGrad_host[2*Np+i]; } - for (int i = 0, iLastExterior(),i++){ + for (int i = 0; iLastExterior();i++){ cgx_loc+=ColorGrad_host[0*Np+i]; cgy_loc+=ColorGrad_host[1*Np+i]; cgz_loc+=ColorGrad_host[2*Np+i]; @@ -1200,9 +1200,9 @@ void ScaLBL_FreeLeeModel::MGTest(){ cgy=Dm->Comm.sumReduce( cgy_loc); cgz=Dm->Comm.sumReduce( cgz_loc); if (rank==0){ - printf("Sum of all x-component of the mixed gradient = %.2g",cgx); - printf("Sum of all y-component of the mixed gradient = %.2g",cgy); - printf("Sum of all z-component of the mixed gradient = %.2g",cgz); + printf("Sum of all x-component of the mixed gradient = %.2g \n",cgx); + printf("Sum of all y-component of the mixed gradient = %.2g \n",cgy); + printf("Sum of all z-component of the mixed gradient = %.2g \n",cgz); } delete [] ColorGrad_host; diff --git a/tests/TestMixedGrad.cpp b/tests/TestMixedGrad.cpp index 821d6465..dad1ae26 100644 --- a/tests/TestMixedGrad.cpp +++ b/tests/TestMixedGrad.cpp @@ -22,7 +22,7 @@ inline void Initialize_DummyPhaseField(ScaLBL_FreeLeeModel &LeeModel){ for (i=0;iid[n]=1; - LeeMOdel.id[n] = LeeModel.Mask->id[n]; + LeeModel.id[n] = LeeModel.Mask->id[n]; } } } @@ -68,7 +68,7 @@ int main( int argc, char **argv ) LeeModel.SetDomain(); Initialize_DummyPhaseField(LeeModel); LeeModel.Create_DummyPhase_MGTest(); - LeeModel.MGTest() + LeeModel.MGTest(); PROFILE_STOP( "Main" ); auto file = db->getWithDefault( "TimerFile", "TestMixedGrad" ); From 9c043202768a8057e399509e8a3962d40098a544 Mon Sep 17 00:00:00 2001 From: Rex Zhe Li Date: Fri, 26 Feb 2021 01:18:21 -0500 Subject: [PATCH 178/205] fix error where parameter is used before being loaded --- cpu/FreeLee.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp index 8aebdb02..04ed7c72 100644 --- a/cpu/FreeLee.cpp +++ b/cpu/FreeLee.cpp @@ -236,12 +236,14 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, dou for (int n=start; n Date: Mon, 1 Mar 2021 14:02:29 -0500 Subject: [PATCH 179/205] Updating FindMPI --- README.titan | 1 - cmake/FindMPI.cmake | 362 ------------------------- cmake/ctest_script.cmake | 3 +- cmake/libraries.cmake | 2 - ctest_script.cmake | 2 +- sample_scripts/config_build_eos | 1 - sample_scripts/config_build_rhea | 1 - sample_scripts/config_build_titan | 1 - sample_scripts/config_build_titan_silo | 1 - sample_scripts/config_poplar_hip | 1 - sample_scripts/config_summit_hip | 1 - sample_scripts/config_titan | 1 - sample_scripts/configure_arc_cluster | 1 - sample_scripts/configure_arden | 1 - sample_scripts/configure_basic_cluster | 1 - sample_scripts/configure_blueridge | 1 - sample_scripts/configure_cascades_cpu | 1 - sample_scripts/configure_desktop | 1 - sample_scripts/configure_huckleberry | 1 - sample_scripts/configure_summit | 1 - sample_scripts/configure_titan_jem | 1 - sample_scripts/configure_ubuntu | 1 - sample_scripts/daedalus_config | 1 - sample_scripts/promethius_config | 1 - 24 files changed, 2 insertions(+), 387 deletions(-) delete mode 100644 cmake/FindMPI.cmake diff --git a/README.titan b/README.titan index 8d087700..a3178875 100644 --- a/README.titan +++ b/README.titan @@ -26,7 +26,6 @@ cmake \ -D CMAKE_CXX_COMPILER:PATH=CC \ -D CFLAGS="-DCBUB" \ -D CXXFLAGS="-DCBUB" \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=aprun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Debug \ diff --git a/cmake/FindMPI.cmake b/cmake/FindMPI.cmake deleted file mode 100644 index e1578737..00000000 --- a/cmake/FindMPI.cmake +++ /dev/null @@ -1,362 +0,0 @@ -# - Message Passing Interface (MPI) module. -# -# The Message Passing Interface (MPI) is a library used to write -# high-performance parallel applications that use message passing, and -# is typically deployed on a cluster. MPI is a standard interface -# (defined by the MPI forum) for which many implementations are -# available. All of these implementations have somewhat different -# compilation approaches (different include paths, libraries to link -# against, etc.), and this module tries to smooth out those differences. -# -# This module will set the following variables: -# MPI_FOUND TRUE if we have found MPI -# MPI_COMPILE_FLAGS Compilation flags for MPI programs -# MPI_INCLUDE_PATH Include path(s) for MPI header -# MPI_LINK_FLAGS Linking flags for MPI programs -# MPI_LIBRARY First MPI library to link against (cached) -# MPI_EXTRA_LIBRARY Extra MPI libraries to link against (cached) -# MPI_LIBRARIES All libraries to link MPI programs against -# MPIEXEC Executable for running MPI programs -# MPIEXEC_NUMPROC_FLAG Flag to pass to MPIEXEC before giving it the -# number of processors to run on -# MPIEXEC_PREFLAGS Flags to pass to MPIEXEC directly before the -# executable to run. -# MPIEXEC_POSTFLAGS Flags to pass to MPIEXEC after all other flags. -# -# This module will attempt to auto-detect these settings, first by -# looking for a MPI compiler, which many MPI implementations provide -# as a pass-through to the native compiler to simplify the compilation -# of MPI programs. The MPI compiler is stored in the cache variable -# MPI_COMPILER, and will attempt to look for commonly-named drivers -# mpic++, mpicxx, mpiCC, or mpicc. If the compiler driver is found and -# recognized, it will be used to set all of the module variables. To -# skip this auto-detection, set MPI_LIBRARY and MPI_INCLUDE_PATH in -# the CMake cache. -# -# If no compiler driver is found or the compiler driver is not -# recognized, this module will then search for common include paths -# and library names to try to detect MPI. -# -# If CMake initially finds a different MPI than was intended, and you -# want to use the MPI compiler auto-detection for a different MPI -# implementation, set MPI_COMPILER to the MPI compiler driver you want -# to use (e.g., mpicxx) and then set MPI_LIBRARY to the string -# MPI_LIBRARY-NOTFOUND. When you re-configure, auto-detection of MPI -# will run again with the newly-specified MPI_COMPILER. -# -# When using MPIEXEC to execute MPI applications, you should typically -# use all of the MPIEXEC flags as follows: -# ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} PROCS ${MPIEXEC_PREFLAGS} EXECUTABLE -# ${MPIEXEC_POSTFLAGS} ARGS -# where PROCS is the number of processors on which to execute the program, -# EXECUTABLE is the MPI program, and ARGS are the arguments to pass to the -# MPI program. - -#============================================================================= -# Copyright 2001-2009 Kitware, Inc. -# -# Distributed under the OSI-approved BSD License (the "License"); -# see accompanying file Copyright.txt for details. -# -# This software is distributed WITHOUT ANY WARRANTY; without even the -# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the License for more information. -#============================================================================= -# (To distribute this file outside of CMake, substitute the full -# License text for the above reference.) - -# This module is maintained by David Partyka . - -# A set of directories to search through in addition to the standard system paths -# that find_program will search through. -# Microsoft HPC SDK is automatically added to the system path -# Argonne National Labs MPICH2 sets a registry key that we can use. - -set(_MPI_PACKAGE_DIR - mpi - mpich - openmpi - lib/mpi - lib/mpich - lib/openmpi - "MPICH/SDK" - "Microsoft Compute Cluster Pack" - "Microsoft HPC Pack 2008 R2" - ) - -set(_MPI_PREFIX_PATH) -if(WIN32) - list(APPEND _MPI_PREFIX_PATH "[HKEY_LOCAL_MACHINE\\SOFTWARE\\MPICH\\SMPD;binary]/..") - list(APPEND _MPI_PREFIX_PATH "[HKEY_LOCAL_MACHINE\\SOFTWARE\\MPICH2;Path]") -endif() - -foreach(SystemPrefixDir ${CMAKE_SYSTEM_PREFIX_PATH}) - foreach(MpiPackageDir ${_MPI_PREFIX_PATH}) - if(EXISTS ${SystemPrefixDir}/${MpiPackageDir}) - list(APPEND _MPI_PREFIX_PATH "${SystemPrefixDir}/${MpiPackageDir}") - endif() - endforeach(MpiPackageDir) -endforeach(SystemPrefixDir) - -# Most mpi distros have some form of mpiexec which gives us something we can reliably look for. -find_program(MPIEXEC - NAMES mpiexec mpirun lamexec - PATHS ${_MPI_PREFIX_PATH} - PATH_SUFFIXES bin - DOC "Executable for running MPI programs." - ) - -# call get_filename_component twice to remove mpiexec and the directory it exists in (typically bin). -# This gives us a fairly reliable base directory to search for /bin /lib and /include from. -get_filename_component(_MPI_BASE_DIR "${MPIEXEC}" PATH) -get_filename_component(_MPI_BASE_DIR "${_MPI_BASE_DIR}" PATH) - -# If there is an mpi compiler find it and interogate (farther below) it for the include -# and lib dirs otherwise we will continue to search from ${_MPI_BASE_DIR}. -find_program(MPI_COMPILER - NAMES mpic++ mpicxx mpiCC mpicc - HINTS "${_MPI_BASE_DIR}" - PATH_SUFFIXES bin - DOC "MPI compiler. Used only to detect MPI compilation flags.") -mark_as_advanced(MPI_COMPILER) - -set(MPIEXEC_NUMPROC_FLAG "-np" CACHE STRING "Flag used by MPI to specify the number of processes for MPIEXEC; the next option will be the number of processes.") -set(MPIEXEC_PREFLAGS "" CACHE STRING "These flags will be directly before the executable that is being run by MPIEXEC.") -set(MPIEXEC_POSTFLAGS "" CACHE STRING "These flags will come after all flags given to MPIEXEC.") -set(MPIEXEC_MAX_NUMPROCS "2" CACHE STRING "Maximum number of processors available to run MPI applications.") -mark_as_advanced(MPIEXEC MPIEXEC_NUMPROC_FLAG MPIEXEC_PREFLAGS - MPIEXEC_POSTFLAGS MPIEXEC_MAX_NUMPROCS) - -if (MPI_INCLUDE_PATH AND MPI_LIBRARY) - # Do nothing: we already have MPI_INCLUDE_PATH and MPI_LIBRARY in - # the cache, and we don't want to override those settings. -elseif (MPI_COMPILER) - # Check whether the -showme:compile option works. This indicates - # that we have either Open MPI or a newer version of LAM-MPI, and - # implies that -showme:link will also work. - # Note that Windows distros do not have an mpi compiler to interogate. - exec_program(${MPI_COMPILER} - ARGS -showme:compile - OUTPUT_VARIABLE MPI_COMPILE_CMDLINE - RETURN_VALUE MPI_COMPILER_RETURN) - - if (MPI_COMPILER_RETURN EQUAL 0) - # If we appear to have -showme:compile, then we should also have - # -showme:link. Try it. - exec_program(${MPI_COMPILER} - ARGS -showme:link - OUTPUT_VARIABLE MPI_LINK_CMDLINE - RETURN_VALUE MPI_COMPILER_RETURN) - - # Note that we probably have -showme:incdirs and -showme:libdirs - # as well. - set(MPI_COMPILER_MAY_HAVE_INCLIBDIRS TRUE) - endif (MPI_COMPILER_RETURN EQUAL 0) - - if (MPI_COMPILER_RETURN EQUAL 0) - # Do nothing: we have our command lines now - else (MPI_COMPILER_RETURN EQUAL 0) - # Older versions of LAM-MPI have "-showme". Try it. - exec_program(${MPI_COMPILER} - ARGS -showme - OUTPUT_VARIABLE MPI_COMPILE_CMDLINE - RETURN_VALUE MPI_COMPILER_RETURN) - endif (MPI_COMPILER_RETURN EQUAL 0) - - if (MPI_COMPILER_RETURN EQUAL 0) - # Do nothing: we have our command lines now - else (MPI_COMPILER_RETURN EQUAL 0) - # MPICH uses "-show". Try it. - exec_program(${MPI_COMPILER} - ARGS -show - OUTPUT_VARIABLE MPI_COMPILE_CMDLINE - RETURN_VALUE MPI_COMPILER_RETURN) - endif (MPI_COMPILER_RETURN EQUAL 0) - - if (MPI_COMPILER_RETURN EQUAL 0) - # We have our command lines, but we might need to copy - # MPI_COMPILE_CMDLINE into MPI_LINK_CMDLINE, if the underlying - if (NOT MPI_LINK_CMDLINE) - SET(MPI_LINK_CMDLINE ${MPI_COMPILE_CMDLINE}) - endif (NOT MPI_LINK_CMDLINE) - else (MPI_COMPILER_RETURN EQUAL 0) - message(STATUS "Unable to determine MPI from MPI driver ${MPI_COMPILER}") - endif (MPI_COMPILER_RETURN EQUAL 0) -endif (MPI_INCLUDE_PATH AND MPI_LIBRARY) - -if (MPI_INCLUDE_PATH AND MPI_LIBRARY) - # Do nothing: we already have MPI_INCLUDE_PATH and MPI_LIBRARY in - # the cache, and we don't want to override those settings. -elseif (MPI_COMPILE_CMDLINE) - # Extract compile flags from the compile command line. - string(REGEX MATCHALL "(^| )-[Df]([^\" ]+|\"[^\"]+\")" MPI_ALL_COMPILE_FLAGS "${MPI_COMPILE_CMDLINE}") - set(MPI_COMPILE_FLAGS_WORK) - foreach(FLAG ${MPI_ALL_COMPILE_FLAGS}) - if (MPI_COMPILE_FLAGS_WORK) - set(MPI_COMPILE_FLAGS_WORK "${MPI_COMPILE_FLAGS_WORK} ${FLAG}") - else(MPI_COMPILE_FLAGS_WORK) - set(MPI_COMPILE_FLAGS_WORK ${FLAG}) - endif(MPI_COMPILE_FLAGS_WORK) - endforeach(FLAG) - - # Extract include paths from compile command line - string(REGEX MATCHALL "(^| )-I([^\" ]+|\"[^\"]+\")" MPI_ALL_INCLUDE_PATHS "${MPI_COMPILE_CMDLINE}") - set(MPI_INCLUDE_PATH_WORK) - foreach(IPATH ${MPI_ALL_INCLUDE_PATHS}) - string(REGEX REPLACE "^ ?-I" "" IPATH ${IPATH}) - string(REGEX REPLACE "//" "/" IPATH ${IPATH}) - list(APPEND MPI_INCLUDE_PATH_WORK ${IPATH}) - endforeach(IPATH) - - if (NOT MPI_INCLUDE_PATH_WORK) - if (MPI_COMPILER_MAY_HAVE_INCLIBDIRS) - # The compile command line didn't have any include paths on it, - # but we may have -showme:incdirs. Use it. - exec_program(${MPI_COMPILER} - ARGS -showme:incdirs - OUTPUT_VARIABLE MPI_INCLUDE_PATH_WORK - RETURN_VALUE MPI_COMPILER_RETURN) - separate_arguments(MPI_INCLUDE_PATH_WORK) - endif (MPI_COMPILER_MAY_HAVE_INCLIBDIRS) - endif (NOT MPI_INCLUDE_PATH_WORK) - - if (NOT MPI_INCLUDE_PATH_WORK) - # If all else fails, just search for mpi.h in the normal include - # paths. - find_path(MPI_INCLUDE_PATH mpi.h - HINTS ${_MPI_BASE_DIR} ${_MPI_PREFIX_PATH} - PATH_SUFFIXES include - ) - set(MPI_INCLUDE_PATH_WORK ${MPI_INCLUDE_PATH}) - endif (NOT MPI_INCLUDE_PATH_WORK) - - # Extract linker paths from the link command line - string(REGEX MATCHALL "(^| |-Wl,)-L([^\" ]+|\"[^\"]+\")" MPI_ALL_LINK_PATHS "${MPI_LINK_CMDLINE}") - set(MPI_LINK_PATH) - foreach(LPATH ${MPI_ALL_LINK_PATHS}) - string(REGEX REPLACE "^(| |-Wl,)-L" "" LPATH ${LPATH}) - string(REGEX REPLACE "//" "/" LPATH ${LPATH}) - list(APPEND MPI_LINK_PATH ${LPATH}) - endforeach(LPATH) - - if (NOT MPI_LINK_PATH) - if (MPI_COMPILER_MAY_HAVE_INCLIBDIRS) - # The compile command line didn't have any linking paths on it, - # but we may have -showme:libdirs. Use it. - exec_program(${MPI_COMPILER} - ARGS -showme:libdirs - OUTPUT_VARIABLE MPI_LINK_PATH - RETURN_VALUE MPI_COMPILER_RETURN) - separate_arguments(MPI_LINK_PATH) - endif (MPI_COMPILER_MAY_HAVE_INCLIBDIRS) - endif (NOT MPI_LINK_PATH) - - # Extract linker flags from the link command line - string(REGEX MATCHALL "(^| )-Wl,([^\" ]+|\"[^\"]+\")" MPI_ALL_LINK_FLAGS "${MPI_LINK_CMDLINE}") - set(MPI_LINK_FLAGS_WORK) - foreach(FLAG ${MPI_ALL_LINK_FLAGS}) - if (MPI_LINK_FLAGS_WORK) - set(MPI_LINK_FLAGS_WORK "${MPI_LINK_FLAGS_WORK} ${FLAG}") - else(MPI_LINK_FLAGS_WORK) - set(MPI_LINK_FLAGS_WORK ${FLAG}) - endif(MPI_LINK_FLAGS_WORK) - endforeach(FLAG) - if ( MPI_LINK_FLAGS_WORK ) - string ( REGEX REPLACE "^ " "" MPI_LINK_FLAGS_WORK ${MPI_LINK_FLAGS_WORK} ) - endif () - - # Extract the set of libraries to link against from the link command - # line - string(REGEX MATCHALL "(^| )-l([^\" ]+|\"[^\"]+\")" MPI_LIBNAMES "${MPI_LINK_CMDLINE}") - - # Determine full path names for all of the libraries that one needs - # to link against in an MPI program - set(MPI_LIBRARIES) - foreach(LIB ${MPI_LIBNAMES}) - string(REGEX REPLACE "^ ?-l" "" LIB ${LIB}) - set(MPI_LIB "MPI_LIB-NOTFOUND" CACHE FILEPATH "Cleared" FORCE) - find_library(MPI_LIB ${LIB} HINTS ${MPI_LINK_PATH}) - if (MPI_LIB) - list(APPEND MPI_LIBRARIES ${MPI_LIB}) - elseif (NOT MPI_FIND_QUIETLY) - message(WARNING "Unable to find MPI library ${LIB}") - endif () - endforeach(LIB) - set(MPI_LIB "MPI_LIB-NOTFOUND" CACHE INTERNAL "Scratch variable for MPI detection" FORCE) - - # Chop MPI_LIBRARIES into the old-style MPI_LIBRARY and - # MPI_EXTRA_LIBRARY. - list(LENGTH MPI_LIBRARIES MPI_NUMLIBS) - list(LENGTH MPI_LIBNAMES MPI_NUMLIBS_EXPECTED) - if (MPI_NUMLIBS EQUAL MPI_NUMLIBS_EXPECTED) - list(GET MPI_LIBRARIES 0 MPI_LIBRARY_WORK) - set(MPI_LIBRARY ${MPI_LIBRARY_WORK} CACHE FILEPATH "MPI library to link against" FORCE) - else (MPI_NUMLIBS EQUAL MPI_NUMLIBS_EXPECTED) - set(MPI_LIBRARY "MPI_LIBRARY-NOTFOUND" CACHE FILEPATH "MPI library to link against" FORCE) - endif (MPI_NUMLIBS EQUAL MPI_NUMLIBS_EXPECTED) - if (MPI_NUMLIBS GREATER 1) - set(MPI_EXTRA_LIBRARY_WORK ${MPI_LIBRARIES}) - list(REMOVE_AT MPI_EXTRA_LIBRARY_WORK 0) - set(MPI_EXTRA_LIBRARY ${MPI_EXTRA_LIBRARY_WORK} CACHE STRING "Extra MPI libraries to link against" FORCE) - else (MPI_NUMLIBS GREATER 1) - set(MPI_EXTRA_LIBRARY "MPI_EXTRA_LIBRARY-NOTFOUND" CACHE STRING "Extra MPI libraries to link against" FORCE) - endif (MPI_NUMLIBS GREATER 1) - - # Set up all of the appropriate cache entries - set(MPI_COMPILE_FLAGS ${MPI_COMPILE_FLAGS_WORK} CACHE STRING "MPI compilation flags" FORCE) - set(MPI_INCLUDE_PATH ${MPI_INCLUDE_PATH_WORK} CACHE STRING "MPI include path" FORCE) - set(MPI_LINK_FLAGS ${MPI_LINK_FLAGS_WORK} CACHE STRING "MPI linking flags" FORCE) -else (MPI_COMPILE_CMDLINE) -# No MPI compiler to interogate so attempt to find everything with find functions. - find_path(MPI_INCLUDE_PATH mpi.h - HINTS ${_MPI_BASE_DIR} ${_MPI_PREFIX_PATH} - PATH_SUFFIXES include Inc - ) - - # Decide between 32-bit and 64-bit libraries for Microsoft's MPI - if("${CMAKE_SIZEOF_VOID_P}" EQUAL 8) - set(MS_MPI_ARCH_DIR amd64) - else() - set(MS_MPI_ARCH_DIR i386) - endif() - - find_library(MPI_LIBRARY - NAMES mpi mpich msmpi - HINTS ${_MPI_BASE_DIR} ${_MPI_PREFIX_PATH} - PATH_SUFFIXES lib lib/${MS_MPI_ARCH_DIR} Lib Lib/${MS_MPI_ARCH_DIR} - ) - - find_library(MPI_EXTRA_LIBRARY - NAMES mpi++ - HINTS ${_MPI_BASE_DIR} ${_MPI_PREFIX_PATH} - PATH_SUFFIXES lib - DOC "Extra MPI libraries to link against.") - - set(MPI_COMPILE_FLAGS "" CACHE STRING "MPI compilation flags") - set(MPI_LINK_FLAGS "" CACHE STRING "MPI linking flags") -endif (MPI_INCLUDE_PATH AND MPI_LIBRARY) - -# Set up extra variables to conform to -if (MPI_EXTRA_LIBRARY) - set(MPI_LIBRARIES ${MPI_LIBRARY} ${MPI_EXTRA_LIBRARY}) -else (MPI_EXTRA_LIBRARY) - set(MPI_LIBRARIES ${MPI_LIBRARY}) -endif (MPI_EXTRA_LIBRARY) - -if (MPI_INCLUDE_PATH AND MPI_LIBRARY) - set(MPI_FOUND TRUE) -else (MPI_INCLUDE_PATH AND MPI_LIBRARY) - set(MPI_FOUND FALSE) -endif (MPI_INCLUDE_PATH AND MPI_LIBRARY) - -#include("${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake") -# handle the QUIETLY and REQUIRED arguments -#find_package_handle_standard_args(MPI DEFAULT_MSG MPI_LIBRARY MPI_INCLUDE_PATH) - -mark_as_advanced(MPI_INCLUDE_PATH MPI_COMPILE_FLAGS MPI_LINK_FLAGS MPI_LIBRARY - MPI_EXTRA_LIBRARY) - -# unset to cleanup namespace -unset(_MPI_PACKAGE_DIR) -unset(_MPI_PREFIX_PATH) -unset(_MPI_BASE_DIR) diff --git a/cmake/ctest_script.cmake b/cmake/ctest_script.cmake index 88bf92fe..e460fed5 100644 --- a/cmake/ctest_script.cmake +++ b/cmake/ctest_script.cmake @@ -32,7 +32,6 @@ SET( CMAKE_MAKE_PROGRAM $ENV{CMAKE_MAKE_PROGRAM} ) SET( CTEST_CMAKE_GENERATOR $ENV{CTEST_CMAKE_GENERATOR} ) SET( LDLIBS $ENV{LDLIBS} ) SET( LDFLAGS $ENV{LDFLAGS} ) -SET( MPI_COMPILER $ENV{MPI_COMPILER} ) SET( MPI_DIRECTORY $ENV{MPI_DIRECTORY} ) SET( MPI_INCLUDE $ENV{MPI_INCLUDE} ) SET( MPI_LINK_FLAGS $ENV{MPI_LINK_FLAGS} ) @@ -198,7 +197,7 @@ SET( CTEST_OPTIONS "${CTEST_OPTIONS};-DCMAKE_C_FLAGS='${CFLAGS}';-DCMAKE_CXX_FLA SET( CTEST_OPTIONS "${CTEST_OPTIONS};-DLDFLAGS:STRING='${FLAGS}';-DLDLIBS:STRING='${LDLIBS}'" ) SET( CTEST_OPTIONS "${CTEST_OPTIONS};-DENABLE_GCOV:BOOL=${ENABLE_GCOV}" ) IF ( USE_MPI ) - SET( CTEST_OPTIONS "${CTEST_OPTIONS};-DMPI_COMPILER:BOOL=true;-DMPIEXEC=${MPIEXEC}") + SET( CTEST_OPTIONS "${CTEST_OPTIONS};-DMPIEXEC=${MPIEXEC}") IF ( NOT USE_VALGRIND ) SET( CTEST_OPTIONS "${CTEST_OPTIONS};-DUSE_MPI_FOR_SERIAL_TESTS:BOOL=true") ENDIF() diff --git a/cmake/libraries.cmake b/cmake/libraries.cmake index 14ff1ac9..f899c289 100644 --- a/cmake/libraries.cmake +++ b/cmake/libraries.cmake @@ -41,9 +41,7 @@ ENDMACRO() # Macro to find and configure the MPI libraries MACRO( CONFIGURE_MPI ) # Determine if we want to use MPI - CHECK_ENABLE_FLAG(USE_MPI 1 ) CHECK_ENABLE_FLAG( USE_MPI 1 ) - MESSAGE("MPIEXEC = ${MPIEXEC}") IF ( USE_MPI ) MESSAGE( "Configuring MPI" ) IF ( MPIEXEC ) diff --git a/ctest_script.cmake b/ctest_script.cmake index 57f7db15..626f870d 100644 --- a/ctest_script.cmake +++ b/ctest_script.cmake @@ -164,7 +164,7 @@ SET( CTEST_OPTIONS ) SET( CTEST_OPTIONS "-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" ) SET( CTEST_OPTIONS "${CTEST_OPTIONS};-DCMAKE_C_COMPILER:PATH=${CC};-DCMAKE_C_FLAGS='${C_FLAGS}';" ) SET( CTEST_OPTIONS "${CTEST_OPTIONS};-DCMAKE_CXX_COMPILER:PATH=${CXX};-DCMAKE_CXX_FLAGS='${CXX_FLAGS}'" ) -SET( CTEST_OPTIONS "${CTEST_OPTIONS};-DMPI_COMPILER:BOOL=true;-DMPIEXEC=${MPIEXEC};-DUSE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=true") +SET( CTEST_OPTIONS "${CTEST_OPTIONS};-DMPIEXEC=${MPIEXEC};-DUSE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=true") IF ( USE_CUDA ) SET( CTEST_OPTIONS "${CTEST_OPTIONS};-DUSE_CUDA:BOOL=true;-DCUDA_NVCC_FLAGS='${CUDA_FLAGS}';-DCUDA_HOST_COMPILER=${CUDA_HOST_COMPILER}" ) ELSE() diff --git a/sample_scripts/config_build_eos b/sample_scripts/config_build_eos index f4d69f26..8c7aeb92 100755 --- a/sample_scripts/config_build_eos +++ b/sample_scripts/config_build_eos @@ -33,7 +33,6 @@ cmake \ -D CMAKE_CXX_STANDARD=14 \ -D USE_TIMER=false \ -D TIMER_DIRECTORY=${HOME}/timerutility/build/opt \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=aprun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D USE_CUDA=0 \ diff --git a/sample_scripts/config_build_rhea b/sample_scripts/config_build_rhea index 0e9b7296..0f5713da 100755 --- a/sample_scripts/config_build_rhea +++ b/sample_scripts/config_build_rhea @@ -27,7 +27,6 @@ cmake \ -D CMAKE_CXX_STD=11 \ -D USE_TIMER=false \ -D TIMER_DIRECTORY=${HOME}/timerutility/build/opt \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=aprun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D USE_CUDA=0 \ diff --git a/sample_scripts/config_build_titan b/sample_scripts/config_build_titan index 18d50e1a..32fd639a 100755 --- a/sample_scripts/config_build_titan +++ b/sample_scripts/config_build_titan @@ -25,7 +25,6 @@ cmake \ -D CMAKE_CUDA_FLAGS="-arch sm_35" \ -D CMAKE_CUDA_HOST_COMPILER="/opt/gcc/6.3.0/bin/gcc" \ -D USE_MPI=1 \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=aprun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D USE_SILO=1 \ diff --git a/sample_scripts/config_build_titan_silo b/sample_scripts/config_build_titan_silo index 22069a6c..0a3df511 100755 --- a/sample_scripts/config_build_titan_silo +++ b/sample_scripts/config_build_titan_silo @@ -20,7 +20,6 @@ cmake \ -D CMAKE_CUDA_FLAGS="-arch sm_35" \ -D CMAKE_CUDA_HOST_COMPILER="/opt/gcc/6.3.0/bin/gcc" \ -D USE_MPI=1 \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=aprun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D USE_NETCDF=0 \ diff --git a/sample_scripts/config_poplar_hip b/sample_scripts/config_poplar_hip index f1c3159c..5628c074 100755 --- a/sample_scripts/config_poplar_hip +++ b/sample_scripts/config_poplar_hip @@ -36,6 +36,5 @@ cmake \ # MPI_THREAD_MULTIPLE=1 MV2_USE_RDMA_CM=0 MV2_USE_RDMA_CM= MV2_NUM_HCAS=1 MV2_USE_CUDA=1 MV2_ENABLE_AFFINITY=0 srun -n 2 -N 1 --cpu-bind=v -c 1 ./test_MPI -# -D MPI_COMPILER:BOOL=TRUE \ # -D MPIEXEC=mpirun \ # -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ diff --git a/sample_scripts/config_summit_hip b/sample_scripts/config_summit_hip index 23d3d919..b921b14a 100755 --- a/sample_scripts/config_summit_hip +++ b/sample_scripts/config_summit_hip @@ -21,7 +21,6 @@ cmake \ -D HIP_NVCC_OPTIONS="-arch sm_70" \ -D LINK_LIBRARIES="/sw/summit/cuda/9.2.148/lib64/libcudart.so" \ -D USE_MPI=1 \ - -D MPI_COMPILER:BOOL=TRUE \ -D USE_NETCDF=0 \ -D USE_SILO=1 \ -D SILO_DIRECTORY=${TPL_DIR}/silo \ diff --git a/sample_scripts/config_titan b/sample_scripts/config_titan index 8493d58b..f1b02507 100755 --- a/sample_scripts/config_titan +++ b/sample_scripts/config_titan @@ -9,7 +9,6 @@ cmake \ -D CMAKE_CUDA_FLAGS="-arch sm_35" \ -D CMAKE_CUDA_HOST_COMPILER="/usr/bin/gcc" \ -D USE_MPI=1 \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=aprun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ ${HOME}/LBPM-WIA diff --git a/sample_scripts/configure_arc_cluster b/sample_scripts/configure_arc_cluster index f4124cf6..e97553dd 100755 --- a/sample_scripts/configure_arc_cluster +++ b/sample_scripts/configure_arc_cluster @@ -10,7 +10,6 @@ cmake \ -D CMAKE_CXX_COMPILER:PATH=mpicxx \ -D CMAKE_C_FLAGS="" \ -D CMAKE_CXX_FLAGS="" \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Release \ diff --git a/sample_scripts/configure_arden b/sample_scripts/configure_arden index 057732a9..3d0759aa 100755 --- a/sample_scripts/configure_arden +++ b/sample_scripts/configure_arden @@ -5,7 +5,6 @@ cmake -D CMAKE_C_COMPILER:PATH=/opt/arden/openmpi/3.1.2/bin/mpicc \ -D CMAKE_C_FLAGS="-O3 -fPIC" \ -D CMAKE_CXX_FLAGS="-O3 -fPIC " \ -D CMAKE_CXX_STANDARD=14 \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Release \ diff --git a/sample_scripts/configure_basic_cluster b/sample_scripts/configure_basic_cluster index 667ebcad..7a861974 100755 --- a/sample_scripts/configure_basic_cluster +++ b/sample_scripts/configure_basic_cluster @@ -8,7 +8,6 @@ cmake \ -D CMAKE_CXX_COMPILER:PATH=mpicxx \ -D CMAKE_C_FLAGS="" \ -D CMAKE_CXX_FLAGS="" \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Release \ diff --git a/sample_scripts/configure_blueridge b/sample_scripts/configure_blueridge index 667ebcad..7a861974 100755 --- a/sample_scripts/configure_blueridge +++ b/sample_scripts/configure_blueridge @@ -8,7 +8,6 @@ cmake \ -D CMAKE_CXX_COMPILER:PATH=mpicxx \ -D CMAKE_C_FLAGS="" \ -D CMAKE_CXX_FLAGS="" \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Release \ diff --git a/sample_scripts/configure_cascades_cpu b/sample_scripts/configure_cascades_cpu index f6c77004..a6b0e203 100755 --- a/sample_scripts/configure_cascades_cpu +++ b/sample_scripts/configure_cascades_cpu @@ -11,7 +11,6 @@ cmake \ -D CMAKE_CXX_COMPILER:PATH=mpicxx \ -D CMAKE_C_FLAGS="-fPIC" \ -D CMAKE_CXX_FLAGS="-fPIC" \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Release \ diff --git a/sample_scripts/configure_desktop b/sample_scripts/configure_desktop index 1e717c98..38f917ad 100755 --- a/sample_scripts/configure_desktop +++ b/sample_scripts/configure_desktop @@ -7,7 +7,6 @@ cmake \ -D CMAKE_C_FLAGS="-g " \ -D CMAKE_CXX_FLAGS="-g " \ -D CMAKE_CXX_STANDARD=14 \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Release \ diff --git a/sample_scripts/configure_huckleberry b/sample_scripts/configure_huckleberry index 5ca6bb43..abe134b0 100755 --- a/sample_scripts/configure_huckleberry +++ b/sample_scripts/configure_huckleberry @@ -12,7 +12,6 @@ cmake \ -D CMAKE_CXX_COMPILER:PATH=mpicxx \ -D CMAKE_C_FLAGS="-fPIC" \ -D CMAKE_CXX_FLAGS="-fPIC" \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Release \ diff --git a/sample_scripts/configure_summit b/sample_scripts/configure_summit index bcd8a221..af761468 100755 --- a/sample_scripts/configure_summit +++ b/sample_scripts/configure_summit @@ -20,7 +20,6 @@ cmake \ -D CMAKE_CUDA_FLAGS="-arch sm_70 -Xptxas=-v -Xptxas -dlcm=cg -lineinfo" \ -D CMAKE_CUDA_HOST_COMPILER="/sw/summit/gcc/6.4.0/bin/gcc" \ -D USE_MPI=1 \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D USE_HDF5=1 \ diff --git a/sample_scripts/configure_titan_jem b/sample_scripts/configure_titan_jem index 8375ad87..a3b730cd 100755 --- a/sample_scripts/configure_titan_jem +++ b/sample_scripts/configure_titan_jem @@ -15,7 +15,6 @@ cmake \ -D CMAKE_C_COMPILER:PATH=cc \ -D CMAKE_CXX_COMPILER:PATH=CC \ -D CMAKE_CXX_COMPILER:PATH=CC \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=aprun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Debug \ diff --git a/sample_scripts/configure_ubuntu b/sample_scripts/configure_ubuntu index 516925d0..cccb112c 100755 --- a/sample_scripts/configure_ubuntu +++ b/sample_scripts/configure_ubuntu @@ -5,7 +5,6 @@ cmake -D CMAKE_C_COMPILER:PATH=/opt/arden/openmpi/3.1.2/bin/mpicc \ -D CMAKE_C_FLAGS="-O3 -fPIC" \ -D CMAKE_CXX_FLAGS="-O3 -fPIC " \ -D CMAKE_CXX_STANDARD=14 \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Release \ diff --git a/sample_scripts/daedalus_config b/sample_scripts/daedalus_config index f14f3627..f2855433 100755 --- a/sample_scripts/daedalus_config +++ b/sample_scripts/daedalus_config @@ -12,7 +12,6 @@ i -D CMAKE_CXX_COMPILER:PATH=/home/christopher/openmpi/install_dir/bin/mpicxx -D USE_DOXYGEN=false \ # -D CMAKE_C_FLAGS="-std=gnu++11 -w" \ # -D CMAKE_CXX_FLAGS="-std=gnu++11 -w" \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=/home/christopher/openmpi/install_dir/bin/mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Release \ diff --git a/sample_scripts/promethius_config b/sample_scripts/promethius_config index ae26ed94..49f42670 100755 --- a/sample_scripts/promethius_config +++ b/sample_scripts/promethius_config @@ -7,7 +7,6 @@ cmake \ -D CMAKE_C_FLAGS="-g " \ -D CMAKE_CXX_FLAGS="-g -Wno-deprecated-declarations" \ -D CXX_STD=11 \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Debug \ From 3f659a0cc254e1b4466c4eddf913cd9b25b0e64e Mon Sep 17 00:00:00 2001 From: James McClure Date: Thu, 11 Mar 2021 13:55:47 -0500 Subject: [PATCH 180/205] adding mass conservation test --- cpu/FreeLee.cpp | 28 +++++----- tests/CMakeLists.txt | 1 + tests/testGlobalMassFreeLee.cpp | 99 +++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 14 deletions(-) create mode 100644 tests/testGlobalMassFreeLee.cpp diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp index 04ed7c72..a11f9ba6 100644 --- a/cpu/FreeLee.cpp +++ b/cpu/FreeLee.cpp @@ -778,25 +778,25 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, dou //-------------------------------- BGK collison for phase field ---------------------------------// // q = 0 - hq[n] = h0 - (h0 - 0.3333333333333333*phi)/tauM; + hq[n] = h0 - (h0 - 0.33333333333333333*phi)/tauM; // q = 1 - hq[nr2] = h1 - (h1 - phi*(0.1111111111111111 + 0.5*ux) - (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; + hq[nr2] = h1 - (h1 - 0.1111111111111111*(phi*(1.0 + 3.0*ux) - (M*nx*(1 - 4*phi*phi))/W)/tauM); // q = 2 - hq[nr1] = h2 - (h2 - phi*(0.1111111111111111 - 0.5*ux) + (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; + hq[nr1] = h2 - (h2 - 0.1111111111111111*(phi*(1.0 - 3.0*ux) + (M*nx*(1 - 4*phi*phi))/W)/tauM); // q = 3 - hq[nr4] = h3 - (h3 - phi*(0.1111111111111111 + 0.5*uy) - (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; + hq[nr4] = h3 - (h3 - 0.1111111111111111*(phi*(1.0 + 3.0*uy) - (M*ny*(1 - 4*phi*phi))/W)/tauM); // q = 4 - hq[nr3] = h4 - (h4 - phi*(0.1111111111111111 - 0.5*uy) + (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; + hq[nr3] = h4 - (h4 - 0.1111111111111111*(phi*(1.0 - 3.0*uy) + (M*ny*(1 - 4*phi*phi))/W)/tauM); // q = 5 - hq[nr6] = h5 - (h5 - phi*(0.1111111111111111 + 0.5*uz) - (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; + hq[nr6] = h5 - (h5 - 0.1111111111111111*(phi*(1.0 + 3.0*uz) - (M*nz*(1 - 4*phi*phi))/W)/tauM); // q = 6 - hq[nr5] = h6 - (h6 - phi*(0.1111111111111111 - 0.5*uz) + (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; + hq[nr5] = h6 - (h6 - 0.1111111111111111*(phi*(1.0 - 3.0*uz) + (M*nz*(1 - 4*phi*phi))/W)/tauM); //........................................................................ //Update velocity on device @@ -1369,25 +1369,25 @@ extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double //-------------------------------- BGK collison for phase field ---------------------------------// // q = 0 - hq[n] = h0 - (h0 - 0.3333333333333333*phi)/tauM; + hq[n] = h0 - (h0 - 0.33333333333333333*phi)/tauM; // q = 1 - hq[1*Np+n] = h1 - (h1 - phi*(0.1111111111111111 + 0.5*ux) - (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; + hq[1*Np+n] = h1 - (h1 - 0.1111111111111111*(phi*(1.0 + 3.0*ux) - (M*nx*(1 - 4*phi*phi))/W)/tauM); // q = 2 - hq[2*Np+n] = h2 - (h2 - phi*(0.1111111111111111 - 0.5*ux) + (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; + hq[2*Np+n] = h2 - (h2 - 0.1111111111111111*(phi*(1.0 - 3.0*ux) + (M*nx*(1 - 4*phi*phi))/W)/tauM); // q = 3 - hq[3*Np+n] = h3 - (h3 - phi*(0.1111111111111111 + 0.5*uy) - (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; + hq[3*Np+n] = h3 - (h3 - 0.1111111111111111*(phi*(1.0 + 3.0*uy) - (M*ny*(1 - 4*phi*phi))/W)/tauM); // q = 4 - hq[4*Np+n] = h4 - (h4 - phi*(0.1111111111111111 - 0.5*uy) + (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; + hq[4*Np+n] = h4 - (h4 - 0.1111111111111111*(phi*(1.0 - 3.0*uy) + (M*ny*(1 - 4*phi*phi))/W)/tauM); // q = 5 - hq[5*Np+n] = h5 - (h5 - phi*(0.1111111111111111 + 0.5*uz) - (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; + hq[5*Np+n] = h5 - (h5 - 0.1111111111111111*(phi*(1.0 + 3.0*uz) - (M*nz*(1 - 4*phi*phi))/W)/tauM); // q = 6 - hq[6*Np+n] = h6 - (h6 - phi*(0.1111111111111111 - 0.5*uz) + (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; + hq[6*Np+n] = h6 - (h6 - 0.1111111111111111*(phi*(1.0 - 3.0*uz) + (M*nz*(1 - 4*phi*phi))/W)/tauM); //........................................................................ //Update velocity on device diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b32c0b57..d53b28c5 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -62,6 +62,7 @@ ADD_LBPM_TEST( TestMap ) ADD_LBPM_TEST( TestWideHalo ) ADD_LBPM_TEST( TestColorGradDFH ) ADD_LBPM_TEST( TestBubbleDFH ../example/Bubble/input.db) +ADD_LBPM_TEST( testGlobalMassFreeLee ../example/Bubble/input.db) #ADD_LBPM_TEST( TestColorMassBounceback ../example/Bubble/input.db) ADD_LBPM_TEST( TestPressVel ../example/Bubble/input.db) ADD_LBPM_TEST( TestPoiseuille ../example/Piston/poiseuille.db) diff --git a/tests/testGlobalMassFreeLee.cpp b/tests/testGlobalMassFreeLee.cpp new file mode 100644 index 00000000..ba8189f1 --- /dev/null +++ b/tests/testGlobalMassFreeLee.cpp @@ -0,0 +1,99 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "common/Utilities.h" +#include "models/FreeLeeModel.h" + +//******************************************************************* +// Implementation of Free-Energy Two-Phase LBM (Lee model) +//******************************************************************* + +int main( int argc, char **argv ) +{ + + // Initialize + Utilities::startup( argc, argv ); + + // Load the input database + auto db = std::make_shared( argv[1] ); + + { // Limit scope so variables that contain communicators will free before MPI_Finialize + + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); + + if (rank == 0){ + printf("********************************************************\n"); + printf("Running Free Energy Lee LBM \n"); + printf("********************************************************\n"); + } + // Initialize compute device + int device=ScaLBL_SetDevice(rank); + NULL_USE( device ); + ScaLBL_DeviceBarrier(); + comm.barrier(); + + PROFILE_ENABLE(1); + //PROFILE_ENABLE_TRACE(); + //PROFILE_ENABLE_MEMORY(); + PROFILE_SYNCHRONIZE(); + PROFILE_START("Main"); + Utilities::setErrorHandlers(); + + auto filename = argv[1]; + ScaLBL_FreeLeeModel LeeModel( rank,nprocs,comm ); + LeeModel.ReadParams( filename ); + LeeModel.SetDomain(); + LeeModel.ReadInput(); + LeeModel.Create_TwoFluid(); + LeeModel.Initialize_TwoFluid(); + + /* Copy the initial density to test that global mass is conserved */ + int Nx = LeeModel.Dm->Nx; + int Ny = LeeModel.Dm->Ny; + int Nz = LeeModel.Dm->Nz; + DoubleArray DensityInit(Nx,Ny,Nz); + LeeModel.ScaLBL_Comm->RegularLayout(LeeModel.Map,LeeModel.Den,DensityInit); + + LeeModel.Run_TwoFluid(); + + DoubleArray DensityFinal(Nx,Ny,Nz); + LeeModel.ScaLBL_Comm->RegularLayout(LeeModel.Map,LeeModel.Den,DensityFinal); + + DoubleArray DensityChange(Nx,Ny,Nz); + double totalChange=0.0; + for (int k=1; k Date: Thu, 11 Mar 2021 14:15:18 -0500 Subject: [PATCH 181/205] update ubuntu config --- sample_scripts/configure_ubuntu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sample_scripts/configure_ubuntu b/sample_scripts/configure_ubuntu index 516925d0..057732a9 100755 --- a/sample_scripts/configure_ubuntu +++ b/sample_scripts/configure_ubuntu @@ -16,7 +16,7 @@ cmake -D CMAKE_C_COMPILER:PATH=/opt/arden/openmpi/3.1.2/bin/mpicc \ -D USE_SILO=1 \ -D SILO_LIB="/opt/arden/silo/4.10.2/lib/libsiloh5.a" \ -D SILO_DIRECTORY="/opt/arden/silo/4.10.2" \ - -D USE_NETCDF=1 \ + -D USE_NETCDF=0 \ -D NETCDF_DIRECTORY="/opt/arden/netcdf/4.6.1" \ -D USE_CUDA=0 \ -D USE_TIMER=0 \ From ec6b3c6a68e93862b6f28dc65d6cc6a52f94a087 Mon Sep 17 00:00:00 2001 From: James McClure Date: Thu, 11 Mar 2021 20:16:02 -0500 Subject: [PATCH 182/205] revert to cs^2=2/9 for D3Q7 --- cpu/FreeLee.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp index a11f9ba6..04ed7c72 100644 --- a/cpu/FreeLee.cpp +++ b/cpu/FreeLee.cpp @@ -778,25 +778,25 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, dou //-------------------------------- BGK collison for phase field ---------------------------------// // q = 0 - hq[n] = h0 - (h0 - 0.33333333333333333*phi)/tauM; + hq[n] = h0 - (h0 - 0.3333333333333333*phi)/tauM; // q = 1 - hq[nr2] = h1 - (h1 - 0.1111111111111111*(phi*(1.0 + 3.0*ux) - (M*nx*(1 - 4*phi*phi))/W)/tauM); + hq[nr2] = h1 - (h1 - phi*(0.1111111111111111 + 0.5*ux) - (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; // q = 2 - hq[nr1] = h2 - (h2 - 0.1111111111111111*(phi*(1.0 - 3.0*ux) + (M*nx*(1 - 4*phi*phi))/W)/tauM); + hq[nr1] = h2 - (h2 - phi*(0.1111111111111111 - 0.5*ux) + (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; // q = 3 - hq[nr4] = h3 - (h3 - 0.1111111111111111*(phi*(1.0 + 3.0*uy) - (M*ny*(1 - 4*phi*phi))/W)/tauM); + hq[nr4] = h3 - (h3 - phi*(0.1111111111111111 + 0.5*uy) - (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; // q = 4 - hq[nr3] = h4 - (h4 - 0.1111111111111111*(phi*(1.0 - 3.0*uy) + (M*ny*(1 - 4*phi*phi))/W)/tauM); + hq[nr3] = h4 - (h4 - phi*(0.1111111111111111 - 0.5*uy) + (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; // q = 5 - hq[nr6] = h5 - (h5 - 0.1111111111111111*(phi*(1.0 + 3.0*uz) - (M*nz*(1 - 4*phi*phi))/W)/tauM); + hq[nr6] = h5 - (h5 - phi*(0.1111111111111111 + 0.5*uz) - (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; // q = 6 - hq[nr5] = h6 - (h6 - 0.1111111111111111*(phi*(1.0 - 3.0*uz) + (M*nz*(1 - 4*phi*phi))/W)/tauM); + hq[nr5] = h6 - (h6 - phi*(0.1111111111111111 - 0.5*uz) + (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; //........................................................................ //Update velocity on device @@ -1369,25 +1369,25 @@ extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double //-------------------------------- BGK collison for phase field ---------------------------------// // q = 0 - hq[n] = h0 - (h0 - 0.33333333333333333*phi)/tauM; + hq[n] = h0 - (h0 - 0.3333333333333333*phi)/tauM; // q = 1 - hq[1*Np+n] = h1 - (h1 - 0.1111111111111111*(phi*(1.0 + 3.0*ux) - (M*nx*(1 - 4*phi*phi))/W)/tauM); + hq[1*Np+n] = h1 - (h1 - phi*(0.1111111111111111 + 0.5*ux) - (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; // q = 2 - hq[2*Np+n] = h2 - (h2 - 0.1111111111111111*(phi*(1.0 - 3.0*ux) + (M*nx*(1 - 4*phi*phi))/W)/tauM); + hq[2*Np+n] = h2 - (h2 - phi*(0.1111111111111111 - 0.5*ux) + (0.5*M*nx*(1 - 4*phi*phi))/W)/tauM; // q = 3 - hq[3*Np+n] = h3 - (h3 - 0.1111111111111111*(phi*(1.0 + 3.0*uy) - (M*ny*(1 - 4*phi*phi))/W)/tauM); + hq[3*Np+n] = h3 - (h3 - phi*(0.1111111111111111 + 0.5*uy) - (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; // q = 4 - hq[4*Np+n] = h4 - (h4 - 0.1111111111111111*(phi*(1.0 - 3.0*uy) + (M*ny*(1 - 4*phi*phi))/W)/tauM); + hq[4*Np+n] = h4 - (h4 - phi*(0.1111111111111111 - 0.5*uy) + (0.5*M*ny*(1 - 4*phi*phi))/W)/tauM; // q = 5 - hq[5*Np+n] = h5 - (h5 - 0.1111111111111111*(phi*(1.0 + 3.0*uz) - (M*nz*(1 - 4*phi*phi))/W)/tauM); + hq[5*Np+n] = h5 - (h5 - phi*(0.1111111111111111 + 0.5*uz) - (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; // q = 6 - hq[6*Np+n] = h6 - (h6 - 0.1111111111111111*(phi*(1.0 - 3.0*uz) + (M*nz*(1 - 4*phi*phi))/W)/tauM); + hq[6*Np+n] = h6 - (h6 - phi*(0.1111111111111111 - 0.5*uz) + (0.5*M*nz*(1 - 4*phi*phi))/W)/tauM; //........................................................................ //Update velocity on device From aa04ad4898490ea41540789df5f6a3fa1710ecbe Mon Sep 17 00:00:00 2001 From: James McClure Date: Sun, 14 Mar 2021 13:02:55 -0400 Subject: [PATCH 183/205] fixed wide halo bug with lee model --- models/FreeLeeModel.cpp | 18 +++++++++++------- tests/TestMixedGrad.cpp | 1 + tests/testGlobalMassFreeLee.cpp | 2 +- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index 77a485f4..9fba430c 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -776,7 +776,7 @@ void ScaLBL_FreeLeeModel::Run_TwoFluid(){ ScaLBL_Comm_WideHalo->Send(Phi); ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, - kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + kappa, beta, W, Fx, Fy, Fz, Nxh, Nxh*Nyh, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm_WideHalo->Recv(Phi); ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE ScaLBL_Comm->Barrier(); @@ -794,13 +794,16 @@ void ScaLBL_FreeLeeModel::Run_TwoFluid(){ ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar); } ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, - kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); + kappa, beta, W, Fx, Fy, Fz, Nxh, Nxh*Nyh, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_Comm->Barrier(); + + printf("write debug \n"); + WriteDebug_TwoFluid(); // *************EVEN TIMESTEP************* timestep++; // Compute the Phase indicator field - ScaLBL_Comm->SendD3Q7AA(hq,0); //READ FROM NORMAL + ScaLBL_Comm->SendD3Q7AA(hq,0); //READ FROM NORMA ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(dvcMap, hq, Den, Phi, rhoA, rhoB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(hq,0); //WRITE INTO OPPOSITE ScaLBL_Comm->Barrier(); @@ -815,7 +818,7 @@ void ScaLBL_FreeLeeModel::Run_TwoFluid(){ } ScaLBL_Comm_WideHalo->Send(Phi); ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, - kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + kappa, beta, W, Fx, Fy, Fz, Nxh, Nxh*Nyh, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm_WideHalo->Recv(Phi); ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE ScaLBL_Comm->Barrier(); @@ -833,7 +836,7 @@ void ScaLBL_FreeLeeModel::Run_TwoFluid(){ ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar); } ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, - kappa, beta, W, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); + kappa, beta, W, Fx, Fy, Fz, Nxh, Nxh*Nyh, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_Comm->Barrier(); //************************************************************************ PROFILE_STOP("Update"); @@ -906,6 +909,7 @@ void ScaLBL_FreeLeeModel::Run_SingleFluid(){ 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_Comm->Barrier(); + // *************EVEN TIMESTEP************* timestep++; //------------------------------------------------------------------------------------------------------------------- @@ -1003,7 +1007,7 @@ void ScaLBL_FreeLeeModel::WriteDebug_TwoFluid(){ fwrite(PhaseField.data(),8,N,VELZ_FILE); fclose(VELZ_FILE); -/* ScaLBL_Comm->RegularLayout(Map,&ColorGrad[0],PhaseField); + ScaLBL_Comm->RegularLayout(Map,&ColorGrad[0],PhaseField); FILE *CGX_FILE; sprintf(LocalRankFilename,"Gradient_X.%05i.raw",rank); CGX_FILE = fopen(LocalRankFilename,"wb"); @@ -1023,7 +1027,7 @@ void ScaLBL_FreeLeeModel::WriteDebug_TwoFluid(){ CGZ_FILE = fopen(LocalRankFilename,"wb"); fwrite(PhaseField.data(),8,N,CGZ_FILE); fclose(CGZ_FILE); -*/ + } void ScaLBL_FreeLeeModel::WriteDebug_SingleFluid(){ diff --git a/tests/TestMixedGrad.cpp b/tests/TestMixedGrad.cpp index dad1ae26..7fc3373c 100644 --- a/tests/TestMixedGrad.cpp +++ b/tests/TestMixedGrad.cpp @@ -69,6 +69,7 @@ int main( int argc, char **argv ) Initialize_DummyPhaseField(LeeModel); LeeModel.Create_DummyPhase_MGTest(); LeeModel.MGTest(); + LeeModel.WriteDebug_TwoFluid(); PROFILE_STOP( "Main" ); auto file = db->getWithDefault( "TimerFile", "TestMixedGrad" ); diff --git a/tests/testGlobalMassFreeLee.cpp b/tests/testGlobalMassFreeLee.cpp index ba8189f1..bd54f106 100644 --- a/tests/testGlobalMassFreeLee.cpp +++ b/tests/testGlobalMassFreeLee.cpp @@ -86,7 +86,7 @@ int main( int argc, char **argv ) fwrite(DensityChange.data(),8,Nx*Ny*Nz,OUTFILE); fclose(OUTFILE); - LeeModel.WriteDebug_TwoFluid(); + //LeeModel.WriteDebug_TwoFluid(); PROFILE_STOP("Main"); // **************************************************** From b828f6e5e65c7dd1523f8fc8be3f1c1c00a23eb2 Mon Sep 17 00:00:00 2001 From: James McClure Date: Mon, 15 Mar 2021 12:58:24 -0400 Subject: [PATCH 184/205] updating mixed gradient test --- models/FreeLeeModel.cpp | 10 +++++----- tests/TestMixedGrad.cpp | 42 ++++++++++++++++++++++++++++++++++------- 2 files changed, 40 insertions(+), 12 deletions(-) diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index 9fba430c..a4895285 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -10,9 +10,9 @@ color lattice boltzmann model #include ScaLBL_FreeLeeModel::ScaLBL_FreeLeeModel(int RANK, int NP, const Utilities::MPI& COMM): -rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),tauM(0),rhoA(0),rhoB(0),W(0),gamma(0),kappa(0),beta(0), +rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(2),tauA(1.0),tauB(1.0),tauM(1.0),rhoA(1.0),rhoB(1.0),W(5.0),gamma(0.001),kappa(0.0075),beta(0.0024), Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0), -tau(0),rho0(0), +tau(1.0),rho0(1.0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) { @@ -797,7 +797,7 @@ void ScaLBL_FreeLeeModel::Run_TwoFluid(){ kappa, beta, W, Fx, Fy, Fz, Nxh, Nxh*Nyh, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_Comm->Barrier(); - printf("write debug \n"); + printf("write debug strideY=%i strideZ = %i \n",Nxh, Nxh*Nyh); WriteDebug_TwoFluid(); // *************EVEN TIMESTEP************* @@ -1178,9 +1178,9 @@ void ScaLBL_FreeLeeModel::MGTest(){ comm.barrier(); ScaLBL_Comm_WideHalo->Send(Phi); - ScaLBL_D3Q9_MGTest(dvcMap,Phi,ColorGrad,Nx,Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q9_MGTest(dvcMap,Phi,ColorGrad,Nxh,Nxh*Nyh, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm_WideHalo->Recv(Phi); - ScaLBL_D3Q9_MGTest(dvcMap,Phi,ColorGrad,Nx,Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q9_MGTest(dvcMap,Phi,ColorGrad,Nxh,Nxh*Nyh, 0, ScaLBL_Comm->LastExterior(), Np); //check the sum of ColorGrad double cgx_loc = 0.0; diff --git a/tests/TestMixedGrad.cpp b/tests/TestMixedGrad.cpp index 7fc3373c..bbf686ea 100644 --- a/tests/TestMixedGrad.cpp +++ b/tests/TestMixedGrad.cpp @@ -9,14 +9,15 @@ #include "common/Utilities.h" #include "models/FreeLeeModel.h" -inline void Initialize_DummyPhaseField(ScaLBL_FreeLeeModel &LeeModel){ +inline void Initialize_Mask(ScaLBL_FreeLeeModel &LeeModel){ // initialize a bubble int i,j,k,n; int rank = LeeModel.Mask->rank(); int Nx = LeeModel.Mask->Nx; int Ny = LeeModel.Mask->Ny; int Nz = LeeModel.Mask->Nz; - if (rank == 0) cout << "Setting up dummy phase field..." << endl; + if (rank == 0) printf(" initialize mask...\n"); + for (k=0;krank(); + int Nx = LeeModel.Mask->Nx; + int Ny = LeeModel.Mask->Ny; + int Nz = LeeModel.Mask->Nz; + if (rank == 0) printf("Setting up dummy phase field with gradient {x,y,z} = {%f , %f , %f}...\n",ax,ay,az); + + double * Dummy; + int Nh = (Nx+2)*(Ny+2)*(Nz+2); + Dummy = new double [(Nx+2)*(Ny+2)*(Nz+2)]; + for (k=0;kid[n]=1; + LeeModel.id[n] = LeeModel.Mask->id[n]; + int nh = (k+1)*(Nx+2)*(Ny+2) + (j+1)*(Nx+2) + i+1; + Dummy[nh] = ax*double(i) + ay*double(j) + az*double(k); + } + } + } + ScaLBL_CopyToDevice(LeeModel.Phi, Dummy, sizeof(double)*Nh); + + LeeModel.MGTest(); +} int main( int argc, char **argv ) { @@ -66,14 +94,14 @@ int main( int argc, char **argv ) ScaLBL_FreeLeeModel LeeModel( rank, nprocs, comm ); LeeModel.ReadParams( filename ); LeeModel.SetDomain(); - Initialize_DummyPhaseField(LeeModel); - LeeModel.Create_DummyPhase_MGTest(); - LeeModel.MGTest(); + Initialize_Mask(LeeModel); + //LeeModel.Create_DummyPhase_MGTest(); + LeeModel.Create_TwoFluid(); + + Initialize_DummyPhaseField(LeeModel,1.0, 2.0, 3.0); LeeModel.WriteDebug_TwoFluid(); PROFILE_STOP( "Main" ); - auto file = db->getWithDefault( "TimerFile", "TestMixedGrad" ); - auto level = db->getWithDefault( "TimerLevel", 1 ); PROFILE_SAVE( file, level ); // **************************************************** From 15dbe5f053b8c9af031879feddded2d273861a40 Mon Sep 17 00:00:00 2001 From: James McClure Date: Mon, 15 Mar 2021 17:31:14 -0400 Subject: [PATCH 185/205] update ubuntu configure script --- sample_scripts/configure_ubuntu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sample_scripts/configure_ubuntu b/sample_scripts/configure_ubuntu index cccb112c..fab18662 100755 --- a/sample_scripts/configure_ubuntu +++ b/sample_scripts/configure_ubuntu @@ -15,7 +15,7 @@ cmake -D CMAKE_C_COMPILER:PATH=/opt/arden/openmpi/3.1.2/bin/mpicc \ -D USE_SILO=1 \ -D SILO_LIB="/opt/arden/silo/4.10.2/lib/libsiloh5.a" \ -D SILO_DIRECTORY="/opt/arden/silo/4.10.2" \ - -D USE_NETCDF=1 \ + -D USE_NETCDF=0 \ -D NETCDF_DIRECTORY="/opt/arden/netcdf/4.6.1" \ -D USE_CUDA=0 \ -D USE_TIMER=0 \ From 6d59317919e8ff240247af3835de70b8922d5c3b Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Tue, 16 Mar 2021 17:21:52 -0400 Subject: [PATCH 186/205] Cleaning up TestWriter --- IO/Reader.cpp | 36 ++++++- IO/Reader.h | 23 ++++- IO/Writer.cpp | 2 +- tests/TestWriter.cpp | 231 ++++++++++++++++++++++--------------------- tests/convertIO.cpp | 2 +- 5 files changed, 177 insertions(+), 117 deletions(-) diff --git a/IO/Reader.cpp b/IO/Reader.cpp index dedb9b82..fe10eae9 100644 --- a/IO/Reader.cpp +++ b/IO/Reader.cpp @@ -37,10 +37,19 @@ std::string IO::getPath( const std::string& filename ) } -// List the timesteps in the given directors (dumps.LBPM) -std::vector IO::readTimesteps( const std::string& filename ) +// List the timesteps in the given directory (dumps.LBPM) +std::vector IO::readTimesteps( const std::string& path, const std::string& format ) { + // Get the name of the summary filename + std::string filename = path + "/"; + if ( format=="old" || format=="new" ) + filename += "summary.LBM"; + else if ( format=="silo" ) + filename += "LBM.visit"; + else + ERROR( "Unknown format: " + format ); PROFILE_START("readTimesteps"); + // Read the data FILE *fid= fopen(filename.c_str(),"rb"); if ( fid==NULL ) ERROR("Error opening file"); @@ -59,6 +68,29 @@ std::vector IO::readTimesteps( const std::string& filename ) fclose(fid); PROFILE_STOP("readTimesteps"); return timesteps; + return timesteps; +} + + +// Read the data for the given timestep +std::vector IO::readData( const std::string& path, const std::string& timestep, const Utilities::MPI &comm ) +{ + // Get the mesh databases + auto db = IO::getMeshList( path, timestep ); + // Create the data + std::vector data( db .size() ); + for ( size_t i=0; i readTimesteps( const std::string& filename ); +/*! + * @brief Read the timestep list + * @details This function reads the timestep list from the summary file. + * @param[in] path The path to use for reading + * @param[in] format The data format to use: + * old - Old mesh format (provided for backward compatibility) + * new - New format, 1 file/process + * silo - Silo + * @return append Append any existing data (default is false) + */ +std::vector readTimesteps( const std::string& path, const std::string& format ); + + +/*! + * @brief Read the data for the timestep + * @details This function reads the mesh and variable data provided for the given timestep. + * Note: this function requires that the number of ranks of the comm match the number of ranks in the meshes + * @param[in] path The path to use for reading + * @param[in] timestep The timestep iteration + */ +std::vector readData( const std::string& path, const std::string& timestep, const Utilities::MPI &comm = MPI_COMM_WORLD ); //! Read the list of mesh databases for the given timestep diff --git a/IO/Writer.cpp b/IO/Writer.cpp index 61c333af..7414d5a1 100644 --- a/IO/Writer.cpp +++ b/IO/Writer.cpp @@ -76,7 +76,7 @@ static std::vector writeMeshesOrigFormat( const std::vectorname ); } diff --git a/tests/TestWriter.cpp b/tests/TestWriter.cpp index 97774c55..4030930c 100644 --- a/tests/TestWriter.cpp +++ b/tests/TestWriter.cpp @@ -31,13 +31,97 @@ inline double distance( const Point& p ) } +bool checkMesh( const std::vector& meshData, const std::string& format, std::shared_ptr mesh ) +{ + + // Get direct access to the meshes used to test the reader + const auto pointmesh = dynamic_cast( meshData[0].mesh.get() ); + const auto trimesh = dynamic_cast( meshData[1].mesh.get() ); + const auto trilist = dynamic_cast( meshData[2].mesh.get() ); + const auto domain = dynamic_cast( meshData[3].mesh.get() ); + const size_t N_tri = trimesh->A.size(); + if ( mesh->className() == "pointmesh" ) { + // Check the pointmesh + auto pmesh = IO::getPointList(mesh); + if ( pmesh.get()==NULL ) + return false; + if ( pmesh->points.size() != pointmesh->points.size() ) + return false; + } + if ( mesh->className() == "trimesh" || mesh->className() == "trilist" ) { + // Check the trimesh/trilist + auto mesh1 = IO::getTriMesh(mesh); + auto mesh2 = IO::getTriList(mesh); + if ( mesh1.get()==NULL || mesh2.get()==NULL ) + return false; + if ( mesh1->A.size()!=N_tri || mesh1->B.size()!=N_tri || mesh1->C.size()!=N_tri || + mesh2->A.size()!=N_tri || mesh2->B.size()!=N_tri || mesh2->C.size()!=N_tri ) + return false; + const std::vector& P1 = mesh1->vertices->points; + const std::vector& A1 = mesh1->A; + const std::vector& B1 = mesh1->B; + const std::vector& C1 = mesh1->C; + const std::vector& A2 = mesh2->A; + const std::vector& B2 = mesh2->B; + const std::vector& C2 = mesh2->C; + const std::vector& A = trilist->A; + const std::vector& B = trilist->B; + const std::vector& C = trilist->C; + for (size_t i=0; iclassName() == "domain" && format!="old" ) { + // Check the domain mesh + const IO::DomainMesh& mesh1 = *std::dynamic_pointer_cast(mesh); + if ( mesh1.nprocx!=domain->nprocx || mesh1.nprocy!=domain->nprocy || mesh1.nprocz!=domain->nprocz ) + return false; + if ( mesh1.nx!=domain->nx || mesh1.ny!=domain->ny || mesh1.nz!=domain->nz ) + return false; + if ( mesh1.Lx!=domain->Lx || mesh1.Ly!=domain->Ly || mesh1.Lz!=domain->Lz ) + return false; + } + return true; +} + + +bool checkVar( const std::string& format, std::shared_ptr mesh, + std::shared_ptr variable1, std::shared_ptr variable2 ) +{ + if ( format=="new" ) + IO::reformatVariable( *mesh, *variable2 ); + bool pass = true; + const IO::Variable& var1 = *variable1; + const IO::Variable& var2 = *variable2; + pass = var1.name == var2.name; + pass = pass && var1.dim == var2.dim; + pass = pass && var1.type == var2.type; + pass = pass && var1.data.length() == var2.data.length(); + if ( pass ) { + for (size_t m=0; m& meshData, UnitTest& ut ) { + PROFILE_SCOPED( path, 0, timer ); + Utilities::MPI comm( MPI_COMM_WORLD ); int nprocs = comm.getSize(); comm.barrier(); + + // Set the path for the writer + std::string path = "test_" + format; + + // Get the format std::string format2 = format; auto precision = IO::DataType::Double; @@ -49,6 +133,7 @@ void testWriter( const std::string& format, std::vector& mes precision = IO::DataType::Float; } + // Set the precision for the variables for ( auto& data : meshData ) { data.precision = precision; @@ -57,123 +142,59 @@ void testWriter( const std::string& format, std::vector& mes } // Write the data - PROFILE_START(format+"-write"); - IO::initialize( "test_"+format, format2, false ); + IO::initialize( path, format2, false ); IO::writeData( 0, meshData, comm ); IO::writeData( 3, meshData, comm ); comm.barrier(); - PROFILE_STOP(format+"-write"); - // Get the summary name for reading - std::string path = "test_" + format; - std::string summary_name; - if ( format=="old" || format=="new" ) - summary_name = "summary.LBM"; - else if ( format=="silo-float" || format=="silo-double" ) - summary_name = "LBM.visit"; - else - ERROR("Unknown format"); - // Get direct access to the meshes used to test the reader - const auto pointmesh = dynamic_cast( meshData[0].mesh.get() ); - const auto trimesh = dynamic_cast( meshData[1].mesh.get() ); - const auto trilist = dynamic_cast( meshData[2].mesh.get() ); - const auto domain = dynamic_cast( meshData[3].mesh.get() ); - const size_t N_tri = trimesh->A.size(); - - // Get a list of the timesteps - PROFILE_START(format+"-read-timesteps"); - auto timesteps = IO::readTimesteps( path + "/" + summary_name ); - PROFILE_STOP(format+"-read-timesteps"); + // Get a list of the timesteps + auto timesteps = IO::readTimesteps( path, format2 ); if ( timesteps.size()==2 ) ut.passes(format+": Corrent number of timesteps"); else ut.failure(format+": Incorrent number of timesteps"); - // Check the mesh lists + + // Test the simple read interface + bool pass = true; + for ( const auto& timestep : timesteps ) { + auto data = IO::readData( path, timestep ); + pass = pass && data.size() == meshData.size(); + for ( size_t i=0; ipoints.size() != pointmesh->points.size() ) { - pass = false; - break; - } - } - if ( database.name=="trimesh" || database.name=="trilist" ) { - // Check the trimesh/trilist - auto mesh1 = IO::getTriMesh(mesh); - auto mesh2 = IO::getTriList(mesh); - if ( mesh1.get()==NULL || mesh2.get()==NULL ) { - pass = false; - break; - } - if ( mesh1->A.size()!=N_tri || mesh1->B.size()!=N_tri || mesh1->C.size()!=N_tri || - mesh2->A.size()!=N_tri || mesh2->B.size()!=N_tri || mesh2->C.size()!=N_tri ) - { - pass = false; - break; - } - const std::vector& P1 = mesh1->vertices->points; - const std::vector& A1 = mesh1->A; - const std::vector& B1 = mesh1->B; - const std::vector& C1 = mesh1->C; - const std::vector& A2 = mesh2->A; - const std::vector& B2 = mesh2->B; - const std::vector& C2 = mesh2->C; - const std::vector& A = trilist->A; - const std::vector& B = trilist->B; - const std::vector& C = trilist->C; - for (size_t i=0; i(mesh); - if ( mesh1.nprocx!=domain->nprocx || mesh1.nprocy!=domain->nprocy || mesh1.nprocz!=domain->nprocz ) - pass = false; - if ( mesh1.nx!=domain->nx || mesh1.ny!=domain->ny || mesh1.nz!=domain->nz ) - pass = false; - if ( mesh1.Lx!=domain->Lx || mesh1.Ly!=domain->Ly || mesh1.Lz!=domain->Lz ) - pass = false; + } else { + pass = pass && checkMesh( meshData, format, mesh ); } } if ( pass ) { @@ -185,31 +206,19 @@ void testWriter( const std::string& format, std::vector& mes // Load the variables and check their data if ( format=="old" ) continue; // Old format does not support variables - const IO::MeshDataStruct* mesh0 = NULL; + const IO::MeshDataStruct* mesh0 = nullptr; for (size_t k=0; kvars.size(); v++) { PROFILE_START(format+"-read-getVariable"); auto variable = IO::getVariable(path,timestep,database,k,mesh0->vars[v]->name); - if ( format=="new" ) - IO::reformatVariable( *mesh, *variable ); - PROFILE_STOP(format+"-read-getVariable"); - const IO::Variable& var1 = *mesh0->vars[v]; - const IO::Variable& var2 = *variable; - pass = var1.name == var2.name; - pass = pass && var1.dim == var2.dim; - pass = pass && var1.type == var2.type; - pass = pass && var1.data.length() == var2.data.length(); - if ( pass ) { - for (size_t m=0; mvars[v], variable ); if ( pass ) { ut.passes(format+": Variable \"" + variable->name + "\" matched"); } else { diff --git a/tests/convertIO.cpp b/tests/convertIO.cpp index a6745263..4eb726b0 100644 --- a/tests/convertIO.cpp +++ b/tests/convertIO.cpp @@ -38,7 +38,7 @@ int main(int argc, char **argv) std::string path = IO::getPath( filename ); // Read the timesteps - auto timesteps = IO::readTimesteps( filename ); + auto timesteps = IO::readTimesteps( filename, "old" ); // Loop through the timesteps, reading/writing the data IO::initialize( "", format, false ); From de743e75b72c01fd3538976de81cc0f93e453974 Mon Sep 17 00:00:00 2001 From: JamesEMcclure Date: Wed, 17 Mar 2021 05:15:17 -0400 Subject: [PATCH 187/205] include convert IO --- tests/CMakeLists.txt | 4 ++-- tests/convertIO.cpp | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8df4e6bd..2c4b8431 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -35,8 +35,8 @@ ADD_LBPM_EXECUTABLE( GenerateSphereTest ) #ADD_LBPM_EXECUTABLE( BlobAnalysis ) #ADD_LBPM_EXECUTABLE( BlobIdentify ) #ADD_LBPM_EXECUTABLE( BlobIdentifyParallel ) -#ADD_LBPM_EXECUTABLE( convertIO ) -#ADD_LBPM_EXECUTABLE( DataAggregator ) +ADD_LBPM_EXECUTABLE( convertIO ) +ADD_LBPM_EXECUTABLE( DataAggregator ) #ADD_LBPM_EXECUTABLE( BlobAnalyzeParallel )( ADD_LBPM_EXECUTABLE( lbpm_minkowski_scalar ) ADD_LBPM_EXECUTABLE( TestPoissonSolver ) diff --git a/tests/convertIO.cpp b/tests/convertIO.cpp index 4eb726b0..c53ffe4f 100644 --- a/tests/convertIO.cpp +++ b/tests/convertIO.cpp @@ -5,7 +5,7 @@ #include #include -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Communication.h" #include "common/Utilities.h" #include "IO/Mesh.h" @@ -13,7 +13,6 @@ #include "IO/Writer.h" #include "ProfilerApp.h" - int main(int argc, char **argv) { // Initialize MPI @@ -38,7 +37,7 @@ int main(int argc, char **argv) std::string path = IO::getPath( filename ); // Read the timesteps - auto timesteps = IO::readTimesteps( filename, "old" ); + auto timesteps = IO::readTimesteps( filename, "silo" ); // Loop through the timesteps, reading/writing the data IO::initialize( "", format, false ); @@ -63,19 +62,20 @@ int main(int argc, char **argv) // Read the variables for ( auto var : database.variables ) { auto varData = IO::getVariable( path, timestep, database, rank, var.name ); + printf("reformat %s \n",var.name); IO::reformatVariable( *meshData[i].mesh, *varData ); meshData[i].vars.push_back( varData ); } i++; } - MPI_Barrier(comm); + comm.barrier(); PROFILE_STOP("Read"); // Save the mesh data to a new file PROFILE_START("Write"); IO::writeData( timestep, meshData, MPI_COMM_WORLD ); - MPI_Barrier(comm); + comm.barrier(); PROFILE_STOP("Write"); } From cb32a4236bc00e1c7cecfe25a6a52ef39821eb76 Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 17 Mar 2021 06:26:15 -0400 Subject: [PATCH 188/205] update lee model tests --- tests/TestMixedGrad.cpp | 89 ++++++++++++++++++++++++++++++++- tests/testGlobalMassFreeLee.cpp | 2 + 2 files changed, 89 insertions(+), 2 deletions(-) diff --git a/tests/TestMixedGrad.cpp b/tests/TestMixedGrad.cpp index bbf686ea..61fa6443 100644 --- a/tests/TestMixedGrad.cpp +++ b/tests/TestMixedGrad.cpp @@ -29,6 +29,7 @@ inline void Initialize_Mask(ScaLBL_FreeLeeModel &LeeModel){ } } + inline void Initialize_DummyPhaseField(ScaLBL_FreeLeeModel &LeeModel, double ax, double ay, double az){ // initialize a bubble int i,j,k,n; @@ -57,12 +58,94 @@ inline void Initialize_DummyPhaseField(ScaLBL_FreeLeeModel &LeeModel, double ax, LeeModel.MGTest(); } +inline int MultiHaloNeighborCheck(ScaLBL_FreeLeeModel &LeeModel){ + int i,j,k,iq,stride,nread; + int Nxh = LeeModel.Nxh; + int Nyh = LeeModel.Nyh; + int Np = LeeModel.Np; + + int *TmpMap; + TmpMap = new int[Np]; + ScaLBL_CopyToHost(TmpMap, LeeModel.dvcMap, Np*sizeof(int)); + + int *neighborList; + neighborList = new int[18*Np]; + ScaLBL_CopyToHost(neighborList, LeeModel.NeighborList, 18*Np*sizeof(int)); + printf("Check stride for interior neighbors \n"); + int count = 0; + for (int n=LeeModel.ScaLBL_Comm->LastInterior(); nLastInterior(); n++){ + // q=0 + int idx = TmpMap[n]; + k = idx/Nxh/Nyh; + j = (idx-k*Nxh*Nyh)/Nxh; + i = (idx-k*Nxh*Nyh -j*Nxh); + + // q=1 + nread = neighborList[n]; + iq = TmpMap[nread]; + stride = idx - iq; + if (stride != 1){ + printf(" %i, %i, %i q = 1 stride=%i \n ",i,j,k,stride); + count++; + } + + // q=2 + nread = neighborList[n+Np]; + iq = TmpMap[nread]; + stride = iq - idx; + if (stride != 1){ + printf(" %i, %i, %i q = 2 stride=%i \n ",i,j,k,stride); + count++; + } + + + // q=3 + nread = neighborList[n+2*Np]; + iq = TmpMap[nread]; + stride = idx - iq; + if (stride != Nxh){ + printf(" %i, %i, %i q = 3 stride=%i \n ",i,j,k,stride); + count++; + } + + // q = 4 + nread = neighborList[n+3*Np]; + iq = TmpMap[nread]; + stride = iq-idx; + if (stride != Nxh){ + printf(" %i, %i, %i q = 4 stride=%i \n ",i,j,k,stride); + count++; + } + + + // q=5 + nread = neighborList[n+4*Np]; + iq = TmpMap[nread]; + stride = idx - iq; + if (stride != Nxh*Nyh){ + count++; + printf(" %i, %i, %i q = 5 stride=%i \n ",i,j,k,stride); + } + + // q = 6 + nread = neighborList[n+5*Np]; + iq = TmpMap[nread]; + stride = iq - idx; + if (stride != Nxh*Nyh){ + count++; + printf(" %i, %i, %i q = 6 stride=%i \n ",i,j,k,stride); + } + + } + return count; +} + int main( int argc, char **argv ) { // Initialize Utilities::startup( argc, argv ); - + int errors = 0; // Load the input database auto db = std::make_shared( argv[1] ); @@ -98,6 +181,8 @@ int main( int argc, char **argv ) //LeeModel.Create_DummyPhase_MGTest(); LeeModel.Create_TwoFluid(); + errors=MultiHaloNeighborCheck(LeeModel); + Initialize_DummyPhaseField(LeeModel,1.0, 2.0, 3.0); LeeModel.WriteDebug_TwoFluid(); @@ -109,6 +194,6 @@ int main( int argc, char **argv ) } // Limit scope so variables that contain communicators will free before MPI_Finialize Utilities::shutdown(); - return 0; + return errors; } diff --git a/tests/testGlobalMassFreeLee.cpp b/tests/testGlobalMassFreeLee.cpp index bd54f106..c9073b9a 100644 --- a/tests/testGlobalMassFreeLee.cpp +++ b/tests/testGlobalMassFreeLee.cpp @@ -53,6 +53,8 @@ int main( int argc, char **argv ) LeeModel.ReadInput(); LeeModel.Create_TwoFluid(); LeeModel.Initialize_TwoFluid(); + /* check neighbors */ + /* Copy the initial density to test that global mass is conserved */ int Nx = LeeModel.Dm->Nx; From 8c026a2f6d7f6aaeec4fcb7de54140ea197dcc49 Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 17 Mar 2021 06:35:19 -0400 Subject: [PATCH 189/205] test neighborlist --- tests/TestMixedGrad.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/TestMixedGrad.cpp b/tests/TestMixedGrad.cpp index 61fa6443..6baede7c 100644 --- a/tests/TestMixedGrad.cpp +++ b/tests/TestMixedGrad.cpp @@ -73,7 +73,7 @@ inline int MultiHaloNeighborCheck(ScaLBL_FreeLeeModel &LeeModel){ ScaLBL_CopyToHost(neighborList, LeeModel.NeighborList, 18*Np*sizeof(int)); printf("Check stride for interior neighbors \n"); int count = 0; - for (int n=LeeModel.ScaLBL_Comm->LastInterior(); nLastInterior(); n++){ + for (int n=LeeModel.ScaLBL_Comm->FirstInterior(); nLastInterior(); n++){ // q=0 int idx = TmpMap[n]; k = idx/Nxh/Nyh; @@ -82,7 +82,7 @@ inline int MultiHaloNeighborCheck(ScaLBL_FreeLeeModel &LeeModel){ // q=1 nread = neighborList[n]; - iq = TmpMap[nread]; + iq = TmpMap[nread%Np]; stride = idx - iq; if (stride != 1){ printf(" %i, %i, %i q = 1 stride=%i \n ",i,j,k,stride); @@ -91,7 +91,7 @@ inline int MultiHaloNeighborCheck(ScaLBL_FreeLeeModel &LeeModel){ // q=2 nread = neighborList[n+Np]; - iq = TmpMap[nread]; + iq = TmpMap[nread%Np]; stride = iq - idx; if (stride != 1){ printf(" %i, %i, %i q = 2 stride=%i \n ",i,j,k,stride); @@ -101,7 +101,7 @@ inline int MultiHaloNeighborCheck(ScaLBL_FreeLeeModel &LeeModel){ // q=3 nread = neighborList[n+2*Np]; - iq = TmpMap[nread]; + iq = TmpMap[nread%Np]; stride = idx - iq; if (stride != Nxh){ printf(" %i, %i, %i q = 3 stride=%i \n ",i,j,k,stride); @@ -110,7 +110,7 @@ inline int MultiHaloNeighborCheck(ScaLBL_FreeLeeModel &LeeModel){ // q = 4 nread = neighborList[n+3*Np]; - iq = TmpMap[nread]; + iq = TmpMap[nread%Np]; stride = iq-idx; if (stride != Nxh){ printf(" %i, %i, %i q = 4 stride=%i \n ",i,j,k,stride); @@ -120,7 +120,7 @@ inline int MultiHaloNeighborCheck(ScaLBL_FreeLeeModel &LeeModel){ // q=5 nread = neighborList[n+4*Np]; - iq = TmpMap[nread]; + iq = TmpMap[nread%Np]; stride = idx - iq; if (stride != Nxh*Nyh){ count++; @@ -129,7 +129,7 @@ inline int MultiHaloNeighborCheck(ScaLBL_FreeLeeModel &LeeModel){ // q = 6 nread = neighborList[n+5*Np]; - iq = TmpMap[nread]; + iq = TmpMap[nread%Np]; stride = iq - idx; if (stride != Nxh*Nyh){ count++; From 89704cbb107bc060a762ef854b09bb420eb2044d Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Wed, 17 Mar 2021 10:24:14 -0400 Subject: [PATCH 190/205] Updating convertIO --- IO/IOHelpers.h | 32 ++- IO/Mesh.cpp | 569 ++++++++++++++++++++++--------------------- IO/Mesh.h | 116 +++++---- IO/MeshDatabase.cpp | 511 +++++++++++++++++++------------------- IO/MeshDatabase.h | 68 +++--- IO/PIO.cpp | 131 +++++----- IO/PIO.h | 47 ++-- IO/PIO.hpp | 12 +- IO/PackData.cpp | 77 +++--- IO/PackData.h | 35 ++- IO/PackData.hpp | 150 ++++++------ IO/Reader.cpp | 456 ++++++++++++++++++---------------- IO/Reader.h | 50 ++-- IO/Writer.cpp | 484 ++++++++++++++++++++---------------- IO/Writer.h | 25 +- IO/netcdf.cpp | 385 +++++++++++++++-------------- IO/netcdf.h | 55 +++-- IO/silo.cpp | 81 +++--- IO/silo.h | 121 ++++----- tests/CMakeLists.txt | 2 +- tests/TestWriter.cpp | 2 +- tests/convertIO.cpp | 113 ++++----- 22 files changed, 1843 insertions(+), 1679 deletions(-) diff --git a/IO/IOHelpers.h b/IO/IOHelpers.h index 2e9b06e0..4389c619 100644 --- a/IO/IOHelpers.h +++ b/IO/IOHelpers.h @@ -10,9 +10,9 @@ namespace IO { // Find a character in a line inline size_t find( const char *line, char token ) { - size_t i=0; + size_t i = 0; while ( 1 ) { - if ( line[i]==token || line[i]<32 || line[i]==0 ) + if ( line[i] == token || line[i] < 32 || line[i] == 0 ) break; ++i; } @@ -21,17 +21,17 @@ inline size_t find( const char *line, char token ) // Remove preceeding/trailing whitespace -inline std::string deblank( const std::string& str ) +inline std::string deblank( const std::string &str ) { size_t i1 = str.size(); size_t i2 = 0; - for (size_t i=0; i=32 ) { - i1 = std::min(i1,i); - i2 = std::max(i2,i); + for ( size_t i = 0; i < str.size(); i++ ) { + if ( str[i] != ' ' && str[i] >= 32 ) { + i1 = std::min( i1, i ); + i2 = std::max( i2, i ); } } - return str.substr(i1,i2-i1+1); + return str.substr( i1, i2 - i1 + 1 ); } @@ -42,14 +42,14 @@ inline std::vector splitList( const char *line, const char token ) size_t i1 = 0; size_t i2 = 0; while ( 1 ) { - if ( line[i2]==token || line[i2]<32 ) { - std::string tmp(&line[i1],i2-i1); - tmp = deblank(tmp); + if ( line[i2] == token || line[i2] < 32 ) { + std::string tmp( &line[i1], i2 - i1 ); + tmp = deblank( tmp ); if ( !tmp.empty() ) - list.push_back(tmp); - i1 = i2+1; + list.push_back( tmp ); + i1 = i2 + 1; } - if ( line[i2]==0 ) + if ( line[i2] == 0 ) break; i2++; } @@ -57,8 +57,6 @@ inline std::vector splitList( const char *line, const char token ) } - -}; +}; // namespace IO #endif - diff --git a/IO/Mesh.cpp b/IO/Mesh.cpp index eb712296..91c78e03 100644 --- a/IO/Mesh.cpp +++ b/IO/Mesh.cpp @@ -19,104 +19,98 @@ inline Point nullPoint() /**************************************************** -* Mesh * -****************************************************/ -Mesh::Mesh( ) -{ -} -Mesh::~Mesh( ) -{ -} + * Mesh * + ****************************************************/ +Mesh::Mesh() {} +Mesh::~Mesh() {} /**************************************************** -* MeshDataStruct * -****************************************************/ + * MeshDataStruct * + ****************************************************/ bool MeshDataStruct::check() const { - enum VariableType { NodeVariable=1, EdgeVariable=2, SurfaceVariable=2, VolumeVariable=3, NullVariable=0 }; bool pass = mesh != nullptr; - for ( const auto& var : vars ) { - pass = pass && static_cast(var->type)>=1 && static_cast(var->type)<=3; + for ( const auto &var : vars ) { + pass = pass && static_cast( var->type ) >= 1 && static_cast( var->type ) <= 3; pass = pass && !var->data.empty(); } - if ( !pass ) + if ( !pass ) { + std::cerr << "Invalid variable detected\n"; return false; - const std::string& meshClass = mesh->className(); + } + const std::string &meshClass = mesh->className(); if ( meshClass == "PointList" ) { - const auto mesh2 = dynamic_cast( mesh.get() ); - if ( mesh2 == nullptr ) - return false; - for ( const auto& var : vars ) { + auto mesh2 = dynamic_cast( mesh.get() ); + ASSERT( mesh2 ); + for ( const auto &var : vars ) { if ( var->type == IO::VariableType::NodeVariable ) { - pass = pass && var->data.size(0)==mesh2->points.size() && var->data.size(1)==var->dim; + pass = pass && var->data.size() == ArraySize( mesh2->points.size(), var->dim ); } else if ( var->type == IO::VariableType::EdgeVariable ) { - ERROR("Invalid type for PointList"); + ERROR( "Invalid type for PointList" ); } else if ( var->type == IO::VariableType::SurfaceVariable ) { - ERROR("Invalid type for PointList"); + ERROR( "Invalid type for PointList" ); } else if ( var->type == IO::VariableType::VolumeVariable ) { - ERROR("Invalid type for PointList"); + ERROR( "Invalid type for PointList" ); } else { - ERROR("Invalid variable type"); + ERROR( "Invalid variable type" ); } } } else if ( meshClass == "TriMesh" || meshClass == "TriList" ) { - const auto mesh2 = getTriMesh( mesh ); - if ( mesh2 == nullptr ) - return false; - for ( const auto& var : vars ) { + auto mesh2 = getTriMesh( mesh ); + ASSERT( mesh2 ); + for ( const auto &var : vars ) { if ( var->type == IO::VariableType::NodeVariable ) { - pass = pass && var->data.size(0)==mesh2->vertices->points.size() && var->data.size(1)==var->dim; + pass = pass && + var->data.size() == ArraySize( mesh2->vertices->points.size(), var->dim ); } else if ( var->type == IO::VariableType::EdgeVariable ) { - ERROR("Not finished"); + ERROR( "Not finished" ); } else if ( var->type == IO::VariableType::SurfaceVariable ) { - ERROR("Not finished"); + ERROR( "Not finished" ); } else if ( var->type == IO::VariableType::VolumeVariable ) { - pass = pass && var->data.size(0)==mesh2->A.size() && var->data.size(1)==var->dim; + pass = pass && var->data.size( 0 ) == mesh2->A.size() && + var->data.size( 1 ) == var->dim; } else { - ERROR("Invalid variable type"); + ERROR( "Invalid variable type" ); } } } else if ( meshClass == "DomainMesh" ) { - const auto mesh2 = dynamic_cast( mesh.get() ); - if ( mesh2 == nullptr ) - return false; - for ( const auto& var : vars ) { + auto mesh2 = dynamic_cast( mesh.get() ); + ASSERT( mesh2 ); + for ( const auto &var : vars ) { + ArraySize varSize; if ( var->type == IO::VariableType::NodeVariable ) { - pass = pass && (int) var->data.size(0)==(mesh2->nx+1) && (int) var->data.size(1)==(mesh2->ny+1) - && (int) var->data.size(2)==(mesh2->nz+1) && var->data.size(3)==var->dim; + varSize = ArraySize( mesh2->nx + 1, mesh2->ny + 1, mesh2->nz + 1, var->dim ); } else if ( var->type == IO::VariableType::EdgeVariable ) { - ERROR("Not finished"); + ERROR( "Not finished" ); } else if ( var->type == IO::VariableType::SurfaceVariable ) { - ERROR("Not finished"); + ERROR( "Not finished" ); } else if ( var->type == IO::VariableType::VolumeVariable ) { - pass = pass && (int) var->data.size(0)==mesh2->nx && (int) var->data.size(1)==mesh2->ny - && (int) var->data.size(2)==mesh2->nz && var->data.size(3)==var->dim; + varSize = ArraySize( mesh2->nx, mesh2->ny, mesh2->nz, var->dim ); } else { - ERROR("Invalid variable type"); + ERROR( "Invalid variable type" ); } + if ( var->data.size() == ArraySize( varSize[0] * varSize[1] * varSize[2], varSize[3] ) ) + var->data.resize( varSize ); + pass = pass && var->data.size() == varSize; } } else { - ERROR("Unknown mesh class: "+mesh->className()); + ERROR( "Unknown mesh class: " + mesh->className() ); } return pass; } /**************************************************** -* PointList * -****************************************************/ -PointList::PointList( ) -{ -} + * PointList * + ****************************************************/ +PointList::PointList() {} PointList::PointList( size_t N ) { Point tmp = nullPoint(); - points.resize(N,tmp); -} -PointList::~PointList( ) -{ + points.resize( N, tmp ); } +PointList::~PointList() {} size_t PointList::numberPointsVar( VariableType type ) const { size_t N = 0; @@ -124,174 +118,168 @@ size_t PointList::numberPointsVar( VariableType type ) const N = points.size(); return N; } -std::pair PointList::pack( int level ) const +std::pair PointList::pack( int level ) const { - std::pair data_out(0,NULL); - if ( level==0 ) { - data_out.first = (2+3*points.size())*sizeof(double); - double *data_ptr = new double[2+3*points.size()]; - data_out.second = data_ptr; - uint64_t *data_int = reinterpret_cast(data_ptr); - data_int[0] = level; - data_int[1] = points.size(); - double *data = &data_ptr[2]; - for (size_t i=0; i data_out( 0, NULL ); + if ( level == 0 ) { + data_out.first = ( 2 + 3 * points.size() ) * sizeof( double ); + double *data_ptr = new double[2 + 3 * points.size()]; + data_out.second = data_ptr; + uint64_t *data_int = reinterpret_cast( data_ptr ); + data_int[0] = level; + data_int[1] = points.size(); + double *data = &data_ptr[2]; + for ( size_t i = 0; i < points.size(); i++ ) { + data[3 * i + 0] = points[i].x; + data[3 * i + 1] = points[i].y; + data[3 * i + 2] = points[i].z; } } return data_out; } -void PointList::unpack( const std::pair& data_in ) +void PointList::unpack( const std::pair &data_in ) { - uint64_t *data_int = reinterpret_cast(data_in.second); - const double *data = reinterpret_cast(data_in.second); - int level = data_int[0]; - uint64_t N = data_int[1]; - data = &data[2]; - if ( level==0 ) { - ASSERT((2+3*N)*sizeof(double)==data_in.first); - points.resize(N); - for (size_t i=0; i( data_in.second ); + const double *data = reinterpret_cast( data_in.second ); + int level = data_int[0]; + uint64_t N = data_int[1]; + data = &data[2]; + if ( level == 0 ) { + ASSERT( ( 2 + 3 * N ) * sizeof( double ) == data_in.first ); + points.resize( N ); + for ( size_t i = 0; i < points.size(); i++ ) { + points[i].x = data[3 * i + 0]; + points[i].y = data[3 * i + 1]; + points[i].z = data[3 * i + 2]; } } } /**************************************************** -* TriList * -****************************************************/ -TriList::TriList( ) -{ -} + * TriList * + ****************************************************/ +TriList::TriList() {} TriList::TriList( size_t N_tri ) { Point tmp = nullPoint(); - A.resize(N_tri,tmp); - B.resize(N_tri,tmp); - C.resize(N_tri,tmp); + A.resize( N_tri, tmp ); + B.resize( N_tri, tmp ); + C.resize( N_tri, tmp ); } -TriList::TriList( const TriMesh& mesh ) +TriList::TriList( const TriMesh &mesh ) { Point tmp = nullPoint(); - A.resize(mesh.A.size(),tmp); - B.resize(mesh.B.size(),tmp); - C.resize(mesh.C.size(),tmp); - ASSERT(mesh.vertices.get()!=NULL); - const std::vector& P = mesh.vertices->points; - for (size_t i=0; i &P = mesh.vertices->points; + for ( size_t i = 0; i < A.size(); i++ ) A[i] = P[mesh.A[i]]; - for (size_t i=0; i TriList::pack( int level ) const +std::pair TriList::pack( int level ) const { - std::pair data_out(0,NULL); - if ( level==0 ) { - data_out.first = (2+9*A.size())*sizeof(double); - double *data_ptr = new double[2+9*A.size()]; - data_out.second = data_ptr; - uint64_t *data_int = reinterpret_cast(data_ptr); - data_int[0] = level; - data_int[1] = A.size(); - double *data = &data_ptr[2]; - for (size_t i=0; i data_out( 0, NULL ); + if ( level == 0 ) { + data_out.first = ( 2 + 9 * A.size() ) * sizeof( double ); + double *data_ptr = new double[2 + 9 * A.size()]; + data_out.second = data_ptr; + uint64_t *data_int = reinterpret_cast( data_ptr ); + data_int[0] = level; + data_int[1] = A.size(); + double *data = &data_ptr[2]; + for ( size_t i = 0; i < A.size(); i++ ) { + data[9 * i + 0] = A[i].x; + data[9 * i + 1] = A[i].y; + data[9 * i + 2] = A[i].z; + data[9 * i + 3] = B[i].x; + data[9 * i + 4] = B[i].y; + data[9 * i + 5] = B[i].z; + data[9 * i + 6] = C[i].x; + data[9 * i + 7] = C[i].y; + data[9 * i + 8] = C[i].z; } } return data_out; } -void TriList::unpack( const std::pair& data_in ) +void TriList::unpack( const std::pair &data_in ) { - uint64_t *data_int = reinterpret_cast(data_in.second); - const double *data = reinterpret_cast(data_in.second); - int level = data_int[0]; - uint64_t N = data_int[1]; - data = &data[2]; - if ( level==0 ) { - ASSERT((2+9*N)*sizeof(double)==data_in.first); - A.resize(N); - B.resize(N); - C.resize(N); - for (size_t i=0; i( data_in.second ); + const double *data = reinterpret_cast( data_in.second ); + int level = data_int[0]; + uint64_t N = data_int[1]; + data = &data[2]; + if ( level == 0 ) { + ASSERT( ( 2 + 9 * N ) * sizeof( double ) == data_in.first ); + A.resize( N ); + B.resize( N ); + C.resize( N ); + for ( size_t i = 0; i < A.size(); i++ ) { + A[i].x = data[9 * i + 0]; + A[i].y = data[9 * i + 1]; + A[i].z = data[9 * i + 2]; + B[i].x = data[9 * i + 3]; + B[i].y = data[9 * i + 4]; + B[i].z = data[9 * i + 5]; + C[i].x = data[9 * i + 6]; + C[i].y = data[9 * i + 7]; + C[i].z = data[9 * i + 8]; } } } /**************************************************** -* TriMesh * -****************************************************/ -TriMesh::TriMesh( ) -{ -} + * TriMesh * + ****************************************************/ +TriMesh::TriMesh() {} TriMesh::TriMesh( size_t N_tri, size_t N_point ) { - vertices.reset( new PointList(N_point) ); - A.resize(N_tri,-1); - B.resize(N_tri,-1); - C.resize(N_tri,-1); + vertices.reset( new PointList( N_point ) ); + A.resize( N_tri, -1 ); + B.resize( N_tri, -1 ); + C.resize( N_tri, -1 ); } TriMesh::TriMesh( size_t N_tri, std::shared_ptr points ) { vertices = points; - A.resize(N_tri,-1); - B.resize(N_tri,-1); - C.resize(N_tri,-1); + A.resize( N_tri, -1 ); + B.resize( N_tri, -1 ); + C.resize( N_tri, -1 ); } -TriMesh::TriMesh( const TriList& mesh ) +TriMesh::TriMesh( const TriList &mesh ) { // For simlicity we will just create a mesh with ~3x the verticies for now - ASSERT(mesh.A.size()==mesh.B.size()&&mesh.A.size()==mesh.C.size()); - A.resize(mesh.A.size()); - B.resize(mesh.B.size()); - C.resize(mesh.C.size()); - vertices.reset( new PointList(3*mesh.A.size()) ); - for (size_t i=0; ipoints[A[i]] = mesh.A[i]; vertices->points[B[i]] = mesh.B[i]; vertices->points[C[i]] = mesh.C[i]; } } -TriMesh::~TriMesh( ) +TriMesh::~TriMesh() { vertices.reset(); A.clear(); @@ -301,181 +289,194 @@ TriMesh::~TriMesh( ) size_t TriMesh::numberPointsVar( VariableType type ) const { size_t N = 0; - if ( type==VariableType::NodeVariable ) + if ( type == VariableType::NodeVariable ) N = vertices->points.size(); - else if ( type==VariableType::SurfaceVariable || type==VariableType::VolumeVariable ) + else if ( type == VariableType::SurfaceVariable || type == VariableType::VolumeVariable ) N = A.size(); return N; } -std::pair TriMesh::pack( int level ) const +std::pair TriMesh::pack( int level ) const { - std::pair data_out(0,NULL); - if ( level==0 ) { - const std::vector& points = vertices->points; - data_out.first = (3+3*points.size())*sizeof(double) + 3*A.size()*sizeof(int); - double *data_ptr = new double[4+3*points.size()+(3*A.size()*sizeof(int))/sizeof(double)]; - data_out.second = data_ptr; - uint64_t *data_int64 = reinterpret_cast(data_ptr); - data_int64[0] = level; - data_int64[1] = points.size(); - data_int64[2] = A.size(); - double *data = &data_ptr[3]; - for (size_t i=0; i data_out( 0, NULL ); + if ( level == 0 ) { + const std::vector &points = vertices->points; + data_out.first = + ( 3 + 3 * points.size() ) * sizeof( double ) + 3 * A.size() * sizeof( int ); + double *data_ptr = + new double[4 + 3 * points.size() + ( 3 * A.size() * sizeof( int ) ) / sizeof( double )]; + data_out.second = data_ptr; + uint64_t *data_int64 = reinterpret_cast( data_ptr ); + data_int64[0] = level; + data_int64[1] = points.size(); + data_int64[2] = A.size(); + double *data = &data_ptr[3]; + for ( size_t i = 0; i < points.size(); i++ ) { + data[3 * i + 0] = points[i].x; + data[3 * i + 1] = points[i].y; + data[3 * i + 2] = points[i].z; } - int *data_int = reinterpret_cast(&data[3*points.size()]); - for (size_t i=0; i( &data[3 * points.size()] ); + for ( size_t i = 0; i < A.size(); i++ ) { + data_int[3 * i + 0] = A[i]; + data_int[3 * i + 1] = B[i]; + data_int[3 * i + 2] = C[i]; } } return data_out; } -void TriMesh::unpack( const std::pair& data_in ) +void TriMesh::unpack( const std::pair &data_in ) { - uint64_t *data_int64 = reinterpret_cast(data_in.second); - const double *data = reinterpret_cast(data_in.second); - int level = data_int64[0]; - uint64_t N_P = data_int64[1]; - uint64_t N_A = data_int64[2]; - data = &data[3]; - if ( level==0 ) { - size_t size = (3+3*N_P)*sizeof(double)+3*N_A*sizeof(int); - ASSERT(size==data_in.first); - vertices.reset( new PointList(N_P) ); - std::vector& points = vertices->points; - for (size_t i=0; i( data_in.second ); + const double *data = reinterpret_cast( data_in.second ); + int level = data_int64[0]; + uint64_t N_P = data_int64[1]; + uint64_t N_A = data_int64[2]; + data = &data[3]; + if ( level == 0 ) { + size_t size = ( 3 + 3 * N_P ) * sizeof( double ) + 3 * N_A * sizeof( int ); + ASSERT( size == data_in.first ); + vertices.reset( new PointList( N_P ) ); + std::vector &points = vertices->points; + for ( size_t i = 0; i < points.size(); i++ ) { + points[i].x = data[3 * i + 0]; + points[i].y = data[3 * i + 1]; + points[i].z = data[3 * i + 2]; } - const int *data_int = reinterpret_cast(&data[3*N_P]); - A.resize(N_A); - B.resize(N_A); - C.resize(N_A); - for (size_t i=0; i( &data[3 * N_P] ); + A.resize( N_A ); + B.resize( N_A ); + C.resize( N_A ); + for ( size_t i = 0; i < A.size(); i++ ) { + A[i] = data_int[3 * i + 0]; + B[i] = data_int[3 * i + 1]; + C[i] = data_int[3 * i + 2]; } } } /**************************************************** -* Domain mesh * -****************************************************/ -DomainMesh::DomainMesh(): - nprocx(0), nprocy(0), nprocz(0), rank(0), - nx(0), ny(0), nz(0), - Lx(0), Ly(0), Lz(0) + * Domain mesh * + ****************************************************/ +DomainMesh::DomainMesh() + : nprocx( 0 ), + nprocy( 0 ), + nprocz( 0 ), + rank( 0 ), + nx( 0 ), + ny( 0 ), + nz( 0 ), + Lx( 0 ), + Ly( 0 ), + Lz( 0 ) { } -DomainMesh::DomainMesh( RankInfoStruct data, - int nx2, int ny2, int nz2, double Lx2, double Ly2, double Lz2 ): - nprocx(data.nx), nprocy(data.ny), nprocz(data.nz), rank(data.rank[1][1][1]), - nx(nx2), ny(ny2), nz(nz2), - Lx(Lx2), Ly(Ly2), Lz(Lz2) -{ -} -DomainMesh::~DomainMesh() +DomainMesh::DomainMesh( + RankInfoStruct data, int nx2, int ny2, int nz2, double Lx2, double Ly2, double Lz2 ) + : nprocx( data.nx ), + nprocy( data.ny ), + nprocz( data.nz ), + rank( data.rank[1][1][1] ), + nx( nx2 ), + ny( ny2 ), + nz( nz2 ), + Lx( Lx2 ), + Ly( Ly2 ), + Lz( Lz2 ) { } +DomainMesh::~DomainMesh() {} size_t DomainMesh::numberPointsVar( VariableType type ) const { size_t N = 0; - if ( type==VariableType::NodeVariable ) - N = (nx+1)*(ny+1)*(nz+1); - else if ( type==VariableType::SurfaceVariable ) - N = (nx+1)*ny*nz + nx*(ny+1)*nz + nx*ny*(nz+1); - else if ( type==VariableType::VolumeVariable ) - N = nx*ny*nz; + if ( type == VariableType::NodeVariable ) + N = ( nx + 1 ) * ( ny + 1 ) * ( nz + 1 ); + else if ( type == VariableType::SurfaceVariable ) + N = ( nx + 1 ) * ny * nz + nx * ( ny + 1 ) * nz + nx * ny * ( nz + 1 ); + else if ( type == VariableType::VolumeVariable ) + N = nx * ny * nz; return N; } -std::pair DomainMesh::pack( int level ) const +std::pair DomainMesh::pack( int level ) const { - std::pair data(0,NULL); - data.first = 7*sizeof(double); + std::pair data( 0, NULL ); + data.first = 7 * sizeof( double ); data.second = new double[7]; - memset(data.second,0,7*sizeof(double)); - int *data_int = reinterpret_cast(data.second); - double *data_double = &reinterpret_cast(data.second)[4]; - data_int[0] = nprocx; - data_int[1] = nprocy; - data_int[2] = nprocz; - data_int[3] = rank; - data_int[4] = nx; - data_int[5] = ny; - data_int[6] = nz; - data_double[0] = Lx; - data_double[1] = Ly; - data_double[2] = Lz; + memset( data.second, 0, 7 * sizeof( double ) ); + int *data_int = reinterpret_cast( data.second ); + double *data_double = &reinterpret_cast( data.second )[4]; + data_int[0] = nprocx; + data_int[1] = nprocy; + data_int[2] = nprocz; + data_int[3] = rank; + data_int[4] = nx; + data_int[5] = ny; + data_int[6] = nz; + data_double[0] = Lx; + data_double[1] = Ly; + data_double[2] = Lz; return data; } -void DomainMesh::unpack( const std::pair& data ) +void DomainMesh::unpack( const std::pair &data ) { - const int *data_int = reinterpret_cast(data.second); - const double *data_double = &reinterpret_cast(data.second)[4]; - nprocx = data_int[0]; - nprocy = data_int[1]; - nprocz = data_int[2]; - rank = data_int[3]; - nx = data_int[4]; - ny = data_int[5]; - nz = data_int[6]; - Lx = data_double[0]; - Ly = data_double[1]; - Lz = data_double[2]; + const int *data_int = reinterpret_cast( data.second ); + const double *data_double = &reinterpret_cast( data.second )[4]; + nprocx = data_int[0]; + nprocy = data_int[1]; + nprocz = data_int[2]; + rank = data_int[3]; + nx = data_int[4]; + ny = data_int[5]; + nz = data_int[6]; + Lx = data_double[0]; + Ly = data_double[1]; + Lz = data_double[2]; } /**************************************************** -* Converters * -****************************************************/ + * Converters * + ****************************************************/ std::shared_ptr getPointList( std::shared_ptr mesh ) { - return std::dynamic_pointer_cast(mesh); + return std::dynamic_pointer_cast( mesh ); } std::shared_ptr getTriMesh( std::shared_ptr mesh ) { std::shared_ptr mesh2; - if ( std::dynamic_pointer_cast(mesh).get() != NULL ) { - mesh2 = std::dynamic_pointer_cast(mesh); - } else if ( std::dynamic_pointer_cast(mesh).get() != NULL ) { - std::shared_ptr trilist = std::dynamic_pointer_cast(mesh); - ASSERT(trilist.get()!=NULL); - mesh2.reset( new TriMesh(*trilist) ); + if ( std::dynamic_pointer_cast( mesh ).get() != NULL ) { + mesh2 = std::dynamic_pointer_cast( mesh ); + } else if ( std::dynamic_pointer_cast( mesh ).get() != NULL ) { + std::shared_ptr trilist = std::dynamic_pointer_cast( mesh ); + ASSERT( trilist.get() != NULL ); + mesh2.reset( new TriMesh( *trilist ) ); } return mesh2; } std::shared_ptr getTriList( std::shared_ptr mesh ) { std::shared_ptr mesh2; - if ( std::dynamic_pointer_cast(mesh).get() != NULL ) { - mesh2 = std::dynamic_pointer_cast(mesh); - } else if ( std::dynamic_pointer_cast(mesh).get() != NULL ) { - std::shared_ptr trimesh = std::dynamic_pointer_cast(mesh); - ASSERT(trimesh.get()!=NULL); - mesh2.reset( new TriList(*trimesh) ); + if ( std::dynamic_pointer_cast( mesh ).get() != NULL ) { + mesh2 = std::dynamic_pointer_cast( mesh ); + } else if ( std::dynamic_pointer_cast( mesh ).get() != NULL ) { + std::shared_ptr trimesh = std::dynamic_pointer_cast( mesh ); + ASSERT( trimesh.get() != NULL ); + mesh2.reset( new TriList( *trimesh ) ); } return mesh2; } std::shared_ptr getPointList( std::shared_ptr mesh ) { - return getPointList( std::const_pointer_cast(mesh) ); + return getPointList( std::const_pointer_cast( mesh ) ); } std::shared_ptr getTriMesh( std::shared_ptr mesh ) { - return getTriMesh( std::const_pointer_cast(mesh) ); + return getTriMesh( std::const_pointer_cast( mesh ) ); } std::shared_ptr getTriList( std::shared_ptr mesh ) { - return getTriList( std::const_pointer_cast(mesh) ); + return getTriList( std::const_pointer_cast( mesh ) ); } -} // IO namespace - +} // namespace IO diff --git a/IO/Mesh.h b/IO/Mesh.h index b204675a..a60e14c9 100644 --- a/IO/Mesh.h +++ b/IO/Mesh.h @@ -6,17 +6,23 @@ #include #include +#include "analysis/PointList.h" #include "common/Array.h" #include "common/Communication.h" -#include "analysis/PointList.h" namespace IO { //! Possible variable types -enum class VariableType: unsigned char { NodeVariable=1, EdgeVariable=2, SurfaceVariable=2, VolumeVariable=3, NullVariable=0 }; -enum class DataType: unsigned char { Double=1, Float=2, Int=2, Null=0 }; +enum class VariableType : unsigned char { + NodeVariable = 1, + EdgeVariable = 2, + SurfaceVariable = 3, + VolumeVariable = 4, + NullVariable = 0 +}; +enum class DataType : unsigned char { Double = 1, Float = 2, Int = 2, Null = 0 }; /*! \class Mesh @@ -32,21 +38,22 @@ public: //! Number of points for the given variable type virtual size_t numberPointsVar( VariableType type ) const = 0; //! Pack the data - virtual std::pair pack( int level ) const = 0; + virtual std::pair pack( int level ) const = 0; //! Unpack the data - virtual void unpack( const std::pair& data ) = 0; + virtual void unpack( const std::pair &data ) = 0; + protected: //! Empty constructor Mesh(); - Mesh(const Mesh&); - Mesh& operator=(const Mesh&); + Mesh( const Mesh & ); + Mesh &operator=( const Mesh & ); }; /*! \class PointList \brief A class used to hold a list of verticies */ -class PointList: public Mesh +class PointList : public Mesh { public: //! Empty constructor @@ -60,13 +67,14 @@ public: //! Number of points for the given variable type virtual size_t numberPointsVar( VariableType type ) const; //! Pack the data - virtual std::pair pack( int level ) const; + virtual std::pair pack( int level ) const; //! Unpack the data - virtual void unpack( const std::pair& data ); + virtual void unpack( const std::pair &data ); //! Access the points - const std::vector& getPoints() const { return points; } + const std::vector &getPoints() const { return points; } + public: - std::vector points; //!< List of points vertex + std::vector points; //!< List of points vertex }; @@ -74,7 +82,7 @@ public: \brief A class used to hold a list of triangles specified by their vertex coordinates */ class TriMesh; -class TriList: public Mesh +class TriList : public Mesh { public: //! Empty constructor @@ -82,7 +90,7 @@ public: //! Constructor for N triangles TriList( size_t N_tri ); //! Constructor from TriMesh - TriList( const TriMesh& ); + TriList( const TriMesh & ); //! Destructor virtual ~TriList(); //! Mesh class name @@ -90,20 +98,22 @@ public: //! Number of points for the given variable type virtual size_t numberPointsVar( VariableType type ) const; //! Pack the data - virtual std::pair pack( int level ) const; + virtual std::pair pack( int level ) const; //! Unpack the data - virtual void unpack( const std::pair& data ); + virtual void unpack( const std::pair &data ); + public: - std::vector A; //!< First vertex - std::vector B; //!< Second vertex - std::vector C; //!< Third vertex + std::vector A; //!< First vertex + std::vector B; //!< Second vertex + std::vector C; //!< Third vertex }; /*! \class TriMesh - \brief A class used to hold a list of trianges specified by their vertex number and list of coordiantes + \brief A class used to hold a list of trianges specified by their vertex number and list of + coordiantes */ -class TriMesh: public Mesh +class TriMesh : public Mesh { public: //! TriMesh constructor @@ -113,7 +123,7 @@ public: //! Constructor for Nt triangles and the given points TriMesh( size_t N_tri, std::shared_ptr points ); //! Constructor from TriList - TriMesh( const TriList& ); + TriMesh( const TriList & ); //! Destructor virtual ~TriMesh(); //! Mesh class name @@ -121,21 +131,22 @@ public: //! Number of points for the given variable type virtual size_t numberPointsVar( VariableType type ) const; //! Pack the data - virtual std::pair pack( int level ) const; + virtual std::pair pack( int level ) const; //! Unpack the data - virtual void unpack( const std::pair& data ); + virtual void unpack( const std::pair &data ); + public: - std::shared_ptr vertices; //!< List of verticies - std::vector A; //!< First vertex - std::vector B; //!< Second vertex - std::vector C; //!< Third vertex + std::shared_ptr vertices; //!< List of verticies + std::vector A; //!< First vertex + std::vector B; //!< Second vertex + std::vector C; //!< Third vertex }; /*! \class Domain \brief A class used to hold the domain */ -class DomainMesh: public Mesh +class DomainMesh : public Mesh { public: //! Empty constructor @@ -149,9 +160,10 @@ public: //! Number of points for the given variable type virtual size_t numberPointsVar( VariableType type ) const; //! Pack the data - virtual std::pair pack( int level ) const; + virtual std::pair pack( int level ) const; //! Unpack the data - virtual void unpack( const std::pair& data ); + virtual void unpack( const std::pair &data ); + public: int nprocx, nprocy, nprocz, rank; int nx, ny, nz; @@ -159,37 +171,40 @@ public: }; - /*! \class Variable \brief A base class for variables */ -struct Variable -{ +struct Variable { public: // Internal variables - unsigned char dim; //!< Number of points per grid point (1: scalar, 3: vector, ...) - VariableType type; //!< Variable type - DataType precision; //!< Variable precision to use for IO - std::string name; //!< Variable name - Array data; //!< Variable data + unsigned char dim; //!< Number of points per grid point (1: scalar, 3: vector, ...) + VariableType type; //!< Variable type + DataType precision; //!< Variable precision to use for IO + std::string name; //!< Variable name + Array data; //!< Variable data //! Empty constructor - Variable(): dim(0), type(VariableType::NullVariable), precision(DataType::Double) {} + Variable() : dim( 0 ), type( VariableType::NullVariable ), precision( DataType::Double ) {} //! Constructor - Variable( int dim_, IO::VariableType type_, const std::string& name_ ): - dim(dim_), type(type_), precision(DataType::Double), name(name_) {} + Variable( int dim_, IO::VariableType type_, const std::string &name_ ) + : dim( dim_ ), type( type_ ), precision( DataType::Double ), name( name_ ) + { + } //! Constructor - Variable( int dim_, IO::VariableType type_, const std::string& name_, const Array& data_ ): - dim(dim_), type(type_), precision(DataType::Double), name(name_), data(data_) {} + Variable( + int dim_, IO::VariableType type_, const std::string &name_, const Array &data_ ) + : dim( dim_ ), type( type_ ), precision( DataType::Double ), name( name_ ), data( data_ ) + { + } //! Destructor virtual ~Variable() {} + protected: //! Empty constructor - Variable(const Variable&); - Variable& operator=(const Variable&); + Variable( const Variable & ); + Variable &operator=( const Variable & ); }; - /*! \class MeshDataStruct \brief A class used to hold database info for saving a mesh */ @@ -197,9 +212,9 @@ struct MeshDataStruct { DataType precision; //!< Precision to use for IO (mesh) std::string meshName; //!< Mesh name std::shared_ptr mesh; //!< Mesh data - std::vector > vars; + std::vector> vars; //! Empty constructor - MeshDataStruct(): precision(DataType::Double) {} + MeshDataStruct() : precision( DataType::Double ) {} //! Check the data bool check() const; }; @@ -214,7 +229,6 @@ std::shared_ptr getTriMesh( std::shared_ptr mesh ); std::shared_ptr getTriList( std::shared_ptr mesh ); -} // IO namespace +} // namespace IO #endif - diff --git a/IO/MeshDatabase.cpp b/IO/MeshDatabase.cpp index 2c03ddde..70b9acc3 100644 --- a/IO/MeshDatabase.cpp +++ b/IO/MeshDatabase.cpp @@ -1,130 +1,151 @@ #include "IO/MeshDatabase.h" +#include "IO/IOHelpers.h" #include "IO/Mesh.h" #include "IO/PackData.h" -#include "IO/IOHelpers.h" #include "common/MPI.h" #include "common/Utilities.h" -#include +#include #include #include -#include +#include #include - // MeshType template<> -size_t packsize( const IO::MeshType& rhs ) +size_t packsize( const IO::MeshType &rhs ) { - return sizeof(IO::MeshType); + return sizeof( IO::MeshType ); } template<> -void pack( const IO::MeshType& rhs, char *buffer ) +void pack( const IO::MeshType &rhs, char *buffer ) { - memcpy(buffer,&rhs,sizeof(IO::MeshType)); + memcpy( buffer, &rhs, sizeof( IO::MeshType ) ); } template<> -void unpack( IO::MeshType& data, const char *buffer ) +void unpack( IO::MeshType &data, const char *buffer ) { - memcpy(&data,buffer,sizeof(IO::MeshType)); + memcpy( &data, buffer, sizeof( IO::MeshType ) ); } // Variable::VariableType template<> -size_t packsize( const IO::VariableType& rhs ) +size_t packsize( const IO::VariableType &rhs ) { - return sizeof(IO::VariableType); + return sizeof( IO::VariableType ); } template<> -void pack( const IO::VariableType& rhs, char *buffer ) +void pack( const IO::VariableType &rhs, char *buffer ) { - memcpy(buffer,&rhs,sizeof(IO::VariableType)); + memcpy( buffer, &rhs, sizeof( IO::VariableType ) ); } template<> -void unpack( IO::VariableType& data, const char *buffer ) +void unpack( IO::VariableType &data, const char *buffer ) { - memcpy(&data,buffer,sizeof(IO::VariableType)); + memcpy( &data, buffer, sizeof( IO::VariableType ) ); } // DatabaseEntry template<> -size_t packsize( const IO::DatabaseEntry& rhs ) +size_t packsize( const IO::DatabaseEntry &rhs ) { - return packsize(rhs.name)+packsize(rhs.file)+packsize(rhs.offset); + return packsize( rhs.name ) + packsize( rhs.file ) + packsize( rhs.offset ); } template<> -void pack( const IO::DatabaseEntry& rhs, char *buffer ) +void pack( const IO::DatabaseEntry &rhs, char *buffer ) { - size_t i=0; - pack(rhs.name,&buffer[i]); i+=packsize(rhs.name); - pack(rhs.file,&buffer[i]); i+=packsize(rhs.file); - pack(rhs.offset,&buffer[i]); i+=packsize(rhs.offset); + size_t i = 0; + pack( rhs.name, &buffer[i] ); + i += packsize( rhs.name ); + pack( rhs.file, &buffer[i] ); + i += packsize( rhs.file ); + pack( rhs.offset, &buffer[i] ); + i += packsize( rhs.offset ); } template<> -void unpack( IO::DatabaseEntry& data, const char *buffer ) +void unpack( IO::DatabaseEntry &data, const char *buffer ) { - size_t i=0; - unpack(data.name,&buffer[i]); i+=packsize(data.name); - unpack(data.file,&buffer[i]); i+=packsize(data.file); - unpack(data.offset,&buffer[i]); i+=packsize(data.offset); + size_t i = 0; + unpack( data.name, &buffer[i] ); + i += packsize( data.name ); + unpack( data.file, &buffer[i] ); + i += packsize( data.file ); + unpack( data.offset, &buffer[i] ); + i += packsize( data.offset ); } // VariableDatabase template<> -size_t packsize( const IO::VariableDatabase& rhs ) +size_t packsize( const IO::VariableDatabase &rhs ) { - return packsize(rhs.name)+packsize(rhs.type)+packsize(rhs.dim); + return packsize( rhs.name ) + packsize( rhs.type ) + packsize( rhs.dim ); } template<> -void pack( const IO::VariableDatabase& rhs, char *buffer ) +void pack( const IO::VariableDatabase &rhs, char *buffer ) { - size_t i=0; - pack(rhs.name,&buffer[i]); i+=packsize(rhs.name); - pack(rhs.type,&buffer[i]); i+=packsize(rhs.type); - pack(rhs.dim,&buffer[i]); i+=packsize(rhs.dim); + size_t i = 0; + pack( rhs.name, &buffer[i] ); + i += packsize( rhs.name ); + pack( rhs.type, &buffer[i] ); + i += packsize( rhs.type ); + pack( rhs.dim, &buffer[i] ); + i += packsize( rhs.dim ); } template<> -void unpack( IO::VariableDatabase& data, const char *buffer ) +void unpack( IO::VariableDatabase &data, const char *buffer ) { - size_t i=0; - unpack(data.name,&buffer[i]); i+=packsize(data.name); - unpack(data.type,&buffer[i]); i+=packsize(data.type); - unpack(data.dim,&buffer[i]); i+=packsize(data.dim); + size_t i = 0; + unpack( data.name, &buffer[i] ); + i += packsize( data.name ); + unpack( data.type, &buffer[i] ); + i += packsize( data.type ); + unpack( data.dim, &buffer[i] ); + i += packsize( data.dim ); } // MeshDatabase template<> -size_t packsize( const IO::MeshDatabase& data ) +size_t packsize( const IO::MeshDatabase &data ) { - return packsize(data.name) - + packsize(data.type) - + packsize(data.meshClass) - + packsize(data.format) - + packsize(data.domains) - + packsize(data.variables) - + packsize(data.variable_data); + return packsize( data.name ) + packsize( data.type ) + packsize( data.meshClass ) + + packsize( data.format ) + packsize( data.domains ) + packsize( data.variables ) + + packsize( data.variable_data ); } template<> -void pack( const IO::MeshDatabase& rhs, char *buffer ) +void pack( const IO::MeshDatabase &rhs, char *buffer ) { size_t i = 0; - pack(rhs.name,&buffer[i]); i+=packsize(rhs.name); - pack(rhs.type,&buffer[i]); i+=packsize(rhs.type); - pack(rhs.meshClass,&buffer[i]); i+=packsize(rhs.meshClass); - pack(rhs.format,&buffer[i]); i+=packsize(rhs.format); - pack(rhs.domains,&buffer[i]); i+=packsize(rhs.domains); - pack(rhs.variables,&buffer[i]); i+=packsize(rhs.variables); - pack(rhs.variable_data,&buffer[i]); i+=packsize(rhs.variable_data); + pack( rhs.name, &buffer[i] ); + i += packsize( rhs.name ); + pack( rhs.type, &buffer[i] ); + i += packsize( rhs.type ); + pack( rhs.meshClass, &buffer[i] ); + i += packsize( rhs.meshClass ); + pack( rhs.format, &buffer[i] ); + i += packsize( rhs.format ); + pack( rhs.domains, &buffer[i] ); + i += packsize( rhs.domains ); + pack( rhs.variables, &buffer[i] ); + i += packsize( rhs.variables ); + pack( rhs.variable_data, &buffer[i] ); + i += packsize( rhs.variable_data ); } template<> -void unpack( IO::MeshDatabase& data, const char *buffer ) +void unpack( IO::MeshDatabase &data, const char *buffer ) { - size_t i=0; - unpack(data.name,&buffer[i]); i+=packsize(data.name); - unpack(data.type,&buffer[i]); i+=packsize(data.type); - unpack(data.meshClass,&buffer[i]); i+=packsize(data.meshClass); - unpack(data.format,&buffer[i]); i+=packsize(data.format); - unpack(data.domains,&buffer[i]); i+=packsize(data.domains); - unpack(data.variables,&buffer[i]); i+=packsize(data.variables); - unpack(data.variable_data,&buffer[i]); i+=packsize(data.variable_data); + size_t i = 0; + unpack( data.name, &buffer[i] ); + i += packsize( data.name ); + unpack( data.type, &buffer[i] ); + i += packsize( data.type ); + unpack( data.meshClass, &buffer[i] ); + i += packsize( data.meshClass ); + unpack( data.format, &buffer[i] ); + i += packsize( data.format ); + unpack( data.domains, &buffer[i] ); + i += packsize( data.domains ); + unpack( data.variables, &buffer[i] ); + i += packsize( data.variables ); + unpack( data.variable_data, &buffer[i] ); + i += packsize( data.variable_data ); } @@ -132,79 +153,72 @@ namespace IO { /**************************************************** -* VariableDatabase * -****************************************************/ -bool VariableDatabase::operator==(const VariableDatabase& rhs ) const + * VariableDatabase * + ****************************************************/ +bool VariableDatabase::operator==( const VariableDatabase &rhs ) const { - return type==rhs.type && dim==rhs.dim && name==rhs.name; + return type == rhs.type && dim == rhs.dim && name == rhs.name; } -bool VariableDatabase::operator!=(const VariableDatabase& rhs ) const +bool VariableDatabase::operator!=( const VariableDatabase &rhs ) const { - return type!=rhs.type || dim!=rhs.dim || name!=rhs.name; + return type != rhs.type || dim != rhs.dim || name != rhs.name; } -bool VariableDatabase::operator>=(const VariableDatabase& rhs ) const +bool VariableDatabase::operator>=( const VariableDatabase &rhs ) const { - return operator>(rhs) || operator==(rhs); + return operator>( rhs ) || operator==( rhs ); } -bool VariableDatabase::operator<=(const VariableDatabase& rhs ) const +bool VariableDatabase::operator<=( const VariableDatabase &rhs ) const { return !operator>( rhs ); } +bool VariableDatabase::operator>( const VariableDatabase &rhs ) const { - return !operator>(rhs); -} -bool VariableDatabase::operator>(const VariableDatabase& rhs ) const -{ - if ( name>rhs.name ) + if ( name > rhs.name ) return true; - else if ( namerhs.type ) + if ( type > rhs.type ) return true; - else if ( typerhs.dim ) + if ( dim > rhs.dim ) return true; - else if ( dim(rhs) && operator!=(rhs); + return !operator>( rhs ) && operator!=( rhs ); } /**************************************************** -* MeshDatabase * -****************************************************/ -MeshDatabase::MeshDatabase() + * MeshDatabase * + ****************************************************/ +MeshDatabase::MeshDatabase() {} +MeshDatabase::~MeshDatabase() {} +MeshDatabase::MeshDatabase( const MeshDatabase &rhs ) { -} -MeshDatabase::~MeshDatabase() -{ -} -MeshDatabase::MeshDatabase(const MeshDatabase& rhs) -{ - name = rhs.name; - type = rhs.type; - meshClass = rhs.meshClass; - format = rhs.format; - domains = rhs.domains; - variables = rhs.variables; + name = rhs.name; + type = rhs.type; + meshClass = rhs.meshClass; + format = rhs.format; + domains = rhs.domains; + variables = rhs.variables; variable_data = rhs.variable_data; } -MeshDatabase& MeshDatabase::operator=(const MeshDatabase& rhs) +MeshDatabase &MeshDatabase::operator=( const MeshDatabase &rhs ) { - this->name = rhs.name; - this->type = rhs.type; - this->meshClass = rhs.meshClass; - this->format = rhs.format; - this->domains = rhs.domains; - this->variables = rhs.variables; + this->name = rhs.name; + this->type = rhs.type; + this->meshClass = rhs.meshClass; + this->format = rhs.format; + this->domains = rhs.domains; + this->variables = rhs.variables; this->variable_data = rhs.variable_data; return *this; } -VariableDatabase MeshDatabase::getVariableDatabase( const std::string& varname ) const +VariableDatabase MeshDatabase::getVariableDatabase( const std::string &varname ) const { - for (size_t i=0; i list = splitList(line,';'); - name = list[0]; - file = list[1]; - offset = atol(list[2].c_str()); + auto list = splitList( line, ';' ); + name = list[0]; + file = list[1]; + offset = atol( list[2].c_str() ); } -void DatabaseEntry::read( const char* line ) +void DatabaseEntry::read( const char *line ) { - std::vector list = splitList(line,';'); - name = list[0]; - file = list[1]; - offset = atol(list[2].c_str()); + auto list = splitList( line, ';' ); + name = list[0]; + file = list[1]; + offset = atol( list[2].c_str() ); } -void DatabaseEntry::read( const std::string& line ) +void DatabaseEntry::read( const std::string &line ) { - std::vector list = splitList(line.c_str(),';'); - name = list[0]; - file = list[1]; - offset = atol(list[2].c_str()); + auto list = splitList( line.c_str(), ';' ); + name = list[0]; + file = list[1]; + offset = atol( list[2].c_str() ); } // Gather the mesh databases from all processors -inline int tod( int N ) { return (N+7)/sizeof(double); } -std::vector gatherAll( const std::vector& meshes, const Utilities::MPI& comm ) +inline int tod( int N ) { return ( N + 7 ) / sizeof( double ); } +std::vector gatherAll( + const std::vector &meshes, const Utilities::MPI &comm ) { if ( comm.getSize() == 1 ) return meshes; - PROFILE_START("gatherAll"); - PROFILE_START("gatherAll-pack",2); + PROFILE_START( "gatherAll" ); + PROFILE_START( "gatherAll-pack", 2 ); int size = comm.getSize(); // First pack the mesh data to local buffers int localsize = 0; - for (size_t i=0; i data; + PROFILE_START( "gatherAll-unpack", 2 ); + std::map data; pos = 0; while ( pos < globalsize ) { MeshDatabase tmp; - unpack(tmp,(char*)&globalbuf[pos]); - pos += tod(packsize(tmp)); - std::map::iterator it = data.find(tmp.name); - if ( it==data.end() ) { + unpack( tmp, (char *) &globalbuf[pos] ); + pos += tod( packsize( tmp ) ); + std::map::iterator it = data.find( tmp.name ); + if ( it == data.end() ) { data[tmp.name] = tmp; } else { - for (size_t i=0; isecond.domains.push_back(tmp.domains[i]); - for (size_t i=0; isecond.variables.push_back(tmp.variables[i]); - it->second.variable_data.insert(tmp.variable_data.begin(),tmp.variable_data.end()); + for ( size_t i = 0; i < tmp.domains.size(); i++ ) + it->second.domains.push_back( tmp.domains[i] ); + for ( size_t i = 0; i < tmp.variables.size(); i++ ) + it->second.variables.push_back( tmp.variables[i] ); + it->second.variable_data.insert( tmp.variable_data.begin(), tmp.variable_data.end() ); } } - for (auto it=data.begin(); it!=data.end(); ++it) { + for ( auto it = data.begin(); it != data.end(); ++it ) { // Get the unique variables - std::set data2(it->second.variables.begin(),it->second.variables.end()); - it->second.variables = std::vector(data2.begin(),data2.end()); + std::set data2( + it->second.variables.begin(), it->second.variables.end() ); + it->second.variables = std::vector( data2.begin(), data2.end() ); } // Free temporary memory - delete [] localbuf; - delete [] disp; - delete [] globalbuf; + delete[] localbuf; + delete[] disp; + delete[] globalbuf; // Return the results - std::vector data2(data.size()); - size_t i=0; - for (std::map::iterator it=data.begin(); it!=data.end(); ++it, ++i) + std::vector data2( data.size() ); + size_t i = 0; + for ( std::map::iterator it = data.begin(); it != data.end(); + ++it, ++i ) data2[i] = it->second; - PROFILE_STOP("gatherAll-unpack",2); - PROFILE_STOP("gatherAll"); + PROFILE_STOP( "gatherAll-unpack", 2 ); + PROFILE_STOP( "gatherAll" ); return data2; } //! Write the mesh databases to a file -void write( const std::vector& meshes, const std::string& filename ) +void write( const std::vector &meshes, const std::string &filename ) { - PROFILE_START("write"); - FILE *fid = fopen(filename.c_str(),"wb"); - for (size_t i=0; i(meshes[i].type)); - fprintf(fid," meshClass: %s\n",meshes[i].meshClass.c_str()); - fprintf(fid," format: %i\n",static_cast(meshes[i].format)); - for (size_t j=0; j(var.type),var.dim); + PROFILE_START( "write" ); + FILE *fid = fopen( filename.c_str(), "wb" ); + for ( size_t i = 0; i < meshes.size(); i++ ) { + fprintf( fid, "%s\n", meshes[i].name.c_str() ); + fprintf( fid, " type: %i\n", static_cast( meshes[i].type ) ); + fprintf( fid, " meshClass: %s\n", meshes[i].meshClass.c_str() ); + fprintf( fid, " format: %i\n", static_cast( meshes[i].format ) ); + for ( size_t j = 0; j < meshes[i].domains.size(); j++ ) + fprintf( fid, " domain: %s\n", meshes[i].domains[j].write().c_str() ); + fprintf( fid, " variables: " ); + for ( size_t j = 0; j < meshes[i].variables.size(); j++ ) { + const VariableDatabase &var = meshes[i].variables[j]; + fprintf( fid, "%s|%i|%i; ", var.name.c_str(), static_cast( var.type ), var.dim ); } - fprintf(fid,"\n"); - std::map,DatabaseEntry>::const_iterator it; - for (it=meshes[i].variable_data.begin(); it!=meshes[i].variable_data.end(); ++it) { - const char* domain = it->first.first.c_str(); - const char* variable = it->first.second.c_str(); - fprintf(fid," variable(%s,%s): %s\n",domain,variable,it->second.write().c_str()); + fprintf( fid, "\n" ); + std::map, DatabaseEntry>::const_iterator it; + for ( it = meshes[i].variable_data.begin(); it != meshes[i].variable_data.end(); ++it ) { + const char *domain = it->first.first.c_str(); + const char *variable = it->first.second.c_str(); + fprintf( + fid, " variable(%s,%s): %s\n", domain, variable, it->second.write().c_str() ); } } - fclose(fid); - PROFILE_STOP("write"); + fclose( fid ); + PROFILE_STOP( "write" ); } //! Read the mesh databases from a file -std::vector read( const std::string& filename ) +std::vector read( const std::string &filename ) { std::vector meshes; - PROFILE_START("read"); - FILE *fid = fopen(filename.c_str(),"rb"); - if ( fid==NULL ) - ERROR("Error opening file"); + PROFILE_START( "read" ); + FILE *fid = fopen( filename.c_str(), "rb" ); + if ( fid == NULL ) + ERROR( "Error opening file" ); char *line = new char[10000]; - while ( std::fgets(line,1000,fid) != NULL ) { - if ( line[0]<32 ) { + while ( std::fgets( line, 1000, fid ) != NULL ) { + if ( line[0] < 32 ) { // Empty line continue; } else if ( line[0] != ' ' ) { - meshes.resize(meshes.size()+1); - std::string name(line); - name.resize(name.size()-1); + meshes.resize( meshes.size() + 1 ); + std::string name( line ); + name.resize( name.size() - 1 ); meshes.back().name = name; - } else if ( strncmp(line," format:",10)==0 ) { - meshes.back().format = static_cast(atoi(&line[10])); - } else if ( strncmp(line," type:",8)==0 ) { - meshes.back().type = static_cast(atoi(&line[8])); - } else if ( strncmp(line," meshClass:",13)==0 ) { - meshes.back().meshClass = deblank(std::string(&line[13])); - } else if ( strncmp(line," domain:",10)==0 ) { - DatabaseEntry data(&line[10]); - meshes.back().domains.push_back(data); - } else if ( strncmp(line," variables:",13)==0 ) { - MeshDatabase& mesh = meshes.back(); - std::vector variables = splitList(&line[13],';'); - mesh.variables.resize(variables.size()); - for (size_t i=0; i tmp = splitList(variables[i].c_str(),'|'); - ASSERT(tmp.size()==3); + } else if ( strncmp( line, " format:", 10 ) == 0 ) { + meshes.back().format = static_cast( atoi( &line[10] ) ); + } else if ( strncmp( line, " type:", 8 ) == 0 ) { + meshes.back().type = static_cast( atoi( &line[8] ) ); + } else if ( strncmp( line, " meshClass:", 13 ) == 0 ) { + meshes.back().meshClass = deblank( std::string( &line[13] ) ); + } else if ( strncmp( line, " domain:", 10 ) == 0 ) { + DatabaseEntry data( &line[10] ); + meshes.back().domains.push_back( data ); + } else if ( strncmp( line, " variables:", 13 ) == 0 ) { + MeshDatabase &mesh = meshes.back(); + std::vector variables = splitList( &line[13], ';' ); + mesh.variables.resize( variables.size() ); + for ( size_t i = 0; i < variables.size(); i++ ) { + std::vector tmp = splitList( variables[i].c_str(), '|' ); + ASSERT( tmp.size() == 3 ); mesh.variables[i].name = tmp[0]; - mesh.variables[i].type = static_cast(atoi(tmp[1].c_str())); - mesh.variables[i].dim = atoi(tmp[2].c_str()); + mesh.variables[i].type = static_cast( atoi( tmp[1].c_str() ) ); + mesh.variables[i].dim = atoi( tmp[2].c_str() ); } - } else if ( strncmp(line," variable(",12)==0 ) { - size_t i1 = find(line,','); - size_t i2 = find(line,':'); - std::string domain = deblank(std::string(line,12,i1-12)); - std::string variable = deblank(std::string(line,i1+1,i2-i1-2)); - std::pair key(domain,variable); - DatabaseEntry data(&line[i2+1]); - meshes.back().variable_data.insert( - std::pair,DatabaseEntry>(key,data) ); + } else if ( strncmp( line, " variable(", 12 ) == 0 ) { + size_t i1 = find( line, ',' ); + size_t i2 = find( line, ':' ); + std::string domain = deblank( std::string( line, 12, i1 - 12 ) ); + std::string variable = deblank( std::string( line, i1 + 1, i2 - i1 - 2 ) ); + std::pair key( domain, variable ); + DatabaseEntry data( &line[i2 + 1] ); + meshes.back().variable_data.insert( + std::pair, DatabaseEntry>( key, data ) ); } else { - ERROR("Error reading line"); + ERROR( "Error reading line" ); } } - fclose(fid); - delete [] line; - PROFILE_STOP("read"); + fclose( fid ); + delete[] line; + PROFILE_STOP( "read" ); return meshes; } // Return the mesh type -IO::MeshType meshType( const IO::Mesh& mesh ) +IO::MeshType meshType( const IO::Mesh &mesh ) { - IO::MeshType type = IO::Unknown; + IO::MeshType type = IO::MeshType::Unknown; const std::string meshClass = mesh.className(); - if ( meshClass=="PointList" ) { - type = IO::PointMesh; - } else if ( meshClass=="TriList" || meshClass=="TriMesh" ) { - type = IO::SurfaceMesh; - } else if ( meshClass=="DomainMesh" ) { - type = IO::VolumeMesh; + if ( meshClass == "PointList" ) { + type = IO::MeshType::PointMesh; + } else if ( meshClass == "TriList" || meshClass == "TriMesh" ) { + type = IO::MeshType::SurfaceMesh; + } else if ( meshClass == "DomainMesh" ) { + type = IO::MeshType::VolumeMesh; } else { - ERROR("Unknown mesh"); + ERROR( "Unknown mesh" ); } return type; } -} // IO namespace - +} // namespace IO diff --git a/IO/MeshDatabase.h b/IO/MeshDatabase.h index 8e501624..0dfd968c 100644 --- a/IO/MeshDatabase.h +++ b/IO/MeshDatabase.h @@ -1,14 +1,14 @@ #ifndef MeshDatabase_INC #define MeshDatabase_INC -#include "IO/Mesh.h" +#include "IO/Mesh.h" #include "common/MPI.h" #include +#include #include #include #include -#include namespace IO { @@ -17,74 +17,76 @@ class Mesh; //! Enum to identify mesh type -//enum class MeshType : char { PointMesh=1, SurfaceMesh=2, VolumeMesh=3, Unknown=-1 }; -enum MeshType { PointMesh=1, SurfaceMesh=2, VolumeMesh=3, Unknown=-1 }; +// enum class MeshType : char { PointMesh=1, SurfaceMesh=2, VolumeMesh=3, Unknown=-1 }; +enum class MeshType { PointMesh = 1, SurfaceMesh = 2, VolumeMesh = 3, Unknown = -1 }; //! Helper struct for containing offsets for the mesh info struct DatabaseEntry { - std::string name; //!< Name of the entry - std::string file; //!< Name of the file containing the entry - size_t offset; //!< Offset in the file to start reading - std::string write( ) const; //!< Convert the data to a string - void read( const char* line ); //!< Convert the string to data - void read( const std::string& line ); //!< Convert the string to data - DatabaseEntry( ) {} //!< Empty constructor - DatabaseEntry( const char* line ); //!< Convert the string to data - ~DatabaseEntry() {} //!< Destructor + std::string name; //!< Name of the entry + std::string file; //!< Name of the file containing the entry + size_t offset; //!< Offset in the file to start reading + std::string write() const; //!< Convert the data to a string + void read( const char *line ); //!< Convert the string to data + void read( const std::string &line ); //!< Convert the string to data + DatabaseEntry() {} //!< Empty constructor + DatabaseEntry( const char *line ); //!< Convert the string to data + ~DatabaseEntry() {} //!< Destructor }; //! Structure to hold the info about the variables struct VariableDatabase { - std::string name; //!< Name of the variable - IO::VariableType type; //!< Variable - unsigned int dim; //!< Number of points per grid point (1: scalar, 3: vector, ...) + std::string name; //!< Name of the variable + IO::VariableType type; //!< Variable + unsigned int dim; //!< Number of points per grid point (1: scalar, 3: vector, ...) // Overload key operators - bool operator==(const VariableDatabase& rhs ) const; - bool operator!=(const VariableDatabase& rhs ) const; - bool operator>=(const VariableDatabase& rhs ) const; - bool operator<=(const VariableDatabase& rhs ) const; - bool operator> (const VariableDatabase& rhs ) const; - bool operator< (const VariableDatabase& rhs ) const; + bool operator==( const VariableDatabase &rhs ) const; + bool operator!=( const VariableDatabase &rhs ) const; + bool operator>=( const VariableDatabase &rhs ) const; + bool operator<=( const VariableDatabase &rhs ) const; + bool operator>( const VariableDatabase &rhs ) const; + bool operator<( const VariableDatabase &rhs ) const; }; //! Structure to hold the info about the meshes struct MeshDatabase { - typedef std::pair variable_id; + typedef std::pair variable_id; std::string name; //!< Name of the mesh MeshType type; //!< Mesh type std::string meshClass; //!< Mesh class unsigned char format; //!< Data format (1: old, 2: new, 3: new (single), 4: silo) std::vector domains; //!< List of the domains - std::vector variables; //!< List of the variables - std::map variable_data; //!< Data for the variables - VariableDatabase getVariableDatabase( const std::string& varname ) const; + std::vector variables; //!< List of the variables + std::map variable_data; //!< Data for the variables + VariableDatabase getVariableDatabase( const std::string &varname ) const; + public: MeshDatabase(); ~MeshDatabase(); - MeshDatabase(const MeshDatabase&); - MeshDatabase& operator=(const MeshDatabase&); + MeshDatabase( const MeshDatabase & ); + MeshDatabase &operator=( const MeshDatabase & ); }; //! Gather the mesh databases from all processors -std::vector gatherAll( const std::vector& meshes, const Utilities::MPI& comm ); +std::vector gatherAll( + const std::vector &meshes, const Utilities::MPI &comm ); //! Write the mesh databases to a file -void write( const std::vector& meshes, const std::string& filename ); +void write( const std::vector &meshes, const std::string &filename ); //! Read the mesh databases from a file -std::vector read( const std::string& filename ); +std::vector read( const std::string &filename ); //! Return the mesh type -IO::MeshType meshType( const IO::Mesh& mesh ); +IO::MeshType meshType( const IO::Mesh &mesh ); -} // IO namespace +} // namespace IO #endif diff --git a/IO/PIO.cpp b/IO/PIO.cpp index fe0f7db4..f959cb49 100644 --- a/IO/PIO.cpp +++ b/IO/PIO.cpp @@ -1,10 +1,10 @@ #include "IO/PIO.h" -#include "common/Utilities.h" #include "common/MPI.h" +#include "common/Utilities.h" +#include #include #include -#include namespace IO { @@ -15,19 +15,18 @@ static ParallelStreamBuffer perr_buffer; static ParallelStreamBuffer plog_buffer; -std::ostream pout(&pout_buffer); -std::ostream perr(&perr_buffer); -std::ostream plog(&plog_buffer); - +std::ostream pout( &pout_buffer ); +std::ostream perr( &perr_buffer ); +std::ostream plog( &plog_buffer ); /**************************************************************************** -* Functions to control logging * -****************************************************************************/ -std::ofstream *global_filestream=NULL; -static void shutdownFilestream( ) + * Functions to control logging * + ****************************************************************************/ +std::ofstream *global_filestream = NULL; +static void shutdownFilestream() { - if ( global_filestream!=NULL ) { + if ( global_filestream != NULL ) { global_filestream->flush(); global_filestream->close(); delete global_filestream; @@ -37,16 +36,16 @@ static void shutdownFilestream( ) void Utilities::logOnlyNodeZero( const std::string &filename ) { int rank = 0; - #ifdef USE_MPI - MPI_Comm_rank( MPI_COMM_WORLD, &rank ); - #endif +#ifdef USE_MPI + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); +#endif if ( rank == 0 ) - logAllNodes(filename,true); + logAllNodes( filename, true ); } void Utilities::logAllNodes( const std::string &filename, bool singleStream ) { if ( singleStream ) - ERROR("Not implimented yet"); + ERROR( "Not implimented yet" ); // If the filestream was open, then close it and reset streams shutdownFilestream(); @@ -55,33 +54,33 @@ void Utilities::logAllNodes( const std::string &filename, bool singleStream ) std::string full_filename = filename; if ( !singleStream ) { int rank = 0; - #ifdef USE_MPI - MPI_Comm_rank( MPI_COMM_WORLD, &rank ); - #endif +#ifdef USE_MPI + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); +#endif char tmp[100]; - sprintf(tmp,".%04i",rank); - full_filename += std::string(tmp); + sprintf( tmp, ".%04i", rank ); + full_filename += std::string( tmp ); } - global_filestream = new std::ofstream(full_filename.c_str()); + global_filestream = new std::ofstream( full_filename.c_str() ); - if ( !(*global_filestream) ) { + if ( !( *global_filestream ) ) { delete global_filestream; global_filestream = NULL; perr << "PIO: Could not open log file ``" << full_filename << "''\n"; } else { - pout_buffer.setOutputStream(global_filestream); - pout_buffer.setOutputStream(&std::cout); - perr_buffer.setOutputStream(global_filestream); - perr_buffer.setOutputStream(&std::cerr); - plog_buffer.setOutputStream(global_filestream); + pout_buffer.setOutputStream( global_filestream ); + pout_buffer.setOutputStream( &std::cout ); + perr_buffer.setOutputStream( global_filestream ); + perr_buffer.setOutputStream( &std::cerr ); + plog_buffer.setOutputStream( global_filestream ); } } /**************************************************************************** -* ParallelStreamBuffer class * -****************************************************************************/ -void Utilities::stopLogging( ) + * ParallelStreamBuffer class * + ****************************************************************************/ +void Utilities::stopLogging() { pout_buffer.reset(); perr_buffer.reset(); @@ -93,77 +92,71 @@ void Utilities::stopLogging( ) /**************************************************************************** -* ParallelStreamBuffer class * -****************************************************************************/ -ParallelStreamBuffer::ParallelStreamBuffer( ): - d_rank(0), d_size(0), d_buffer_size(0), d_buffer(NULL) + * ParallelStreamBuffer class * + ****************************************************************************/ +ParallelStreamBuffer::ParallelStreamBuffer() + : d_rank( 0 ), d_size( 0 ), d_buffer_size( 0 ), d_buffer( NULL ) { } -ParallelStreamBuffer:: ~ParallelStreamBuffer() -{ - delete [] d_buffer; -} -void ParallelStreamBuffer::setOutputStream( std::ostream *stream ) -{ - d_stream.push_back( stream ); -} +ParallelStreamBuffer::~ParallelStreamBuffer() { delete[] d_buffer; } +void ParallelStreamBuffer::setOutputStream( std::ostream *stream ) { d_stream.push_back( stream ); } int ParallelStreamBuffer::sync() { - for (size_t i=0; i d_buffer_size ) { - if ( d_buffer_size==0 ) { + if ( d_buffer_size == 0 ) { d_buffer_size = 1024; - d_buffer = new char[d_buffer_size]; - memset(d_buffer,0,d_buffer_size); + d_buffer = new char[d_buffer_size]; + memset( d_buffer, 0, d_buffer_size ); } while ( size > d_buffer_size ) { char *tmp = d_buffer; d_buffer_size *= 2; d_buffer = new char[d_buffer_size]; - memset(d_buffer,0,d_buffer_size); - memcpy(d_buffer,tmp,d_size); - delete [] tmp; + memset( d_buffer, 0, d_buffer_size ); + memcpy( d_buffer, tmp, d_size ); + delete[] tmp; } } } -std::streamsize ParallelStreamBuffer::xsputn( const char* text, std::streamsize n ) +std::streamsize ParallelStreamBuffer::xsputn( const char *text, std::streamsize n ) { - reserve(d_size+n); - memcpy(&d_buffer[d_size],text,n); + reserve( d_size + n ); + memcpy( &d_buffer[d_size], text, n ); d_size += n; - if ( text[n-1]==0 || text[n-1]==10 ) { sync(); } + if ( text[n - 1] == 0 || text[n - 1] == 10 ) { + sync(); + } return n; } -int ParallelStreamBuffer::overflow(int ch) +int ParallelStreamBuffer::overflow( int ch ) { - reserve(d_size+1); + reserve( d_size + 1 ); d_buffer[d_size] = ch; d_size++; - if ( ch==0 || ch==10 ) { sync(); } - return std::char_traits::to_int_type(ch); + if ( ch == 0 || ch == 10 ) { + sync(); + } + return std::char_traits::to_int_type( ch ); } -int ParallelStreamBuffer::underflow() -{ - return -1; -} -void ParallelStreamBuffer::reset() +int ParallelStreamBuffer::underflow() { return -1; } +void ParallelStreamBuffer::reset() { sync(); d_stream.clear(); - delete [] d_buffer; - d_buffer = NULL; + delete[] d_buffer; + d_buffer = NULL; d_buffer_size = 0; } -} // IO namespace - +} // namespace IO diff --git a/IO/PIO.h b/IO/PIO.h index b6d8b103..9b8aeb89 100644 --- a/IO/PIO.h +++ b/IO/PIO.h @@ -17,7 +17,7 @@ extern std::ostream pout; /*! * Parallel output stream perr writes to the standard error from all nodes. - * Output is prepended with the processor number. + * Output is prepended with the processor number. */ extern std::ostream perr; @@ -45,12 +45,11 @@ inline int printp( const char *format, ... ); class ParallelStreamBuffer : public std::streambuf { public: - /*! * Create a parallel buffer class. The object will require further * initialization to set up the I/O streams and prefix string. */ - ParallelStreamBuffer( ); + ParallelStreamBuffer(); /*! * Set the output file stream (multiple output streams are supported) @@ -60,26 +59,26 @@ public: /*! * The destructor simply deallocates any internal data - * buffers. It does not modify the output streams. + * buffers. It does not modify the output streams. */ virtual ~ParallelStreamBuffer(); /*! * Synchronize the parallel buffer (called from streambuf). */ - virtual int sync(); + virtual int sync(); /** * Write the specified number of characters into the output stream (called * from streambuf). - */ - virtual std::streamsize xsputn(const char* text, std::streamsize n); + */ + virtual std::streamsize xsputn( const char *text, std::streamsize n ); /*! * Write an overflow character into the parallel buffer (called from * streambuf). */ - virtual int overflow(int ch); + virtual int overflow( int ch ); /*! * Read an overflow character from the parallel buffer (called from @@ -98,30 +97,30 @@ private: size_t d_size; size_t d_buffer_size; char *d_buffer; - std::vector d_stream; + std::vector d_stream; inline void reserve( size_t size ); }; namespace Utilities { - /*! - * Log messages for node zero only to the specified filename. All output - * to pout, perr, and plog on node zero will go to the log file. - */ - void logOnlyNodeZero( const std::string &filename ); +/*! + * Log messages for node zero only to the specified filename. All output + * to pout, perr, and plog on node zero will go to the log file. + */ +void logOnlyNodeZero( const std::string &filename ); - /*! - * Log messages from all nodes. The diagnostic data for processor XXXXX - * will be sent to a file with the name filename.XXXXX, where filename is - * the function argument. - */ - void logAllNodes( const std::string &filename, bool singleStream=false ); +/*! + * Log messages from all nodes. The diagnostic data for processor XXXXX + * will be sent to a file with the name filename.XXXXX, where filename is + * the function argument. + */ +void logAllNodes( const std::string &filename, bool singleStream = false ); - /*! - * Stop logging messages, flush buffers, and reset memory. - */ - void stopLogging( ); +/*! + * Stop logging messages, flush buffers, and reset memory. + */ +void stopLogging(); } // namespace Utilities diff --git a/IO/PIO.hpp b/IO/PIO.hpp index 67b32cdb..748bf32b 100644 --- a/IO/PIO.hpp +++ b/IO/PIO.hpp @@ -3,9 +3,9 @@ #include "IO/PIO.h" +#include #include #include -#include namespace IO { @@ -13,17 +13,17 @@ namespace IO { inline int printp( const char *format, ... ) { - va_list ap; - va_start(ap,format); + va_list ap; + va_start( ap, format ); char tmp[1024]; - int n = vsprintf(tmp,format,ap); - va_end(ap); + int n = vsprintf( tmp, format, ap ); + va_end( ap ); pout << tmp; pout.flush(); return n; } -} // IO namespace +} // namespace IO #endif diff --git a/IO/PackData.cpp b/IO/PackData.cpp index f10d9ca7..3782914c 100644 --- a/IO/PackData.cpp +++ b/IO/PackData.cpp @@ -4,102 +4,101 @@ /******************************************************** -* Concrete implimentations for packing/unpacking * -********************************************************/ + * Concrete implimentations for packing/unpacking * + ********************************************************/ // unsigned char template<> -size_t packsize( const unsigned char& rhs ) +size_t packsize( const unsigned char &rhs ) { - return sizeof(unsigned char); + return sizeof( unsigned char ); } template<> -void pack( const unsigned char& rhs, char *buffer ) +void pack( const unsigned char &rhs, char *buffer ) { - memcpy(buffer,&rhs,sizeof(unsigned char)); + memcpy( buffer, &rhs, sizeof( unsigned char ) ); } template<> -void unpack( unsigned char& data, const char *buffer ) +void unpack( unsigned char &data, const char *buffer ) { - memcpy(&data,buffer,sizeof(unsigned char)); + memcpy( &data, buffer, sizeof( unsigned char ) ); } // char template<> -size_t packsize( const char& rhs ) +size_t packsize( const char &rhs ) { - return sizeof(char); + return sizeof( char ); } template<> -void pack( const char& rhs, char *buffer ) +void pack( const char &rhs, char *buffer ) { - memcpy(buffer,&rhs,sizeof(char)); + memcpy( buffer, &rhs, sizeof( char ) ); } template<> -void unpack( char& data, const char *buffer ) +void unpack( char &data, const char *buffer ) { - memcpy(&data,buffer,sizeof(char)); + memcpy( &data, buffer, sizeof( char ) ); } // int template<> -size_t packsize( const int& rhs ) +size_t packsize( const int &rhs ) { - return sizeof(int); + return sizeof( int ); } template<> -void pack( const int& rhs, char *buffer ) +void pack( const int &rhs, char *buffer ) { - memcpy(buffer,&rhs,sizeof(int)); + memcpy( buffer, &rhs, sizeof( int ) ); } template<> -void unpack( int& data, const char *buffer ) +void unpack( int &data, const char *buffer ) { - memcpy(&data,buffer,sizeof(int)); + memcpy( &data, buffer, sizeof( int ) ); } // unsigned int template<> -size_t packsize( const unsigned int& rhs ) +size_t packsize( const unsigned int &rhs ) { - return sizeof(unsigned int); + return sizeof( unsigned int ); } template<> -void pack( const unsigned int& rhs, char *buffer ) +void pack( const unsigned int &rhs, char *buffer ) { - memcpy(buffer,&rhs,sizeof(int)); + memcpy( buffer, &rhs, sizeof( int ) ); } template<> -void unpack( unsigned int& data, const char *buffer ) +void unpack( unsigned int &data, const char *buffer ) { - memcpy(&data,buffer,sizeof(int)); + memcpy( &data, buffer, sizeof( int ) ); } // size_t template<> -size_t packsize( const size_t& rhs ) +size_t packsize( const size_t &rhs ) { - return sizeof(size_t); + return sizeof( size_t ); } template<> -void pack( const size_t& rhs, char *buffer ) +void pack( const size_t &rhs, char *buffer ) { - memcpy(buffer,&rhs,sizeof(size_t)); + memcpy( buffer, &rhs, sizeof( size_t ) ); } template<> -void unpack( size_t& data, const char *buffer ) +void unpack( size_t &data, const char *buffer ) { - memcpy(&data,buffer,sizeof(size_t)); + memcpy( &data, buffer, sizeof( size_t ) ); } // std::string template<> -size_t packsize( const std::string& rhs ) +size_t packsize( const std::string &rhs ) { - return rhs.size()+1; + return rhs.size() + 1; } template<> -void pack( const std::string& rhs, char *buffer ) +void pack( const std::string &rhs, char *buffer ) { - memcpy(buffer,rhs.c_str(),rhs.size()+1); + memcpy( buffer, rhs.c_str(), rhs.size() + 1 ); } template<> -void unpack( std::string& data, const char *buffer ) +void unpack( std::string &data, const char *buffer ) { - data = std::string(buffer); + data = std::string( buffer ); } - diff --git a/IO/PackData.h b/IO/PackData.h index 85326c0b..f7c1d748 100644 --- a/IO/PackData.h +++ b/IO/PackData.h @@ -2,77 +2,76 @@ #ifndef included_PackData #define included_PackData -#include -#include #include +#include +#include //! Template function to return the buffer size required to pack a class template -size_t packsize( const TYPE& rhs ); +size_t packsize( const TYPE &rhs ); //! Template function to pack a class to a buffer template -void pack( const TYPE& rhs, char *buffer ); +void pack( const TYPE &rhs, char *buffer ); //! Template function to unpack a class from a buffer template -void unpack( TYPE& data, const char *buffer ); +void unpack( TYPE &data, const char *buffer ); //! Template function to return the buffer size required to pack a std::vector template -size_t packsize( const std::vector& rhs ); +size_t packsize( const std::vector &rhs ); //! Template function to pack a class to a buffer template -void pack( const std::vector& rhs, char *buffer ); +void pack( const std::vector &rhs, char *buffer ); //! Template function to pack a class to a buffer template -void unpack( std::vector& data, const char *buffer ); +void unpack( std::vector &data, const char *buffer ); //! Template function to return the buffer size required to pack a std::pair template -size_t packsize( const std::pair& rhs ); +size_t packsize( const std::pair &rhs ); //! Template function to pack a class to a buffer template -void pack( const std::pair& rhs, char *buffer ); +void pack( const std::pair &rhs, char *buffer ); //! Template function to pack a class to a buffer template -void unpack( std::pair& data, const char *buffer ); +void unpack( std::pair &data, const char *buffer ); //! Template function to return the buffer size required to pack a std::map template -size_t packsize( const std::map& rhs ); +size_t packsize( const std::map &rhs ); //! Template function to pack a class to a buffer template -void pack( const std::map& rhs, char *buffer ); +void pack( const std::map &rhs, char *buffer ); //! Template function to pack a class to a buffer template -void unpack( std::map& data, const char *buffer ); +void unpack( std::map &data, const char *buffer ); //! Template function to return the buffer size required to pack a std::set template -size_t packsize( const std::set& rhs ); +size_t packsize( const std::set &rhs ); //! Template function to pack a class to a buffer template -void pack( const std::set& rhs, char *buffer ); +void pack( const std::set &rhs, char *buffer ); //! Template function to pack a class to a buffer template -void unpack( std::set& data, const char *buffer ); +void unpack( std::set &data, const char *buffer ); #include "IO/PackData.hpp" #endif - diff --git a/IO/PackData.hpp b/IO/PackData.hpp index 006cdf73..fd74aa64 100644 --- a/IO/PackData.hpp +++ b/IO/PackData.hpp @@ -4,152 +4,156 @@ #include "IO/PackData.h" +#include +#include #include #include -#include -#include - /******************************************************** -* Default instantiations for std::vector * -********************************************************/ + * Default instantiations for std::vector * + ********************************************************/ template -size_t packsize( const std::vector& rhs ) +size_t packsize( const std::vector &rhs ) { - size_t bytes = sizeof(size_t); - for (size_t i=0; i -void pack( const std::vector& rhs, char *buffer ) +void pack( const std::vector &rhs, char *buffer ) { size_t size = rhs.size(); - memcpy(buffer,&size,sizeof(size_t)); - size_t pos = sizeof(size_t); - for (size_t i=0; i -void unpack( std::vector& data, const char *buffer ) +void unpack( std::vector &data, const char *buffer ) { size_t size; - memcpy(&size,buffer,sizeof(size_t)); + memcpy( &size, buffer, sizeof( size_t ) ); data.clear(); - data.resize(size); - size_t pos = sizeof(size_t); - for (size_t i=0; i -size_t packsize( const std::pair& rhs ) +size_t packsize( const std::pair &rhs ) { - return packsize(rhs.first)+packsize(rhs.second); + return packsize( rhs.first ) + packsize( rhs.second ); } template -void pack( const std::pair& rhs, char *buffer ) +void pack( const std::pair &rhs, char *buffer ) { - pack(rhs.first,buffer); - pack(rhs.second,&buffer[packsize(rhs.first)]); + pack( rhs.first, buffer ); + pack( rhs.second, &buffer[packsize( rhs.first )] ); } template -void unpack( std::pair& data, const char *buffer ) +void unpack( std::pair &data, const char *buffer ) { - unpack(data.first,buffer); - unpack(data.second,&buffer[packsize(data.first)]); + unpack( data.first, buffer ); + unpack( data.second, &buffer[packsize( data.first )] ); } /******************************************************** -* Default instantiations for std::map * -********************************************************/ + * Default instantiations for std::map * + ********************************************************/ template -size_t packsize( const std::map& rhs ) +size_t packsize( const std::map &rhs ) { - size_t bytes = sizeof(size_t); - typename std::map::const_iterator it; - for (it=rhs.begin(); it!=rhs.end(); ++it) { - bytes += packsize(it->first); - bytes += packsize(it->second); + size_t bytes = sizeof( size_t ); + typename std::map::const_iterator it; + for ( it = rhs.begin(); it != rhs.end(); ++it ) { + bytes += packsize( it->first ); + bytes += packsize( it->second ); } return bytes; } template -void pack( const std::map& rhs, char *buffer ) +void pack( const std::map &rhs, char *buffer ) { size_t N = rhs.size(); - pack(N,buffer); - size_t pos = sizeof(size_t); - typename std::map::const_iterator it; - for (it=rhs.begin(); it!=rhs.end(); ++it) { - pack(it->first,&buffer[pos]); pos+=packsize(it->first); - pack(it->second,&buffer[pos]); pos+=packsize(it->second); + pack( N, buffer ); + size_t pos = sizeof( size_t ); + typename std::map::const_iterator it; + for ( it = rhs.begin(); it != rhs.end(); ++it ) { + pack( it->first, &buffer[pos] ); + pos += packsize( it->first ); + pack( it->second, &buffer[pos] ); + pos += packsize( it->second ); } } template -void unpack( std::map& data, const char *buffer ) +void unpack( std::map &data, const char *buffer ) { size_t N = 0; - unpack(N,buffer); - size_t pos = sizeof(size_t); + unpack( N, buffer ); + size_t pos = sizeof( size_t ); data.clear(); - for (size_t i=0; i tmp; - unpack(tmp.first,&buffer[pos]); pos+=packsize(tmp.first); - unpack(tmp.second,&buffer[pos]); pos+=packsize(tmp.second); - data.insert(tmp); + for ( size_t i = 0; i < N; i++ ) { + std::pair tmp; + unpack( tmp.first, &buffer[pos] ); + pos += packsize( tmp.first ); + unpack( tmp.second, &buffer[pos] ); + pos += packsize( tmp.second ); + data.insert( tmp ); } } /******************************************************** -* Default instantiations for std::set * -********************************************************/ + * Default instantiations for std::set * + ********************************************************/ template -size_t packsize( const std::set& rhs ) +size_t packsize( const std::set &rhs ) { - size_t bytes = sizeof(size_t); + size_t bytes = sizeof( size_t ); typename std::set::const_iterator it; - for (it=rhs.begin(); it!=rhs.end(); ++it) { - bytes += packsize(*it); + for ( it = rhs.begin(); it != rhs.end(); ++it ) { + bytes += packsize( *it ); } return bytes; } template -void pack( const std::set& rhs, char *buffer ) +void pack( const std::set &rhs, char *buffer ) { size_t N = rhs.size(); - pack(N,buffer); - size_t pos = sizeof(size_t); + pack( N, buffer ); + size_t pos = sizeof( size_t ); typename std::set::const_iterator it; - for (it=rhs.begin(); it!=rhs.end(); ++it) { - pack(*it); pos+=packsize(*it); + for ( it = rhs.begin(); it != rhs.end(); ++it ) { + pack( *it ); + pos += packsize( *it ); } } template -void unpack( std::set& data, const char *buffer ) +void unpack( std::set &data, const char *buffer ) { size_t N = 0; - unpack(N,buffer); - size_t pos = sizeof(size_t); + unpack( N, buffer ); + size_t pos = sizeof( size_t ); data.clear(); - for (size_t i=0; i +#include +#include #include +#include #include #include -#include -#include // Inline function to read line without a return argument -static inline void fgetl( char * str, int num, FILE * stream ) +static inline void fgetl( char *str, int num, FILE *stream ) { - char* ptr = fgets( str, num, stream ); - if ( 0 ) {char *temp = (char *)&ptr; temp++;} + char *ptr = fgets( str, num, stream ); + if ( 0 ) { + char *temp = (char *) &ptr; + temp++; + } +} + + +// Check if the file exists +bool fileExists( const std::string &filename ) +{ + std::ifstream ifile( filename.c_str() ); + return ifile.good(); } // Get the path to a file -std::string IO::getPath( const std::string& filename ) +std::string IO::getPath( const std::string &filename ) { - std::string file(filename); - size_t k1 = file.rfind(47); - size_t k2 = file.rfind(92); - if ( k1==std::string::npos ) { k1=0; } - if ( k2==std::string::npos ) { k2=0; } - return file.substr(0,std::max(k1,k2)); + std::string file( filename ); + size_t k1 = file.rfind( 47 ); + size_t k2 = file.rfind( 92 ); + if ( k1 == std::string::npos ) { + k1 = 0; + } + if ( k2 == std::string::npos ) { + k2 = 0; + } + return file.substr( 0, std::max( k1, k2 ) ); } // List the timesteps in the given directory (dumps.LBPM) -std::vector IO::readTimesteps( const std::string& path, const std::string& format ) +std::vector IO::readTimesteps( const std::string &path, const std::string &format ) { // Get the name of the summary filename std::string filename = path + "/"; - if ( format=="old" || format=="new" ) + if ( format == "old" || format == "new" ) { filename += "summary.LBM"; - else if ( format=="silo" ) + } else if ( format == "silo" ) { filename += "LBM.visit"; - else + } else if ( format == "auto" ) { + bool test_old = fileExists( path + "/summary.LBM" ); + bool test_silo = fileExists( path + "/LBM.visit" ); + if ( test_old && test_silo ) { + ERROR( "Unable to determine format (both summary.LBM and LBM.visit exist)" ); + } else if ( test_old ) { + filename += "summary.LBM"; + } else if ( test_silo ) { + filename += "LBM.visit"; + } else { + ERROR( "Unable to determine format (neither summary.LBM or LBM.visit exist)" ); + } + } else { ERROR( "Unknown format: " + format ); - PROFILE_START("readTimesteps"); + } + PROFILE_START( "readTimesteps" ); // Read the data - FILE *fid= fopen(filename.c_str(),"rb"); - if ( fid==NULL ) - ERROR("Error opening file"); + FILE *fid = fopen( filename.c_str(), "rb" ); + if ( fid == NULL ) + ERROR( "Error opening file" ); std::vector timesteps; char buf[1000]; - while (fgets(buf,sizeof(buf),fid) != NULL) { - std::string line(buf); - line.resize(line.size()-1); + while ( fgets( buf, sizeof( buf ), fid ) != NULL ) { + std::string line( buf ); + line.resize( line.size() - 1 ); auto pos = line.find( "summary.silo" ); if ( pos != std::string::npos ) - line.resize(pos); + line.resize( pos ); if ( line.empty() ) continue; - timesteps.push_back(line); + timesteps.push_back( line ); } - fclose(fid); - PROFILE_STOP("readTimesteps"); + fclose( fid ); + PROFILE_STOP( "readTimesteps" ); return timesteps; return timesteps; } +// Get the maximum number of domains +int IO::maxDomains( const std::string &path, const std::string &format, const Utilities::MPI &comm ) +{ + int rank = comm.getRank(); + int n_domains = 0; + if ( rank == 0 ) { + // Get the timesteps + auto timesteps = IO::readTimesteps( path, format ); + ASSERT( !timesteps.empty() ); + // Get the database for the first domain + auto db = IO::getMeshList( path, timesteps[0] ); + for ( size_t i = 0; i < db.size(); i++ ) + n_domains = std::max( n_domains, db[i].domains.size() ); + } + return comm.bcast( n_domains, 0 ); +} + + // Read the data for the given timestep -std::vector IO::readData( const std::string& path, const std::string& timestep, const Utilities::MPI &comm ) +std::vector IO::readData( + const std::string &path, const std::string ×tep, int rank ) { // Get the mesh databases auto db = IO::getMeshList( path, timestep ); // Create the data - std::vector data( db .size() ); - for ( size_t i=0; i data( db.size() ); + for ( size_t i = 0; i < data.size(); i++ ) { data[i].precision = IO::DataType::Double; - data[i].meshName = db [i].name; - data[i].mesh = getMesh( path, timestep, db [i], domain ); + data[i].meshName = db[i].name; + data[i].mesh = getMesh( path, timestep, db[i], rank ); data[i].vars.resize( db[i].variables.size() ); - for ( size_t j=0; j IO::getMeshList( const std::string& path, const std::string& timestep ) +std::vector IO::getMeshList( + const std::string &path, const std::string ×tep ) { std::string filename = path + "/" + timestep + "/LBM.summary"; return IO::read( filename ); @@ -103,270 +150,271 @@ std::vector IO::getMeshList( const std::string& path, const st // Read the given mesh domain -std::shared_ptr IO::getMesh( const std::string& path, const std::string& timestep, - const IO::MeshDatabase& meshDatabase, int domain ) +std::shared_ptr IO::getMesh( const std::string &path, const std::string ×tep, + const IO::MeshDatabase &meshDatabase, int domain ) { - PROFILE_START("getMesh"); + PROFILE_START( "getMesh" ); std::shared_ptr mesh; - if ( meshDatabase.format==1 ) { + if ( meshDatabase.format == 1 ) { // Old format (binary doubles) std::string filename = path + "/" + timestep + "/" + meshDatabase.domains[domain].file; - FILE *fid = fopen(filename.c_str(),"rb"); - INSIST(fid!=NULL,"Error opening file"); + FILE *fid = fopen( filename.c_str(), "rb" ); + INSIST( fid != NULL, "Error opening file" ); fseek( fid, 0, SEEK_END ); - size_t bytes = ftell(fid); - size_t N_max = bytes/sizeof(double)+1000; + size_t bytes = ftell( fid ); + size_t N_max = bytes / sizeof( double ) + 1000; double *data = new double[N_max]; - fseek(fid,0,SEEK_SET); - size_t count = fread(data,sizeof(double),N_max,fid); - fclose(fid); - if ( count%3 != 0 ) - ERROR("Error reading file"); - if ( meshDatabase.type==IO::PointMesh ) { - size_t N = count/3; - std::shared_ptr pointlist( new PointList(N) ); - std::vector& P = pointlist->points; - for (size_t i=0; i pointlist( new PointList( N ) ); + std::vector &P = pointlist->points; + for ( size_t i = 0; i < N; i++ ) { + P[i].x = data[3 * i + 0]; + P[i].y = data[3 * i + 1]; + P[i].z = data[3 * i + 2]; } mesh = pointlist; - } else if ( meshDatabase.type==IO::SurfaceMesh ) { - if ( count%9 != 0 ) - ERROR("Error reading file (2)"); - size_t N_tri = count/9; - std::shared_ptr trilist( new TriList(N_tri) ); - std::vector& A = trilist->A; - std::vector& B = trilist->B; - std::vector& C = trilist->C; - for (size_t i=0; i trilist( new TriList( N_tri ) ); + std::vector &A = trilist->A; + std::vector &B = trilist->B; + std::vector &C = trilist->C; + for ( size_t i = 0; i < N_tri; i++ ) { + A[i].x = data[9 * i + 0]; + A[i].y = data[9 * i + 1]; + A[i].z = data[9 * i + 2]; + B[i].x = data[9 * i + 3]; + B[i].y = data[9 * i + 4]; + B[i].z = data[9 * i + 5]; + C[i].x = data[9 * i + 6]; + C[i].y = data[9 * i + 7]; + C[i].z = data[9 * i + 8]; } mesh = trilist; - } else if ( meshDatabase.type==IO::VolumeMesh ) { + } else if ( meshDatabase.type == IO::MeshType::VolumeMesh ) { // this was never supported in the old format mesh = std::shared_ptr( new DomainMesh() ); } else { - ERROR("Unknown mesh type"); + ERROR( "Unknown mesh type" ); } - delete [] data; - } else if ( meshDatabase.format==2 ) { - const DatabaseEntry& database = meshDatabase.domains[domain]; - std::string filename = path + "/" + timestep + "/" + database.file; - FILE *fid = fopen(filename.c_str(),"rb"); - fseek(fid,database.offset,SEEK_SET); + delete[] data; + } else if ( meshDatabase.format == 2 ) { + const DatabaseEntry &database = meshDatabase.domains[domain]; + std::string filename = path + "/" + timestep + "/" + database.file; + FILE *fid = fopen( filename.c_str(), "rb" ); + fseek( fid, database.offset, SEEK_SET ); char line[1000]; - fgetl(line,1000,fid); - size_t i1 = find(line,':'); - size_t i2 = find(&line[i1+1],':')+i1+1; - size_t bytes = atol(&line[i2+1]); - char *data = new char[bytes]; - size_t count = fread(data,1,bytes,fid); - fclose(fid); - ASSERT(count==bytes); - if ( meshDatabase.meshClass=="PointList" ) { + fgetl( line, 1000, fid ); + size_t i1 = find( line, ':' ); + size_t i2 = find( &line[i1 + 1], ':' ) + i1 + 1; + size_t bytes = atol( &line[i2 + 1] ); + char *data = new char[bytes]; + size_t count = fread( data, 1, bytes, fid ); + fclose( fid ); + ASSERT( count == bytes ); + if ( meshDatabase.meshClass == "PointList" ) { mesh.reset( new IO::PointList() ); - } else if ( meshDatabase.meshClass=="TriMesh" ) { + } else if ( meshDatabase.meshClass == "TriMesh" ) { mesh.reset( new IO::TriMesh() ); - } else if ( meshDatabase.meshClass=="TriList" ) { + } else if ( meshDatabase.meshClass == "TriList" ) { mesh.reset( new IO::TriList() ); - } else if ( meshDatabase.meshClass=="DomainMesh" ) { + } else if ( meshDatabase.meshClass == "DomainMesh" ) { mesh.reset( new IO::DomainMesh() ); } else { - ERROR("Unknown mesh class"); + ERROR( "Unknown mesh class" ); } - mesh->unpack( std::pair(bytes,data) ); - delete [] data; - } else if ( meshDatabase.format==4 ) { + mesh->unpack( std::pair( bytes, data ) ); + delete[] data; + } else if ( meshDatabase.format == 4 ) { // Reading a silo file #ifdef USE_SILO - const DatabaseEntry& database = meshDatabase.domains[domain]; - std::string filename = path + "/" + timestep + "/" + database.file; - auto fid = silo::open( filename, silo::READ ); - if ( meshDatabase.meshClass=="PointList" ) { + const DatabaseEntry &database = meshDatabase.domains[domain]; + std::string filename = path + "/" + timestep + "/" + database.file; + auto fid = silo::open( filename, silo::READ ); + if ( meshDatabase.meshClass == "PointList" ) { Array coords = silo::readPointMesh( fid, database.name ); - ASSERT(coords.size(1)==3); - std::shared_ptr mesh2( new IO::PointList( coords.size(0) ) ); - for (size_t i=0; ipoints[i].x = coords(i,0); - mesh2->points[i].y = coords(i,1); - mesh2->points[i].z = coords(i,2); + ASSERT( coords.size( 1 ) == 3 ); + std::shared_ptr mesh2( new IO::PointList( coords.size( 0 ) ) ); + for ( size_t i = 0; i < coords.size( 1 ); i++ ) { + mesh2->points[i].x = coords( i, 0 ); + mesh2->points[i].y = coords( i, 1 ); + mesh2->points[i].z = coords( i, 2 ); } mesh = mesh2; - } else if ( meshDatabase.meshClass=="TriMesh" || meshDatabase.meshClass=="TriList" ) { + } else if ( meshDatabase.meshClass == "TriMesh" || meshDatabase.meshClass == "TriList" ) { Array coords; Array tri; silo::readTriMesh( fid, database.name, coords, tri ); - ASSERT( tri.size(1)==3 && coords.size(1)==3 ); - int N_tri = tri.size(0); - int N_point = coords.size(0); + ASSERT( tri.size( 1 ) == 3 && coords.size( 1 ) == 3 ); + int N_tri = tri.size( 0 ); + int N_point = coords.size( 0 ); std::shared_ptr mesh2( new IO::TriMesh( N_tri, N_point ) ); - for (int i=0; ivertices->points[i].x = coords(i,0); - mesh2->vertices->points[i].y = coords(i,1); - mesh2->vertices->points[i].z = coords(i,2); + for ( int i = 0; i < N_point; i++ ) { + mesh2->vertices->points[i].x = coords( i, 0 ); + mesh2->vertices->points[i].y = coords( i, 1 ); + mesh2->vertices->points[i].z = coords( i, 2 ); } - for (int i=0; iA[i] = tri(i,0); - mesh2->B[i] = tri(i,1); - mesh2->C[i] = tri(i,2); + for ( int i = 0; i < N_tri; i++ ) { + mesh2->A[i] = tri( i, 0 ); + mesh2->B[i] = tri( i, 1 ); + mesh2->C[i] = tri( i, 2 ); } - if ( meshDatabase.meshClass=="TriMesh" ) { + if ( meshDatabase.meshClass == "TriMesh" ) { mesh = mesh2; - } else if ( meshDatabase.meshClass=="TriList" ) { + } else if ( meshDatabase.meshClass == "TriList" ) { auto trilist = IO::getTriList( std::dynamic_pointer_cast( mesh2 ) ); - mesh = trilist; + mesh = trilist; } - } else if ( meshDatabase.meshClass=="DomainMesh" ) { + } else if ( meshDatabase.meshClass == "DomainMesh" ) { std::vector range; std::vector N; silo::readUniformMesh( fid, database.name, range, N ); - auto rankinfo = silo::read( fid, database.name+"_rankinfo" ); + auto rankinfo = silo::read( fid, database.name + "_rankinfo" ); RankInfoStruct rank_data( rankinfo[0], rankinfo[1], rankinfo[2], rankinfo[3] ); - mesh.reset( new IO::DomainMesh( rank_data, N[0], N[1], N[2], range[1]-range[0], range[3]-range[2], range[5]-range[4] ) ); + mesh.reset( new IO::DomainMesh( rank_data, N[0], N[1], N[2], range[1] - range[0], + range[3] - range[2], range[5] - range[4] ) ); } else { - ERROR("Unknown mesh class"); + ERROR( "Unknown mesh class" ); } silo::close( fid ); #else - ERROR("Build without silo support"); + ERROR( "Build without silo support" ); #endif } else { - ERROR("Unknown format"); + ERROR( "Unknown format" ); } - PROFILE_STOP("getMesh"); + PROFILE_STOP( "getMesh" ); return mesh; } // Read the given variable for the given mesh domain -std::shared_ptr IO::getVariable( const std::string& path, const std::string& timestep, - const MeshDatabase& meshDatabase, int domain, const std::string& variable ) +std::shared_ptr IO::getVariable( const std::string &path, const std::string ×tep, + const MeshDatabase &meshDatabase, int domain, const std::string &variable ) { - std::pair key(meshDatabase.domains[domain].name,variable); - std::map,DatabaseEntry>::const_iterator it; - it = meshDatabase.variable_data.find(key); - if ( it==meshDatabase.variable_data.end() ) + std::pair key( meshDatabase.domains[domain].name, variable ); + std::map, DatabaseEntry>::const_iterator it; + it = meshDatabase.variable_data.find( key ); + if ( it == meshDatabase.variable_data.end() ) return std::shared_ptr(); std::shared_ptr var; if ( meshDatabase.format == 2 ) { - const DatabaseEntry& database = it->second; - std::string filename = path + "/" + timestep + "/" + database.file; - FILE *fid = fopen(filename.c_str(),"rb"); - fseek(fid,database.offset,SEEK_SET); + const DatabaseEntry &database = it->second; + std::string filename = path + "/" + timestep + "/" + database.file; + FILE *fid = fopen( filename.c_str(), "rb" ); + fseek( fid, database.offset, SEEK_SET ); char line[1000]; - fgetl(line,1000,fid); - size_t i1 = find(line,':'); - size_t i2 = find(&line[i1+1],':')+i1+1; - std::vector values = splitList(&line[i2+1],','); - ASSERT(values.size()==5); - int dim = atoi(values[0].c_str()); - int type = atoi(values[1].c_str()); - size_t N = atol(values[2].c_str()); - size_t bytes = atol(values[3].c_str()); - std::string precision = values[4]; - var = std::shared_ptr( new IO::Variable() ); - var->dim = dim; - var->type = static_cast(type); - var->name = variable; - var->data.resize(N*dim); - if ( precision=="double" ) { - size_t count = fread(var->data.data(),sizeof(double),N*dim,fid); - ASSERT(count*sizeof(double)==bytes); + fgetl( line, 1000, fid ); + size_t i1 = find( line, ':' ); + size_t i2 = find( &line[i1 + 1], ':' ) + i1 + 1; + std::vector values = splitList( &line[i2 + 1], ',' ); + ASSERT( values.size() == 5 ); + int dim = atoi( values[0].c_str() ); + int type = atoi( values[1].c_str() ); + size_t N = atol( values[2].c_str() ); + size_t bytes = atol( values[3].c_str() ); + std::string precision = values[4]; + var = std::shared_ptr( new IO::Variable() ); + var->dim = dim; + var->type = static_cast( type ); + var->name = variable; + var->data.resize( N, dim ); + if ( precision == "double" ) { + size_t count = fread( var->data.data(), sizeof( double ), N * dim, fid ); + ASSERT( count * sizeof( double ) == bytes ); } else { - ERROR("Format not implimented"); + ERROR( "Format not implimented" ); } - fclose(fid); + fclose( fid ); } else if ( meshDatabase.format == 4 ) { // Reading a silo file #ifdef USE_SILO - const auto& database = meshDatabase.domains[domain]; + const auto &database = meshDatabase.domains[domain]; auto variableDatabase = meshDatabase.getVariableDatabase( variable ); - std::string filename = path + "/" + timestep + "/" + database.file; - auto fid = silo::open( filename, silo::READ ); + std::string filename = path + "/" + timestep + "/" + database.file; + auto fid = silo::open( filename, silo::READ ); var.reset( new Variable( variableDatabase.dim, variableDatabase.type, variable ) ); - if ( meshDatabase.meshClass=="PointList" ) { + if ( meshDatabase.meshClass == "PointList" ) { var->data = silo::readPointMeshVariable( fid, variable ); - } else if ( meshDatabase.meshClass=="TriMesh" || meshDatabase.meshClass=="TriList" ) { + } else if ( meshDatabase.meshClass == "TriMesh" || meshDatabase.meshClass == "TriList" ) { var->data = silo::readTriMeshVariable( fid, variable ); - } else if ( meshDatabase.meshClass=="DomainMesh" ) { + } else if ( meshDatabase.meshClass == "DomainMesh" ) { var->data = silo::readUniformMeshVariable( fid, variable ); } else { - ERROR("Unknown mesh class"); + ERROR( "Unknown mesh class" ); } silo::close( fid ); #else - ERROR("Build without silo support"); + ERROR( "Build without silo support" ); #endif } else { - ERROR("Unknown format"); + ERROR( "Unknown format" ); } return var; } /**************************************************** -* Reformat the variable to match the mesh * -****************************************************/ -void IO::reformatVariable( const IO::Mesh& mesh, IO::Variable& var ) + * Reformat the variable to match the mesh * + ****************************************************/ +void IO::reformatVariable( const IO::Mesh &mesh, IO::Variable &var ) { if ( mesh.className() == "DomainMesh" ) { - const IO::DomainMesh& mesh2 = dynamic_cast( mesh ); + const IO::DomainMesh &mesh2 = dynamic_cast( mesh ); if ( var.type == VariableType::NodeVariable ) { - size_t N2 = var.data.length() / ((mesh2.nx+1)*(mesh2.ny+1)*(mesh2.nz+1)); - ASSERT( (mesh2.nx+1)*(mesh2.ny+1)*(mesh2.nz+1)*N2 == var.data.length() ); - var.data.reshape( { (size_t) mesh2.nx+1, (size_t) mesh2.ny+1, (size_t) mesh2.nz+1, N2 } ); + size_t N2 = + var.data.length() / ( ( mesh2.nx + 1 ) * ( mesh2.ny + 1 ) * ( mesh2.nz + 1 ) ); + ASSERT( + ( mesh2.nx + 1 ) * ( mesh2.ny + 1 ) * ( mesh2.nz + 1 ) * N2 == var.data.length() ); + var.data.reshape( + { (size_t) mesh2.nx + 1, (size_t) mesh2.ny + 1, (size_t) mesh2.nz + 1, N2 } ); } else if ( var.type == VariableType::EdgeVariable ) { - ERROR("Not finished"); + ERROR( "Not finished" ); } else if ( var.type == VariableType::SurfaceVariable ) { - ERROR("Not finished"); + ERROR( "Not finished" ); } else if ( var.type == VariableType::VolumeVariable ) { - size_t N2 = var.data.length() / (mesh2.nx*mesh2.ny*mesh2.nz); - ASSERT( mesh2.nx*mesh2.ny*mesh2.nz*N2 == var.data.length() ); + size_t N2 = var.data.length() / ( mesh2.nx * mesh2.ny * mesh2.nz ); + ASSERT( mesh2.nx * mesh2.ny * mesh2.nz * N2 == var.data.length() ); var.data.reshape( { (size_t) mesh2.nx, (size_t) mesh2.ny, (size_t) mesh2.nz, N2 } ); } else { - ERROR("Invalid variable type"); + ERROR( "Invalid variable type" ); } } else if ( mesh.className() == "PointList" ) { - const IO::PointList& mesh2 = dynamic_cast( mesh ); - size_t N = mesh2.points.size(); - size_t N_var = var.data.length()/N; - ASSERT( N*N_var == var.data.length() ); + const IO::PointList &mesh2 = dynamic_cast( mesh ); + size_t N = mesh2.points.size(); + size_t N_var = var.data.length() / N; + ASSERT( N * N_var == var.data.length() ); var.data.reshape( { N, N_var } ); - } else if ( mesh.className()=="TriMesh" || mesh.className() == "TriList" ) { - std::shared_ptr mesh_ptr( const_cast(&mesh), []( void* ) {} ); + } else if ( mesh.className() == "TriMesh" || mesh.className() == "TriList" ) { + std::shared_ptr mesh_ptr( const_cast( &mesh ), []( void * ) {} ); std::shared_ptr mesh2 = getTriMesh( mesh_ptr ); if ( var.type == VariableType::NodeVariable ) { - size_t N = mesh2->vertices->points.size(); - size_t N_var = var.data.length()/N; - ASSERT( N*N_var == var.data.length() ); + size_t N = mesh2->vertices->points.size(); + size_t N_var = var.data.length() / N; + ASSERT( N * N_var == var.data.length() ); var.data.reshape( { N, N_var } ); } else if ( var.type == VariableType::EdgeVariable ) { - ERROR("Not finished"); + ERROR( "Not finished" ); } else if ( var.type == VariableType::SurfaceVariable ) { - ERROR("Not finished"); + ERROR( "Not finished" ); } else if ( var.type == VariableType::VolumeVariable ) { - size_t N = mesh2->A.size(); - size_t N_var = var.data.length()/N; - ASSERT( N*N_var == var.data.length() ); + size_t N = mesh2->A.size(); + size_t N_var = var.data.length() / N; + ASSERT( N * N_var == var.data.length() ); var.data.reshape( { N, N_var } ); } else { - ERROR("Invalid variable type"); + ERROR( "Invalid variable type" ); } } else { - ERROR("Unknown mesh type"); + ERROR( "Unknown mesh type" ); } } - - - diff --git a/IO/Reader.h b/IO/Reader.h index 7cc96ab7..6542a2ea 100644 --- a/IO/Reader.h +++ b/IO/Reader.h @@ -14,7 +14,24 @@ namespace IO { //! Get the path to a file -std::string getPath( const std::string& filename ); +std::string getPath( const std::string &filename ); + + +/*! + * @brief Get the maximum number of domains written + * @details This function reads the summary files to determine the maximum + * number of domains in the output. + * @param[in] path The path to use for reading + * @param[in] format The data format to use: + * old - Old mesh format (provided for backward compatibility) + * new - New format, 1 file/process + * silo - Silo + * auto - Auto-determin the format + * @param[in] comm Optional comm to use to reduce IO load by + * reading on rank 0 and then communicating the result + */ +int maxDomains( const std::string &path, const std::string &format = "auto", + const Utilities::MPI &comm = MPI_COMM_SELF ); /*! @@ -22,31 +39,34 @@ std::string getPath( const std::string& filename ); * @details This function reads the timestep list from the summary file. * @param[in] path The path to use for reading * @param[in] format The data format to use: - * old - Old mesh format (provided for backward compatibility) - * new - New format, 1 file/process - * silo - Silo - * @return append Append any existing data (default is false) + * old - Old mesh format (provided for backward compatibility) + * new - New format, 1 file/process + * silo - Silo + * auto - Auto-determin the format + * @return append Append any existing data (default is false) */ -std::vector readTimesteps( const std::string& path, const std::string& format ); +std::vector readTimesteps( + const std::string &path, const std::string &format = "auto" ); /*! * @brief Read the data for the timestep * @details This function reads the mesh and variable data provided for the given timestep. - * Note: this function requires that the number of ranks of the comm match the number of ranks in the meshes * @param[in] path The path to use for reading * @param[in] timestep The timestep iteration + * @param[in] domain The desired domain to read */ -std::vector readData( const std::string& path, const std::string& timestep, const Utilities::MPI &comm = MPI_COMM_WORLD ); +std::vector readData( + const std::string &path, const std::string ×tep, int domain ); //! Read the list of mesh databases for the given timestep -std::vector getMeshList( const std::string& path, const std::string& timestep ); +std::vector getMeshList( const std::string &path, const std::string ×tep ); //! Read the given mesh domain -std::shared_ptr getMesh( const std::string& path, const std::string& timestep, - const MeshDatabase& meshDatabase, int domain ); +std::shared_ptr getMesh( const std::string &path, const std::string ×tep, + const MeshDatabase &meshDatabase, int domain ); /*! @@ -59,8 +79,8 @@ std::shared_ptr getMesh( const std::string& path, const std::string& t * @param[in] variable The variable name to read * @return Returns the variable data as a linear array */ -std::shared_ptr getVariable( const std::string& path, const std::string& timestep, - const MeshDatabase& meshDatabase, int domain, const std::string& variable ); +std::shared_ptr getVariable( const std::string &path, const std::string ×tep, + const MeshDatabase &meshDatabase, int domain, const std::string &variable ); /*! @@ -69,9 +89,9 @@ std::shared_ptr getVariable( const std::string& path, const std::s * @param[in] mesh The underlying mesh * @param[in/out] variable The variable name to read */ -void reformatVariable( const IO::Mesh& mesh, IO::Variable& var ); +void reformatVariable( const IO::Mesh &mesh, IO::Variable &var ); -} // IO namespace +} // namespace IO #endif diff --git a/IO/Writer.cpp b/IO/Writer.cpp index 7414d5a1..d3f9d991 100644 --- a/IO/Writer.cpp +++ b/IO/Writer.cpp @@ -1,28 +1,69 @@ #include "IO/Writer.h" -#include "IO/MeshDatabase.h" #include "IO/IOHelpers.h" +#include "IO/MeshDatabase.h" #include "IO/silo.h" #include "common/MPI.h" #include "common/Utilities.h" -#include #include -#include -#include #include +#include +#include +#include - -enum class Format { OLD, NEW, SILO, UNKNOWN }; - +enum class Format { OLD, NEW, SILO, UNKNOWN }; /**************************************************** -* Initialize the writer * -****************************************************/ + * Recursively create the subdirectory * + ****************************************************/ +static void recursiveMkdir( const std::string &path, mode_t mode ) +{ + // Iterate through the root directories until we create the desired path + for ( size_t pos = 0; pos < path.size(); ) { + // slide backwards in string until next slash found + pos++; + for ( ; pos < path.size(); pos++ ) { + if ( path[pos] == '/' || path[pos] == 92 ) + break; + } + // Create the temporary path + auto path2 = path.substr( 0, pos ); + // Check if the temporary path exists + struct stat status; + int result = stat( path2.data(), &status ); + if ( result == 0 ) { + // if there is a part of the path that already exists make sure it is really a directory + if ( !S_ISDIR( status.st_mode ) ) { + ERROR( + "Error in recursiveMkdir...\n" + " Cannot create directories in path = " + + path + + "\n because some intermediate item in path exists and is NOT a directory" ); + } + continue; + } + // Create the directory and test the result + result = mkdir( path2.data(), mode ); + if ( result != 0 ) { + // Maybe another rank created the directory, check + int result = stat( path2.data(), &status ); + if ( result != 0 && !S_ISDIR( status.st_mode ) ) + ERROR( "Error in Utilities::recursiveMkdir...\n" + " Cannot create directory = " + + path2 ); + } + } +} + + +/**************************************************** + * Initialize the writer * + ****************************************************/ static std::string global_IO_path; static Format global_IO_format = Format::UNKNOWN; -void IO::initialize( const std::string& path, const std::string& format, bool append ) +void IO::initialize( const std::string &path, const std::string &format, bool append ) { if ( path.empty() ) global_IO_path = "."; @@ -35,161 +76,170 @@ void IO::initialize( const std::string& path, const std::string& format, bool ap else if ( format == "silo" ) global_IO_format = Format::SILO; else - ERROR("Unknown format"); - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); - if ( !append && rank==0 ) { - mkdir(path.c_str(),S_IRWXU|S_IRGRP); + ERROR( "Unknown format" ); + int rank = Utilities::MPI( MPI_COMM_WORLD ).getRank(); + if ( !append && rank == 0 ) { + recursiveMkdir( path, S_IRWXU | S_IRGRP ); std::string filename; - if ( global_IO_format==Format::OLD || global_IO_format==Format::NEW ) + if ( global_IO_format == Format::OLD || global_IO_format == Format::NEW ) filename = global_IO_path + "/summary.LBM"; - else if ( global_IO_format==Format::SILO ) + else if ( global_IO_format == Format::SILO ) filename = global_IO_path + "/LBM.visit"; else - ERROR("Unknown format"); - auto fid = fopen(filename.c_str(),"wb"); - fclose(fid); + ERROR( "Unknown format" ); + auto fid = fopen( filename.c_str(), "wb" ); + fclose( fid ); } } // Write the mesh data in the original format -static std::vector writeMeshesOrigFormat( const std::vector& meshData, const std::string& path ) +static std::vector writeMeshesOrigFormat( + const std::vector &meshData, const std::string &path, int rank ) { - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); std::vector meshes_written; - for (size_t i=0; i mesh = meshData[i].mesh; IO::MeshDatabase mesh_entry; - mesh_entry.name = meshData[i].meshName; - mesh_entry.type = meshType(*mesh); + mesh_entry.name = meshData[i].meshName; + mesh_entry.type = meshType( *mesh ); mesh_entry.meshClass = meshData[i].mesh->className(); - mesh_entry.format = 1; + mesh_entry.format = 1; IO::DatabaseEntry domain; - domain.name = domainname; - domain.file = filename; + domain.name = domainname; + domain.file = filename; domain.offset = 0; - mesh_entry.domains.push_back(domain); + mesh_entry.domains.push_back( domain ); if ( !meshData[i].vars.empty() ) { - printf("Warning: variables are not supported with this format (original)\n"); - //for (size_t j=0; jname ); } const std::string meshClass = mesh->className(); - if ( meshClass=="PointList" ) { + if ( meshClass == "PointList" ) { // List of points - std::shared_ptr pointlist = std::dynamic_pointer_cast(mesh); - const std::vector& P = pointlist->points; - for (size_t i=0; i pointlist = + std::dynamic_pointer_cast( mesh ); + const std::vector &P = pointlist->points; + for ( size_t i = 0; i < P.size(); i++ ) { double x[3]; - x[0] = P[i].x; x[1] = P[i].y; x[2] = P[i].z; - fwrite(x,sizeof(double),3,fid); + x[0] = P[i].x; + x[1] = P[i].y; + x[2] = P[i].z; + fwrite( x, sizeof( double ), 3, fid ); } - } else if ( meshClass=="TriList" || meshClass=="TriMesh" ) { + } else if ( meshClass == "TriList" || meshClass == "TriMesh" ) { // Triangle mesh - std::shared_ptr trilist = IO::getTriList(mesh); - const std::vector& A = trilist->A; - const std::vector& B = trilist->B; - const std::vector& C = trilist->C; - for (size_t i=0; i trilist = IO::getTriList( mesh ); + const std::vector &A = trilist->A; + const std::vector &B = trilist->B; + const std::vector &C = trilist->C; + for ( size_t i = 0; i < A.size(); i++ ) { double tri[9]; - tri[0] = A[i].x; tri[1] = A[i].y; tri[2] = A[i].z; - tri[3] = B[i].x; tri[4] = B[i].y; tri[5] = B[i].z; - tri[6] = C[i].x; tri[7] = C[i].y; tri[8] = C[i].z; - fwrite(tri,sizeof(double),9,fid); + tri[0] = A[i].x; + tri[1] = A[i].y; + tri[2] = A[i].z; + tri[3] = B[i].x; + tri[4] = B[i].y; + tri[5] = B[i].z; + tri[6] = C[i].x; + tri[7] = C[i].y; + tri[8] = C[i].z; + fwrite( tri, sizeof( double ), 9, fid ); } - } else if ( meshClass=="DomainMesh" ) { + } else if ( meshClass == "DomainMesh" ) { // This format was never supported with the old format } else { - ERROR("Unknown mesh"); + ERROR( "Unknown mesh" ); } - fclose(fid); + fclose( fid ); std::sort( mesh_entry.variables.begin(), mesh_entry.variables.end() ); - mesh_entry.variables.erase( std::unique( mesh_entry.variables.begin(), mesh_entry.variables.end() ), mesh_entry.variables.end() ); - meshes_written.push_back(mesh_entry); + mesh_entry.variables.erase( + std::unique( mesh_entry.variables.begin(), mesh_entry.variables.end() ), + mesh_entry.variables.end() ); + meshes_written.push_back( mesh_entry ); } return meshes_written; } // Create the database entry for the mesh data -static IO::MeshDatabase getDatabase( const std::string& filename, const IO::MeshDataStruct& mesh, int format ) +static IO::MeshDatabase getDatabase( + const std::string &filename, const IO::MeshDataStruct &mesh, int format, int rank ) { - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); char domainname[100]; - sprintf(domainname,"%s_%05i",mesh.meshName.c_str(),rank); + sprintf( domainname, "%s_%05i", mesh.meshName.c_str(), rank ); // Create the MeshDatabase IO::MeshDatabase database; - database.name = mesh.meshName; - database.type = meshType(*(mesh.mesh)); + database.name = mesh.meshName; + database.type = meshType( *( mesh.mesh ) ); database.meshClass = mesh.mesh->className(); - database.format = format; + database.format = format; // Write the mesh IO::DatabaseEntry domain; - domain.name = domainname; - domain.file = filename; + domain.name = domainname; + domain.file = filename; domain.offset = -1; - database.domains.push_back(domain); + database.domains.push_back( domain ); // Write the variables - for (size_t i=0; iname; info.type = mesh.vars[i]->type; - info.dim = mesh.vars[i]->dim; - database.variables.push_back(info); + info.dim = mesh.vars[i]->dim; + database.variables.push_back( info ); // Add domain variable info IO::DatabaseEntry variable; - variable.name = mesh.vars[i]->name; - variable.file = filename; + variable.name = mesh.vars[i]->name; + variable.file = filename; variable.offset = -1; - std::pair key(domain.name,mesh.vars[i]->name); - database.variable_data.insert( - std::pair,IO::DatabaseEntry>(key,variable) ); + std::pair key( domain.name, mesh.vars[i]->name ); + database.variable_data.insert( + std::pair, IO::DatabaseEntry>( key, variable ) ); } return database; } // Write a mesh (and variables) to a file -static IO::MeshDatabase write_domain( FILE *fid, const std::string& filename, - const IO::MeshDataStruct& mesh, int format ) +static IO::MeshDatabase write_domain( + FILE *fid, const std::string &filename, const IO::MeshDataStruct &mesh, int format, int rank ) { const int level = 0; - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); // Create the MeshDatabase - IO::MeshDatabase database = getDatabase( filename, mesh, format ); + IO::MeshDatabase database = getDatabase( filename, mesh, format, rank ); // Write the mesh - IO::DatabaseEntry& domain = database.domains[0]; - domain.offset = ftell(fid); - std::pair data = mesh.mesh->pack(level); - fprintf(fid,"Mesh: %s-%05i: %lu\n",mesh.meshName.c_str(),rank,data.first); - fwrite(data.second,1,data.first,fid); - fprintf(fid,"\n"); - delete [] (char*) data.second; + IO::DatabaseEntry &domain = database.domains[0]; + domain.offset = ftell( fid ); + std::pair data = mesh.mesh->pack( level ); + fprintf( fid, "Mesh: %s-%05i: %lu\n", mesh.meshName.c_str(), rank, data.first ); + fwrite( data.second, 1, data.first, fid ); + fprintf( fid, "\n" ); + delete[]( char * ) data.second; // Write the variables - for (size_t i=0; i key(domain.name,mesh.vars[i]->name); - IO::DatabaseEntry& variable = database.variable_data[key]; - variable.offset = ftell(fid); - int dim = mesh.vars[i]->dim; - int type = static_cast(mesh.vars[i]->type); - size_t N = mesh.vars[i]->data.length(); - if ( type == static_cast(IO::VariableType::NullVariable) ) { - ERROR("Variable type not set"); + for ( size_t i = 0; i < mesh.vars.size(); i++ ) { + std::pair key( domain.name, mesh.vars[i]->name ); + IO::DatabaseEntry &variable = database.variable_data[key]; + variable.offset = ftell( fid ); + int dim = mesh.vars[i]->dim; + int type = static_cast( mesh.vars[i]->type ); + size_t N = mesh.vars[i]->data.length(); + if ( type == static_cast( IO::VariableType::NullVariable ) ) { + ERROR( "Variable type not set" ); } - size_t N_mesh = mesh.mesh->numberPointsVar(mesh.vars[i]->type); - ASSERT(N==dim*N_mesh); - fprintf(fid,"Var: %s-%05i-%s: %i, %i, %lu, %lu, double\n", - database.name.c_str(), rank, variable.name.c_str(), - dim, type, N_mesh, N*sizeof(double) ); - fwrite(mesh.vars[i]->data.data(),sizeof(double),N,fid); - fprintf(fid,"\n"); + size_t N_mesh = mesh.mesh->numberPointsVar( mesh.vars[i]->type ); + ASSERT( N == dim * N_mesh ); + fprintf( fid, "Var: %s-%05i-%s: %i, %i, %lu, %lu, double\n", database.name.c_str(), rank, + variable.name.c_str(), dim, type, N_mesh, N * sizeof( double ) ); + fwrite( mesh.vars[i]->data.data(), sizeof( double ), N, fid ); + fprintf( fid, "\n" ); } return database; } @@ -198,11 +248,12 @@ static IO::MeshDatabase write_domain( FILE *fid, const std::string& filename, #ifdef USE_SILO // Write a PointList mesh (and variables) to a file template -static void writeSiloPointMesh( DBfile *fid, const IO::PointList& mesh, const std::string& meshname ) +static void writeSiloPointMesh( + DBfile *fid, const IO::PointList &mesh, const std::string &meshname ) { - const auto& points = mesh.getPoints(); - std::vector x(points.size()), y(points.size()), z(points.size()); - for (size_t i=0; i x( points.size() ), y( points.size() ), z( points.size() ); + for ( size_t i = 0; i < x.size(); i++ ) { x[i] = points[i].x; y[i] = points[i].y; z[i] = points[i].z; @@ -210,28 +261,29 @@ static void writeSiloPointMesh( DBfile *fid, const IO::PointList& mesh, const st const TYPE *coords[] = { x.data(), y.data(), z.data() }; silo::writePointMesh( fid, meshname, 3, points.size(), coords ); } -static void writeSiloPointList( DBfile *fid, const IO::MeshDataStruct& meshData, IO::MeshDatabase database ) +static void writeSiloPointList( + DBfile *fid, const IO::MeshDataStruct &meshData, IO::MeshDatabase database ) { - const IO::PointList& mesh = dynamic_cast( *meshData.mesh ); + const IO::PointList &mesh = dynamic_cast( *meshData.mesh ); const std::string meshname = database.domains[0].name; if ( meshData.precision == IO::DataType::Double ) { writeSiloPointMesh( fid, mesh, meshname ); } else if ( meshData.precision == IO::DataType::Float ) { writeSiloPointMesh( fid, mesh, meshname ); } else { - ERROR("Unsupported format"); + ERROR( "Unsupported format" ); } - const auto& points = mesh.getPoints(); - std::vector x(points.size()), y(points.size()), z(points.size()); - for (size_t i=0; i x( points.size() ), y( points.size() ), z( points.size() ); + for ( size_t i = 0; i < x.size(); i++ ) { x[i] = points[i].x; y[i] = points[i].y; z[i] = points[i].z; } const double *coords[] = { x.data(), y.data(), z.data() }; silo::writePointMesh( fid, meshname, 3, points.size(), coords ); - for (size_t i=0; i -static void writeSiloTriMesh( DBfile *fid, const IO::TriMesh& mesh, const std::string& meshname ) +static void writeSiloTriMesh( DBfile *fid, const IO::TriMesh &mesh, const std::string &meshname ) { - const auto& points = mesh.vertices->getPoints(); - std::vector x(points.size()), y(points.size()), z(points.size()); - for (size_t i=0; igetPoints(); + std::vector x( points.size() ), y( points.size() ), z( points.size() ); + for ( size_t i = 0; i < x.size(); i++ ) { x[i] = points[i].x; y[i] = points[i].y; z[i] = points[i].z; } const TYPE *coords[] = { x.data(), y.data(), z.data() }; - const int *tri[] = { mesh.A.data(), mesh.B.data(), mesh.C.data() }; + const int *tri[] = { mesh.A.data(), mesh.B.data(), mesh.C.data() }; silo::writeTriMesh( fid, meshname, 3, 2, points.size(), coords, mesh.A.size(), tri ); } -static void writeSiloTriMesh2( DBfile *fid, const IO::MeshDataStruct& meshData, - const IO::TriMesh& mesh, IO::MeshDatabase database ) +static void writeSiloTriMesh2( DBfile *fid, const IO::MeshDataStruct &meshData, + const IO::TriMesh &mesh, IO::MeshDatabase database ) { const std::string meshname = database.domains[0].name; if ( meshData.precision == IO::DataType::Double ) { @@ -271,11 +323,11 @@ static void writeSiloTriMesh2( DBfile *fid, const IO::MeshDataStruct& meshData, } else if ( meshData.precision == IO::DataType::Float ) { writeSiloTriMesh( fid, mesh, meshname ); } else { - ERROR("Unsupported format"); + ERROR( "Unsupported format" ); } - for (size_t i=0; i( var.type ); + for ( size_t i = 0; i < meshData.vars.size(); i++ ) { + const IO::Variable &var = *meshData.vars[i]; + auto type = static_cast( var.type ); if ( var.precision == IO::DataType::Double ) { silo::writeTriMeshVariable( fid, 3, meshname, var.name, var.data, type ); } else if ( var.precision == IO::DataType::Float ) { @@ -287,35 +339,40 @@ static void writeSiloTriMesh2( DBfile *fid, const IO::MeshDataStruct& meshData, data2.copy( var.data ); silo::writeTriMeshVariable( fid, 3, meshname, var.name, data2, type ); } else { - ERROR("Unsupported format"); + ERROR( "Unsupported format" ); } } } -static void writeSiloTriMesh( DBfile *fid, const IO::MeshDataStruct& meshData, IO::MeshDatabase database ) +static void writeSiloTriMesh( + DBfile *fid, const IO::MeshDataStruct &meshData, IO::MeshDatabase database ) { - const IO::TriMesh& mesh = dynamic_cast( *meshData.mesh ); + const IO::TriMesh &mesh = dynamic_cast( *meshData.mesh ); writeSiloTriMesh2( fid, meshData, mesh, database ); } -static void writeSiloTriList( DBfile *fid, const IO::MeshDataStruct& meshData, IO::MeshDatabase database ) +static void writeSiloTriList( + DBfile *fid, const IO::MeshDataStruct &meshData, IO::MeshDatabase database ) { auto mesh = getTriMesh( meshData.mesh ); writeSiloTriMesh2( fid, meshData, *mesh, database ); } // Write a DomainMesh mesh (and variables) to a file -static void writeSiloDomainMesh( DBfile *fid, const IO::MeshDataStruct& meshData, IO::MeshDatabase database ) +static void writeSiloDomainMesh( + DBfile *fid, const IO::MeshDataStruct &meshData, IO::MeshDatabase database ) { - const IO::DomainMesh& mesh = dynamic_cast( *meshData.mesh ); + const IO::DomainMesh &mesh = dynamic_cast( *meshData.mesh ); RankInfoStruct info( mesh.rank, mesh.nprocx, mesh.nprocy, mesh.nprocz ); - std::array range = { info.ix*mesh.Lx/info.nx, (info.ix+1)*mesh.Lx/info.nx, - info.jy*mesh.Ly/info.ny, (info.jy+1)*mesh.Ly/info.ny, - info.kz*mesh.Lz/info.nz, (info.kz+1)*mesh.Lz/info.nz }; - std::array N = { mesh.nx, mesh.ny, mesh.nz }; - auto meshname = database.domains[0].name; + std::array range = { info.ix * mesh.Lx / info.nx, + ( info.ix + 1 ) * mesh.Lx / info.nx, info.jy * mesh.Ly / info.ny, + ( info.jy + 1 ) * mesh.Ly / info.ny, info.kz * mesh.Lz / info.nz, + ( info.kz + 1 ) * mesh.Lz / info.nz }; + std::array N = { mesh.nx, mesh.ny, mesh.nz }; + auto meshname = database.domains[0].name; silo::writeUniformMesh<3>( fid, meshname, range, N ); - silo::write( fid, meshname+"_rankinfo", { mesh.rank, mesh.nprocx, mesh.nprocy, mesh.nprocz } ); - for (size_t i=0; i( var.type ); + silo::write( + fid, meshname + "_rankinfo", { mesh.rank, mesh.nprocx, mesh.nprocy, mesh.nprocz } ); + for ( size_t i = 0; i < meshData.vars.size(); i++ ) { + const auto &var = *meshData.vars[i]; + auto type = static_cast( var.type ); if ( var.precision == IO::DataType::Double ) { silo::writeUniformMeshVariable<3>( fid, meshname, N, var.name, var.data, type ); } else if ( var.precision == IO::DataType::Float ) { @@ -327,65 +384,66 @@ static void writeSiloDomainMesh( DBfile *fid, const IO::MeshDataStruct& meshData data2.copy( var.data ); silo::writeUniformMeshVariable<3>( fid, meshname, N, var.name, data2, type ); } else { - ERROR("Unsupported format"); + ERROR( "Unsupported format" ); } } } // Write a mesh (and variables) to a file -static IO::MeshDatabase write_domain_silo( DBfile *fid, const std::string& filename, - const IO::MeshDataStruct& mesh, int format ) +static IO::MeshDatabase write_domain_silo( + DBfile *fid, const std::string &filename, const IO::MeshDataStruct &mesh, int format, int rank ) { // Create the MeshDatabase - auto database = getDatabase( filename, mesh, format ); - if ( database.meshClass=="PointList" ) { + auto database = getDatabase( filename, mesh, format, rank ); + if ( database.meshClass == "PointList" ) { writeSiloPointList( fid, mesh, database ); - } else if ( database.meshClass=="TriMesh" ) { + } else if ( database.meshClass == "TriMesh" ) { writeSiloTriMesh( fid, mesh, database ); - } else if ( database.meshClass=="TriList" ) { + } else if ( database.meshClass == "TriList" ) { writeSiloTriList( fid, mesh, database ); - } else if ( database.meshClass=="DomainMesh" ) { + } else if ( database.meshClass == "DomainMesh" ) { writeSiloDomainMesh( fid, mesh, database ); } else { - ERROR("Unknown mesh class"); + ERROR( "Unknown mesh class" ); } return database; } // Write the summary file for silo -std::pair getSiloMeshType( const std::string& meshClass ) +std::pair getSiloMeshType( const std::string &meshClass ) { int meshType = 0; - int varType = 0; - if ( meshClass=="PointList" ) { + int varType = 0; + if ( meshClass == "PointList" ) { meshType = DB_POINTMESH; varType = DB_POINTVAR; - } else if ( meshClass=="TriMesh" ) { + } else if ( meshClass == "TriMesh" ) { meshType = DB_UCDMESH; varType = DB_UCDVAR; - } else if ( meshClass=="TriList" ) { + } else if ( meshClass == "TriList" ) { meshType = DB_UCDMESH; varType = DB_UCDVAR; - } else if ( meshClass=="DomainMesh" ) { + } else if ( meshClass == "DomainMesh" ) { meshType = DB_QUAD_RECT; varType = DB_QUADVAR; } else { - ERROR("Unknown mesh class"); + ERROR( "Unknown mesh class" ); } return std::make_pair( meshType, varType ); } -void writeSiloSummary( const std::vector& meshes_written, const std::string& filename ) +void writeSiloSummary( + const std::vector &meshes_written, const std::string &filename ) { auto fid = silo::open( filename, silo::CREATE ); - for ( const auto& data : meshes_written ) { + for ( const auto &data : meshes_written ) { auto type = getSiloMeshType( data.meshClass ); std::vector meshTypes( data.domains.size(), type.first ); std::vector varTypes( data.domains.size(), type.second ); std::vector meshNames; - for ( const auto& tmp : data.domains ) + for ( const auto &tmp : data.domains ) meshNames.push_back( tmp.file + ":" + tmp.name ); silo::writeMultiMesh( fid, data.name, meshNames, meshTypes ); - for (const auto& variable : data.variables ) { + for ( const auto &variable : data.variables ) { std::vector varnames; - for ( const auto& tmp : data.domains ) + for ( const auto &tmp : data.domains ) varnames.push_back( tmp.file + ":" + variable.name ); silo::writeMultiVar( fid, variable.name, varnames, varTypes ); } @@ -396,113 +454,111 @@ void writeSiloSummary( const std::vector& meshes_written, cons // Write the mesh data in the new format -static std::vector writeMeshesNewFormat( - const std::vector& meshData, const std::string& path, int format ) +static std::vector writeMeshesNewFormat( + const std::vector &meshData, const std::string &path, int format, int rank ) { - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); std::vector meshes_written; char filename[100], fullpath[200]; - sprintf(filename,"%05i",rank); - sprintf(fullpath,"%s/%s",path.c_str(),filename); - FILE *fid = fopen(fullpath,"wb"); - for (size_t i=0; i mesh = meshData[i].mesh; - meshes_written.push_back( write_domain(fid,filename,meshData[i],format) ); + meshes_written.push_back( write_domain( fid, filename, meshData[i], format, rank ) ); } - fclose(fid); + fclose( fid ); return meshes_written; } // Write the mesh data to silo -static std::vector writeMeshesSilo( - const std::vector& meshData, const std::string& path, int format ) +static std::vector writeMeshesSilo( + const std::vector &meshData, const std::string &path, int format, int rank ) { #ifdef USE_SILO - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); std::vector meshes_written; char filename[100], fullpath[200]; - sprintf(filename,"%05i.silo",rank); - sprintf(fullpath,"%s/%s",path.c_str(),filename); + sprintf( filename, "%05i.silo", rank ); + sprintf( fullpath, "%s/%s", path.c_str(), filename ); auto fid = silo::open( fullpath, silo::CREATE ); - for (size_t i=0; i(); #endif -} +} /**************************************************** -* Write the mesh data * -****************************************************/ -void IO::writeData( const std::string& subdir, const std::vector& meshData, const Utilities::MPI& comm ) + * Write the mesh data * + ****************************************************/ +void IO::writeData( const std::string &subdir, const std::vector &meshData, + const Utilities::MPI &comm ) { if ( global_IO_path.empty() ) - IO::initialize( ); - PROFILE_START("writeData"); - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); + IO::initialize(); + PROFILE_START( "writeData" ); + int rank = Utilities::MPI( MPI_COMM_WORLD ).getRank(); // Check the meshData before writing - for ( const auto& data : meshData ) { + for ( const auto &data : meshData ) { if ( !data.check() ) - ERROR("Error in meshData"); + ERROR( "Error in meshData" ); } // Create the output directory std::string path = global_IO_path + "/" + subdir; - if ( rank == 0 ) { - mkdir(path.c_str(),S_IRWXU|S_IRGRP); - } - comm.barrier(); + recursiveMkdir( path, S_IRWXU | S_IRGRP ); // Write the mesh files std::vector meshes_written; if ( global_IO_format == Format::OLD ) { // Write the original triangle format - meshes_written = writeMeshesOrigFormat( meshData, path ); + meshes_written = writeMeshesOrigFormat( meshData, path, rank ); } else if ( global_IO_format == Format::NEW ) { // Write the new format (double precision) - meshes_written = writeMeshesNewFormat( meshData, path, 2 ); + meshes_written = writeMeshesNewFormat( meshData, path, 2, rank ); } else if ( global_IO_format == Format::SILO ) { // Write silo - meshes_written = writeMeshesSilo( meshData, path, 4 ); + meshes_written = writeMeshesSilo( meshData, path, 4, rank ); } else { - ERROR("Unknown format"); + ERROR( "Unknown format" ); } // Gather a complete list of files on rank 0 - meshes_written = gatherAll(meshes_written,comm); + meshes_written = gatherAll( meshes_written, comm ); // Write the summary files if ( rank == 0 ) { // Write the summary file for the current timestep char filename[200]; - sprintf(filename,"%s/LBM.summary",path.c_str()); - write(meshes_written,filename); - // Write summary silo file if needed - #ifdef USE_SILO + sprintf( filename, "%s/LBM.summary", path.c_str() ); + write( meshes_written, filename ); +// Write summary silo file if needed +#ifdef USE_SILO if ( global_IO_format == Format::SILO ) { - sprintf(filename,"%s/summary.silo",path.c_str()); - writeSiloSummary(meshes_written,filename); + sprintf( filename, "%s/summary.silo", path.c_str() ); + writeSiloSummary( meshes_written, filename ); } - #endif +#endif // Add the timestep to the global summary file if ( global_IO_format == Format::OLD || global_IO_format == Format::NEW ) { - auto filename = global_IO_path+"/summary.LBM"; - FILE *fid = fopen(filename.c_str(),"ab"); - fprintf(fid,"%s/\n",subdir.c_str()); - fclose(fid); + auto filename = global_IO_path + "/summary.LBM"; + FILE *fid = fopen( filename.c_str(), "ab" ); + fprintf( fid, "%s/\n", subdir.c_str() ); + fclose( fid ); } else if ( global_IO_format == Format::SILO ) { - auto filename = global_IO_path+"/LBM.visit"; - FILE *fid = fopen(filename.c_str(),"ab"); - fprintf(fid,"%s/summary.silo\n",subdir.c_str()); - fclose(fid); + auto filename = global_IO_path + "/LBM.visit"; + FILE *fid = fopen( filename.c_str(), "ab" ); + fprintf( fid, "%s/summary.silo\n", subdir.c_str() ); + fclose( fid ); } else { - ERROR("Unknown format"); + ERROR( "Unknown format" ); } } - PROFILE_STOP("writeData"); + PROFILE_STOP( "writeData" ); } - - diff --git a/IO/Writer.h b/IO/Writer.h index dfc22db8..c3d9d5bb 100644 --- a/IO/Writer.h +++ b/IO/Writer.h @@ -14,17 +14,18 @@ namespace IO { /*! * @brief Initialize the writer - * @details This function initializes the writer to the given path. All subsequent - * writes will occur in this directory. If this is not called, then it will default - * to the current path. + * @details This function initializes the writer to the given path. + * All subsequent writes will occur in this directory. + * If this is not called, then it will default to the current path. * @param[in] path The path to use for writes * @param[in] format The data format to use: - * old - Old mesh format (provided for backward compatibility, cannot write variables) - * new - New format, 1 file/process - * silo - Silo + * old - Old mesh format + * (provided for backward compatibility, cannot write variables) + * new - New format, 1 file/process silo - Silo * @param[in] append Append any existing data (default is false) */ -void initialize( const std::string& path="", const std::string& format="silo", bool append=false ); +void initialize( + const std::string &path = "", const std::string &format = "silo", bool append = false ); /*! @@ -34,7 +35,8 @@ void initialize( const std::string& path="", const std::string& format="silo", b * @param[in] meshData The data to write * @param[in] comm The comm to use for writing (usually MPI_COMM_WORLD or a dup thereof) */ -void writeData( const std::string& subdir, const std::vector& meshData, const Utilities::MPI& comm ); +void writeData( const std::string &subdir, const std::vector &meshData, + const Utilities::MPI &comm ); /*! @@ -44,14 +46,15 @@ void writeData( const std::string& subdir, const std::vector * @param[in] meshData The data to write * @param[in] comm The comm to use for writing (usually MPI_COMM_WORLD or a dup thereof) */ -inline void writeData( int timestep, const std::vector& meshData, const Utilities::MPI& comm ) +inline void writeData( + int timestep, const std::vector &meshData, const Utilities::MPI &comm ) { char subdir[100]; - sprintf(subdir,"vis%03i",timestep); + sprintf( subdir, "vis%03i", timestep ); writeData( subdir, meshData, comm ); } -} // IO namespace +} // namespace IO #endif diff --git a/IO/netcdf.cpp b/IO/netcdf.cpp index 6c3773e3..06f41dba 100644 --- a/IO/netcdf.cpp +++ b/IO/netcdf.cpp @@ -1,6 +1,6 @@ #include "IO/netcdf.h" -#include "common/Utilities.h" #include "common/MPI.h" +#include "common/Utilities.h" #include "ProfilerApp.h" @@ -12,14 +12,14 @@ #include -#define CHECK_NC_ERR( ERR ) \ - do { \ - if ( ERR != NC_NOERR ) { \ +#define CHECK_NC_ERR( ERR ) \ + do { \ + if ( ERR != NC_NOERR ) { \ std::string msg = "Error calling netcdf routine: "; \ - msg += nc_strerror( ERR ); \ - ERROR( msg ); \ - } \ - } while (0) + msg += nc_strerror( ERR ); \ + ERROR( msg ); \ + } \ + } while ( 0 ) namespace netcdf { @@ -50,43 +50,64 @@ static inline VariableType convertType( nc_type type ) else if ( type == NC_DOUBLE ) type2 = DOUBLE; else - ERROR("Unknown type"); + ERROR( "Unknown type" ); return type2; } // Get nc_type from the template -template inline nc_type getType(); -template<> inline nc_type getType() { return NC_CHAR; } -template<> inline nc_type getType() { return NC_SHORT; } -template<> inline nc_type getType() { return NC_INT; } -template<> inline nc_type getType() { return NC_FLOAT; } -template<> inline nc_type getType() { return NC_DOUBLE; } +template +inline nc_type getType(); +template<> +inline nc_type getType() +{ + return NC_CHAR; +} +template<> +inline nc_type getType() +{ + return NC_SHORT; +} +template<> +inline nc_type getType() +{ + return NC_INT; +} +template<> +inline nc_type getType() +{ + return NC_FLOAT; +} +template<> +inline nc_type getType() +{ + return NC_DOUBLE; +} // Function to reverse an array template -inline std::vector reverse( const std::vector& x ) +inline std::vector reverse( const std::vector &x ) { - std::vector y(x.size()); - for (size_t i=0; i y( x.size() ); + for ( size_t i = 0; i < x.size(); i++ ) + y[i] = x[x.size() - i - 1]; return y; } // Function to reverse an array template -inline std::vector convert( const std::vector& x ) +inline std::vector convert( const std::vector &x ) { - std::vector y(x.size()); - for (size_t i=0; i(x[i]); + std::vector y( x.size() ); + for ( size_t i = 0; i < x.size(); i++ ) + y[i] = static_cast( x[i] ); return y; } /**************************************************** -* Convert the VariableType to a string * -****************************************************/ + * Convert the VariableType to a string * + ****************************************************/ std::string VariableTypeName( VariableType type ) { if ( type == BYTE ) @@ -114,9 +135,9 @@ std::string VariableTypeName( VariableType type ) /**************************************************** -* Open/close a file * -****************************************************/ -int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm ) + * Open/close a file * + ****************************************************/ +int open( const std::string &filename, FileMode mode, const Utilities::MPI &comm ) { int fid = 0; if ( comm.isNull() ) { @@ -127,23 +148,26 @@ int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm int err = nc_open( filename.c_str(), NC_WRITE, &fid ); CHECK_NC_ERR( err ); } else if ( mode == CREATE ) { - int err = nc_create( filename.c_str(), NC_SHARE|NC_64BIT_OFFSET, &fid ); + int err = nc_create( filename.c_str(), NC_SHARE | NC_64BIT_OFFSET, &fid ); CHECK_NC_ERR( err ); } else { - ERROR("Unknown file mode"); + ERROR( "Unknown file mode" ); } } else { if ( mode == READ ) { - int err = nc_open_par( filename.c_str(), NC_MPIPOSIX, comm.getCommunicator(), MPI_INFO_NULL, &fid ); + int err = nc_open_par( + filename.c_str(), NC_MPIPOSIX, comm.getCommunicator(), MPI_INFO_NULL, &fid ); CHECK_NC_ERR( err ); } else if ( mode == WRITE ) { - int err = nc_open_par( filename.c_str(), NC_WRITE|NC_MPIPOSIX, comm.getCommunicator(), MPI_INFO_NULL, &fid ); + int err = nc_open_par( filename.c_str(), NC_WRITE | NC_MPIPOSIX, comm.getCommunicator(), + MPI_INFO_NULL, &fid ); CHECK_NC_ERR( err ); } else if ( mode == CREATE ) { - int err = nc_create_par( filename.c_str(), NC_NETCDF4|NC_MPIIO, comm.getCommunicator(), MPI_INFO_NULL, &fid ); + int err = nc_create_par( filename.c_str(), NC_NETCDF4 | NC_MPIIO, + comm.getCommunicator(), MPI_INFO_NULL, &fid ); CHECK_NC_ERR( err ); } else { - ERROR("Unknown file mode"); + ERROR( "Unknown file mode" ); } } return fid; @@ -152,42 +176,42 @@ void close( int fid ) { int err = nc_close( fid ); if ( err != NC_NOERR ) - ERROR("Error closing file"); + ERROR( "Error closing file" ); } /**************************************************** -* Query basic properties * -****************************************************/ + * Query basic properties * + ****************************************************/ static std::vector getDimVar( int fid, int varid ) { int ndim = 0; - int err = nc_inq_varndims( fid, varid, &ndim ); + int err = nc_inq_varndims( fid, varid, &ndim ); CHECK_NC_ERR( err ); - std::vector dims(ndim,0); - int dimid[64] = {-1}; - err = nc_inq_vardimid( fid, varid, dimid ); + std::vector dims( ndim, 0 ); + int dimid[64] = { -1 }; + err = nc_inq_vardimid( fid, varid, dimid ); CHECK_NC_ERR( err ); - for (int i=0; i getVarDim( int fid, const std::string& var ) +std::vector getVarDim( int fid, const std::string &var ) { return getDimVar( fid, getVarID( fid, var ) ); } -std::vector getAttDim( int fid, const std::string& att ) +std::vector getAttDim( int fid, const std::string &att ) { - std::vector dim(1,0); + std::vector dim( 1, 0 ); int err = nc_inq_attlen( fid, NC_GLOBAL, att.c_str(), dim.data() ); CHECK_NC_ERR( err ); return dim; @@ -197,9 +221,9 @@ std::vector getVarNames( int fid ) int nvar; int err = nc_inq( fid, NULL, &nvar, NULL, NULL ); CHECK_NC_ERR( err ); - std::vector vars(nvar); - for (int i=0; i vars( nvar ); + for ( int i = 0; i < nvar; i++ ) { + char name[NC_MAX_NAME + 1]; err = nc_inq_varname( fid, i, name ); CHECK_NC_ERR( err ); vars[i] = name; @@ -211,262 +235,269 @@ std::vector getAttNames( int fid ) int natt; int err = nc_inq( fid, NULL, NULL, &natt, NULL ); CHECK_NC_ERR( err ); - std::vector att(natt); - for (int i=0; i att( natt ); + for ( int i = 0; i < natt; i++ ) { + char name[NC_MAX_NAME + 1]; + err = nc_inq_attname( fid, NC_GLOBAL, i, name ); CHECK_NC_ERR( err ); att[i] = name; } return att; } -VariableType getVarType( int fid, const std::string& var ) +VariableType getVarType( int fid, const std::string &var ) { int varid = -1; - int err = nc_inq_varid( fid, var.c_str(), &varid ); + int err = nc_inq_varid( fid, var.c_str(), &varid ); CHECK_NC_ERR( err ); - nc_type type=0; - err = nc_inq_vartype( fid, varid, &type ); + nc_type type = 0; + err = nc_inq_vartype( fid, varid, &type ); CHECK_NC_ERR( err ); - return convertType(type); + return convertType( type ); } -VariableType getAttType( int fid, const std::string& att ) +VariableType getAttType( int fid, const std::string &att ) { - nc_type type=0; - int err = nc_inq_atttype( fid, NC_GLOBAL, att.c_str(), &type ); + nc_type type = 0; + int err = nc_inq_atttype( fid, NC_GLOBAL, att.c_str(), &type ); CHECK_NC_ERR( err ); - return convertType(type); + return convertType( type ); } - /**************************************************** -* Read a variable * -****************************************************/ + * Read a variable * + ****************************************************/ template<> -Array getVar( int fid, const std::string& var ) +Array getVar( int fid, const std::string &var ) { - PROFILE_START("getVar"); - Array x( reverse(getVarDim(fid,var)) ); - int err = nc_get_var_ushort( fid, getVarID(fid,var), x.data() ); + PROFILE_START( "getVar" ); + Array x( reverse( getVarDim( fid, var ) ) ); + int err = nc_get_var_ushort( fid, getVarID( fid, var ), x.data() ); CHECK_NC_ERR( err ); - PROFILE_STOP("getVar"); + PROFILE_STOP( "getVar" ); return x.reverseDim(); } template<> -Array getVar( int fid, const std::string& var ) +Array getVar( int fid, const std::string &var ) { - PROFILE_START("getVar"); - Array x( reverse(getVarDim(fid,var)) ); - int err = nc_get_var_short( fid, getVarID(fid,var), x.data() ); + PROFILE_START( "getVar" ); + Array x( reverse( getVarDim( fid, var ) ) ); + int err = nc_get_var_short( fid, getVarID( fid, var ), x.data() ); CHECK_NC_ERR( err ); - PROFILE_STOP("getVar"); + PROFILE_STOP( "getVar" ); return x.reverseDim(); } template<> -Array getVar( int fid, const std::string& var ) +Array getVar( int fid, const std::string &var ) { - PROFILE_START("getVar"); - Array x( reverse(getVarDim(fid,var)) ); - int err = nc_get_var_uint( fid, getVarID(fid,var), x.data() ); + PROFILE_START( "getVar" ); + Array x( reverse( getVarDim( fid, var ) ) ); + int err = nc_get_var_uint( fid, getVarID( fid, var ), x.data() ); CHECK_NC_ERR( err ); - PROFILE_STOP("getVar"); + PROFILE_STOP( "getVar" ); return x.reverseDim(); } template<> -Array getVar( int fid, const std::string& var ) +Array getVar( int fid, const std::string &var ) { - PROFILE_START("getVar"); - Array x( reverse(getVarDim(fid,var)) ); - int err = nc_get_var_int( fid, getVarID(fid,var), x.data() ); + PROFILE_START( "getVar" ); + Array x( reverse( getVarDim( fid, var ) ) ); + int err = nc_get_var_int( fid, getVarID( fid, var ), x.data() ); CHECK_NC_ERR( err ); - PROFILE_STOP("getVar"); + PROFILE_STOP( "getVar" ); return x.reverseDim(); } template<> -Array getVar( int fid, const std::string& var ) +Array getVar( int fid, const std::string &var ) { - PROFILE_START("getVar"); - Array x( reverse(getVarDim(fid,var)) ); - int err = nc_get_var_float( fid, getVarID(fid,var), x.data() ); + PROFILE_START( "getVar" ); + Array x( reverse( getVarDim( fid, var ) ) ); + int err = nc_get_var_float( fid, getVarID( fid, var ), x.data() ); CHECK_NC_ERR( err ); - PROFILE_STOP("getVar"); + PROFILE_STOP( "getVar" ); return x.reverseDim(); } template<> -Array getVar( int fid, const std::string& var ) +Array getVar( int fid, const std::string &var ) { - PROFILE_START("getVar"); - Array x( reverse(getVarDim(fid,var)) ); - int err = nc_get_var_double( fid, getVarID(fid,var), x.data() ); + PROFILE_START( "getVar" ); + Array x( reverse( getVarDim( fid, var ) ) ); + int err = nc_get_var_double( fid, getVarID( fid, var ), x.data() ); CHECK_NC_ERR( err ); - PROFILE_STOP("getVar"); + PROFILE_STOP( "getVar" ); return x.reverseDim(); } template<> -Array getVar( int fid, const std::string& var ) -{ - PROFILE_START("getVar"); - Array x( reverse(getVarDim(fid,var)) ); - int err = nc_get_var_text( fid, getVarID(fid,var), x.data() ); +Array getVar( int fid, const std::string &var ) +{ + PROFILE_START( "getVar" ); + Array x( reverse( getVarDim( fid, var ) ) ); + int err = nc_get_var_text( fid, getVarID( fid, var ), x.data() ); CHECK_NC_ERR( err ); - PROFILE_STOP("getVar"); + PROFILE_STOP( "getVar" ); return x.reverseDim(); } template<> -Array getVar( int fid, const std::string& var ) +Array getVar( int fid, const std::string &var ) { - PROFILE_START("getVar"); - Array tmp = getVar( fid, var ); - std::vector dim = {tmp.size(0), tmp.size(1), tmp.size(2) }; + PROFILE_START( "getVar" ); + Array tmp = getVar( fid, var ); + std::vector dim = { tmp.size( 0 ), tmp.size( 1 ), tmp.size( 2 ) }; if ( dim.size() == 1 ) dim[0] = 1; else dim.erase( dim.begin() ); - Array text(dim); - for (size_t i=0; i"); + Array text( dim ); + for ( size_t i = 0; i < text.length(); i++ ) + text( i ) = &( tmp( 0, i ) ); + PROFILE_STOP( "getVar" ); return text; } -static inline void get_stride_args( const std::vector& start, - const std::vector& count, const std::vector& stride, - size_t *startp, size_t *countp, ptrdiff_t *stridep ) +static inline void get_stride_args( const std::vector &start, const std::vector &count, + const std::vector &stride, size_t *startp, size_t *countp, ptrdiff_t *stridep ) { - for (size_t i=0; i -int nc_get_vars_TYPE( int fid, int varid, const size_t start[], - const size_t count[], const ptrdiff_t stride[], TYPE *ptr ); +int nc_get_vars_TYPE( int fid, int varid, const size_t start[], const size_t count[], + const ptrdiff_t stride[], TYPE *ptr ); template<> -int nc_get_vars_TYPE( int fid, int varid, const size_t start[], - const size_t count[], const ptrdiff_t stride[], short *ptr ) +int nc_get_vars_TYPE( int fid, int varid, const size_t start[], const size_t count[], + const ptrdiff_t stride[], short *ptr ) { return nc_get_vars_short( fid, varid, start, count, stride, ptr ); } template<> -int nc_get_vars_TYPE( int fid, int varid, const size_t start[], - const size_t count[], const ptrdiff_t stride[], int *ptr ) +int nc_get_vars_TYPE( int fid, int varid, const size_t start[], const size_t count[], + const ptrdiff_t stride[], int *ptr ) { return nc_get_vars_int( fid, varid, start, count, stride, ptr ); } template<> -int nc_get_vars_TYPE( int fid, int varid, const size_t start[], - const size_t count[], const ptrdiff_t stride[], float *ptr ) +int nc_get_vars_TYPE( int fid, int varid, const size_t start[], const size_t count[], + const ptrdiff_t stride[], float *ptr ) { return nc_get_vars_float( fid, varid, start, count, stride, ptr ); } template<> -int nc_get_vars_TYPE( int fid, int varid, const size_t start[], - const size_t count[], const ptrdiff_t stride[], double *ptr ) +int nc_get_vars_TYPE( int fid, int varid, const size_t start[], const size_t count[], + const ptrdiff_t stride[], double *ptr ) { return nc_get_vars_double( fid, varid, start, count, stride, ptr ); } template -Array getVar( int fid, const std::string& var, const std::vector& start, - const std::vector& count, const std::vector& stride ) +Array getVar( int fid, const std::string &var, const std::vector &start, + const std::vector &count, const std::vector &stride ) { - PROFILE_START("getVar<> (strided)"); + PROFILE_START( "getVar<> (strided)" ); std::vector var_size = getVarDim( fid, var ); - for (int d=0; d<(int)var_size.size(); d++) { - if ( start[d]<0 || start[d]+stride[d]*(count[d]-1)>(int)var_size[d] ) { - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); + for ( int d = 0; d < (int) var_size.size(); d++ ) { + if ( start[d] < 0 || start[d] + stride[d] * ( count[d] - 1 ) > (int) var_size[d] ) { + int rank = Utilities::MPI( MPI_COMM_WORLD ).getRank(); char tmp[1000]; - sprintf(tmp,"%i: Range exceeded array dimension:\n" + sprintf( tmp, + "%i: Range exceeded array dimension:\n" " start[%i]=%i, count[%i]=%i, stride[%i]=%i, var_size[%i]=%i", - rank,d,start[d],d,count[d],d,stride[d],d,(int)var_size[d]); - ERROR(tmp); + rank, d, start[d], d, count[d], d, stride[d], d, (int) var_size[d] ); + ERROR( tmp ); } } - Array x( reverse(convert(count)) ); + Array x( reverse( convert( count ) ) ); size_t startp[10], countp[10]; ptrdiff_t stridep[10]; get_stride_args( start, count, stride, startp, countp, stridep ); - int err = nc_get_vars_TYPE( fid, getVarID(fid,var), startp, countp, stridep, x.data() ); + int err = + nc_get_vars_TYPE( fid, getVarID( fid, var ), startp, countp, stridep, x.data() ); CHECK_NC_ERR( err ); - PROFILE_STOP("getVar<> (strided)"); + PROFILE_STOP( "getVar<> (strided)" ); return x.reverseDim(); } -template Array getVar( int, const std::string&, const std::vector&, const std::vector&, const std::vector& ); -template Array getVar( int, const std::string&, const std::vector&, const std::vector&, const std::vector& ); -template Array getVar( int, const std::string&, const std::vector&, const std::vector&, const std::vector& ); -template Array getVar( int, const std::string&, const std::vector&, const std::vector&, const std::vector& ); +template Array getVar( int, const std::string &, const std::vector &, + const std::vector &, const std::vector & ); +template Array getVar( int, const std::string &, const std::vector &, + const std::vector &, const std::vector & ); +template Array getVar( int, const std::string &, const std::vector &, + const std::vector &, const std::vector & ); +template Array getVar( int, const std::string &, const std::vector &, + const std::vector &, const std::vector & ); /**************************************************** -* Read an attribute * -****************************************************/ + * Read an attribute * + ****************************************************/ template<> -Array getAtt( int fid, const std::string& att ) +Array getAtt( int fid, const std::string &att ) { - PROFILE_START("getAtt"); - Array x( getAttDim(fid,att) ); + PROFILE_START( "getAtt" ); + Array x( getAttDim( fid, att ) ); int err = nc_get_att_double( fid, NC_GLOBAL, att.c_str(), x.data() ); CHECK_NC_ERR( err ); - PROFILE_STOP("getAtt"); + PROFILE_STOP( "getAtt" ); return x; } template<> -Array getAtt( int fid, const std::string& att ) +Array getAtt( int fid, const std::string &att ) { - PROFILE_START("getAtt"); - char *tmp = new char[getAttDim(fid,att)[0]]; - Array x(1); - x(0) = tmp; - delete [] tmp; - PROFILE_STOP("getAtt"); + PROFILE_START( "getAtt" ); + char *tmp = new char[getAttDim( fid, att )[0]]; + Array x( 1 ); + x( 0 ) = tmp; + delete[] tmp; + PROFILE_STOP( "getAtt" ); return x; } /**************************************************** -* Write an array to a file * -****************************************************/ -std::vector defDim( int fid, const std::vector& names, const std::vector& dims ) + * Write an array to a file * + ****************************************************/ +std::vector defDim( + int fid, const std::vector &names, const std::vector &dims ) { - std::vector dimid(names.size(),0); - for (size_t i=0; i dimid( names.size(), 0 ); + for ( size_t i = 0; i < names.size(); i++ ) { + int err = nc_def_dim( fid, names[i].c_str(), dims[i], &dimid[i] ); CHECK_NC_ERR( err ); } return dimid; } template -void write( int fid, const std::string& var, const std::vector& dimids, - const Array& data, const RankInfoStruct& info ) +void write( int fid, const std::string &var, const std::vector &dimids, + const Array &data, const RankInfoStruct &info ) { // Define the variable int varid = 0; - int err = nc_def_var( fid, var.c_str(), getType(), data.ndim(), dimids.data(), &varid ); + int err = nc_def_var( fid, var.c_str(), getType(), data.ndim(), dimids.data(), &varid ); CHECK_NC_ERR( err ); - // exit define mode + // exit define mode err = nc_enddef( fid ); CHECK_NC_ERR( err ); - // set the access method to use MPI/PnetCDF collective I/O + // set the access method to use MPI/PnetCDF collective I/O err = nc_var_par_access( fid, varid, NC_INDEPENDENT ); CHECK_NC_ERR( err ); // parallel write: each process writes its subarray to the file - auto x = data.reverseDim(); - std::vector count = { data.size(0), data.size(1), data.size(2) }; - std::vector start = { info.ix*data.size(0), info.jy*data.size(1), info.kz*data.size(2) }; + auto x = data.reverseDim(); + std::vector count = { data.size( 0 ), data.size( 1 ), data.size( 2 ) }; + std::vector start = { info.ix * data.size( 0 ), info.jy * data.size( 1 ), + info.kz * data.size( 2 ) }; nc_put_vara( fid, varid, start.data(), count.data(), x.data() ); } -template void write( int fid, const std::string& var, const std::vector& dimids, const Array& data, const RankInfoStruct& info ); -template void write( int fid, const std::string& var, const std::vector& dimids, const Array& data, const RankInfoStruct& info ); -template void write( int fid, const std::string& var, const std::vector& dimids, const Array& data, const RankInfoStruct& info ); -template void write( int fid, const std::string& var, const std::vector& dimids, const Array& data, const RankInfoStruct& info ); +template void write( int fid, const std::string &var, const std::vector &dimids, + const Array &data, const RankInfoStruct &info ); +template void write( int fid, const std::string &var, const std::vector &dimids, + const Array &data, const RankInfoStruct &info ); +template void write( int fid, const std::string &var, const std::vector &dimids, + const Array &data, const RankInfoStruct &info ); +template void write( int fid, const std::string &var, const std::vector &dimids, + const Array &data, const RankInfoStruct &info ); - -}; // netcdf namespace +}; // namespace netcdf #else #endif - - diff --git a/IO/netcdf.h b/IO/netcdf.h index e1f65e61..eb77784d 100644 --- a/IO/netcdf.h +++ b/IO/netcdf.h @@ -5,9 +5,8 @@ #include #include "common/Array.h" -#include "common/MPI.h" #include "common/Communication.h" - +#include "common/MPI.h" namespace netcdf { @@ -31,15 +30,15 @@ std::string VariableTypeName( VariableType type ); * @param filename File to open * @param mode Open the file for reading or writing * @param comm MPI communicator to use (MPI_COMM_WORLD: don't use parallel netcdf) -*/ -int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm=MPI_COMM_NULL ); + */ +int open( const std::string &filename, FileMode mode, const Utilities::MPI &comm = MPI_COMM_NULL ); /*! * @brief Close netcdf file * @details This function closes a netcdf file * @param fid Handle to the open file -*/ + */ void close( int fid ); @@ -47,7 +46,7 @@ void close( int fid ); * @brief Read the variable names * @details This function reads a list of the variable names in the file * @param fid Handle to the open file -*/ + */ std::vector getVarNames( int fid ); @@ -55,7 +54,7 @@ std::vector getVarNames( int fid ); * @brief Read the attribute names * @details This function reads a list of the attribute names in the file * @param fid Handle to the open file -*/ + */ std::vector getAttNames( int fid ); @@ -64,8 +63,8 @@ std::vector getAttNames( int fid ); * @details This function returns the type for a variable * @param fid Handle to the open file * @param var Variable to read -*/ -VariableType getVarType( int fid, const std::string& var ); + */ +VariableType getVarType( int fid, const std::string &var ); /*! @@ -73,8 +72,8 @@ VariableType getVarType( int fid, const std::string& var ); * @details This function returns the type for an attribute * @param fid Handle to the open file * @param att Attribute to read -*/ -VariableType getAttType( int fid, const std::string& att ); + */ +VariableType getAttType( int fid, const std::string &att ); /*! @@ -82,8 +81,8 @@ VariableType getAttType( int fid, const std::string& att ); * @details This function returns the die for a variable * @param fid Handle to the open file * @param var Variable to read -*/ -std::vector getVarDim( int fid, const std::string& var ); + */ +std::vector getVarDim( int fid, const std::string &var ); /*! @@ -91,9 +90,9 @@ std::vector getVarDim( int fid, const std::string& var ); * @details This function reads a variable with the given name from the file * @param fid Handle to the open file * @param var Variable to read -*/ + */ template -Array getVar( int fid, const std::string& var ); +Array getVar( int fid, const std::string &var ); /*! @@ -104,10 +103,10 @@ Array getVar( int fid, const std::string& var ); * @param start Starting corner for the read * @param count Number of elements to read * @param stride Stride size for the read -*/ + */ template -Array getVar( int fid, const std::string& var, const std::vector& start, - const std::vector& count, const std::vector& stride ); +Array getVar( int fid, const std::string &var, const std::vector &start, + const std::vector &count, const std::vector &stride ); /*! @@ -115,27 +114,29 @@ Array getVar( int fid, const std::string& var, const std::vector& sta * @details This function reads an attribute with the given name from the file * @param fid Handle to the open file * @param att Attribute to read -*/ + */ template -Array getAtt( int fid, const std::string& att ); +Array getAtt( int fid, const std::string &att ); /*! * @brief Write the dimensions - * @details This function writes the grid dimensions to netcdf. + * @details This function writes the grid dimensions to netcdf. * @param fid Handle to the open file -*/ -std::vector defDim( int fid, const std::vector& names, const std::vector& dims ); + */ +std::vector defDim( + int fid, const std::vector &names, const std::vector &dims ); /*! * @brief Write a variable - * @details This function writes a variable to netcdf. + * @details This function writes a variable to netcdf. * @param fid Handle to the open file -*/ + */ template -void write( int fid, const std::string& var, const std::vector& dimids, const Array& data, const RankInfoStruct& rank_info ); +void write( int fid, const std::string &var, const std::vector &dimids, + const Array &data, const RankInfoStruct &rank_info ); -}; // netcdf namespace +}; // namespace netcdf #endif diff --git a/IO/silo.cpp b/IO/silo.cpp index ddf3646a..6b6a2c39 100644 --- a/IO/silo.cpp +++ b/IO/silo.cpp @@ -1,6 +1,6 @@ #include "IO/silo.h" -#include "common/Utilities.h" #include "common/MPI.h" +#include "common/Utilities.h" #include "ProfilerApp.h" @@ -10,14 +10,13 @@ #include - namespace silo { /**************************************************** -* Open/close a file * -****************************************************/ -DBfile* open( const std::string& filename, FileMode mode ) + * Open/close a file * + ****************************************************/ +DBfile *open( const std::string &filename, FileMode mode ) { DBfile *fid = nullptr; if ( mode == CREATE ) { @@ -29,18 +28,15 @@ DBfile* open( const std::string& filename, FileMode mode ) } return fid; } -void close( DBfile* fid ) -{ - DBClose( fid ); -} +void close( DBfile *fid ) { DBClose( fid ); } /**************************************************** -* Helper functions * -****************************************************/ -VariableDataType varDataType( DBfile *fid, const std::string& name ) + * Helper functions * + ****************************************************/ +VariableDataType varDataType( DBfile *fid, const std::string &name ) { - auto type = DBGetVarType( fid, name.c_str() ); + auto type = DBGetVarType( fid, name.c_str() ); VariableDataType type2 = VariableDataType::UNKNOWN; if ( type == DB_DOUBLE ) type2 = VariableDataType::DOUBLE; @@ -53,58 +49,57 @@ VariableDataType varDataType( DBfile *fid, const std::string& name ) /**************************************************** -* Write/read a uniform mesh to silo * -****************************************************/ -void readUniformMesh( DBfile* fid, const std::string& meshname, - std::vector& range, std::vector& N ) + * Write/read a uniform mesh to silo * + ****************************************************/ +void readUniformMesh( + DBfile *fid, const std::string &meshname, std::vector &range, std::vector &N ) { - DBquadmesh* mesh = DBGetQuadmesh( fid, meshname.c_str() ); - int ndim = mesh->ndims; - range.resize(2*ndim); - N.resize(ndim); - for (int d=0; ddims[d]-1; - range[2*d+0] = mesh->min_extents[d]; - range[2*d+1] = mesh->max_extents[d]; + DBquadmesh *mesh = DBGetQuadmesh( fid, meshname.c_str() ); + int ndim = mesh->ndims; + range.resize( 2 * ndim ); + N.resize( ndim ); + for ( int d = 0; d < ndim; d++ ) { + N[d] = mesh->dims[d] - 1; + range[2 * d + 0] = mesh->min_extents[d]; + range[2 * d + 1] = mesh->max_extents[d]; } DBFreeQuadmesh( mesh ); } /**************************************************** -* Write a multimesh * -****************************************************/ -void writeMultiMesh( DBfile* fid, const std::string& meshname, - const std::vector& meshNames, - const std::vector& meshTypes ) + * Write a multimesh * + ****************************************************/ +void writeMultiMesh( DBfile *fid, const std::string &meshname, + const std::vector &meshNames, const std::vector &meshTypes ) { - std::vector meshnames(meshNames.size()); + std::vector meshnames( meshNames.size() ); for ( size_t i = 0; i < meshNames.size(); ++i ) meshnames[i] = (char *) meshNames[i].c_str(); std::string tree_name = meshname + "_tree"; DBoptlist *optList = DBMakeOptlist( 1 ); DBAddOption( optList, DBOPT_MRGTREE_NAME, (char *) tree_name.c_str() ); - DBPutMultimesh( fid, meshname.c_str(), meshNames.size(), meshnames.data(), (int*) meshTypes.data(), nullptr ); + DBPutMultimesh( fid, meshname.c_str(), meshNames.size(), meshnames.data(), + (int *) meshTypes.data(), nullptr ); DBFreeOptlist( optList ); } /**************************************************** -* Write a multivariable * -****************************************************/ -void writeMultiVar( DBfile* fid, const std::string& varname, - const std::vector& varNames, - const std::vector& varTypes ) + * Write a multivariable * + ****************************************************/ +void writeMultiVar( DBfile *fid, const std::string &varname, + const std::vector &varNames, const std::vector &varTypes ) { - std::vector varnames(varNames.size(),nullptr); - for (size_t j=0; j(varNames[j].c_str()); - DBPutMultivar( fid, varname.c_str(), varNames.size(), varnames.data(), (int*) varTypes.data(), nullptr ); + std::vector varnames( varNames.size(), nullptr ); + for ( size_t j = 0; j < varNames.size(); j++ ) + varnames[j] = const_cast( varNames[j].c_str() ); + DBPutMultivar( + fid, varname.c_str(), varNames.size(), varnames.data(), (int *) varTypes.data(), nullptr ); } - -}; // silo namespace +}; // namespace silo #else diff --git a/IO/silo.h b/IO/silo.h index 40a023d7..309746f3 100644 --- a/IO/silo.h +++ b/IO/silo.h @@ -1,29 +1,34 @@ #ifndef SILO_INTERFACE #define SILO_INTERFACE +#include #include #include -#include #include "common/Array.h" -#include "common/MPI.h" #include "common/Communication.h" +#include "common/MPI.h" #ifdef USE_SILO - #include +#include #else - typedef int DBfile; +typedef int DBfile; #endif - namespace silo { enum FileMode { READ, WRITE, CREATE }; -enum class VariableType : int { NodeVariable=1, EdgeVariable=2, SurfaceVariable=2, VolumeVariable=3, NullVariable=0 }; +enum class VariableType : int { + NodeVariable = 1, + EdgeVariable = 2, + SurfaceVariable = 2, + VolumeVariable = 3, + NullVariable = 0 +}; enum class VariableDataType { DOUBLE, FLOAT, INT, UNKNOWN }; @@ -34,16 +39,16 @@ enum class VariableDataType { DOUBLE, FLOAT, INT, UNKNOWN }; * @param[in] filename File to open * @param[in] mode Open the file for reading or writing * @return This function returns a handle to the file -*/ -DBfile* open( const std::string& filename, FileMode mode ); + */ +DBfile *open( const std::string &filename, FileMode mode ); /*! * @brief Close silo file * @details This function closes a silo file * @param[in] fid Handle to the open file -*/ -void close( DBfile* fid ); + */ +void close( DBfile *fid ); /*! @@ -51,8 +56,8 @@ void close( DBfile* fid ); * @details This function returns the type of variable data * @param[in] fid Handle to the open file * @param[in] name Name of variable -*/ -VariableDataType varDataType( DBfile *dbfile, const std::string& name ); + */ +VariableDataType varDataType( DBfile *dbfile, const std::string &name ); /*! @@ -61,9 +66,9 @@ VariableDataType varDataType( DBfile *dbfile, const std::string& name ); * @param[in] fid Handle to the open file * @param[in] varname Variable name * @param[in] data Data to write -*/ + */ template -void write( DBfile* fid, const std::string& varname, const std::vector& data ); +void write( DBfile *fid, const std::string &varname, const std::vector &data ); /*! @@ -72,9 +77,9 @@ void write( DBfile* fid, const std::string& varname, const std::vector& da * @param[in] fid Handle to the open file * @param[in] varname Variable name * @return Data read -*/ + */ template -std::vector read( DBfile* fid, const std::string& varname ); +std::vector read( DBfile *fid, const std::string &varname ); /*! @@ -84,10 +89,10 @@ std::vector read( DBfile* fid, const std::string& varname ); * @param[in] meshname Mesh name * @param[in] range Range of mesh { xmin, xmax, ymin, ymax, zmin, zmax } * @param[in] N Number of cells in each direction -*/ + */ template -void writeUniformMesh( DBfile* fid, const std::string& meshname, - const std::array& range, const std::array& N ); +void writeUniformMesh( DBfile *fid, const std::string &meshname, + const std::array &range, const std::array &N ); /*! @@ -97,9 +102,9 @@ void writeUniformMesh( DBfile* fid, const std::string& meshname, * @param[in] meshname Mesh name * @param[out] range Range of mesh { xmin, xmax, ymin, ymax, zmin, zmax } * @param[out] N Number of cells in each direction -*/ -void readUniformMesh( DBfile* fid, const std::string& meshname, - std::vector& range, std::vector& N ); + */ +void readUniformMesh( + DBfile *fid, const std::string &meshname, std::vector &range, std::vector &N ); /*! @@ -111,10 +116,11 @@ void readUniformMesh( DBfile* fid, const std::string& meshname, * @param[in] varname Variable name * @param[in] data Variable data * @param[in] type Variable type -*/ -template< int NDIM, class TYPE > -void writeUniformMeshVariable( DBfile* fid, const std::string& meshname, const std::array& N, - const std::string& varname, const Array& data, VariableType type ); + */ +template +void writeUniformMeshVariable( DBfile *fid, const std::string &meshname, + const std::array &N, const std::string &varname, const Array &data, + VariableType type ); /*! @@ -123,9 +129,9 @@ void writeUniformMeshVariable( DBfile* fid, const std::string& meshname, const s * @param[in] fid Handle to the open file * @param[in] varname Variable name * @return Variable data -*/ + */ template -Array readUniformMeshVariable( DBfile* fid, const std::string& varname ); +Array readUniformMeshVariable( DBfile *fid, const std::string &varname ); /*! @@ -136,10 +142,10 @@ Array readUniformMeshVariable( DBfile* fid, const std::string& varname ); * @param[in] ndim Number of dimensions * @param[in] N Number of points * @param[in] coords Coordinates of the points -*/ + */ template -void writePointMesh( DBfile* fid, const std::string& meshname, - int ndim, int N, const TYPE *coords[] ); +void writePointMesh( + DBfile *fid, const std::string &meshname, int ndim, int N, const TYPE *coords[] ); /*! @@ -147,10 +153,10 @@ void writePointMesh( DBfile* fid, const std::string& meshname, * @details This function reads a pointmesh from silo * @param[in] fid Handle to the open file * @param[in] meshname Mesh name - * @return Returns the coordinates as a N x ndim array -*/ + * @return Returns the coordinates as a N x ndim array + */ template -Array readPointMesh( DBfile* fid, const std::string& meshname ); +Array readPointMesh( DBfile *fid, const std::string &meshname ); /*! @@ -160,10 +166,10 @@ Array readPointMesh( DBfile* fid, const std::string& meshname ); * @param[in] meshname Mesh name * @param[in] varname Variable name * @param[in] data Variable data -*/ + */ template -void writePointMeshVariable( DBfile* fid, const std::string& meshname, - const std::string& varname, const Array& data ); +void writePointMeshVariable( + DBfile *fid, const std::string &meshname, const std::string &varname, const Array &data ); /*! @@ -172,9 +178,9 @@ void writePointMeshVariable( DBfile* fid, const std::string& meshname, * @param[in] fid Handle to the open file * @param[in] varname Variable name * @return Variable data -*/ + */ template -Array readPointMeshVariable( DBfile* fid, const std::string& varname ); +Array readPointMeshVariable( DBfile *fid, const std::string &varname ); /*! @@ -188,10 +194,10 @@ Array readPointMeshVariable( DBfile* fid, const std::string& varname ); * @param[in] coords Coordinates of the points * @param[in] N_tri Number of triangles * @param[in] tri Coordinates of the points -*/ + */ template -void writeTriMesh( DBfile* fid, const std::string& meshname, - int ndim, int ndim_tri, int N, const TYPE *coords[], int N_tri, const int *tri[] ); +void writeTriMesh( DBfile *fid, const std::string &meshname, int ndim, int ndim_tri, int N, + const TYPE *coords[], int N_tri, const int *tri[] ); /*! @@ -201,9 +207,9 @@ void writeTriMesh( DBfile* fid, const std::string& meshname, * @param[in] meshname Mesh name * @param[in] coords Coordinates of the points * @param[in] tri Coordinates of the points -*/ + */ template -void readTriMesh( DBfile* fid, const std::string& meshname, Array& coords, Array& tri ); +void readTriMesh( DBfile *fid, const std::string &meshname, Array &coords, Array &tri ); /*! @@ -215,10 +221,10 @@ void readTriMesh( DBfile* fid, const std::string& meshname, Array& coords, * @param[in] varname Variable name * @param[in] data Variable data * @param[in] type Variable type -*/ + */ template -void writeTriMeshVariable( DBfile* fid, int ndim, const std::string& meshname, - const std::string& varname, const Array& data, VariableType type ); +void writeTriMeshVariable( DBfile *fid, int ndim, const std::string &meshname, + const std::string &varname, const Array &data, VariableType type ); /*! @@ -227,9 +233,9 @@ void writeTriMeshVariable( DBfile* fid, int ndim, const std::string& meshname, * @param[in] fid Handle to the open file * @param[in] varname Variable name * @return Variable data -*/ + */ template -Array readTriMeshVariable( DBfile* fid, const std::string& varname ); +Array readTriMeshVariable( DBfile *fid, const std::string &varname ); /*! @@ -239,10 +245,9 @@ Array readTriMeshVariable( DBfile* fid, const std::string& varname ); * @param[in] meshname Mesh name * @param[in] subMeshNames Names of the sub meshes in the form "filename:meshname" * @param[in] subMeshTypes Type of each submesh -*/ -void writeMultiMesh( DBfile* fid, const std::string& meshname, - const std::vector& subMeshNames, - const std::vector& subMeshTypes ); + */ +void writeMultiMesh( DBfile *fid, const std::string &meshname, + const std::vector &subMeshNames, const std::vector &subMeshTypes ); /*! @@ -255,14 +260,12 @@ void writeMultiMesh( DBfile* fid, const std::string& meshname, * @param[in] subVarTypes Type of each submesh * @param[in] ndim Dimension of variable (used to determine suffix) * @param[in] nvar Number of subvariables (used to determine suffix) -*/ -void writeMultiVar( DBfile* fid, const std::string& varname, - const std::vector& subVarNames, - const std::vector& subVarTypes ); + */ +void writeMultiVar( DBfile *fid, const std::string &varname, + const std::vector &subVarNames, const std::vector &subVarTypes ); -}; // silo namespace +}; // namespace silo #endif #include "IO/silo.hpp" - diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8df4e6bd..2405b463 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -35,7 +35,7 @@ ADD_LBPM_EXECUTABLE( GenerateSphereTest ) #ADD_LBPM_EXECUTABLE( BlobAnalysis ) #ADD_LBPM_EXECUTABLE( BlobIdentify ) #ADD_LBPM_EXECUTABLE( BlobIdentifyParallel ) -#ADD_LBPM_EXECUTABLE( convertIO ) +ADD_LBPM_EXECUTABLE( convertIO ) #ADD_LBPM_EXECUTABLE( DataAggregator ) #ADD_LBPM_EXECUTABLE( BlobAnalyzeParallel )( ADD_LBPM_EXECUTABLE( lbpm_minkowski_scalar ) diff --git a/tests/TestWriter.cpp b/tests/TestWriter.cpp index 4030930c..9b7e381c 100644 --- a/tests/TestWriter.cpp +++ b/tests/TestWriter.cpp @@ -159,7 +159,7 @@ void testWriter( const std::string& format, std::vector& mes // Test the simple read interface bool pass = true; for ( const auto& timestep : timesteps ) { - auto data = IO::readData( path, timestep ); + auto data = IO::readData( path, timestep, comm.getRank() ); pass = pass && data.size() == meshData.size(); for ( size_t i=0; i #include -#include "common/MPI_Helpers.h" -#include "common/Communication.h" +#include "common/MPI.h" #include "common/Utilities.h" #include "IO/Mesh.h" #include "IO/Reader.h" @@ -16,74 +15,56 @@ int main(int argc, char **argv) { - // Initialize MPI - Utilities::startup( argc, argv ); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); - Utilities::setErrorHandlers(); - PROFILE_ENABLE(2); - PROFILE_ENABLE_TRACE(); - PROFILE_START("Main"); - { // Limit scope + // Initialize MPI + Utilities::startup( argc, argv ); + Utilities::setErrorHandlers(); + PROFILE_ENABLE(2); + PROFILE_ENABLE_TRACE(); + PROFILE_START("Main"); - // Get inputs - if ( argc != 3 ) { - std::cerr << "Error calling convertIO:\n"; - std::cerr << " convertIO input_file format\n"; - return -1; - } - std::string filename = argv[1]; - std::string format = argv[2]; - std::string path = IO::getPath( filename ); + { // Limit scope - // Read the timesteps - auto timesteps = IO::readTimesteps( filename, "old" ); - - // Loop through the timesteps, reading/writing the data - IO::initialize( "", format, false ); - for ( auto timestep : timesteps ) { - - // Read the list of MeshDatabase - auto databases = IO::getMeshList( path, timestep ); - - // Build the MeshDataStruct - std::vector meshData(databases.size()); - - // Loop through the database - int i = 0; - PROFILE_START("Read"); - for ( const auto& database : databases ) { - - // Read the appropriate mesh domain - ASSERT( (int) database.domains.size() == nprocs ); - meshData[i].meshName = database.name; - meshData[i].mesh = IO::getMesh( path, timestep, database, rank ); - - // Read the variables - for ( auto var : database.variables ) { - auto varData = IO::getVariable( path, timestep, database, rank, var.name ); - IO::reformatVariable( *meshData[i].mesh, *varData ); - meshData[i].vars.push_back( varData ); - } - - i++; + Utilities::MPI comm( MPI_COMM_WORLD ); + // Get inputs + if ( argc != 5 ) { + std::cerr << "Error calling convertIO:\n"; + std::cerr << " convertIO \n"; + return -1; } - MPI_Barrier(comm); - PROFILE_STOP("Read"); + std::string path_in = argv[1]; + std::string format_in = argv[2]; + std::string path_out = argv[3]; + std::string format_out = argv[4]; - // Save the mesh data to a new file - PROFILE_START("Write"); - IO::writeData( timestep, meshData, MPI_COMM_WORLD ); - MPI_Barrier(comm); - PROFILE_STOP("Write"); - } + // Check that we have enough ranks to load and write the data + // This is really only a bottleneck for the writer + int N_domains = IO::maxDomains( path_in, format_in, comm ); + ASSERT( comm.getSize() == N_domains ); - } // Limit scope - PROFILE_STOP("Main"); - PROFILE_SAVE("convertData",true); - comm.barrier(); - Utilities::shutdown(); - return 0; + // Read the timesteps + auto timesteps = IO::readTimesteps( path_in, format_in ); + + // Loop through the timesteps, reading/writing the data + IO::initialize( path_out, format_out, false ); + for ( auto timestep : timesteps ) { + + // Set the domain to read (needs to be the current rank for the writer to be valid) + int domain = comm.getRank(); + + // Get the maximum number of domains for the + auto data = IO::readData( path_in, timestep, domain ); + + // Save the mesh data to a new file + IO::writeData( timestep, data, comm ); + + } + + } // Limit scope + + // shutdown + PROFILE_STOP("Main"); + PROFILE_SAVE("convertData",true); + Utilities::shutdown(); + return 0; } From dfa97a013251e78728ead35f8e18820a91b2b6b9 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Wed, 17 Mar 2021 13:34:38 -0400 Subject: [PATCH 191/205] Fixing minor build error without timer --- CMakeLists.txt | 2 +- cmake/Find_TIMER.cmake | 41 ++++++++++++++++++++++++++++++----------- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8d479391..8f500927 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -150,7 +150,7 @@ IF ( NOT ONLY_BUILD_DOCS ) CONFIGURE_NETCDF() CONFIGURE_SILO() CONFIGURE_LBPM() - CONFIGURE_TIMER( 0 "${${PROJ}_INSTALL_DIR}/null_timer" ) + CONFIGURE_TIMER( 0 "${${PROJ}_INSTALL_DIR}/null_timer" FALSE ) CONFIGURE_LINE_COVERAGE() # Set the external library link list SET( EXTERNAL_LIBS ${EXTERNAL_LIBS} ${TIMER_LIBS} ) diff --git a/cmake/Find_TIMER.cmake b/cmake/Find_TIMER.cmake index 7ebc7aea..a4d7bdb2 100644 --- a/cmake/Find_TIMER.cmake +++ b/cmake/Find_TIMER.cmake @@ -4,7 +4,7 @@ # CONFIGURE_TIMER( DEFAULT_USE_TIMER NULL_TIMER_DIR ) # This function assumes that USE_TIMER is set to indicate if the timer should be used # If USE_TIMER is set, TIMER_DIRECTORY specifies the install path for the timer -# If USE_TIMER is not set we will create a summy timer that does nothing. +# If USE_TIMER is not set we will create a dummy timer that does nothing. # The input argument DEFAULT_USE_TIMER specifies if the timer library is included by default. # The input argument NULL_TIMER_DIR specifies the location to install the dummy timer. # If it is an empty string, the default install path "${CMAKE_CURRENT_BINARY_DIR}/null_timer" is used. @@ -13,7 +13,7 @@ # TIMER_CXXFLAGS - C++ flags for the timer library # TIMER_LDFLAGS - Linker flags to link the timer library # TIMER_LDLIBS - Linker libraries to link the timer library -FUNCTION( CONFIGURE_TIMER DEFAULT_USE_TIMER NULL_TIMER_DIR ) +FUNCTION( CONFIGURE_TIMER DEFAULT_USE_TIMER NULL_TIMER_DIR QUIET ) # Determine if we want to use the timer utility CHECK_ENABLE_FLAG( USE_TIMER ${DEFAULT_USE_TIMER} ) SET( TIMER_INCLUDE ) @@ -33,20 +33,23 @@ FUNCTION( CONFIGURE_TIMER DEFAULT_USE_TIMER NULL_TIMER_DIR ) FIND_LIBRARY( TIMER_LIBS NAMES timerutility PATHS ${TIMER_DIRECTORY}/lib NO_DEFAULT_PATH ) SET( TIMER_INCLUDE ${TIMER_DIRECTORY}/include ) SET( TIMER_CXXFLAGS "-DUSE_TIMER -I${TIMER_DIRECTORY}/include" ) - SET( TIMER_LDFLAGS -L${TIMER_DIRECTORY}/lib ) - SET( TIMER_LDLIBS -ltimerutility ) + SET( TIMER_LDFLAGS ) + SET( TIMER_LDLIBS "${TIMER_LIBS}" ) ELSE() MESSAGE( FATAL_ERROR "Default search for TIMER is not yet supported. Use -D TIMER_DIRECTORY=" ) ENDIF() - SET(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_RPATH} "${TIMER_DIRECTORY}/lib" PARENT_SCOPE ) + SET( CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_RPATH} "${TIMER_DIRECTORY}/lib" PARENT_SCOPE ) INCLUDE_DIRECTORIES( "${TIMER_INCLUDE}" ) ADD_DEFINITIONS( -DUSE_TIMER ) - MESSAGE( "Using timer utility" ) - MESSAGE( " TIMER_LIBRARIES = ${TIMER_LIBS}" ) + IF ( NOT QUIET ) + MESSAGE( STATUS "Using timer utility" ) + MESSAGE( STATUS " TIMER_LIBRARIES = ${TIMER_LIBS}" ) + ENDIF() ELSE() IF ( "${NULL_TIMER_DIR}" STREQUAL "" ) SET( NULL_TIMER_DIR "${CMAKE_CURRENT_BINARY_DIR}/null_timer" ) ENDIF() + # Write ProfilerApp.h FILE(WRITE "${NULL_TIMER_DIR}/ProfilerApp.h" "#define PROFILE_START(...) do {} while(0)\n" ) FILE(APPEND "${NULL_TIMER_DIR}/ProfilerApp.h" "#define PROFILE_STOP(...) do {} while(0)\n" ) FILE(APPEND "${NULL_TIMER_DIR}/ProfilerApp.h" "#define PROFILE_START2(...) do {} while(0)\n" ) @@ -61,9 +64,25 @@ FUNCTION( CONFIGURE_TIMER DEFAULT_USE_TIMER NULL_TIMER_DIR ) FILE(APPEND "${NULL_TIMER_DIR}/ProfilerApp.h" "#define PROFILE_DISABLE_TRACE() do {} while(0)\n" ) FILE(APPEND "${NULL_TIMER_DIR}/ProfilerApp.h" "#define PROFILE_ENABLE_MEMORY() do {} while(0)\n" ) FILE(APPEND "${NULL_TIMER_DIR}/ProfilerApp.h" "#define PROFILE_DISABLE_MEMORY() do {} while(0)\n" ) + # Write MemoryApp.h + FILE(WRITE "${NULL_TIMER_DIR}/MemoryApp.h" "#include \n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" "class MemoryApp final {\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" "public:\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" " struct MemoryStats {\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" " size_t bytes_new, bytes_delete, N_new, N_delete, tot_bytes_used, system_memory, stack_used, stack_size;\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" " MemoryStats() { memset(this,0,sizeof(MemoryStats)); }\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" " };\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" " static inline void print( std::ostream& ) {}\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" " static inline size_t getMemoryUsage() { return 0; }\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" " static inline size_t getTotalMemoryUsage() { return 0; }\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" " static inline size_t getSystemMemory() { return 0; }\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" " static inline MemoryStats getMemoryStats() { return MemoryStats(); }\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" "};\n" ) SET( TIMER_INCLUDE "${NULL_TIMER_DIR}" ) INCLUDE_DIRECTORIES( "${TIMER_INCLUDE}" ) - MESSAGE( "Disabling timer utility" ) + IF ( NOT QUIET ) + MESSAGE( STATUS "Disabling timer utility" ) + ENDIF() ENDIF() SET( TIMER_INCLUDE "${TIMER_INCLUDE}" PARENT_SCOPE ) SET( TIMER_CXXFLAGS "${TIMER_CXXFLAGS}" PARENT_SCOPE ) @@ -88,12 +107,12 @@ MACRO( CHECK_ENABLE_FLAG FLAG DEFAULT ) SET( ${FLAG} ${DEFAULT} ) ELSEIF( ${FLAG} STREQUAL "" ) SET( ${FLAG} ${DEFAULT} ) - ELSEIF( ( ${${FLAG}} STREQUAL "false" ) OR ( ${${FLAG}} STREQUAL "0" ) OR ( ${${FLAG}} STREQUAL "OFF" ) ) + ELSEIF( ( ${${FLAG}} STREQUAL "FALSE" ) OR ( ${${FLAG}} STREQUAL "false" ) OR ( ${${FLAG}} STREQUAL "0" ) OR ( ${${FLAG}} STREQUAL "OFF" ) ) SET( ${FLAG} 0 ) - ELSEIF( ( ${${FLAG}} STREQUAL "true" ) OR ( ${${FLAG}} STREQUAL "1" ) OR ( ${${FLAG}} STREQUAL "ON" ) ) + ELSEIF( ( ${${FLAG}} STREQUAL "TRUE" ) OR ( ${${FLAG}} STREQUAL "true" ) OR ( ${${FLAG}} STREQUAL "1" ) OR ( ${${FLAG}} STREQUAL "ON" ) ) SET( ${FLAG} 1 ) ELSE() - MESSAGE( "Bad value for ${FLAG} (${${FLAG}}); use true or false" ) + MESSAGE( FATAL_ERROR "Bad value for ${FLAG} (${${FLAG}}); use true or false" ) ENDIF () ENDMACRO() From 59b7b9a0fee0b5fe498b6c56776d37c0c82ecc45 Mon Sep 17 00:00:00 2001 From: Mark Berrill Date: Thu, 18 Mar 2021 09:41:39 -0400 Subject: [PATCH 192/205] Fixing issues with enums and the summary files --- IO/Mesh.cpp | 170 ++++++++++++++++-- IO/Mesh.h | 31 +++- IO/MeshDatabase.cpp | 76 ++++---- IO/MeshDatabase.h | 9 +- IO/Reader.cpp | 18 +- IO/Writer.cpp | 90 +++++----- IO/silo.cpp | 16 +- IO/silo.h | 17 +- IO/silo.hpp | 4 +- tests/TestWriter.cpp | 400 ++++++++++++++++++++++--------------------- 10 files changed, 480 insertions(+), 351 deletions(-) diff --git a/IO/Mesh.cpp b/IO/Mesh.cpp index 91c78e03..9966bf52 100644 --- a/IO/Mesh.cpp +++ b/IO/Mesh.cpp @@ -1,4 +1,5 @@ #include "Mesh.h" +#include "IO/IOHelpers.h" #include "common/Utilities.h" #include @@ -28,16 +29,23 @@ Mesh::~Mesh() {} /**************************************************** * MeshDataStruct * ****************************************************/ -bool MeshDataStruct::check() const +#define checkResult( pass, msg ) \ + do { \ + if ( !( pass ) ) { \ + if ( abort ) \ + ERROR( msg ); \ + return false; \ + } \ + } while ( 0 ) +bool MeshDataStruct::check( bool abort ) const { - bool pass = mesh != nullptr; for ( const auto &var : vars ) { - pass = pass && static_cast( var->type ) >= 1 && static_cast( var->type ) <= 3; - pass = pass && !var->data.empty(); - } - if ( !pass ) { - std::cerr << "Invalid variable detected\n"; - return false; + checkResult( var->type == VariableType::NodeVariable || + var->type == VariableType::EdgeVariable || + var->type == VariableType::SurfaceVariable || + var->type == VariableType::VolumeVariable, + "Invalid data type" ); + checkResult( !var->data.empty(), "Variable data is empty" ); } const std::string &meshClass = mesh->className(); if ( meshClass == "PointList" ) { @@ -45,7 +53,9 @@ bool MeshDataStruct::check() const ASSERT( mesh2 ); for ( const auto &var : vars ) { if ( var->type == IO::VariableType::NodeVariable ) { - pass = pass && var->data.size() == ArraySize( mesh2->points.size(), var->dim ); + size_t N_points = mesh2->points.size(); + checkResult( var->data.size( 0 ) == N_points, "sizeof NodeVariable" ); + checkResult( var->data.size( 1 ) == var->dim, "sizeof NodeVariable" ); } else if ( var->type == IO::VariableType::EdgeVariable ) { ERROR( "Invalid type for PointList" ); } else if ( var->type == IO::VariableType::SurfaceVariable ) { @@ -61,15 +71,16 @@ bool MeshDataStruct::check() const ASSERT( mesh2 ); for ( const auto &var : vars ) { if ( var->type == IO::VariableType::NodeVariable ) { - pass = pass && - var->data.size() == ArraySize( mesh2->vertices->points.size(), var->dim ); + size_t N_points = mesh2->vertices->points.size(); + checkResult( var->data.size( 0 ) == N_points, "sizeof NodeVariable" ); + checkResult( var->data.size( 1 ) == var->dim, "sizeof NodeVariable" ); } else if ( var->type == IO::VariableType::EdgeVariable ) { ERROR( "Not finished" ); } else if ( var->type == IO::VariableType::SurfaceVariable ) { ERROR( "Not finished" ); } else if ( var->type == IO::VariableType::VolumeVariable ) { - pass = pass && var->data.size( 0 ) == mesh2->A.size() && - var->data.size( 1 ) == var->dim; + checkResult( var->data.size( 0 ) == mesh2->A.size(), "sizeof VolumeVariable" ); + checkResult( var->data.size( 1 ) == var->dim, "sizeof VolumeVariable" ); } else { ERROR( "Invalid variable type" ); } @@ -90,14 +101,16 @@ bool MeshDataStruct::check() const } else { ERROR( "Invalid variable type" ); } - if ( var->data.size() == ArraySize( varSize[0] * varSize[1] * varSize[2], varSize[3] ) ) + if ( var->data.size( 0 ) == varSize[0] * varSize[1] * varSize[2] && + var->data.size( 1 ) == varSize[3] ) var->data.resize( varSize ); - pass = pass && var->data.size() == varSize; + for ( int d = 0; d < 4; d++ ) + checkResult( var->data.size( d ) == varSize[d], "DomainMesh Variable" ); } } else { ERROR( "Unknown mesh class: " + mesh->className() ); } - return pass; + return true; } @@ -479,4 +492,129 @@ std::shared_ptr getTriList( std::shared_ptr mesh ) } +/**************************************************** + * Convert enum values * + ****************************************************/ +std::string getString( VariableType type ) +{ + if ( type == VariableType::NodeVariable ) + return "node"; + else if ( type == VariableType::EdgeVariable ) + return "edge"; + else if ( type == VariableType::SurfaceVariable ) + return "face"; + else if ( type == VariableType::VolumeVariable ) + return "cell"; + else if ( type == VariableType::NullVariable ) + return "null"; + else + ERROR( "Invalid type" ); + return ""; +} +VariableType getVariableType( const std::string &type_in ) +{ + auto type = deblank( type_in ); + if ( type == "node" ) + return VariableType::NodeVariable; + else if ( type == "edge" || type == "1" ) + return VariableType::EdgeVariable; + else if ( type == "face" ) + return VariableType::SurfaceVariable; + else if ( type == "cell" || type == "3" ) + return VariableType::VolumeVariable; + else if ( type == "null" ) + return VariableType::NullVariable; + else + ERROR( "Invalid type: " + type ); + return VariableType::NullVariable; +} +std::string getString( DataType type ) +{ + if ( type == DataType::Double ) + return "double"; + else if ( type == DataType::Float ) + return "float"; + else if ( type == DataType::Int ) + return "int"; + else if ( type == DataType::Null ) + return "null"; + else + ERROR( "Invalid type" ); + return ""; +} +DataType getDataType( const std::string &type_in ) +{ + auto type = deblank( type_in ); + if ( type == "double" ) + return DataType::Double; + else if ( type == "float" ) + return DataType::Float; + else if ( type == "int" ) + return DataType::Int; + else if ( type == "null" ) + return DataType::Null; + else + ERROR( "Invalid type: " + type ); + return DataType::Null; +} +std::string getString( MeshType type ) +{ + if ( type == MeshType::PointMesh ) + return "PointMesh"; + else if ( type == MeshType::SurfaceMesh ) + return "SurfaceMesh"; + else if ( type == MeshType::VolumeMesh ) + return "VolumeMesh"; + else if ( type == MeshType::Unknown ) + return "unknown"; + else + ERROR( "Invalid type" ); + return ""; +} +MeshType getMeshType( const std::string &type_in ) +{ + auto type = deblank( type_in ); + if ( type == "PointMesh" || type == "1" ) + return MeshType::PointMesh; + else if ( type == "SurfaceMesh" || type == "2" ) + return MeshType::SurfaceMesh; + else if ( type == "VolumeMesh" || type == "3" ) + return MeshType::VolumeMesh; + else if ( type == "unknown" || type == "-1" ) + return MeshType::Unknown; + else + ERROR( "Invalid type: " + type ); + return MeshType::Unknown; +} +std::string getString( FileFormat type ) +{ + if ( type == FileFormat::OLD ) + return "old"; + else if ( type == FileFormat::NEW ) + return "new"; + else if ( type == FileFormat::NEW_SINGLE ) + return "new(single)"; + else if ( type == FileFormat::SILO ) + return "silo"; + else + ERROR( "Invalid type" ); + return ""; +} +FileFormat getFileFormat( const std::string &type_in ) +{ + auto type = deblank( type_in ); + if ( type == "old" || type == "1" ) + return FileFormat::OLD; + else if ( type == "new" || type == "2" ) + return FileFormat::NEW; + else if ( type == "new(single)" || type == "3" ) + return FileFormat::NEW_SINGLE; + else if ( type == "silo" || type == "4" ) + return FileFormat::SILO; + else + ERROR( "Invalid type: " + type ); + return FileFormat::SILO; +} + + } // namespace IO diff --git a/IO/Mesh.h b/IO/Mesh.h index a60e14c9..a420f95d 100644 --- a/IO/Mesh.h +++ b/IO/Mesh.h @@ -14,15 +14,28 @@ namespace IO { -//! Possible variable types -enum class VariableType : unsigned char { - NodeVariable = 1, - EdgeVariable = 2, - SurfaceVariable = 3, - VolumeVariable = 4, - NullVariable = 0 +//! Enums to define types +enum class VariableType { + NodeVariable, + EdgeVariable, + SurfaceVariable, + VolumeVariable, + NullVariable }; -enum class DataType : unsigned char { Double = 1, Float = 2, Int = 2, Null = 0 }; +enum class DataType { Double, Float, Int, Null }; +enum class MeshType { PointMesh, SurfaceMesh, VolumeMesh, Unknown }; +enum class FileFormat { OLD, NEW, NEW_SINGLE, SILO }; + + +//! Convert enums to/from strings (more future-proof than static_cast) +std::string getString( VariableType ); +std::string getString( DataType ); +std::string getString( MeshType ); +std::string getString( FileFormat ); +VariableType getVariableType( const std::string & ); +DataType getDataType( const std::string & ); +MeshType getMeshType( const std::string & ); +FileFormat getFileFormat( const std::string & ); /*! \class Mesh @@ -216,7 +229,7 @@ struct MeshDataStruct { //! Empty constructor MeshDataStruct() : precision( DataType::Double ) {} //! Check the data - bool check() const; + bool check( bool abort = true ) const; }; diff --git a/IO/MeshDatabase.cpp b/IO/MeshDatabase.cpp index 70b9acc3..63702c7b 100644 --- a/IO/MeshDatabase.cpp +++ b/IO/MeshDatabase.cpp @@ -13,38 +13,31 @@ #include -// MeshType -template<> -size_t packsize( const IO::MeshType &rhs ) -{ - return sizeof( IO::MeshType ); -} -template<> -void pack( const IO::MeshType &rhs, char *buffer ) -{ - memcpy( buffer, &rhs, sizeof( IO::MeshType ) ); -} -template<> -void unpack( IO::MeshType &data, const char *buffer ) -{ - memcpy( &data, buffer, sizeof( IO::MeshType ) ); -} -// Variable::VariableType -template<> -size_t packsize( const IO::VariableType &rhs ) -{ - return sizeof( IO::VariableType ); -} -template<> -void pack( const IO::VariableType &rhs, char *buffer ) -{ - memcpy( buffer, &rhs, sizeof( IO::VariableType ) ); -} -template<> -void unpack( IO::VariableType &data, const char *buffer ) -{ - memcpy( &data, buffer, sizeof( IO::VariableType ) ); -} +// Default pack/unpack +// clang-format off +#define INSTANTIATE_PACK( TYPE ) \ + template<> \ + size_t packsize( const TYPE &rhs ) \ + { \ + return sizeof( TYPE ); \ + } \ + template<> \ + void pack( const TYPE &rhs, char *buffer ) \ + { \ + memcpy( buffer, &rhs, sizeof( IO::MeshType ) ); \ + } \ + template<> \ + void unpack( TYPE &data, const char *buffer ) \ + { \ + memcpy( &data, buffer, sizeof( IO::MeshType ) ); \ + } +INSTANTIATE_PACK( IO::VariableType ) +INSTANTIATE_PACK( IO::DataType ) +INSTANTIATE_PACK( IO::MeshType ) +INSTANTIATE_PACK( IO::FileFormat ) +// clang-format on + + // DatabaseEntry template<> size_t packsize( const IO::DatabaseEntry &rhs ) @@ -327,8 +320,7 @@ std::vector gatherAll( // Return the results std::vector data2( data.size() ); size_t i = 0; - for ( std::map::iterator it = data.begin(); it != data.end(); - ++it, ++i ) + for ( auto it = data.begin(); it != data.end(); ++it, ++i ) data2[i] = it->second; PROFILE_STOP( "gatherAll-unpack", 2 ); PROFILE_STOP( "gatherAll" ); @@ -343,19 +335,19 @@ void write( const std::vector &meshes, const std::string &filename FILE *fid = fopen( filename.c_str(), "wb" ); for ( size_t i = 0; i < meshes.size(); i++ ) { fprintf( fid, "%s\n", meshes[i].name.c_str() ); - fprintf( fid, " type: %i\n", static_cast( meshes[i].type ) ); + fprintf( fid, " type: %s\n", getString( meshes[i].type ).data() ); fprintf( fid, " meshClass: %s\n", meshes[i].meshClass.c_str() ); - fprintf( fid, " format: %i\n", static_cast( meshes[i].format ) ); + fprintf( fid, " format: %s\n", getString( meshes[i].format ).data() ); for ( size_t j = 0; j < meshes[i].domains.size(); j++ ) fprintf( fid, " domain: %s\n", meshes[i].domains[j].write().c_str() ); fprintf( fid, " variables: " ); for ( size_t j = 0; j < meshes[i].variables.size(); j++ ) { const VariableDatabase &var = meshes[i].variables[j]; - fprintf( fid, "%s|%i|%i; ", var.name.c_str(), static_cast( var.type ), var.dim ); + fprintf( fid, "%s|%s|%i; ", var.name.data(), getString( var.type ).data(), var.dim ); } fprintf( fid, "\n" ); - std::map, DatabaseEntry>::const_iterator it; - for ( it = meshes[i].variable_data.begin(); it != meshes[i].variable_data.end(); ++it ) { + for ( auto it = meshes[i].variable_data.begin(); it != meshes[i].variable_data.end(); + ++it ) { const char *domain = it->first.first.c_str(); const char *variable = it->first.second.c_str(); fprintf( @@ -386,9 +378,9 @@ std::vector read( const std::string &filename ) name.resize( name.size() - 1 ); meshes.back().name = name; } else if ( strncmp( line, " format:", 10 ) == 0 ) { - meshes.back().format = static_cast( atoi( &line[10] ) ); + meshes.back().format = getFileFormat( &line[10] ); } else if ( strncmp( line, " type:", 8 ) == 0 ) { - meshes.back().type = static_cast( atoi( &line[8] ) ); + meshes.back().type = getMeshType( &line[8] ); } else if ( strncmp( line, " meshClass:", 13 ) == 0 ) { meshes.back().meshClass = deblank( std::string( &line[13] ) ); } else if ( strncmp( line, " domain:", 10 ) == 0 ) { @@ -402,7 +394,7 @@ std::vector read( const std::string &filename ) std::vector tmp = splitList( variables[i].c_str(), '|' ); ASSERT( tmp.size() == 3 ); mesh.variables[i].name = tmp[0]; - mesh.variables[i].type = static_cast( atoi( tmp[1].c_str() ) ); + mesh.variables[i].type = getVariableType( tmp[1] ); mesh.variables[i].dim = atoi( tmp[2].c_str() ); } } else if ( strncmp( line, " variable(", 12 ) == 0 ) { diff --git a/IO/MeshDatabase.h b/IO/MeshDatabase.h index 0dfd968c..508f85d8 100644 --- a/IO/MeshDatabase.h +++ b/IO/MeshDatabase.h @@ -13,13 +13,6 @@ namespace IO { -class Mesh; - - -//! Enum to identify mesh type -// enum class MeshType : char { PointMesh=1, SurfaceMesh=2, VolumeMesh=3, Unknown=-1 }; -enum class MeshType { PointMesh = 1, SurfaceMesh = 2, VolumeMesh = 3, Unknown = -1 }; - //! Helper struct for containing offsets for the mesh info struct DatabaseEntry { @@ -56,7 +49,7 @@ struct MeshDatabase { std::string name; //!< Name of the mesh MeshType type; //!< Mesh type std::string meshClass; //!< Mesh class - unsigned char format; //!< Data format (1: old, 2: new, 3: new (single), 4: silo) + FileFormat format; //!< Data format (1: old, 2: new, 3: new (single), 4: silo) std::vector domains; //!< List of the domains std::vector variables; //!< List of the variables std::map variable_data; //!< Data for the variables diff --git a/IO/Reader.cpp b/IO/Reader.cpp index e5fae5bc..e63b8dd3 100644 --- a/IO/Reader.cpp +++ b/IO/Reader.cpp @@ -155,7 +155,7 @@ std::shared_ptr IO::getMesh( const std::string &path, const std::strin { PROFILE_START( "getMesh" ); std::shared_ptr mesh; - if ( meshDatabase.format == 1 ) { + if ( meshDatabase.format == FileFormat::OLD ) { // Old format (binary doubles) std::string filename = path + "/" + timestep + "/" + meshDatabase.domains[domain].file; FILE *fid = fopen( filename.c_str(), "rb" ); @@ -206,7 +206,8 @@ std::shared_ptr IO::getMesh( const std::string &path, const std::strin ERROR( "Unknown mesh type" ); } delete[] data; - } else if ( meshDatabase.format == 2 ) { + } else if ( meshDatabase.format == FileFormat::NEW || + meshDatabase.format == FileFormat::NEW_SINGLE ) { const DatabaseEntry &database = meshDatabase.domains[domain]; std::string filename = path + "/" + timestep + "/" + database.file; FILE *fid = fopen( filename.c_str(), "rb" ); @@ -233,7 +234,7 @@ std::shared_ptr IO::getMesh( const std::string &path, const std::strin } mesh->unpack( std::pair( bytes, data ) ); delete[] data; - } else if ( meshDatabase.format == 4 ) { + } else if ( meshDatabase.format == FileFormat::SILO ) { // Reading a silo file #ifdef USE_SILO const DatabaseEntry &database = meshDatabase.domains[domain]; @@ -301,12 +302,11 @@ std::shared_ptr IO::getVariable( const std::string &path, const st const MeshDatabase &meshDatabase, int domain, const std::string &variable ) { std::pair key( meshDatabase.domains[domain].name, variable ); - std::map, DatabaseEntry>::const_iterator it; - it = meshDatabase.variable_data.find( key ); + auto it = meshDatabase.variable_data.find( key ); if ( it == meshDatabase.variable_data.end() ) return std::shared_ptr(); std::shared_ptr var; - if ( meshDatabase.format == 2 ) { + if ( meshDatabase.format == FileFormat::NEW || meshDatabase.format == FileFormat::NEW_SINGLE ) { const DatabaseEntry &database = it->second; std::string filename = path + "/" + timestep + "/" + database.file; FILE *fid = fopen( filename.c_str(), "rb" ); @@ -318,13 +318,13 @@ std::shared_ptr IO::getVariable( const std::string &path, const st std::vector values = splitList( &line[i2 + 1], ',' ); ASSERT( values.size() == 5 ); int dim = atoi( values[0].c_str() ); - int type = atoi( values[1].c_str() ); + auto type = values[1]; size_t N = atol( values[2].c_str() ); size_t bytes = atol( values[3].c_str() ); std::string precision = values[4]; var = std::shared_ptr( new IO::Variable() ); var->dim = dim; - var->type = static_cast( type ); + var->type = getVariableType( type ); var->name = variable; var->data.resize( N, dim ); if ( precision == "double" ) { @@ -334,7 +334,7 @@ std::shared_ptr IO::getVariable( const std::string &path, const st ERROR( "Format not implimented" ); } fclose( fid ); - } else if ( meshDatabase.format == 4 ) { + } else if ( meshDatabase.format == FileFormat::SILO ) { // Reading a silo file #ifdef USE_SILO const auto &database = meshDatabase.domains[domain]; diff --git a/IO/Writer.cpp b/IO/Writer.cpp index d3f9d991..051db47d 100644 --- a/IO/Writer.cpp +++ b/IO/Writer.cpp @@ -110,7 +110,7 @@ static std::vector writeMeshesOrigFormat( mesh_entry.name = meshData[i].meshName; mesh_entry.type = meshType( *mesh ); mesh_entry.meshClass = meshData[i].mesh->className(); - mesh_entry.format = 1; + mesh_entry.format = IO::FileFormat::OLD; IO::DatabaseEntry domain; domain.name = domainname; domain.file = filename; @@ -171,7 +171,7 @@ static std::vector writeMeshesOrigFormat( // Create the database entry for the mesh data static IO::MeshDatabase getDatabase( - const std::string &filename, const IO::MeshDataStruct &mesh, int format, int rank ) + const std::string &filename, const IO::MeshDataStruct &mesh, IO::FileFormat format, int rank ) { char domainname[100]; sprintf( domainname, "%s_%05i", mesh.meshName.c_str(), rank ); @@ -209,8 +209,8 @@ static IO::MeshDatabase getDatabase( // Write a mesh (and variables) to a file -static IO::MeshDatabase write_domain( - FILE *fid, const std::string &filename, const IO::MeshDataStruct &mesh, int format, int rank ) +static IO::MeshDatabase write_domain( FILE *fid, const std::string &filename, + const IO::MeshDataStruct &mesh, IO::FileFormat format, int rank ) { const int level = 0; // Create the MeshDatabase @@ -225,19 +225,17 @@ static IO::MeshDatabase write_domain( delete[]( char * ) data.second; // Write the variables for ( size_t i = 0; i < mesh.vars.size(); i++ ) { + ASSERT( mesh.vars[i]->type != IO::VariableType::NullVariable ); std::pair key( domain.name, mesh.vars[i]->name ); - IO::DatabaseEntry &variable = database.variable_data[key]; - variable.offset = ftell( fid ); - int dim = mesh.vars[i]->dim; - int type = static_cast( mesh.vars[i]->type ); - size_t N = mesh.vars[i]->data.length(); - if ( type == static_cast( IO::VariableType::NullVariable ) ) { - ERROR( "Variable type not set" ); - } - size_t N_mesh = mesh.mesh->numberPointsVar( mesh.vars[i]->type ); + auto &variable = database.variable_data[key]; + variable.offset = ftell( fid ); + int dim = mesh.vars[i]->dim; + auto type = getString( mesh.vars[i]->type ); + size_t N = mesh.vars[i]->data.length(); + size_t N_mesh = mesh.mesh->numberPointsVar( mesh.vars[i]->type ); ASSERT( N == dim * N_mesh ); - fprintf( fid, "Var: %s-%05i-%s: %i, %i, %lu, %lu, double\n", database.name.c_str(), rank, - variable.name.c_str(), dim, type, N_mesh, N * sizeof( double ) ); + fprintf( fid, "Var: %s-%05i-%s: %i, %s, %lu, %lu, double\n", database.name.c_str(), rank, + variable.name.c_str(), dim, type.data(), N_mesh, N * sizeof( double ) ); fwrite( mesh.vars[i]->data.data(), sizeof( double ), N, fid ); fprintf( fid, "\n" ); } @@ -259,7 +257,7 @@ static void writeSiloPointMesh( z[i] = points[i].z; } const TYPE *coords[] = { x.data(), y.data(), z.data() }; - silo::writePointMesh( fid, meshname, 3, points.size(), coords ); + IO::silo::writePointMesh( fid, meshname, 3, points.size(), coords ); } static void writeSiloPointList( DBfile *fid, const IO::MeshDataStruct &meshData, IO::MeshDatabase database ) @@ -281,19 +279,19 @@ static void writeSiloPointList( z[i] = points[i].z; } const double *coords[] = { x.data(), y.data(), z.data() }; - silo::writePointMesh( fid, meshname, 3, points.size(), coords ); + IO::silo::writePointMesh( fid, meshname, 3, points.size(), coords ); for ( size_t i = 0; i < meshData.vars.size(); i++ ) { const IO::Variable &var = *meshData.vars[i]; if ( var.precision == IO::DataType::Double ) { - silo::writePointMeshVariable( fid, meshname, var.name, var.data ); + IO::silo::writePointMeshVariable( fid, meshname, var.name, var.data ); } else if ( var.precision == IO::DataType::Float ) { Array data2( var.data.size() ); data2.copy( var.data ); - silo::writePointMeshVariable( fid, meshname, var.name, data2 ); + IO::silo::writePointMeshVariable( fid, meshname, var.name, data2 ); } else if ( var.precision == IO::DataType::Int ) { Array data2( var.data.size() ); data2.copy( var.data ); - silo::writePointMeshVariable( fid, meshname, var.name, data2 ); + IO::silo::writePointMeshVariable( fid, meshname, var.name, data2 ); } else { ERROR( "Unsupported format" ); } @@ -312,7 +310,7 @@ static void writeSiloTriMesh( DBfile *fid, const IO::TriMesh &mesh, const std::s } const TYPE *coords[] = { x.data(), y.data(), z.data() }; const int *tri[] = { mesh.A.data(), mesh.B.data(), mesh.C.data() }; - silo::writeTriMesh( fid, meshname, 3, 2, points.size(), coords, mesh.A.size(), tri ); + IO::silo::writeTriMesh( fid, meshname, 3, 2, points.size(), coords, mesh.A.size(), tri ); } static void writeSiloTriMesh2( DBfile *fid, const IO::MeshDataStruct &meshData, const IO::TriMesh &mesh, IO::MeshDatabase database ) @@ -327,17 +325,16 @@ static void writeSiloTriMesh2( DBfile *fid, const IO::MeshDataStruct &meshData, } for ( size_t i = 0; i < meshData.vars.size(); i++ ) { const IO::Variable &var = *meshData.vars[i]; - auto type = static_cast( var.type ); if ( var.precision == IO::DataType::Double ) { - silo::writeTriMeshVariable( fid, 3, meshname, var.name, var.data, type ); + IO::silo::writeTriMeshVariable( fid, 3, meshname, var.name, var.data, var.type ); } else if ( var.precision == IO::DataType::Float ) { Array data2( var.data.size() ); data2.copy( var.data ); - silo::writeTriMeshVariable( fid, 3, meshname, var.name, data2, type ); + IO::silo::writeTriMeshVariable( fid, 3, meshname, var.name, data2, var.type ); } else if ( var.precision == IO::DataType::Int ) { Array data2( var.data.size() ); data2.copy( var.data ); - silo::writeTriMeshVariable( fid, 3, meshname, var.name, data2, type ); + IO::silo::writeTriMeshVariable( fid, 3, meshname, var.name, data2, var.type ); } else { ERROR( "Unsupported format" ); } @@ -367,30 +364,29 @@ static void writeSiloDomainMesh( ( info.kz + 1 ) * mesh.Lz / info.nz }; std::array N = { mesh.nx, mesh.ny, mesh.nz }; auto meshname = database.domains[0].name; - silo::writeUniformMesh<3>( fid, meshname, range, N ); - silo::write( + IO::silo::writeUniformMesh<3>( fid, meshname, range, N ); + IO::silo::write( fid, meshname + "_rankinfo", { mesh.rank, mesh.nprocx, mesh.nprocy, mesh.nprocz } ); for ( size_t i = 0; i < meshData.vars.size(); i++ ) { const auto &var = *meshData.vars[i]; - auto type = static_cast( var.type ); if ( var.precision == IO::DataType::Double ) { - silo::writeUniformMeshVariable<3>( fid, meshname, N, var.name, var.data, type ); + IO::silo::writeUniformMeshVariable<3>( fid, meshname, N, var.name, var.data, var.type ); } else if ( var.precision == IO::DataType::Float ) { Array data2( var.data.size() ); data2.copy( var.data ); - silo::writeUniformMeshVariable<3>( fid, meshname, N, var.name, data2, type ); + IO::silo::writeUniformMeshVariable<3>( fid, meshname, N, var.name, data2, var.type ); } else if ( var.precision == IO::DataType::Int ) { Array data2( var.data.size() ); data2.copy( var.data ); - silo::writeUniformMeshVariable<3>( fid, meshname, N, var.name, data2, type ); + IO::silo::writeUniformMeshVariable<3>( fid, meshname, N, var.name, data2, var.type ); } else { ERROR( "Unsupported format" ); } } } // Write a mesh (and variables) to a file -static IO::MeshDatabase write_domain_silo( - DBfile *fid, const std::string &filename, const IO::MeshDataStruct &mesh, int format, int rank ) +static IO::MeshDatabase write_domain_silo( DBfile *fid, const std::string &filename, + const IO::MeshDataStruct &mesh, IO::FileFormat format, int rank ) { // Create the MeshDatabase auto database = getDatabase( filename, mesh, format, rank ); @@ -432,7 +428,7 @@ std::pair getSiloMeshType( const std::string &meshClass ) void writeSiloSummary( const std::vector &meshes_written, const std::string &filename ) { - auto fid = silo::open( filename, silo::CREATE ); + auto fid = IO::silo::open( filename, IO::silo::CREATE ); for ( const auto &data : meshes_written ) { auto type = getSiloMeshType( data.meshClass ); std::vector meshTypes( data.domains.size(), type.first ); @@ -440,22 +436,23 @@ void writeSiloSummary( std::vector meshNames; for ( const auto &tmp : data.domains ) meshNames.push_back( tmp.file + ":" + tmp.name ); - silo::writeMultiMesh( fid, data.name, meshNames, meshTypes ); + IO::silo::writeMultiMesh( fid, data.name, meshNames, meshTypes ); for ( const auto &variable : data.variables ) { std::vector varnames; for ( const auto &tmp : data.domains ) varnames.push_back( tmp.file + ":" + variable.name ); - silo::writeMultiVar( fid, variable.name, varnames, varTypes ); + IO::silo::writeMultiVar( fid, variable.name, varnames, varTypes ); } } - silo::close( fid ); + IO::silo::close( fid ); } #endif // Write the mesh data in the new format static std::vector writeMeshesNewFormat( - const std::vector &meshData, const std::string &path, int format, int rank ) + const std::vector &meshData, const std::string &path, IO::FileFormat format, + int rank ) { std::vector meshes_written; char filename[100], fullpath[200]; @@ -473,19 +470,20 @@ static std::vector writeMeshesNewFormat( // Write the mesh data to silo static std::vector writeMeshesSilo( - const std::vector &meshData, const std::string &path, int format, int rank ) + const std::vector &meshData, const std::string &path, IO::FileFormat format, + int rank ) { #ifdef USE_SILO std::vector meshes_written; char filename[100], fullpath[200]; sprintf( filename, "%05i.silo", rank ); sprintf( fullpath, "%s/%s", path.c_str(), filename ); - auto fid = silo::open( fullpath, silo::CREATE ); + auto fid = IO::silo::open( fullpath, IO::silo::CREATE ); for ( size_t i = 0; i < meshData.size(); i++ ) { auto mesh = meshData[i].mesh; meshes_written.push_back( write_domain_silo( fid, filename, meshData[i], format, rank ) ); } - silo::close( fid ); + IO::silo::close( fid ); return meshes_written; #else NULL_USE( meshData ); @@ -509,10 +507,8 @@ void IO::writeData( const std::string &subdir, const std::vector -namespace silo { +namespace IO::silo { /**************************************************** @@ -34,16 +34,16 @@ void close( DBfile *fid ) { DBClose( fid ); } /**************************************************** * Helper functions * ****************************************************/ -VariableDataType varDataType( DBfile *fid, const std::string &name ) +DataType varDataType( DBfile *fid, const std::string &name ) { - auto type = DBGetVarType( fid, name.c_str() ); - VariableDataType type2 = VariableDataType::UNKNOWN; + auto type = DBGetVarType( fid, name.c_str() ); + DataType type2 = DataType::Null; if ( type == DB_DOUBLE ) - type2 = VariableDataType::DOUBLE; + type2 = DataType::Double; else if ( type == DB_FLOAT ) - type2 = VariableDataType::FLOAT; + type2 = DataType::Float; else if ( type == DB_INT ) - type2 = VariableDataType::INT; + type2 = DataType::Int; return type2; } @@ -99,7 +99,7 @@ void writeMultiVar( DBfile *fid, const std::string &varname, } -}; // namespace silo +}; // namespace IO::silo #else diff --git a/IO/silo.h b/IO/silo.h index 309746f3..5e1068fe 100644 --- a/IO/silo.h +++ b/IO/silo.h @@ -5,6 +5,7 @@ #include #include +#include "IO/Mesh.h" #include "common/Array.h" #include "common/Communication.h" #include "common/MPI.h" @@ -17,21 +18,11 @@ typedef int DBfile; #endif -namespace silo { +namespace IO::silo { enum FileMode { READ, WRITE, CREATE }; -enum class VariableType : int { - NodeVariable = 1, - EdgeVariable = 2, - SurfaceVariable = 2, - VolumeVariable = 3, - NullVariable = 0 -}; - -enum class VariableDataType { DOUBLE, FLOAT, INT, UNKNOWN }; - /*! * @brief Open silo file @@ -57,7 +48,7 @@ void close( DBfile *fid ); * @param[in] fid Handle to the open file * @param[in] name Name of variable */ -VariableDataType varDataType( DBfile *dbfile, const std::string &name ); +DataType varDataType( DBfile *dbfile, const std::string &name ); /*! @@ -265,7 +256,7 @@ void writeMultiVar( DBfile *fid, const std::string &varname, const std::vector &subVarNames, const std::vector &subVarTypes ); -}; // namespace silo +}; // namespace IO::silo #endif #include "IO/silo.hpp" diff --git a/IO/silo.hpp b/IO/silo.hpp index 1e17aa5c..b76ebd28 100644 --- a/IO/silo.hpp +++ b/IO/silo.hpp @@ -13,7 +13,7 @@ #include -namespace silo { +namespace IO::silo { /**************************************************** @@ -413,7 +413,7 @@ Array readTriMeshVariable( DBfile *fid, const std::string &varname ) } -}; // namespace silo +}; // namespace IO::silo #endif diff --git a/tests/TestWriter.cpp b/tests/TestWriter.cpp index 9b7e381c..652c3d4c 100644 --- a/tests/TestWriter.cpp +++ b/tests/TestWriter.cpp @@ -1,115 +1,117 @@ +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include "common/UnitTest.h" -#include "common/Utilities.h" -#include "common/MPI.h" #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" #include "ProfilerApp.h" +#include "common/MPI.h" +#include "common/UnitTest.h" +#include "common/Utilities.h" -inline bool approx_equal( const Point& A, const Point& B ) +inline bool approx_equal( const Point &A, const Point &B ) { - double tol = 1e-7*sqrt(A.x*A.x+A.y*A.y+A.z*A.z); - return fabs(A.x-B.x)<=tol && fabs(A.y-B.y)<=tol && fabs(A.z-B.z)<=tol; + double tol = 1e-7 * sqrt( A.x * A.x + A.y * A.y + A.z * A.z ); + return fabs( A.x - B.x ) <= tol && fabs( A.y - B.y ) <= tol && fabs( A.z - B.z ) <= tol; } -inline bool approx_equal( const double& A, const double& B ) +inline bool approx_equal( const double &A, const double &B ) { - return fabs(A-B) <= std::max(1e-7*fabs(A+B),1e-20); + return fabs( A - B ) <= std::max( 1e-7 * fabs( A + B ), 1e-20 ); } -inline double distance( const Point& p ) -{ - return sqrt(p.x*p.x+p.y*p.y+p.z*p.z); -} +inline double distance( const Point &p ) { return sqrt( p.x * p.x + p.y * p.y + p.z * p.z ); } -bool checkMesh( const std::vector& meshData, const std::string& format, std::shared_ptr mesh ) +bool checkMesh( const std::vector &meshData, const std::string &format, + std::shared_ptr mesh ) { // Get direct access to the meshes used to test the reader - const auto pointmesh = dynamic_cast( meshData[0].mesh.get() ); - const auto trimesh = dynamic_cast( meshData[1].mesh.get() ); - const auto trilist = dynamic_cast( meshData[2].mesh.get() ); - const auto domain = dynamic_cast( meshData[3].mesh.get() ); - const size_t N_tri = trimesh->A.size(); + const auto pointmesh = dynamic_cast( meshData[0].mesh.get() ); + const auto trimesh = dynamic_cast( meshData[1].mesh.get() ); + const auto trilist = dynamic_cast( meshData[2].mesh.get() ); + const auto domain = dynamic_cast( meshData[3].mesh.get() ); + const size_t N_tri = trimesh->A.size(); if ( mesh->className() == "pointmesh" ) { // Check the pointmesh - auto pmesh = IO::getPointList(mesh); - if ( pmesh.get()==NULL ) + auto pmesh = IO::getPointList( mesh ); + if ( pmesh.get() == NULL ) return false; if ( pmesh->points.size() != pointmesh->points.size() ) return false; } if ( mesh->className() == "trimesh" || mesh->className() == "trilist" ) { // Check the trimesh/trilist - auto mesh1 = IO::getTriMesh(mesh); - auto mesh2 = IO::getTriList(mesh); - if ( mesh1.get()==NULL || mesh2.get()==NULL ) - return false; - if ( mesh1->A.size()!=N_tri || mesh1->B.size()!=N_tri || mesh1->C.size()!=N_tri || - mesh2->A.size()!=N_tri || mesh2->B.size()!=N_tri || mesh2->C.size()!=N_tri ) - return false; - const std::vector& P1 = mesh1->vertices->points; - const std::vector& A1 = mesh1->A; - const std::vector& B1 = mesh1->B; - const std::vector& C1 = mesh1->C; - const std::vector& A2 = mesh2->A; - const std::vector& B2 = mesh2->B; - const std::vector& C2 = mesh2->C; - const std::vector& A = trilist->A; - const std::vector& B = trilist->B; - const std::vector& C = trilist->C; - for (size_t i=0; iA.size() != N_tri || mesh1->B.size() != N_tri || mesh1->C.size() != N_tri || + mesh2->A.size() != N_tri || mesh2->B.size() != N_tri || mesh2->C.size() != N_tri ) + return false; + const std::vector &P1 = mesh1->vertices->points; + const std::vector &A1 = mesh1->A; + const std::vector &B1 = mesh1->B; + const std::vector &C1 = mesh1->C; + const std::vector &A2 = mesh2->A; + const std::vector &B2 = mesh2->B; + const std::vector &C2 = mesh2->C; + const std::vector &A = trilist->A; + const std::vector &B = trilist->B; + const std::vector &C = trilist->C; + for ( size_t i = 0; i < N_tri; i++ ) { + if ( !approx_equal( P1[A1[i]], A[i] ) || !approx_equal( P1[B1[i]], B[i] ) || + !approx_equal( P1[C1[i]], C[i] ) ) return false; - if ( !approx_equal(A2[i],A[i]) || !approx_equal(B2[i],B[i]) || !approx_equal(C2[i],C[i]) ) + if ( !approx_equal( A2[i], A[i] ) || !approx_equal( B2[i], B[i] ) || + !approx_equal( C2[i], C[i] ) ) return false; } } - if ( mesh->className() == "domain" && format!="old" ) { + if ( mesh->className() == "domain" && format != "old" ) { // Check the domain mesh - const IO::DomainMesh& mesh1 = *std::dynamic_pointer_cast(mesh); - if ( mesh1.nprocx!=domain->nprocx || mesh1.nprocy!=domain->nprocy || mesh1.nprocz!=domain->nprocz ) + const IO::DomainMesh &mesh1 = *std::dynamic_pointer_cast( mesh ); + if ( mesh1.nprocx != domain->nprocx || mesh1.nprocy != domain->nprocy || + mesh1.nprocz != domain->nprocz ) return false; - if ( mesh1.nx!=domain->nx || mesh1.ny!=domain->ny || mesh1.nz!=domain->nz ) + if ( mesh1.nx != domain->nx || mesh1.ny != domain->ny || mesh1.nz != domain->nz ) return false; - if ( mesh1.Lx!=domain->Lx || mesh1.Ly!=domain->Ly || mesh1.Lz!=domain->Lz ) + if ( mesh1.Lx != domain->Lx || mesh1.Ly != domain->Ly || mesh1.Lz != domain->Lz ) return false; } return true; } -bool checkVar( const std::string& format, std::shared_ptr mesh, +bool checkVar( const std::string &format, std::shared_ptr mesh, std::shared_ptr variable1, std::shared_ptr variable2 ) { - if ( format=="new" ) + if ( format == "new" ) IO::reformatVariable( *mesh, *variable2 ); - bool pass = true; - const IO::Variable& var1 = *variable1; - const IO::Variable& var2 = *variable2; - pass = var1.name == var2.name; - pass = pass && var1.dim == var2.dim; - pass = pass && var1.type == var2.type; - pass = pass && var1.data.length() == var2.data.length(); + bool pass = true; + const IO::Variable &var1 = *variable1; + const IO::Variable &var2 = *variable2; + pass = var1.name == var2.name; + pass = pass && var1.dim == var2.dim; + pass = pass && var1.type == var2.type; + pass = pass && var1.data.length() == var2.data.length(); if ( pass ) { - for (size_t m=0; m& meshData, UnitTest& ut ) +void testWriter( + const std::string &format, std::vector &meshData, UnitTest &ut ) { PROFILE_SCOPED( path, 0, timer ); @@ -124,20 +126,20 @@ void testWriter( const std::string& format, std::vector& mes // Get the format std::string format2 = format; - auto precision = IO::DataType::Double; + auto precision = IO::DataType::Double; if ( format == "silo-double" ) { - format2 = "silo"; + format2 = "silo"; precision = IO::DataType::Double; } else if ( format == "silo-float" ) { - format2 = "silo"; + format2 = "silo"; precision = IO::DataType::Float; } // Set the precision for the variables - for ( auto& data : meshData ) { + for ( auto &data : meshData ) { data.precision = precision; - for ( auto& var : data.vars ) + for ( auto &var : data.vars ) var->precision = precision; } @@ -150,18 +152,18 @@ void testWriter( const std::string& format, std::vector& mes // Get a list of the timesteps auto timesteps = IO::readTimesteps( path, format2 ); - if ( timesteps.size()==2 ) - ut.passes(format+": Corrent number of timesteps"); + if ( timesteps.size() == 2 ) + ut.passes( format + ": Corrent number of timesteps" ); else - ut.failure(format+": Incorrent number of timesteps"); + ut.failure( format + ": Incorrent number of timesteps" ); // Test the simple read interface bool pass = true; - for ( const auto& timestep : timesteps ) { + for ( const auto ×tep : timesteps ) { auto data = IO::readData( path, timestep, comm.getRank() ); - pass = pass && data.size() == meshData.size(); - for ( size_t i=0; i& mes // Test reading each mesh domain - for ( const auto& timestep : timesteps ) { + for ( const auto ×tep : timesteps ) { // Load the list of meshes and check its size - auto databaseList = IO::getMeshList(path,timestep); - if ( databaseList.size()==meshData.size() ) - ut.passes(format+": Corrent number of meshes found"); + auto databaseList = IO::getMeshList( path, timestep ); + if ( databaseList.size() == meshData.size() ) + ut.passes( format + ": Corrent number of meshes found" ); else - ut.failure(format+": Incorrent number of meshes found"); + ut.failure( format + ": Incorrent number of meshes found" ); // Check the number of domains for each mesh - for ( const auto& database : databaseList ) { + for ( const auto &database : databaseList ) { int N_domains = database.domains.size(); if ( N_domains != nprocs ) { ut.failure( format + ": Incorrent number of domains for mesh" ); @@ -188,8 +190,8 @@ void testWriter( const std::string& format, std::vector& mes } // For each domain, load the mesh and check its data bool pass = true; - for (int k=0; k& mes } } if ( pass ) { - ut.passes(format+": Mesh \"" + database.name + "\" loaded correctly"); + ut.passes( format + ": Mesh \"" + database.name + "\" loaded correctly" ); } else { - ut.failure(format+": Mesh \"" + database.name + "\" did not load correctly"); + ut.failure( format + ": Mesh \"" + database.name + "\" did not load correctly" ); continue; } // Load the variables and check their data - if ( format=="old" ) - continue; // Old format does not support variables - const IO::MeshDataStruct* mesh0 = nullptr; - for (size_t k=0; kvars.size(); v++) { - PROFILE_START(format+"-read-getVariable"); - auto variable = IO::getVariable(path,timestep,database,k,mesh0->vars[v]->name); + for ( int k = 0; k < N_domains; k++ ) { + auto mesh = IO::getMesh( path, timestep, database, k ); + for ( size_t v = 0; v < mesh0->vars.size(); v++ ) { + PROFILE_START( format + "-read-getVariable" ); + auto variable = + IO::getVariable( path, timestep, database, k, mesh0->vars[v]->name ); pass = checkVar( format, mesh, mesh0->vars[v], variable ); if ( pass ) { - ut.passes(format+": Variable \"" + variable->name + "\" matched"); + ut.passes( format + ": Variable \"" + variable->name + "\" matched" ); } else { - ut.failure(format+": Variable \"" + variable->name + "\" did not match"); + ut.failure( + format + ": Variable \"" + variable->name + "\" did not match" ); break; } } @@ -233,157 +237,161 @@ void testWriter( const std::string& format, std::vector& mes // Main -int main(int argc, char **argv) +int main( int argc, char **argv ) { Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); + int rank = comm.getRank(); int nprocs = comm.getSize(); - Utilities::setAbortBehavior(true,2); + Utilities::setAbortBehavior( true, 2 ); Utilities::setErrorHandlers(); UnitTest ut; // Create some points const int N_points = 8; - const int N_tri = 12; - double x[8] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - double y[8] = { 0, 0, 1, 1, 0, 0, 1, 1 }; - double z[8] = { 0, 0, 0, 0, 1, 1, 1, 1 }; - int tri[N_tri][3] = { - {0,1,3}, {0,3,2}, {4,5,7}, {4,7,6}, // z faces - {0,1,4}, {1,4,5}, {2,3,6}, {3,6,7}, // y faces - {0,2,4}, {2,4,6}, {1,3,5}, {3,5,7} // x faces + const int N_tri = 12; + double x[8] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + double y[8] = { 0, 0, 1, 1, 0, 0, 1, 1 }; + double z[8] = { 0, 0, 0, 0, 1, 1, 1, 1 }; + int tri[N_tri][3] = { + { 0, 1, 3 }, { 0, 3, 2 }, { 4, 5, 7 }, { 4, 7, 6 }, // z faces + { 0, 1, 4 }, { 1, 4, 5 }, { 2, 3, 6 }, { 3, 6, 7 }, // y faces + { 0, 2, 4 }, { 2, 4, 6 }, { 1, 3, 5 }, { 3, 5, 7 } // x faces }; // Create the meshes - auto set1 = std::make_shared(N_points); - for (int i=0; i( N_points ); + for ( int i = 0; i < N_points; i++ ) { set1->points[i].x = x[i]; set1->points[i].y = y[i]; set1->points[i].z = z[i]; } - auto trimesh = std::make_shared(N_tri,set1); - for (int i=0; i( N_tri, set1 ); + for ( int i = 0; i < N_tri; i++ ) { trimesh->A[i] = tri[i][0]; trimesh->B[i] = tri[i][1]; trimesh->C[i] = tri[i][2]; } - auto trilist = std::make_shared(*trimesh); - for (int i=0; iA[i],A) || !approx_equal(trilist->B[i],B) || !approx_equal(trilist->C[i],C) ) - { - printf("Failed to create trilist\n"); + auto trilist = std::make_shared( *trimesh ); + for ( int i = 0; i < N_tri; i++ ) { + Point A( x[tri[i][0]], y[tri[i][0]], z[tri[i][0]] ); + Point B( x[tri[i][1]], y[tri[i][1]], z[tri[i][1]] ); + Point C( x[tri[i][2]], y[tri[i][2]], z[tri[i][2]] ); + if ( !approx_equal( trilist->A[i], A ) || !approx_equal( trilist->B[i], B ) || + !approx_equal( trilist->C[i], C ) ) { + printf( "Failed to create trilist\n" ); return -1; } } RankInfoStruct rank_data( rank, nprocs, 1, 1 ); - auto domain = std::make_shared(rank_data,6,7,8,1.0,1.0,1.0); + auto domain = std::make_shared( rank_data, 6, 7, 8, 1.0, 1.0, 1.0 ); // Create the variables const auto NodeVar = IO::VariableType::NodeVariable; const auto VolVar = IO::VariableType::VolumeVariable; - auto set_node_mag = std::make_shared(1,NodeVar,"Node_set_mag"); - auto set_node_vec = std::make_shared(3,NodeVar,"Node_set_vec"); - auto list_node_mag = std::make_shared(1,NodeVar,"Node_list_mag"); - auto list_node_vec = std::make_shared(3,NodeVar,"Node_list_vec"); - auto point_node_mag = std::make_shared(1,NodeVar,"Node_point_mag"); - auto point_node_vec = std::make_shared(3,NodeVar,"Node_point_vec"); - auto domain_node_mag = std::make_shared(1,NodeVar,"Node_domain_mag"); - auto domain_node_vec = std::make_shared(3,NodeVar,"Node_domain_vec"); - auto set_cell_mag = std::make_shared(1,VolVar,"Cell_set_mag"); - auto set_cell_vec = std::make_shared(3,VolVar,"Cell_set_vec"); - auto list_cell_mag = std::make_shared(1,VolVar,"Cell_list_mag"); - auto list_cell_vec = std::make_shared(3,VolVar,"Cell_list_vec"); - auto domain_cell_mag = std::make_shared(1,VolVar,"Cell_domain_mag"); - auto domain_cell_vec = std::make_shared(3,VolVar,"Cell_domain_vec"); + auto set_node_mag = std::make_shared( 1, NodeVar, "Node_set_mag" ); + auto set_node_vec = std::make_shared( 3, NodeVar, "Node_set_vec" ); + auto list_node_mag = std::make_shared( 1, NodeVar, "Node_list_mag" ); + auto list_node_vec = std::make_shared( 3, NodeVar, "Node_list_vec" ); + auto point_node_mag = std::make_shared( 1, NodeVar, "Node_point_mag" ); + auto point_node_vec = std::make_shared( 3, NodeVar, "Node_point_vec" ); + auto domain_node_mag = std::make_shared( 1, NodeVar, "Node_domain_mag" ); + auto domain_node_vec = std::make_shared( 3, NodeVar, "Node_domain_vec" ); + auto set_cell_mag = std::make_shared( 1, VolVar, "Cell_set_mag" ); + auto set_cell_vec = std::make_shared( 3, VolVar, "Cell_set_vec" ); + auto list_cell_mag = std::make_shared( 1, VolVar, "Cell_list_mag" ); + auto list_cell_vec = std::make_shared( 3, VolVar, "Cell_list_vec" ); + auto domain_cell_mag = std::make_shared( 1, VolVar, "Cell_domain_mag" ); + auto domain_cell_vec = std::make_shared( 3, VolVar, "Cell_domain_vec" ); point_node_mag->data.resize( N_points ); point_node_vec->data.resize( N_points, 3 ); - for (int i=0; idata(i) = distance(set1->points[i]); - point_node_vec->data(i,0) = set1->points[i].x; - point_node_vec->data(i,1) = set1->points[i].y; - point_node_vec->data(i,2) = set1->points[i].z; + for ( int i = 0; i < N_points; i++ ) { + point_node_mag->data( i ) = distance( set1->points[i] ); + point_node_vec->data( i, 0 ) = set1->points[i].x; + point_node_vec->data( i, 1 ) = set1->points[i].y; + point_node_vec->data( i, 2 ) = set1->points[i].z; } set_node_mag->data = point_node_mag->data; set_node_vec->data = point_node_vec->data; - list_node_mag->data.resize( 3*N_tri ); - list_node_vec->data.resize( 3*N_tri, 3 ); - for (int i=0; idata(3*i+0) = distance(trilist->A[i]); - list_node_mag->data(3*i+1) = distance(trilist->B[i]); - list_node_mag->data(3*i+2) = distance(trilist->C[i]); - list_node_vec->data(3*i+0,0) = trilist->A[i].x; - list_node_vec->data(3*i+0,1) = trilist->A[i].y; - list_node_vec->data(3*i+0,2) = trilist->A[i].z; - list_node_vec->data(3*i+1,0) = trilist->B[i].x; - list_node_vec->data(3*i+1,1) = trilist->B[i].y; - list_node_vec->data(3*i+1,2) = trilist->B[i].z; - list_node_vec->data(3*i+2,0) = trilist->C[i].x; - list_node_vec->data(3*i+2,1) = trilist->C[i].y; - list_node_vec->data(3*i+2,2) = trilist->C[i].z; + list_node_mag->data.resize( 3 * N_tri ); + list_node_vec->data.resize( 3 * N_tri, 3 ); + for ( int i = 0; i < N_points; i++ ) { + list_node_mag->data( 3 * i + 0 ) = distance( trilist->A[i] ); + list_node_mag->data( 3 * i + 1 ) = distance( trilist->B[i] ); + list_node_mag->data( 3 * i + 2 ) = distance( trilist->C[i] ); + list_node_vec->data( 3 * i + 0, 0 ) = trilist->A[i].x; + list_node_vec->data( 3 * i + 0, 1 ) = trilist->A[i].y; + list_node_vec->data( 3 * i + 0, 2 ) = trilist->A[i].z; + list_node_vec->data( 3 * i + 1, 0 ) = trilist->B[i].x; + list_node_vec->data( 3 * i + 1, 1 ) = trilist->B[i].y; + list_node_vec->data( 3 * i + 1, 2 ) = trilist->B[i].z; + list_node_vec->data( 3 * i + 2, 0 ) = trilist->C[i].x; + list_node_vec->data( 3 * i + 2, 1 ) = trilist->C[i].y; + list_node_vec->data( 3 * i + 2, 2 ) = trilist->C[i].z; } - domain_node_mag->data.resize(domain->nx+1,domain->ny+1,domain->nz+1); - domain_node_vec->data.resize({(size_t)domain->nx+1,(size_t)domain->ny+1,(size_t)domain->nz+1,3}); - for (int i=0; inx+1; i++) { - for (int j=0; jny+1; j++) { - for (int k=0; knz+1; k++) { - domain_node_mag->data(i,j,k) = distance(Point(i,j,k)); - domain_node_vec->data(i,j,k,0) = Point(i,j,k).x; - domain_node_vec->data(i,j,k,1) = Point(i,j,k).y; - domain_node_vec->data(i,j,k,2) = Point(i,j,k).z; + domain_node_mag->data.resize( domain->nx + 1, domain->ny + 1, domain->nz + 1 ); + domain_node_vec->data.resize( + { (size_t) domain->nx + 1, (size_t) domain->ny + 1, (size_t) domain->nz + 1, 3 } ); + for ( int i = 0; i < domain->nx + 1; i++ ) { + for ( int j = 0; j < domain->ny + 1; j++ ) { + for ( int k = 0; k < domain->nz + 1; k++ ) { + domain_node_mag->data( i, j, k ) = distance( Point( i, j, k ) ); + domain_node_vec->data( i, j, k, 0 ) = Point( i, j, k ).x; + domain_node_vec->data( i, j, k, 1 ) = Point( i, j, k ).y; + domain_node_vec->data( i, j, k, 2 ) = Point( i, j, k ).z; } } } set_cell_mag->data.resize( N_tri ); set_cell_vec->data.resize( N_tri, 3 ); - for (int i=0; idata(i) = i; - set_cell_vec->data(i,0) = 3*i+0; - set_cell_vec->data(i,1) = 3*i+1; - set_cell_vec->data(i,2) = 3*i+2; + for ( int i = 0; i < N_tri; i++ ) { + set_cell_mag->data( i ) = i; + set_cell_vec->data( i, 0 ) = 3 * i + 0; + set_cell_vec->data( i, 1 ) = 3 * i + 1; + set_cell_vec->data( i, 2 ) = 3 * i + 2; } list_cell_mag->data = set_cell_mag->data; list_cell_vec->data = set_cell_vec->data; - domain_cell_mag->data.resize(domain->nx,domain->ny,domain->nz); - domain_cell_vec->data.resize({(size_t)domain->nx,(size_t)domain->ny,(size_t)domain->nz,3}); - for (int i=0; inx; i++) { - for (int j=0; jny; j++) { - for (int k=0; knz; k++) { - domain_cell_mag->data(i,j,k) = distance(Point(i,j,k)); - domain_cell_vec->data(i,j,k,0) = Point(i,j,k).x; - domain_cell_vec->data(i,j,k,1) = Point(i,j,k).y; - domain_cell_vec->data(i,j,k,2) = Point(i,j,k).z; + domain_cell_mag->data.resize( domain->nx, domain->ny, domain->nz ); + domain_cell_vec->data.resize( + { (size_t) domain->nx, (size_t) domain->ny, (size_t) domain->nz, 3 } ); + for ( int i = 0; i < domain->nx; i++ ) { + for ( int j = 0; j < domain->ny; j++ ) { + for ( int k = 0; k < domain->nz; k++ ) { + domain_cell_mag->data( i, j, k ) = distance( Point( i, j, k ) ); + domain_cell_vec->data( i, j, k, 0 ) = Point( i, j, k ).x; + domain_cell_vec->data( i, j, k, 1 ) = Point( i, j, k ).y; + domain_cell_vec->data( i, j, k, 2 ) = Point( i, j, k ).z; } } } // Create the MeshDataStruct - std::vector meshData(4); + std::vector meshData( 4 ); meshData[0].meshName = "pointmesh"; - meshData[0].mesh = set1; - meshData[0].vars.push_back(point_node_mag); - meshData[0].vars.push_back(point_node_vec); + meshData[0].mesh = set1; + meshData[0].vars.push_back( point_node_mag ); + meshData[0].vars.push_back( point_node_vec ); meshData[1].meshName = "trimesh"; - meshData[1].mesh = trimesh; - meshData[1].vars.push_back(set_node_mag); - meshData[1].vars.push_back(set_node_vec); - meshData[1].vars.push_back(set_cell_mag); - meshData[1].vars.push_back(set_cell_vec); + meshData[1].mesh = trimesh; + meshData[1].vars.push_back( set_node_mag ); + meshData[1].vars.push_back( set_node_vec ); + meshData[1].vars.push_back( set_cell_mag ); + meshData[1].vars.push_back( set_cell_vec ); meshData[2].meshName = "trilist"; - meshData[2].mesh = trilist; - meshData[2].vars.push_back(list_node_mag); - meshData[2].vars.push_back(list_node_vec); - meshData[2].vars.push_back(list_cell_mag); - meshData[2].vars.push_back(list_cell_vec); + meshData[2].mesh = trilist; + meshData[2].vars.push_back( list_node_mag ); + meshData[2].vars.push_back( list_node_vec ); + meshData[2].vars.push_back( list_cell_mag ); + meshData[2].vars.push_back( list_cell_vec ); meshData[3].meshName = "domain"; - meshData[3].mesh = domain; - meshData[3].vars.push_back(domain_node_mag); - meshData[3].vars.push_back(domain_node_vec); - meshData[3].vars.push_back(domain_cell_mag); - meshData[3].vars.push_back(domain_cell_vec); + meshData[3].mesh = domain; + meshData[3].vars.push_back( domain_node_mag ); + meshData[3].vars.push_back( domain_node_vec ); + meshData[3].vars.push_back( domain_cell_mag ); + meshData[3].vars.push_back( domain_cell_vec ); + for ( const auto &data : meshData ) + ASSERT( data.check( true ) ); // Run the tests testWriter( "old", meshData, ut ); @@ -393,11 +401,9 @@ int main(int argc, char **argv) // Finished ut.report(); - PROFILE_SAVE("TestWriter",true); + PROFILE_SAVE( "TestWriter", true ); int N_errors = ut.NumFailGlobal(); comm.barrier(); Utilities::shutdown(); return N_errors; } - - From c1cd959da61968df006756b5ca30bff5b3bd5179 Mon Sep 17 00:00:00 2001 From: James McClure Date: Thu, 18 Mar 2021 14:51:26 -0400 Subject: [PATCH 193/205] D3Q7 Lee model (looks anisotropic) --- common/ScaLBL.cpp | 40 +++-- common/ScaLBL.h | 15 +- cpu/FreeLee.cpp | 339 +++++++++++++++++++++++++--------------- models/FreeLeeModel.cpp | 74 +++++++-- 4 files changed, 300 insertions(+), 168 deletions(-) diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index dcadb08e..4726bae6 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -516,9 +516,9 @@ int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLis n = k*Nx*Ny+j*Nx+i; if (id[n] > 0){ // Counts for the six faces - if (i>0 && i<=width) Map(n)=idx++; - else if (j>0 && j<=width) Map(n)=idx++; - else if (k>0 && k<=width) Map(n)=idx++; + if (i>0 && i<=width) Map(n)=idx++; + else if (j>0 && j<=width) Map(n)=idx++; + else if (k>0 && k<=width) Map(n)=idx++; else if (i>Nx-width-2 && iNy-width-2 && jNz-width-2 && k +#include #define STOKES @@ -70,6 +71,8 @@ extern "C" void ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(double *gqbar, double gqbar[17*Np+n] = 0.0277777777777778*(p-0.5*(Fy-Fz)); ; //double(100*n)+17.f; gqbar[18*Np+n] = 0.0277777777777778*(p-0.5*(-Fy+Fz));; //double(100*n)+18.f; } + + } extern "C" void ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, double *Den, double *hq, double *ColorGrad, @@ -101,7 +104,8 @@ extern "C" void ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, doubl nz = nz/ColorMag_temp; theta = M*cs2_inv*(1-4.0*phi*phi)/W; - + theta = 0; // try more diffusive initial condition + hq[0*Np+idx]=0.3333333333333333*(phi); hq[1*Np+idx]=0.1111111111111111*(phi+theta*nx); hq[2*Np+idx]=0.1111111111111111*(phi-theta*nx); @@ -116,7 +120,7 @@ extern "C" void ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, doubl extern "C" void ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, double rhoA, double rhoB, int start, int finish, int Np){ - int idx,n,nread; + int idx,nread; double fq,phi; for (int n=start; nSendD3Q7AA(hq,0); //READ FROM NORMAL - ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(NeighborList, dvcMap, hq, Den, Phi, rhoA, rhoB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(NeighborList, dvcMap, hq, Den, Phi, ColorGrad, Velocity, rhoA, rhoB, tauM, W, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(hq,0); //WRITE INTO OPPOSITE ScaLBL_Comm->Barrier(); - ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(NeighborList, dvcMap, hq, Den, Phi, rhoA, rhoB, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(NeighborList, dvcMap, hq, Den, Phi, ColorGrad, Velocity, rhoA, rhoB, tauM, W, 0, ScaLBL_Comm->LastExterior(), Np); // Perform the collision operation - ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FROM NORMAL + // Halo exchange for phase field + ScaLBL_D3Q7_ComputePhaseField(dvcMap, hq, Den, Phi, rhoA, rhoB, 0, ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm_WideHalo->Send(Phi); + ScaLBL_Comm_WideHalo->Recv(Phi); if (BoundaryCondition > 0 && BoundaryCondition < 5){ //TODO to be revised + // Need to add BC for hq!!! ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); } - // Halo exchange for phase field - ScaLBL_Comm_WideHalo->Send(Phi); + printf("write debug strideY=%i strideZ = %i \n",Nxh, Nxh*Nyh); + WriteDebug_TwoFluid(); + + ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FROM NORMAL ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, kappa, beta, W, Fx, Fy, Fz, Nxh, Nxh*Nyh, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm_WideHalo->Recv(Phi); + ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE ScaLBL_Comm->Barrier(); // Set BCs @@ -793,33 +799,34 @@ void ScaLBL_FreeLeeModel::Run_TwoFluid(){ ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar); ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar); } + ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, kappa, beta, W, Fx, Fy, Fz, Nxh, Nxh*Nyh, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_Comm->Barrier(); - printf("write debug strideY=%i strideZ = %i \n",Nxh, Nxh*Nyh); - WriteDebug_TwoFluid(); - + // *************EVEN TIMESTEP************* timestep++; // Compute the Phase indicator field ScaLBL_Comm->SendD3Q7AA(hq,0); //READ FROM NORMA - ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(dvcMap, hq, Den, Phi, rhoA, rhoB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_D3Q7_AAeven_FreeLee_PhaseField(dvcMap, hq, Den, Phi, ColorGrad, Velocity, rhoA, rhoB, tauM, W, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(hq,0); //WRITE INTO OPPOSITE ScaLBL_Comm->Barrier(); - ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(dvcMap, hq, Den, Phi, rhoA, rhoB, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_D3Q7_AAeven_FreeLee_PhaseField(dvcMap, hq, Den, Phi, ColorGrad, Velocity, rhoA, rhoB, tauM, W, 0, ScaLBL_Comm->LastExterior(), Np); // Perform the collision operation - ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FORM NORMAL // Halo exchange for phase field + ScaLBL_D3Q7_ComputePhaseField(dvcMap, hq, Den, Phi, rhoA, rhoB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm_WideHalo->Send(Phi); + ScaLBL_Comm_WideHalo->Recv(Phi); if (BoundaryCondition > 0 && BoundaryCondition < 5){ ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); } - ScaLBL_Comm_WideHalo->Send(Phi); + ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FORM NORMAL + ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, kappa, beta, W, Fx, Fy, Fz, Nxh, Nxh*Nyh, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm_WideHalo->Recv(Phi); ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE ScaLBL_Comm->Barrier(); // Set boundary conditions @@ -964,6 +971,32 @@ void ScaLBL_FreeLeeModel::WriteDebug_TwoFluid(){ DoubleArray PhaseData(Nxh,Nyh,Nzh); //ScaLBL_Comm->RegularLayout(Map,Phi,PhaseField); ScaLBL_CopyToHost(PhaseData.data(), Phi, sizeof(double)*Nh); + /* + IntArray MapData(Np); + ScaLBL_CopyToHost(MapData.data(), dvcMap, sizeof(int)*Np); + FILE *MAP; + sprintf(LocalRankFilename,"Map.%05i.raw",rank); + MAP = fopen(LocalRankFilename,"wb"); + fwrite(MapData.data(),4,Np,MAP); + fclose(MAP); + + FILE *NB; + //IntArray Neighbors(18,Np); + //ScaLBL_CopyToHost(Neighbors.data(), NeighborList, sizeof(int)*Np*18); + sprintf(LocalRankFilename,"neighbors.%05i.raw",rank); + NB = fopen(LocalRankFilename,"wb"); + fwrite(NeighborList,4,18*Np,NB); + fclose(NB); + + FILE *DIST; + DoubleArray DistData(7, Np); + ScaLBL_CopyToHost(DistData.data(), hq, 7*sizeof(double)*Np); + sprintf(LocalRankFilename,"h.%05i.raw",rank); + DIST = fopen(LocalRankFilename,"wb"); + fwrite(DistData.data(),8,7*Np,DIST); + fclose(DIST); + + */ FILE *OUTFILE; sprintf(LocalRankFilename,"Phase.%05i.raw",rank); @@ -972,6 +1005,17 @@ void ScaLBL_FreeLeeModel::WriteDebug_TwoFluid(){ fclose(OUTFILE); DoubleArray PhaseField(Nx,Ny,Nz); + FILE *DIST; + for (int q=0; q<7; q++){ + ScaLBL_Comm->RegularLayout(Map,&hq[q*Np],PhaseField); + + sprintf(LocalRankFilename,"h%i.%05i.raw",q,rank); + DIST = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,Nx*Ny*Nz,DIST); + fclose(DIST); + + } + ScaLBL_Comm->RegularLayout(Map,Den,PhaseField); FILE *AFILE; sprintf(LocalRankFilename,"Density.%05i.raw",rank); @@ -1082,7 +1126,7 @@ void ScaLBL_FreeLeeModel::Create_DummyPhase_MGTest(){ if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N); Map.resize(Nx,Ny,Nz); Map.fill(-2); auto neighborList= new int[18*Npad]; - Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np,2); + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np,1); comm.barrier(); //........................................................................... From eb62dbd38ecc9b6680ad72038b52555573992b74 Mon Sep 17 00:00:00 2001 From: James McClure Date: Thu, 18 Mar 2021 21:06:15 -0400 Subject: [PATCH 194/205] standalone d3q7 mass collision for color model --- common/ScaLBL.h | 6 ++ cpu/Color.cpp | 194 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 198 insertions(+), 2 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 89965f4e..4eb6dd7a 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -155,6 +155,12 @@ extern "C" void ScaLBL_D3Q7_AAodd_PhaseField(int *NeighborList, int *Map, double extern "C" void ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq, double *Den, double *Phi, int start, int finish, int Np); +extern "C" void ScaLBL_D3Q7_AAodd_Color(int *neighborList, int *Map, double *Aq, double *Bq, double *Den, + double *Phi, double *ColorGrad, double *Vel, double rhoA, double rhoB, double beta, int start, int finish, int Np); + +extern "C" void ScaLBL_D3Q7_AAeven_Color(int *Map, double *Aq, double *Bq, double *Den, + double *Phi, double *ColorGrad, double *Vel, double rhoA, double rhoB, double beta, int start, int finish, int Np); + extern "C" void ScaLBL_D3Q19_Gradient(int *Map, double *Phi, double *ColorGrad, int start, int finish, int Np, int Nx, int Ny, int Nz); extern "C" void ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz); diff --git a/cpu/Color.cpp b/cpu/Color.cpp index 35cbd5fd..6f67a6fc 100644 --- a/cpu/Color.cpp +++ b/cpu/Color.cpp @@ -2489,10 +2489,200 @@ extern "C" void ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *di } } + +extern "C" void ScaLBL_D3Q7_AAodd_Color(int *neighborList, int *Map, double *Aq, double *Bq, double *Den, + double *Phi, double *ColorGrad, double *Vel, double rhoA, double rhoB, double beta, int start, int finish, int Np){ + + int nr1,nr2,nr3,nr4,nr5,nr6; + double nA,nB; // number density + double a1,b1,a2,b2,nAB,delta; + double C,nx,ny,nz; //color gradient magnitude and direction + double ux,uy,uz; + double phi; + // Instantiate mass transport distributions + // Stationary value - distribution 0 + for (int n=start; n0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; + + // q = 1 + //nread = neighborList[n+Np]; + Aq[nr2] = a1; + Bq[nr2] = b1; + // q=2 + //nread = neighborList[n]; + Aq[nr1] = a2; + Bq[nr1] = b2; + + //............................................... + // Cq = {0,1,0} + delta = beta*nA*nB*nAB*0.1111111111111111*ny; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; + + // q = 3 + //nread = neighborList[n+3*Np]; + Aq[nr4] = a1; + Bq[nr4] = b1; + // q = 4 + //nread = neighborList[n+2*Np]; + Aq[nr3] = a2; + Bq[nr3] = b2; + + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nz; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; + + // q = 5 + //nread = neighborList[n+5*Np]; + Aq[nr6] = a1; + Bq[nr6] = b1; + // q = 6 + //nread = neighborList[n+4*Np]; + Aq[nr5] = a2; + Bq[nr5] = b2; + //............................................... + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Color(int *Map, double *Aq, double *Bq, double *Den, + double *Phi, double *ColorGrad, double *Vel, double rhoA, double rhoB, double beta, int start, int finish, int Np){ + + double nA,nB; // number density + double a1,b1,a2,b2,nAB,delta; + double C,nx,ny,nz; //color gradient magnitude and direction + double ux,uy,uz; + double phi; + // Instantiate mass transport distributions + // Stationary value - distribution 0 + for (int n=start; n0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; + + Aq[1*Np+n] = a1; + Bq[1*Np+n] = b1; + Aq[2*Np+n] = a2; + Bq[2*Np+n] = b2; + + //............................................... + // q = 2 + // Cq = {0,1,0} + delta = beta*nA*nB*nAB*0.1111111111111111*ny; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; + + Aq[3*Np+n] = a1; + Bq[3*Np+n] = b1; + Aq[4*Np+n] = a2; + Bq[4*Np+n] = b2; + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nz; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; + + Aq[5*Np+n] = a1; + Bq[5*Np+n] = b1; + Aq[6*Np+n] = a2; + Bq[6*Np+n] = b2; + //............................................... + + } +} + + extern "C" void ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double *Aq, double *Bq, double *Den, double *Phi, int start, int finish, int Np){ - int idx,n,nread; + int idx,nread; double fq,nA,nB; for (int n=start; n Date: Fri, 19 Mar 2021 20:54:29 -0400 Subject: [PATCH 195/205] working bubble for FreeLee --- cpu/FreeLee.cpp | 27 +++++++++++++++------------ models/FreeLeeModel.cpp | 4 +--- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp index e32b4ca4..be9b903b 100644 --- a/cpu/FreeLee.cpp +++ b/cpu/FreeLee.cpp @@ -223,6 +223,7 @@ extern "C" void ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(int *neighborList, int *Map double ux,uy,uz; double phi; double M = 2.0/9.0*(tauM-0.5);//diffusivity (or mobility) for the phase field D3Q7 + double factor = 1.0; for (int n=start; nColor_BC_z(dvcMap, Phi, Den, inletA, inletB); ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); } - - printf("write debug strideY=%i strideZ = %i \n",Nxh, Nxh*Nyh); - WriteDebug_TwoFluid(); ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FROM NORMAL ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, @@ -865,6 +862,7 @@ void ScaLBL_FreeLeeModel::Run_TwoFluid(){ if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); if (rank==0) printf("********************************************************\n"); + WriteDebug_TwoFluid(); // ************************************************************************ } From dadc9709474ac25f9cb37e08f570b427d9bd820d Mon Sep 17 00:00:00 2001 From: James McClure Date: Sat, 20 Mar 2021 13:12:49 -0400 Subject: [PATCH 196/205] clean up arguments for Free Lee --- common/ScaLBL.h | 8 +- cpu/FreeLee.cpp | 32 +- cuda/FreeLee.cu | 1406 ++++++++++++++++++++++++----------------------- 3 files changed, 734 insertions(+), 712 deletions(-) diff --git a/common/ScaLBL.h b/common/ScaLBL.h index 4eb6dd7a..3bc752dc 100644 --- a/common/ScaLBL.h +++ b/common/ScaLBL.h @@ -206,12 +206,12 @@ extern "C" void ScaLBL_D3Q7_AAeven_FreeLee_PhaseField( int *Map, double *hq, dou extern "C" void ScaLBL_D3Q7_ComputePhaseField(int *Map, double *hq, double *Den, double *Phi, double rhoA, double rhoB, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, - double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, +extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, double *dist, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np); -extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, - double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np); extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborList, double *dist, double *Vel, double *Pressure, diff --git a/cpu/FreeLee.cpp b/cpu/FreeLee.cpp index be9b903b..16a9b3bd 100644 --- a/cpu/FreeLee.cpp +++ b/cpu/FreeLee.cpp @@ -244,7 +244,7 @@ extern "C" void ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(int *neighborList, int *Map //Normalize the Color Gradient C = sqrt(nx*nx+ny*ny+nz*nz); double ColorMag = C; - if (C < 1.0e-8) ColorMag=1.0; + if (C < 1.0e-12) ColorMag=1.0; nx = nx/ColorMag; ny = ny/ColorMag; nz = nz/ColorMag; @@ -292,15 +292,7 @@ extern "C" void ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(int *neighborList, int *Map hq[nr6] = h5; hq[nr5] = h6; //........................................................................ - - //phi = h0+h1+h2+h3+h4+h5+h6; - - // save the number densities - //Den[n] = rhoA + 0.5*(1.0-phi)*(rhoB-rhoA); - - // save the phase indicator field - //Phi[idx] = phi; - + } } @@ -335,7 +327,7 @@ extern "C" void ScaLBL_D3Q7_AAeven_FreeLee_PhaseField( int *Map, double *hq, dou //Normalize the Color Gradient C = sqrt(nx*nx+ny*ny+nz*nz); double ColorMag = C; - if (C < 1.0e-8) ColorMag=1.0; + if (C < 1.0e-12) ColorMag=1.0; nx = nx/ColorMag; ny = ny/ColorMag; nz = nz/ColorMag; @@ -368,15 +360,7 @@ extern "C" void ScaLBL_D3Q7_AAeven_FreeLee_PhaseField( int *Map, double *hq, dou hq[5*Np+n] = h5; hq[6*Np+n] = h6; //........................................................................ - - //phi = h0+h1+h2+h3+h4+h5+h6; - - // save the number densities - //Den[n] = rhoA + 0.5*(1.0-phi)*(rhoB-rhoA); - - // save the phase indicator field - //Phi[idx] = phi; - + } } @@ -408,8 +392,8 @@ extern "C" void ScaLBL_D3Q7_ComputePhaseField(int *Map, double *hq, double *Den } -extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, - double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, +extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, double *dist, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ int nn,nn2x,ijk; @@ -960,8 +944,8 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, dou } } -extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, - double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ int nn,nn2x,ijk; diff --git a/cuda/FreeLee.cu b/cuda/FreeLee.cu index e37a92a3..3d9afccb 100644 --- a/cuda/FreeLee.cu +++ b/cuda/FreeLee.cu @@ -185,10 +185,16 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, } } -__global__ void dvc_ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double *hq, double *Den, double *Phi, - double rhoA, double rhoB, int start, int finish, int Np){ - int idx,n; - double fq,phi; +__global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel, + double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np){ + + int idx,nr1,nr2,nr3,nr4,nr5,nr6; + double h0,h1,h2,h3,h4,h5,h6; + double nx,ny,nz,C; + double ux,uy,uz; + double phi; + double M = 2.0/9.0*(tauM-0.5);//diffusivity (or mobility) for the phase field D3Q7 + double factor = 1.0; // for (int n=start; n Date: Sat, 20 Mar 2021 13:13:00 -0400 Subject: [PATCH 197/205] clean up arguments for Free Lee --- models/FreeLeeModel.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index 209a36c9..a2c7dcda 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -778,7 +778,7 @@ void ScaLBL_FreeLeeModel::Run_TwoFluid(){ } ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FROM NORMAL - ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, + ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, Nxh, Nxh*Nyh, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE @@ -797,7 +797,7 @@ void ScaLBL_FreeLeeModel::Run_TwoFluid(){ ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar); } - ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, + ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, Nxh, Nxh*Nyh, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_Comm->Barrier(); @@ -822,7 +822,7 @@ void ScaLBL_FreeLeeModel::Run_TwoFluid(){ } ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FORM NORMAL - ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, + ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, Nxh, Nxh*Nyh, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE ScaLBL_Comm->Barrier(); @@ -839,7 +839,7 @@ void ScaLBL_FreeLeeModel::Run_TwoFluid(){ ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar); ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar); } - ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, hq, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, tauM, + ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, Nxh, Nxh*Nyh, 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_Comm->Barrier(); //************************************************************************ From 68b881bdb03d964417a86dbda6db67bed3b62157 Mon Sep 17 00:00:00 2001 From: James McClure Date: Sat, 20 Mar 2021 18:36:09 -0400 Subject: [PATCH 198/205] draft for cuda version of free lee model --- cuda/FreeLee.cu | 78 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 72 insertions(+), 6 deletions(-) diff --git a/cuda/FreeLee.cu b/cuda/FreeLee.cu index 3d9afccb..7352585a 100644 --- a/cuda/FreeLee.cu +++ b/cuda/FreeLee.cu @@ -1,4 +1,5 @@ #include +#include #define STOKES @@ -2001,54 +2002,119 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dis } extern "C" void ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np){ - + + dvc_ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init<<>>( gqbar, mu_phi, ColorGrad, Fx, Fy, Fz, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init: %s \n",cudaGetErrorString(err)); + } } extern "C" void ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(double *gqbar, double Fx, double Fy, double Fz, int Np){ - + + ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init<<>>( gqbar, Fx, Fy, Fz, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init: %s \n",cudaGetErrorString(err)); + } } extern "C" void ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, double *Den, double *hq, double *ColorGrad, double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np){ + ScaLBL_FreeLeeModel_PhaseField_Init<<>>(Map, Phi, Den, hq, ColorGrad, rhoA, rhoB, tauM, W, start, finish, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_FreeLeeModel_PhaseField_Init: %s \n",cudaGetErrorString(err)); + } + + } extern "C" void ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel, double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np) { - /* need to add launcher */ + cudaFuncSetCacheConfig(ScaLBL_D3Q7_AAodd_FreeLee_PhaseField, cudaFuncCachePreferL1); + ScaLBL_D3Q7_AAodd_FreeLee_PhaseField<<>>(neighborList, Map, hq, Den, Phi, ColorGrad, Vel, + rhoA, rhoB, tauM, W, start, finish, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAodd_FreeLee_PhaseField: %s \n",cudaGetErrorString(err)); + } } extern "C" void ScaLBL_D3Q7_AAeven_FreeLee_PhaseField( int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel, double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np){ - /****/ + + cudaFuncSetCacheConfig(ScaLBL_D3Q7_AAeven_FreeLee_PhaseField, cudaFuncCachePreferL1); + ScaLBL_D3Q7_AAeven_FreeLee_PhaseField<<>>( Map, hq, Den, Phi, ColorGrad, Vel, rhoA, rhoB, tauM, W, start, finish, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAeven_FreeLee_PhaseField: %s \n",cudaGetErrorString(err)); + } } extern "C" void ScaLBL_D3Q7_ComputePhaseField(int *Map, double *hq, double *Den, double *Phi, double rhoA, double rhoB, int start, int finish, int Np){ + cudaFuncSetCacheConfig(ScaLBL_D3Q7_ComputePhaseField, cudaFuncCachePreferL1); + ScaLBL_D3Q7_ComputePhaseField<<>>( Map, hq, Den, Phi, rhoA, rhoB, start, finish, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_ComputePhaseField: %s \n",cudaGetErrorString(err)); + } + } extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, double *dist, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ + + cudaFuncSetCacheConfig(ScaLBL_D3Q19_AAodd_FreeLeeModel, cudaFuncCachePreferL1); + ScaLBL_D3Q19_AAodd_FreeLeeModel<<>>(neighborList, Map, dist, Den, Phi, mu_phi, Vel, Pressure, ColorGrad, + rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_FreeLeeModel: %s \n",cudaGetErrorString(err)); + } } extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ + cudaFuncSetCacheConfig(ScaLBL_D3Q19_AAeven_FreeLeeModel, cudaFuncCachePreferL1); + ScaLBL_D3Q19_AAeven_FreeLeeModel<<>>(Map, dist, Den, Phi, mu_phi, Vel, Pressure, ColorGrad, + rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_FreeLeeModel: %s \n",cudaGetErrorString(err)); + } + } extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborList, double *dist, double *Vel, double *Pressure, double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){ - + cudaFuncSetCacheConfig(ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK, cudaFuncCachePreferL1); + ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK<<>>(neighborList, dist, Vel, Pressure, + tau, Fx, Fy, Fz, start, finish, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK: %s \n",cudaGetErrorString(err)); + } } extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure, double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){ - + + cudaFuncSetCacheConfig(ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK, cudaFuncCachePreferL1); + ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK<<>>(dist, Vel, Pressure, + tau, Fx, Fy, Fz, start, finish, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK: %s \n",cudaGetErrorString(err)); + } } From f3b7f6ee7e953f998acceb18160ac967298fdc65 Mon Sep 17 00:00:00 2001 From: James McClure Date: Sat, 20 Mar 2021 20:39:33 -0400 Subject: [PATCH 199/205] fix ubuntu config --- sample_scripts/configure_ubuntu | 1 - 1 file changed, 1 deletion(-) diff --git a/sample_scripts/configure_ubuntu b/sample_scripts/configure_ubuntu index fab18662..290e8078 100755 --- a/sample_scripts/configure_ubuntu +++ b/sample_scripts/configure_ubuntu @@ -6,7 +6,6 @@ cmake -D CMAKE_C_COMPILER:PATH=/opt/arden/openmpi/3.1.2/bin/mpicc \ -D CMAKE_CXX_FLAGS="-O3 -fPIC " \ -D CMAKE_CXX_STANDARD=14 \ -D MPIEXEC=mpirun \ - -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Release \ -D CUDA_FLAGS="-arch sm_35" \ -D CUDA_HOST_COMPILER="/usr/bin/gcc" \ From 9f9b0dbffe45868e6ec23493b89a610836bb3b6d Mon Sep 17 00:00:00 2001 From: James McClure Date: Sat, 20 Mar 2021 22:24:28 -0400 Subject: [PATCH 200/205] adding functions to get data for analysis routines --- models/FreeLeeModel.cpp | 39 +++++++++++++++++++++++++++++++++++++++ models/FreeLeeModel.h | 6 +++++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index a2c7dcda..ef6f0772 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -20,6 +20,45 @@ Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0), ScaLBL_FreeLeeModel::~ScaLBL_FreeLeeModel(){ } + + +void ScaLBL_FreeLeeModel::getPhase(DoubleArray &PhaseValues){ + + DoubleArray PhaseWideHalo(Nxh,Nyh,Nzh); + ScaLBL_CopyToHost(PhaseWideHalo.data(), Phi, sizeof(double)*Nh); + + // use halo width = 1 for analysis data + for (int k=1; kRegularLayout(Map,Pressure,PressureValues); + ScaLBL_Comm->Barrier(); comm.barrier(); + + ScaLBL_Comm->RegularLayout(Map,mu_phi,MuValues); + ScaLBL_Comm->Barrier(); comm.barrier(); + +} + +void ScaLBL_FreeLeeModel::getVelocity(DoubleArray &Vel_x, DoubleArray &Vel_y, DoubleArray &Vel_z){ + + ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Vel_x); + ScaLBL_Comm->Barrier(); comm.barrier(); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Vel_y); + ScaLBL_Comm->Barrier(); comm.barrier(); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Vel_z); + ScaLBL_Comm->Barrier(); comm.barrier(); +} + void ScaLBL_FreeLeeModel::ReadParams(string filename){ // read the input database db = std::make_shared( filename ); diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h index 23afe39b..af1906db 100644 --- a/models/FreeLeeModel.h +++ b/models/FreeLeeModel.h @@ -76,8 +76,12 @@ public: double *Velocity; double *Pressure; + void getPhase(DoubleArray &PhaseValues); + void getPotential(DoubleArray &PressureValues, DoubleArray &MuValues); + void getVelocity(DoubleArray &Vx, DoubleArray &Vy, DoubleArray &Vz); + DoubleArray SignDist; - + private: Utilities::MPI comm; From d913b9bdc8c091537c06cdd083b376cab864e330 Mon Sep 17 00:00:00 2001 From: James McClure Date: Sun, 21 Mar 2021 00:15:23 -0400 Subject: [PATCH 201/205] added visualization capability for Lee model --- analysis/FreeEnergy.cpp | 181 +++++++++++++++++++++++++++++++ analysis/FreeEnergy.h | 54 +++++++++ models/FreeLeeModel.cpp | 28 ++--- models/FreeLeeModel.h | 9 +- tests/lbpm_freelee_simulator.cpp | 30 ++++- tests/testGlobalMassFreeLee.cpp | 2 +- 6 files changed, 278 insertions(+), 26 deletions(-) create mode 100644 analysis/FreeEnergy.cpp create mode 100644 analysis/FreeEnergy.h diff --git a/analysis/FreeEnergy.cpp b/analysis/FreeEnergy.cpp new file mode 100644 index 00000000..6a641a95 --- /dev/null +++ b/analysis/FreeEnergy.cpp @@ -0,0 +1,181 @@ +#include "analysis/FreeEnergy.h" + +FreeEnergyAnalyzer::FreeEnergyAnalyzer(std::shared_ptr dm): + Dm(dm) +{ + + Nx=dm->Nx; Ny=dm->Ny; Nz=dm->Nz; + Volume=(Nx-2)*(Ny-2)*(Nz-2)*Dm->nprocx()*Dm->nprocy()*Dm->nprocz()*1.0; + + ChemicalPotential.resize(Nx,Ny,Nz); ChemicalPotential.fill(0); + Phi.resize(Nx,Ny,Nz); Phi.fill(0); + Pressure.resize(Nx,Ny,Nz); Pressure.fill(0); + Rho.resize(Nx,Ny,Nz); Rho.fill(0); + Vel_x.resize(Nx,Ny,Nz); Vel_x.fill(0); // Gradient of the phase indicator field + Vel_y.resize(Nx,Ny,Nz); Vel_y.fill(0); + Vel_z.resize(Nx,Ny,Nz); Vel_z.fill(0); + SDs.resize(Nx,Ny,Nz); SDs.fill(0); + + if (Dm->rank()==0){ + bool WriteHeader=false; + TIMELOG = fopen("free.csv","r"); + if (TIMELOG != NULL) + fclose(TIMELOG); + else + WriteHeader=true; + + TIMELOG = fopen("free.csv","a+"); + if (WriteHeader) + { + // If timelog is empty, write a short header to list the averages + //fprintf(TIMELOG,"--------------------------------------------------------------------------------------\n"); + fprintf(TIMELOG,"timestep\n"); + } + } + +} + +FreeEnergyAnalyzer::~FreeEnergyAnalyzer(){ + if (Dm->rank()==0){ + fclose(TIMELOG); + } +} + +void FreeEnergyAnalyzer::SetParams(){ + +} + +void FreeEnergyAnalyzer::Basic(ScaLBL_FreeLeeModel &LeeModel, int timestep){ + + int i,j,k; + + if (Dm->rank()==0){ + fprintf(TIMELOG,"%i ",timestep); + /*for (int ion=0; ion input_db, int timestep){ + + auto vis_db = input_db->getDatabase( "Visualization" ); + char VisName[40]; + + std::vector visData; + fillHalo fillData(Dm->Comm,Dm->rank_info,{Dm->Nx-2,Dm->Ny-2,Dm->Nz-2},{1,1,1},0,1); + + IO::initialize("","silo","false"); + // Create the MeshDataStruct + visData.resize(1); + + visData[0].meshName = "domain"; + visData[0].mesh = std::make_shared( Dm->rank_info,Dm->Nx-2,Dm->Ny-2,Dm->Nz-2,Dm->Lx,Dm->Ly,Dm->Lz ); + auto VisPhase = std::make_shared(); + auto VisPressure = std::make_shared(); + auto VisChemicalPotential = std::make_shared(); + auto VxVar = std::make_shared(); + auto VyVar = std::make_shared(); + auto VzVar = std::make_shared(); + + + if (vis_db->getWithDefault( "save_phase_field", true )){ + VisPhase->name = "Phase"; + VisPhase->type = IO::VariableType::VolumeVariable; + VisPhase->dim = 1; + VisPhase->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VisPhase); + } + + if (vis_db->getWithDefault( "save_potential", true )){ + + VisPressure->name = "Pressure"; + VisPressure->type = IO::VariableType::VolumeVariable; + VisPressure->dim = 1; + VisPressure->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VisPressure); + + VisChemicalPotential->name = "ChemicalPotential"; + VisChemicalPotential->type = IO::VariableType::VolumeVariable; + VisChemicalPotential->dim = 1; + VisChemicalPotential->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VisChemicalPotential); + } + + if (vis_db->getWithDefault( "save_velocity", false )){ + VxVar->name = "Velocity_x"; + VxVar->type = IO::VariableType::VolumeVariable; + VxVar->dim = 1; + VxVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VxVar); + VyVar->name = "Velocity_y"; + VyVar->type = IO::VariableType::VolumeVariable; + VyVar->dim = 1; + VyVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VyVar); + VzVar->name = "Velocity_z"; + VzVar->type = IO::VariableType::VolumeVariable; + VzVar->dim = 1; + VzVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VzVar); + } + + if (vis_db->getWithDefault( "save_phase", true )){ + ASSERT(visData[0].vars[0]->name=="Phase"); + LeeModel.getPhase(Phi); + Array& PhaseData = visData[0].vars[0]->data; + fillData.copy(Phi,PhaseData); + } + + if (vis_db->getWithDefault( "save_potential", true )){ + ASSERT(visData[0].vars[1]->name=="Pressure"); + LeeModel.getPotential(Pressure, ChemicalPotential); + Array& PressureData = visData[0].vars[1]->data; + fillData.copy(Pressure,PressureData); + + ASSERT(visData[0].vars[2]->name=="ChemicalPotential"); + Array& ChemicalPotentialData = visData[0].vars[2]->data; + fillData.copy(ChemicalPotential,ChemicalPotentialData); + } + + if (vis_db->getWithDefault( "save_velocity", false )){ + ASSERT(visData[0].vars[3]->name=="Velocity_x"); + ASSERT(visData[0].vars[4]->name=="Velocity_y"); + ASSERT(visData[0].vars[5]->name=="Velocity_z"); + LeeModel.getVelocity(Vel_x,Vel_y,Vel_z); + Array& VelxData = visData[0].vars[3]->data; + Array& VelyData = visData[0].vars[4]->data; + Array& VelzData = visData[0].vars[5]->data; + fillData.copy(Vel_x,VelxData); + fillData.copy(Vel_y,VelyData); + fillData.copy(Vel_z,VelzData); + } + + if (vis_db->getWithDefault( "write_silo", true )) + IO::writeData( timestep, visData, Dm->Comm ); + +/* if (vis_db->getWithDefault( "save_8bit_raw", true )){ + char CurrentIDFilename[40]; + sprintf(CurrentIDFilename,"id_t%d.raw",timestep); + Averages.AggregateLabels(CurrentIDFilename); + } +*/ +} diff --git a/analysis/FreeEnergy.h b/analysis/FreeEnergy.h new file mode 100644 index 00000000..fbb1ba31 --- /dev/null +++ b/analysis/FreeEnergy.h @@ -0,0 +1,54 @@ +/* + * averaging tools for electrochemistry + */ + +#ifndef FreeEnergyAnalyzer_INC +#define FreeEnergyAnalyzer_INC + +#include +#include "common/Domain.h" +#include "common/Utilities.h" +#include "common/MPI.h" +#include "common/Communication.h" +#include "analysis/analysis.h" +#include "analysis/distance.h" +#include "analysis/Minkowski.h" +#include "analysis/SubPhase.h" +#include "IO/MeshDatabase.h" +#include "IO/Reader.h" +#include "IO/Writer.h" +#include "models/FreeLeeModel.h" + +class FreeEnergyAnalyzer{ +public: + std::shared_ptr Dm; + double Volume; + // input variables + double rho_n, rho_w; + double nu_n, nu_w; + double gamma_wn, beta; + double Fx, Fy, Fz; + + //........................................................................... + int Nx,Ny,Nz; + DoubleArray Rho; + DoubleArray Phi; + DoubleArray ChemicalPotential; + DoubleArray Pressure; + DoubleArray Vel_x; + DoubleArray Vel_y; + DoubleArray Vel_z; + DoubleArray SDs; + + FreeEnergyAnalyzer(std::shared_ptr Dm); + ~FreeEnergyAnalyzer(); + + void SetParams(); + void Basic( ScaLBL_FreeLeeModel &LeeModel, int timestep); + void WriteVis( ScaLBL_FreeLeeModel &LeeModel, std::shared_ptr input_db, int timestep); + +private: + FILE *TIMELOG; +}; +#endif + diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index ef6f0772..428db40f 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -31,7 +31,7 @@ void ScaLBL_FreeLeeModel::getPhase(DoubleArray &PhaseValues){ for (int k=1; k( t2 - t1 ).count() / timestep; + double cputime = std::chrono::duration( t2 - t1 ).count() / (EXIT_TIME-START_TIME); // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; - if (rank==0) printf("********************************************************\n"); - if (rank==0) printf("CPU time = %f \n", cputime); - if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); - MLUPS *= nprocs; - if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); - if (rank==0) printf("********************************************************\n"); - - WriteDebug_TwoFluid(); - // ************************************************************************ + return MLUPS; } void ScaLBL_FreeLeeModel::Run_SingleFluid(){ diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h index af1906db..17cc6323 100644 --- a/models/FreeLeeModel.h +++ b/models/FreeLeeModel.h @@ -16,6 +16,9 @@ Implementation of Lee et al JCP 2016 lattice boltzmann model #include "common/ScaLBL.h" #include "common/WideHalo.h" +#ifndef ScaLBL_FreeLeeModel_INC +#define ScaLBL_FreeLeeModel_INC + class ScaLBL_FreeLeeModel{ public: ScaLBL_FreeLeeModel(int RANK, int NP, const Utilities::MPI& COMM); @@ -28,11 +31,13 @@ public: void ReadInput(); void Create_TwoFluid(); void Initialize_TwoFluid(); - void Run_TwoFluid(); + double Run_TwoFluid(int returntime); + void WriteDebug_TwoFluid(); void Create_SingleFluid(); void Initialize_SingleFluid(); void Run_SingleFluid(); + void WriteDebug_SingleFluid(); // test utilities void Create_DummyPhase_MGTest(); @@ -97,4 +102,4 @@ private: void AssignComponentLabels_ChemPotential_ColorGrad(); }; - +#endif diff --git a/tests/lbpm_freelee_simulator.cpp b/tests/lbpm_freelee_simulator.cpp index 3663c4e9..0508c43a 100644 --- a/tests/lbpm_freelee_simulator.cpp +++ b/tests/lbpm_freelee_simulator.cpp @@ -8,6 +8,7 @@ #include "common/Utilities.h" #include "models/FreeLeeModel.h" +#include "analysis/FreeEnergy.h" //******************************************************************* // Implementation of Free-Energy Two-Phase LBM (Lee model) @@ -52,10 +53,33 @@ int main( int argc, char **argv ) LeeModel.SetDomain(); LeeModel.ReadInput(); LeeModel.Create_TwoFluid(); + + FreeEnergyAnalyzer Analysis(LeeModel.Dm); + LeeModel.Initialize_TwoFluid(); - LeeModel.Run_TwoFluid(); - LeeModel.WriteDebug_TwoFluid(); - + + /*** RUN MAIN TIMESTEPS HERE ************/ + double MLUPS=0.0; + int timestep = 0; + int visualization_time = LeeModel.timestepMax; + if (LeeModel.vis_db->keyExists( "visualizataion_interval" )){ + visualization_time = LeeModel.vis_db->getScalar( "visualizataion_interval" ); + timestep += visualization_time; + } + while (LeeModel.timestep < LeeModel.timestepMax){ + MLUPS = LeeModel.Run_TwoFluid(timestep); + if (rank==0) printf("Lattice update rate (per MPI process)= %f MLUPS \n", MLUPS); + Analysis.WriteVis(LeeModel,LeeModel.db, timestep); + timestep += visualization_time; + } + //LeeModel.WriteDebug_TwoFluid(); + if (rank==0) printf("********************************************************\n"); + if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); + MLUPS *= nprocs; + if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); + if (rank==0) printf("********************************************************\n"); + // ************************************************************************ + PROFILE_STOP("Main"); auto file = db->getWithDefault( "TimerFile", "lbpm_freelee_simulator" ); auto level = db->getWithDefault( "TimerLevel", 1 ); diff --git a/tests/testGlobalMassFreeLee.cpp b/tests/testGlobalMassFreeLee.cpp index c9073b9a..2e976854 100644 --- a/tests/testGlobalMassFreeLee.cpp +++ b/tests/testGlobalMassFreeLee.cpp @@ -63,7 +63,7 @@ int main( int argc, char **argv ) DoubleArray DensityInit(Nx,Ny,Nz); LeeModel.ScaLBL_Comm->RegularLayout(LeeModel.Map,LeeModel.Den,DensityInit); - LeeModel.Run_TwoFluid(); + double MLUPS = LeeModel.Run_TwoFluid(LeeModel.timestepMax); DoubleArray DensityFinal(Nx,Ny,Nz); LeeModel.ScaLBL_Comm->RegularLayout(LeeModel.Map,LeeModel.Den,DensityFinal); From 32d750a04cfd7b75bca83ca79f9c25571836a8e1 Mon Sep 17 00:00:00 2001 From: James McClure Date: Mon, 22 Mar 2021 10:45:30 -0400 Subject: [PATCH 202/205] GPU build for lee model --- cuda/FreeLee.cu | 48 +++++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/cuda/FreeLee.cu b/cuda/FreeLee.cu index 7352585a..45bbf65b 100644 --- a/cuda/FreeLee.cu +++ b/cuda/FreeLee.cu @@ -1,4 +1,5 @@ #include +#include #include #define STOKES @@ -189,7 +190,7 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, __global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel, double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np){ - int idx,nr1,nr2,nr3,nr4,nr5,nr6; + int n,idx,nr1,nr2,nr3,nr4,nr5,nr6; double h0,h1,h2,h3,h4,h5,h6; double nx,ny,nz,C; double ux,uy,uz; @@ -380,7 +381,7 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ - int nn,nn2x,ijk; + int n,nn,nn2x,ijk; int nr1,nr2,nr3,nr4,nr5,nr6,nr7,nr8,nr9,nr10,nr11,nr12,nr13,nr14,nr15,nr16,nr17,nr18; double ux,uy,uz;//fluid velocity double p;//pressure @@ -936,8 +937,7 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, dou double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ - int nn,nn2x,ijk; - //int nr1,nr2,nr3,nr4,nr5,nr6,nr7,nr8,nr9,nr10,nr11,nr12,nr13,nr14,nr15,nr16,nr17,nr18; + int n,nn,nn2x,ijk; double ux,uy,uz;//fluid velocity double p;//pressure double chem;//chemical potential @@ -2013,7 +2013,7 @@ extern "C" void ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *m extern "C" void ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(double *gqbar, double Fx, double Fy, double Fz, int Np){ - ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init<<>>( gqbar, Fx, Fy, Fz, Np); + dvc_ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init<<>>( gqbar, Fx, Fy, Fz, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init: %s \n",cudaGetErrorString(err)); @@ -2023,7 +2023,7 @@ extern "C" void ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(double *gqbar, double extern "C" void ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, double *Den, double *hq, double *ColorGrad, double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np){ - ScaLBL_FreeLeeModel_PhaseField_Init<<>>(Map, Phi, Den, hq, ColorGrad, rhoA, rhoB, tauM, W, start, finish, Np); + dvc_ScaLBL_FreeLeeModel_PhaseField_Init<<>>(Map, Phi, Den, hq, ColorGrad, rhoA, rhoB, tauM, W, start, finish, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_FreeLeeModel_PhaseField_Init: %s \n",cudaGetErrorString(err)); @@ -2034,8 +2034,8 @@ extern "C" void ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, doubl extern "C" void ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel, double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np) { - cudaFuncSetCacheConfig(ScaLBL_D3Q7_AAodd_FreeLee_PhaseField, cudaFuncCachePreferL1); - ScaLBL_D3Q7_AAodd_FreeLee_PhaseField<<>>(neighborList, Map, hq, Den, Phi, ColorGrad, Vel, + cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField, cudaFuncCachePreferL1); + dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField<<>>(neighborList, Map, hq, Den, Phi, ColorGrad, Vel, rhoA, rhoB, tauM, W, start, finish, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ @@ -2046,8 +2046,8 @@ extern "C" void ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(int *neighborList, int *Map extern "C" void ScaLBL_D3Q7_AAeven_FreeLee_PhaseField( int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel, double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np){ - cudaFuncSetCacheConfig(ScaLBL_D3Q7_AAeven_FreeLee_PhaseField, cudaFuncCachePreferL1); - ScaLBL_D3Q7_AAeven_FreeLee_PhaseField<<>>( Map, hq, Den, Phi, ColorGrad, Vel, rhoA, rhoB, tauM, W, start, finish, Np); + cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAeven_FreeLee_PhaseField, cudaFuncCachePreferL1); + dvc_ScaLBL_D3Q7_AAeven_FreeLee_PhaseField<<>>( Map, hq, Den, Phi, ColorGrad, Vel, rhoA, rhoB, tauM, W, start, finish, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q7_AAeven_FreeLee_PhaseField: %s \n",cudaGetErrorString(err)); @@ -2057,13 +2057,12 @@ extern "C" void ScaLBL_D3Q7_AAeven_FreeLee_PhaseField( int *Map, double *hq, dou extern "C" void ScaLBL_D3Q7_ComputePhaseField(int *Map, double *hq, double *Den, double *Phi, double rhoA, double rhoB, int start, int finish, int Np){ - cudaFuncSetCacheConfig(ScaLBL_D3Q7_ComputePhaseField, cudaFuncCachePreferL1); - ScaLBL_D3Q7_ComputePhaseField<<>>( Map, hq, Den, Phi, rhoA, rhoB, start, finish, Np); + cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q7_ComputePhaseField, cudaFuncCachePreferL1); + dvc_ScaLBL_D3Q7_ComputePhaseField<<>>( Map, hq, Den, Phi, rhoA, rhoB, start, finish, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q7_ComputePhaseField: %s \n",cudaGetErrorString(err)); } - } @@ -2071,8 +2070,8 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, dou double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ - cudaFuncSetCacheConfig(ScaLBL_D3Q19_AAodd_FreeLeeModel, cudaFuncCachePreferL1); - ScaLBL_D3Q19_AAodd_FreeLeeModel<<>>(neighborList, Map, dist, Den, Phi, mu_phi, Vel, Pressure, ColorGrad, + cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel, cudaFuncCachePreferL1); + dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel<<>>(neighborList, Map, dist, Den, Phi, mu_phi, Vel, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ @@ -2084,8 +2083,8 @@ extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ - cudaFuncSetCacheConfig(ScaLBL_D3Q19_AAeven_FreeLeeModel, cudaFuncCachePreferL1); - ScaLBL_D3Q19_AAeven_FreeLeeModel<<>>(Map, dist, Den, Phi, mu_phi, Vel, Pressure, ColorGrad, + cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel, cudaFuncCachePreferL1); + dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel<<>>(Map, dist, Den, Phi, mu_phi, Vel, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ @@ -2097,9 +2096,9 @@ extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborList, double *dist, double *Vel, double *Pressure, double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){ - cudaFuncSetCacheConfig(ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK, cudaFuncCachePreferL1); - ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK<<>>(neighborList, dist, Vel, Pressure, - tau, Fx, Fy, Fz, start, finish, Np); + cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK, cudaFuncCachePreferL1); + dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK<<>>(neighborList, dist, Vel, Pressure, + tau, rho0, Fx, Fy, Fz, start, finish, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK: %s \n",cudaGetErrorString(err)); @@ -2109,12 +2108,15 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborLis extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure, double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){ - cudaFuncSetCacheConfig(ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK, cudaFuncCachePreferL1); - ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK<<>>(dist, Vel, Pressure, - tau, Fx, Fy, Fz, start, finish, Np); + cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK, cudaFuncCachePreferL1); + dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK<<>>(dist, Vel, Pressure, + tau, rho0, Fx, Fy, Fz, start, finish, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK: %s \n",cudaGetErrorString(err)); } } + +extern "C" void ScaLBL_D3Q9_MGTest(int *Map, double *Phi,double *ColorGrad,int strideY, int strideZ, int start, int finish, int Np){ +} \ No newline at end of file From f44167d2ef8ef7d449389c25d7741509feb1f50f Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 24 Mar 2021 07:57:27 -0400 Subject: [PATCH 203/205] add hip version of Lee model --- hip/FreeLee.cu | 1483 ++++++++++++++++++++++++++---------------------- 1 file changed, 794 insertions(+), 689 deletions(-) diff --git a/hip/FreeLee.cu b/hip/FreeLee.cu index 558bd2f1..09bc8689 100644 --- a/hip/FreeLee.cu +++ b/hip/FreeLee.cu @@ -1,11 +1,12 @@ #include +#include #include "hip/hip_runtime.h" +#define STOKES + #define NBLOCKS 1024 #define NTHREADS 256 -#define STOKES - __global__ void dvc_ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np) { int n; @@ -186,10 +187,16 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, } } -__global__ void dvc_ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double *hq, double *Den, double *Phi, - double rhoA, double rhoB, int start, int finish, int Np){ - int idx,n; - double fq,phi; +__global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel, + double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np){ + + int n,idx,nr1,nr2,nr3,nr4,nr5,nr6; + double h0,h1,h2,h3,h4,h5,h6; + double nx,ny,nz,C; + double ux,uy,uz; + double phi; + double M = 2.0/9.0*(tauM-0.5);//diffusivity (or mobility) for the phase field D3Q7 + double factor = 1.0; // for (int n=start; n>>( gqbar, mu_phi, ColorGrad, Fx, Fy, Fz, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init: %s \n",hipGetErrorString(err)); + } } extern "C" void ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(double *gqbar, double Fx, double Fy, double Fz, int Np){ - + + dvc_ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init<<>>( gqbar, Fx, Fy, Fz, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init: %s \n",hipGetErrorString(err)); + } } extern "C" void ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, double *Den, double *hq, double *ColorGrad, double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np){ -} -extern "C" void ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, - double rhoA, double rhoB, int start, int finish, int Np){ + dvc_ScaLBL_FreeLeeModel_PhaseField_Init<<>>(Map, Phi, Den, hq, ColorGrad, rhoA, rhoB, tauM, W, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_FreeLeeModel_PhaseField_Init: %s \n",hipGetErrorString(err)); + } + } - -extern "C" void ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double *hq, double *Den, double *Phi, - double rhoA, double rhoB, int start, int finish, int Np){ - +extern "C" void ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel, + double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np) +{ + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField, hipFuncCachePreferL1); + dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField<<>>(neighborList, Map, hq, Den, Phi, ColorGrad, Vel, + rhoA, rhoB, tauM, W, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAodd_FreeLee_PhaseField: %s \n",hipGetErrorString(err)); + } } -extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, - double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, +extern "C" void ScaLBL_D3Q7_AAeven_FreeLee_PhaseField( int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel, + double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np){ + + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAeven_FreeLee_PhaseField, hipFuncCachePreferL1); + dvc_ScaLBL_D3Q7_AAeven_FreeLee_PhaseField<<>>( Map, hq, Den, Phi, ColorGrad, Vel, rhoA, rhoB, tauM, W, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAeven_FreeLee_PhaseField: %s \n",hipGetErrorString(err)); + } +} + + +extern "C" void ScaLBL_D3Q7_ComputePhaseField(int *Map, double *hq, double *Den, double *Phi, double rhoA, double rhoB, int start, int finish, int Np){ + + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q7_ComputePhaseField, hipFuncCachePreferL1); + dvc_ScaLBL_D3Q7_ComputePhaseField<<>>( Map, hq, Den, Phi, rhoA, rhoB, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_ComputePhaseField: %s \n",hipGetErrorString(err)); + } +} + + +extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, double *dist, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ -} - + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel, hipFuncCachePreferL1); + dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel<<>>(neighborList, Map, dist, Den, Phi, mu_phi, Vel, Pressure, ColorGrad, + rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_FreeLeeModel: %s \n",hipGetErrorString(err)); + } +} -extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, - double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz, +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel, hipFuncCachePreferL1); + dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel<<>>(Map, dist, Den, Phi, mu_phi, Vel, Pressure, ColorGrad, + rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_FreeLeeModel: %s \n",hipGetErrorString(err)); + } + } extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborList, double *dist, double *Vel, double *Pressure, double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){ - + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK, hipFuncCachePreferL1); + dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK<<>>(neighborList, dist, Vel, Pressure, + tau, rho0, Fx, Fy, Fz, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK: %s \n",hipGetErrorString(err)); + } } extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure, double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){ - + + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK, hipFuncCachePreferL1); + dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK<<>>(dist, Vel, Pressure, + tau, rho0, Fx, Fy, Fz, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK: %s \n",hipGetErrorString(err)); + } } + +extern "C" void ScaLBL_D3Q9_MGTest(int *Map, double *Phi,double *ColorGrad,int strideY, int strideZ, int start, int finish, int Np){ +} \ No newline at end of file From aba4ca8455576b277776fd8b708af30757a08872 Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 24 Mar 2021 21:18:50 -0400 Subject: [PATCH 204/205] add extra check on wall displacement --- analysis/morphology.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/analysis/morphology.cpp b/analysis/morphology.cpp index 37f58d0c..f21767dd 100644 --- a/analysis/morphology.cpp +++ b/analysis/morphology.cpp @@ -702,12 +702,14 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, if (rank == 0) printf(" delta=%f, growth=%f, max. displacement = %f \n",morph_delta, GrowthEstimate, MAX_DISPLACEMENT); // Now adjust morph_delta - double step_size = (TargetGrowth - GrowthEstimate)*(morph_delta - morph_delta_previous) / (GrowthEstimate - GrowthPrevious); - GrowthPrevious = GrowthEstimate; - morph_delta_previous = morph_delta; - morph_delta += step_size; + if (fabs(GrowthEstimate - GrowthPrevious) > 0.0) { + double step_size = (TargetGrowth - GrowthEstimate)*(morph_delta - morph_delta_previous) / (GrowthEstimate - GrowthPrevious); + GrowthPrevious = GrowthEstimate; + morph_delta_previous = morph_delta; + morph_delta += step_size; + } if (morph_delta / morph_delta_previous > 2.0 ) morph_delta = morph_delta_previous*2.0; - + //MAX_DISPLACEMENT *= max(TargetGrowth/GrowthEstimate,1.25); if (morph_delta > 0.0 ){ From 1ddf5e709e3fd7de732d5323c62e0769ecda1aea Mon Sep 17 00:00:00 2001 From: James McClure Date: Wed, 24 Mar 2021 21:19:53 -0400 Subject: [PATCH 205/205] refactor analysis to take color model as argument --- analysis/runAnalysis.cpp | 133 +++++++++++++++++++++++++++++++ analysis/runAnalysis.h | 3 + models/ColorModel.cpp | 126 ++++++++++++++++++++++++++++- models/ColorModel.h | 6 ++ tests/lbpm_color_simulator.cpp | 36 +++++++-- tests/lbpm_freelee_simulator.cpp | 4 +- 6 files changed, 297 insertions(+), 11 deletions(-) diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index f43a26ff..ab40ae4c 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -706,6 +706,139 @@ runAnalysis::runAnalysis( std::shared_ptr input_db, const RankInfoStru } + // Initialize the comms + for ( int i = 0; i < 1024; i++ ) + d_comm_used[i] = false; + // Initialize the threads + int N_threads = db->getWithDefault( "N_threads", 4 ); + auto method = db->getWithDefault( "load_balance", "default" ); + createThreads( method, N_threads ); +} + +runAnalysis::runAnalysis( ScaLBL_ColorModel &ColorModel) +/* std::shared_ptr input_db, const RankInfoStruct &rank_info, + std::shared_ptr ScaLBL_Comm, std::shared_ptr Dm, int Np, + bool Regular, IntArray Map ) + : d_Np( Np ), + d_regular( Regular ), + d_rank_info( rank_info ), + d_Map( Map ), + d_comm( Dm->Comm.dup() ), + d_ScaLBL_Comm( ScaLBL_Comm )*/ +{ + + d_comm = ColorModel.Dm->Comm.dup(); + d_Np = ColorModel.Np; + bool Regular = false; + + auto input_db = ColorModel.db; + auto db = input_db->getDatabase( "Analysis" ); + auto vis_db = input_db->getDatabase( "Visualization" ); + + // Ids of work items to use for dependencies + ThreadPool::thread_id_t d_wait_blobID; + ThreadPool::thread_id_t d_wait_analysis; + ThreadPool::thread_id_t d_wait_vis; + ThreadPool::thread_id_t d_wait_restart; + ThreadPool::thread_id_t d_wait_subphase; + + char rankString[20]; + sprintf( rankString, "%05d", ColorModel.Dm->rank() ); + d_n[0] = ColorModel.Dm->Nx - 2; + d_n[1] = ColorModel.Dm->Ny - 2; + d_n[2] = ColorModel.Dm->Nz - 2; + d_N[0] = ColorModel.Dm->Nx; + d_N[1] = ColorModel.Dm->Ny; + d_N[2] = ColorModel.Dm->Nz; + + d_restart_interval = db->getScalar( "restart_interval" ); + d_analysis_interval = db->getScalar( "analysis_interval" ); + d_subphase_analysis_interval = INT_MAX; + d_visualization_interval = INT_MAX; + d_blobid_interval = INT_MAX; + if ( db->keyExists( "blobid_interval" ) ) { + d_blobid_interval = db->getScalar( "blobid_interval" ); + } + if ( db->keyExists( "visualization_interval" ) ) { + d_visualization_interval = db->getScalar( "visualization_interval" ); + } + if ( db->keyExists( "subphase_analysis_interval" ) ) { + d_subphase_analysis_interval = db->getScalar( "subphase_analysis_interval" ); + } + + auto restart_file = db->getScalar( "restart_file" ); + d_restartFile = restart_file + "." + rankString; + + + d_rank = d_comm.getRank(); + writeIDMap( ID_map_struct(), 0, id_map_filename ); + // Initialize IO for silo + IO::initialize( "", "silo", "false" ); + // Create the MeshDataStruct + d_meshData.resize( 1 ); + + d_meshData[0].meshName = "domain"; + d_meshData[0].mesh = std::make_shared( + d_rank_info, d_n[0], d_n[1], d_n[2], ColorModel.Dm->Lx, ColorModel.Dm->Ly, ColorModel.Dm->Lz ); + auto PhaseVar = std::make_shared(); + auto PressVar = std::make_shared(); + auto VxVar = std::make_shared(); + auto VyVar = std::make_shared(); + auto VzVar = std::make_shared(); + auto SignDistVar = std::make_shared(); + auto BlobIDVar = std::make_shared(); + + if ( vis_db->getWithDefault( "save_phase_field", true ) ) { + PhaseVar->name = "phase"; + PhaseVar->type = IO::VariableType::VolumeVariable; + PhaseVar->dim = 1; + PhaseVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( PhaseVar ); + } + + if ( vis_db->getWithDefault( "save_pressure", false ) ) { + PressVar->name = "Pressure"; + PressVar->type = IO::VariableType::VolumeVariable; + PressVar->dim = 1; + PressVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( PressVar ); + } + + if ( vis_db->getWithDefault( "save_velocity", false ) ) { + VxVar->name = "Velocity_x"; + VxVar->type = IO::VariableType::VolumeVariable; + VxVar->dim = 1; + VxVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( VxVar ); + VyVar->name = "Velocity_y"; + VyVar->type = IO::VariableType::VolumeVariable; + VyVar->dim = 1; + VyVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( VyVar ); + VzVar->name = "Velocity_z"; + VzVar->type = IO::VariableType::VolumeVariable; + VzVar->dim = 1; + VzVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( VzVar ); + } + + if ( vis_db->getWithDefault( "save_distance", false ) ) { + SignDistVar->name = "SignDist"; + SignDistVar->type = IO::VariableType::VolumeVariable; + SignDistVar->dim = 1; + SignDistVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( SignDistVar ); + } + + if ( vis_db->getWithDefault( "save_connected_components", false ) ) { + BlobIDVar->name = "BlobID"; + BlobIDVar->type = IO::VariableType::VolumeVariable; + BlobIDVar->dim = 1; + BlobIDVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( BlobIDVar ); + } + + // Initialize the comms for ( int i = 0; i < 1024; i++ ) d_comm_used[i] = false; diff --git a/analysis/runAnalysis.h b/analysis/runAnalysis.h index a82c4ba0..c7c4ce71 100644 --- a/analysis/runAnalysis.h +++ b/analysis/runAnalysis.h @@ -7,6 +7,7 @@ #include "common/Communication.h" #include "common/ScaLBL.h" #include "threadpool/thread_pool.h" +#include "models/ColorModel.h" #include @@ -31,6 +32,8 @@ public: runAnalysis( std::shared_ptr db, const RankInfoStruct &rank_info, std::shared_ptr ScaLBL_Comm, std::shared_ptr dm, int Np, bool Regular, IntArray Map ); + + runAnalysis( ScaLBL_ColorModel &ColorModel); //! Destructor ~runAnalysis(); diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 1fe00824..204fd1d6 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -531,6 +531,121 @@ void ScaLBL_ColorModel::Initialize(){ ScaLBL_CopyToHost(Averages->Phi.data(),Phi,N*sizeof(double)); } +double ScaLBL_ColorModel::Run(int returntime){ + int nprocs=nprocx*nprocy*nprocz; + + //************ MAIN ITERATION LOOP ***************************************/ + comm.barrier(); + PROFILE_START("Loop"); + //std::shared_ptr analysis_db; + bool Regular = false; + auto current_db = db->cloneDatabase(); + auto t1 = std::chrono::system_clock::now(); + int START_TIMESTEP = timestep; + int EXIT_TIMESTEP = min(timestepMax,returntime); + while (timestep < EXIT_TIMESTEP ) { + //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } + PROFILE_START("Update"); + // *************ODD TIMESTEP************* + timestep++; + // Compute the Phase indicator field + // Read for Aq, Bq happens in this routine (requires communication) + ScaLBL_Comm->BiSendD3Q7AA(Aq,Bq); //READ FROM NORMAL + ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, Aq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->BiRecvD3Q7AA(Aq,Bq); //WRITE INTO OPPOSITE + ScaLBL_Comm->Barrier(); + ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, Aq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); + + // Perform the collision operation + ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL + if (BoundaryCondition > 0 && BoundaryCondition < 5){ + ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); + ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); + } + // Halo exchange for phase field + ScaLBL_Comm_Regular->SendHalo(Phi); + + ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm_Regular->RecvHalo(Phi); + ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + ScaLBL_Comm->Barrier(); + // Set BCs + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + } + ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_Comm->Barrier(); + + // *************EVEN TIMESTEP************* + timestep++; + // Compute the Phase indicator field + ScaLBL_Comm->BiSendD3Q7AA(Aq,Bq); //READ FROM NORMAL + ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, Aq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->BiRecvD3Q7AA(Aq,Bq); //WRITE INTO OPPOSITE + ScaLBL_Comm->Barrier(); + ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, Aq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); + + // Perform the collision operation + ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL + // Halo exchange for phase field + if (BoundaryCondition > 0 && BoundaryCondition < 5){ + ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); + ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); + } + ScaLBL_Comm_Regular->SendHalo(Phi); + ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm_Regular->RecvHalo(Phi); + ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + ScaLBL_Comm->Barrier(); + // Set boundary conditions + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + } + ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_Comm->Barrier(); + //************************************************************************ + } + PROFILE_STOP("Update"); + + PROFILE_STOP("Loop"); + PROFILE_SAVE("lbpm_color_simulator",1); + //************************************************************************ + // Compute the walltime per timestep + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / (timestep - START_TIMESTEP); + // Performance obtained from each node + double MLUPS = double(Np)/cputime/1000000; + + if (rank==0) printf("********************************************************\n"); + if (rank==0) printf("CPU time = %f \n", cputime); + if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); + return(MLUPS); + MLUPS *= nprocs; + +} + void ScaLBL_ColorModel::Run(){ int nprocs=nprocx*nprocy*nprocz; const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); @@ -580,7 +695,6 @@ void ScaLBL_ColorModel::Run(){ if (color_db->keyExists( "krA_morph_factor" )){ KRA_MORPH_FACTOR = color_db->getScalar( "krA_morph_factor" ); } - /* defaults for simulation protocols */ auto protocol = color_db->getWithDefault( "protocol", "none" ); if (protocol == "image sequence"){ @@ -625,7 +739,7 @@ void ScaLBL_ColorModel::Run(){ if (analysis_db->keyExists( "seed_water" )){ seed_water = analysis_db->getScalar( "seed_water" ); if (rank == 0) printf("Seed water in oil %f (seed_water) \n",seed_water); - USE_SEED = true; + ASSERT(protocol == "seed water"); } if (analysis_db->keyExists( "morph_delta" )){ morph_delta = analysis_db->getScalar( "morph_delta" ); @@ -656,7 +770,6 @@ void ScaLBL_ColorModel::Run(){ MAX_MORPH_TIMESTEPS = analysis_db->getScalar( "max_morph_timesteps" ); } - if (rank==0){ printf("********************************************************\n"); if (protocol == "image sequence"){ @@ -1320,7 +1433,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta double vF = 0.f; double vS = 0.f; double delta_volume; - double WallFactor = 0.0; + double WallFactor = 1.0; bool USE_CONNECTED_NWP = false; DoubleArray phase(Nx,Ny,Nz); @@ -1343,6 +1456,11 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta } } double volume_initial = Dm->Comm.sumReduce( count); + double PoreVolume = Dm->Volume*Dm->Porosity(); + /*ensure target isn't an absurdly small fraction of pore volume */ + if (volume_initial < target_delta_volume*PoreVolume){ + volume_initial = target_delta_volume*PoreVolume; + } /* sprintf(LocalRankFilename,"phi_initial.%05i.raw",rank); FILE *INPUT = fopen(LocalRankFilename,"wb"); diff --git a/models/ColorModel.h b/models/ColorModel.h index b2a9c1d1..7d3c858a 100644 --- a/models/ColorModel.h +++ b/models/ColorModel.h @@ -16,6 +16,10 @@ Implementation of color lattice boltzmann model #include "ProfilerApp.h" #include "threadpool/thread_pool.h" + +#ifndef ScaLBL_ColorModel_INC +#define ScaLBL_ColorModel_INC + class ScaLBL_ColorModel{ public: ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM); @@ -29,6 +33,7 @@ public: void Create(); void Initialize(); void Run(); + double Run(int returntime); void WriteDebug(); void getPhaseField(DoubleArray &f); @@ -99,4 +104,5 @@ private: int timestep; int timestep_previous; }; +#endif diff --git a/tests/lbpm_color_simulator.cpp b/tests/lbpm_color_simulator.cpp index 590d5b8e..d62bef0f 100644 --- a/tests/lbpm_color_simulator.cpp +++ b/tests/lbpm_color_simulator.cpp @@ -27,19 +27,24 @@ int main( int argc, char **argv ) // Initialize Utilities::startup( argc, argv ); - // Load the input database - auto db = std::make_shared( argv[1] ); - { // Limit scope so variables that contain communicators will free before MPI_Finialize Utilities::MPI comm( MPI_COMM_WORLD ); int rank = comm.getRank(); int nprocs = comm.getSize(); + std::string SimulationMode = "production"; + // Load the input database + auto db = std::make_shared( argv[1] ); + if (argc > 2) { + SimulationMode = "development"; + } if ( rank == 0 ) { printf( "********************************************************\n" ); printf( "Running Color LBM \n" ); printf( "********************************************************\n" ); + if (SimulationMode == "development") + printf("**** DEVELOPMENT MODE ENABLED *************\n"); } // Initialize compute device int device = ScaLBL_SetDevice( rank ); @@ -62,8 +67,29 @@ int main( int argc, char **argv ) ColorModel.Create(); // creating the model will create data structure to match the pore // structure and allocate variables ColorModel.Initialize(); // initializing the model will set initial conditions for variables - ColorModel.Run(); - // ColorModel.WriteDebug(); + + if (SimulationMode == "development"){ + double MLUPS=0.0; + int timestep = 0; + int analysis_interval = ColorModel.timestepMax; + if (ColorModel.analysis_db->keyExists( "" )){ + analysis_interval = ColorModel.analysis_db->getScalar( "analysis_interval" ); + } + FlowAdaptor Adapt(ColorModel); + runAnalysis analysis(ColorModel); + while (ColorModel.timestep < ColorModel.timestepMax){ + timestep += analysis_interval; + MLUPS = ColorModel.Run(timestep); + if (rank==0) printf("Lattice update rate (per MPI process)= %f MLUPS \n", MLUPS); + + Adapt.MoveInterface(ColorModel); + } + } //Analysis.WriteVis(LeeModel,LeeModel.db, timestep); + + else + ColorModel.Run(); + + ColorModel.WriteDebug(); PROFILE_STOP( "Main" ); auto file = db->getWithDefault( "TimerFile", "lbpm_color_simulator" ); diff --git a/tests/lbpm_freelee_simulator.cpp b/tests/lbpm_freelee_simulator.cpp index 0508c43a..0f003baa 100644 --- a/tests/lbpm_freelee_simulator.cpp +++ b/tests/lbpm_freelee_simulator.cpp @@ -62,8 +62,8 @@ int main( int argc, char **argv ) double MLUPS=0.0; int timestep = 0; int visualization_time = LeeModel.timestepMax; - if (LeeModel.vis_db->keyExists( "visualizataion_interval" )){ - visualization_time = LeeModel.vis_db->getScalar( "visualizataion_interval" ); + if (LeeModel.vis_db->keyExists( "visualization_interval" )){ + visualization_time = LeeModel.vis_db->getScalar( "visualization_interval" ); timestep += visualization_time; } while (LeeModel.timestep < LeeModel.timestepMax){